Statistics
| Branch: | Tag: | Revision:

root / libsylph / codeconv.c @ aebfd4cc

History | View | Annotate | Download (59.7 kB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 */
5
6
#ifdef HAVE_CONFIG_H
7
#  include "config.h"
8
#endif
9
10
#include "defs.h"
11
12
#include <glib.h>
13
#include <string.h>
14
#include <ctype.h>
15
#include <stdlib.h>
16
#include <errno.h>
17
18
#if HAVE_LOCALE_H
19
#  include <locale.h>
20
#endif
21
22
#include <iconv.h>
23
24
#include "codeconv.h"
25
#include "unmime.h"
26
#include "base64.h"
27
#include "quoted-printable.h"
28
#include "utils.h"
29
30
typedef enum
31
{
32
        JIS_ASCII,
33
        JIS_KANJI,
34
        JIS_HWKANA,
35
        JIS_AUXKANJI,
36
        JIS_UDC
37
} JISState;
38
39
#define SUBST_CHAR        '_'
40
#define ESC                '\033'
41
#define SO                0x0e
42
#define SI                0x0f
43
#define SS2                0x8e
44
#define SS3                0x8f
45
46
#define iseuckanji(c) \
47
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
48
#define iseuchwkana1(c) \
49
        (((c) & 0xff) == SS2)
50
#define iseuchwkana2(c) \
51
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
52
#define iseucaux(c) \
53
        (((c) & 0xff) == SS3)
54
55
#define issjiskanji1(c) \
56
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
57
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xef))
58
#define issjiskanji2(c) \
59
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
60
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
61
#define issjishwkana(c) \
62
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
63
#define issjisext(c) \
64
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xfc)
65
#define issjisudc(c) \
66
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xf9)
67
#define issjisibmext(c1, c2) \
68
        ((((c1) & 0xff) >= 0xfa && ((c1) & 0xff) <= 0xfb && \
69
          issjiskanji2(c2)) ||                              \
70
         (((c1) & 0xff) == 0xfc &&                          \
71
          ((c2) & 0xff) >= 0x40 && ((c2) & 0xff) <= 0x4b))
72
73
#define isjiskanji(c) \
74
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x7e)
75
#define isjishwkana(c) \
76
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x5f)
77
#define isjisudc(c) \
78
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x34)
79
#define isjisudclow(c) \
80
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x2a)
81
#define isjisudchigh(c) \
82
        (((c) & 0xff) >= 0x2b && ((c) & 0xff) <= 0x34)
83
84
/* U+0080 - U+07FF */
85
#define isutf8_2_1(c) \
86
        (((c) & 0xe0) == 0xc0)
87
#define isutf8_2_2(c) \
88
        (((c) & 0xc0) == 0x80)
89
/* U+0800 - U+FFFF */
90
#define isutf8_3_1(c) \
91
        (((c) & 0xf0) == 0xe0)
92
#define isutf8_3_2(c) \
93
        (((c) & 0xc0) == 0x80)
94
95
#define isutf8bom(s) \
96
        (((*(s)) & 0xff) == 0xef && ((*(s + 1)) & 0xff) == 0xbb && \
97
         ((*(s + 2)) & 0xff) == 0xbf)
98
99
#define K_IN()                                \
100
        if (state != JIS_KANJI) {        \
101
                *out++ = ESC;                \
102
                *out++ = '$';                \
103
                *out++ = 'B';                \
104
                state = JIS_KANJI;        \
105
        }
106
107
#define K_OUT()                                \
108
        if (state != JIS_ASCII) {        \
109
                *out++ = ESC;                \
110
                *out++ = '(';                \
111
                *out++ = 'B';                \
112
                state = JIS_ASCII;        \
113
        }
114
115
#define HW_IN()                                \
116
        if (state != JIS_HWKANA) {        \
117
                *out++ = ESC;                \
118
                *out++ = '(';                \
119
                *out++ = 'I';                \
120
                state = JIS_HWKANA;        \
121
        }
122
123
#define AUX_IN()                        \
124
        if (state != JIS_AUXKANJI) {        \
125
                *out++ = ESC;                \
126
                *out++ = '$';                \
127
                *out++ = '(';                \
128
                *out++ = 'D';                \
129
                state = JIS_AUXKANJI;        \
130
        }
131
132
#define UDC_IN()                        \
133
        if (state != JIS_UDC) {                \
134
                *out++ = ESC;                \
135
                *out++ = '$';                \
136
                *out++ = '(';                \
137
                *out++ = '?';                \
138
                state = JIS_UDC;        \
139
        }
140
141
static ConvADType conv_ad_type = C_AD_BY_LOCALE;
142
static gboolean allow_jisx0201_kana = FALSE;
143
144
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error);
145
static gchar *conv_jistosjis(const gchar *inbuf, gint *error);
146
static gchar *conv_euctojis(const gchar *inbuf, gint *error);
147
static gchar *conv_sjistojis(const gchar *inbuf, gint *error);
148
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error);
149
150
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error);
151
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error);
152
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error);
153
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error);
154
155
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error);
156
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error);
157
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error);
158
159
/* static void conv_unreadable_eucjp(gchar *str); */
160
static void conv_unreadable_8bit(gchar *str);
161
/* static void conv_unreadable_latin(gchar *str); */
162
163
static gchar *conv_jistodisp(const gchar *inbuf, gint *error);
164
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error);
165
static gchar *conv_euctodisp(const gchar *inbuf, gint *error);
166
167
static gchar *conv_anytodisp(const gchar *inbuf, gint *error);
168
static gchar *conv_ustodisp(const gchar *inbuf, gint *error);
169
static gchar *conv_noconv(const gchar *inbuf, gint *error);
170
171
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error)
172
{
173
        gchar *outbuf;
174
        const guchar *in = (guchar *)inbuf;
175
        guchar *out;
176
        JISState state = JIS_ASCII;
177
        gint error_ = 0;
178
179
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
180
        out = (guchar *)outbuf;
181
182
        while (*in != '\0') {
183
                if (*in == ESC) {
184
                        in++;
185
                        if (*in == '$') {
186
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
187
                                        state = JIS_KANJI;
188
                                        in += 2;
189
                                } else if (*(in + 1) == '(' &&
190
                                           *(in + 2) == 'D') {
191
                                        state = JIS_AUXKANJI;
192
                                        in += 3;
193
                                } else {
194
                                        /* unknown escape sequence */
195
                                        error_ = -1;
196
                                        state = JIS_ASCII;
197
                                }
198
                        } else if (*in == '(') {
199
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
200
                                        state = JIS_ASCII;
201
                                        in += 2;
202
                                } else if (*(in + 1) == 'I') {
203
                                        state = JIS_HWKANA;
204
                                        in += 2;
205
                                } else {
206
                                        /* unknown escape sequence */
207
                                        error_ = -1;
208
                                        state = JIS_ASCII;
209
                                }
210
                        } else {
211
                                /* unknown escape sequence */
212
                                error_ = -1;
213
                                state = JIS_ASCII;
214
                        }
215
                } else if (*in == 0x0e) {
216
                        state = JIS_HWKANA;
217
                        in++;
218
                } else if (*in == 0x0f) {
219
                        state = JIS_ASCII;
220
                        in++;
221
                } else {
222
                        switch (state) {
223
                        case JIS_ASCII:
224
                                *out++ = *in++;
225
                                break;
226
                        case JIS_KANJI:
227
                                *out++ = *in++ | 0x80;
228
                                if (*in == '\0') break;
229
                                *out++ = *in++ | 0x80;
230
                                break;
231
                        case JIS_HWKANA:
232
                                *out++ = 0x8e;
233
                                *out++ = *in++ | 0x80;
234
                                break;
235
                        case JIS_AUXKANJI:
236
                                *out++ = 0x8f;
237
                                *out++ = *in++ | 0x80;
238
                                if (*in == '\0') break;
239
                                *out++ = *in++ | 0x80;
240
                                break;
241
                        default:
242
                                *out++ = *in++;
243
                                break;
244
                        }
245
                }
246
        }
247
248
        *out = '\0';
249
250
        if (error)
251
                *error = error_;
252
253
        return outbuf;
254
}
255
256
static gchar *conv_jistosjis(const gchar *inbuf, gint *error)
257
{
258
        gchar *outbuf;
259
        const guchar *in = (guchar *)inbuf;
260
        guchar *out;
261
        JISState state = JIS_ASCII;
262
        gint error_ = 0;
263
264
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
265
        out = (guchar *)outbuf;
266
267
        while (*in != '\0') {
268
                if (*in == ESC) {
269
                        in++;
270
                        if (*in == '$') {
271
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
272
                                        state = JIS_KANJI;
273
                                        in += 2;
274
                                } else if (*(in + 1) == '(' &&
275
                                           *(in + 2) == '?') {
276
                                        /* ISO-2022-JP-MS extention */
277
                                        state = JIS_UDC;
278
                                        in += 3;
279
                                } else {
280
                                        /* unknown escape sequence */
281
                                        error_ = -1;
282
                                        state = JIS_ASCII;
283
                                }
284
                        } else if (*in == '(') {
285
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
286
                                        state = JIS_ASCII;
287
                                        in += 2;
288
                                } else if (*(in + 1) == 'I') {
289
                                        state = JIS_HWKANA;
290
                                        in += 2;
291
                                } else {
292
                                        /* unknown escape sequence */
293
                                        error_ = -1;
294
                                        state = JIS_ASCII;
295
                                }
296
                        } else {
297
                                /* unknown escape sequence */
298
                                error_ = -1;
299
                                state = JIS_ASCII;
300
                        }
301
                } else if (*in == SO) {
302
                        state = JIS_HWKANA;
303
                        in++;
304
                } else if (*in == SI) {
305
                        state = JIS_ASCII;
306
                        in++;
307
                } else {
308
                        switch (state) {
309
                        case JIS_ASCII:
310
                                *out++ = *in++;
311
                                break;
312
                        case JIS_HWKANA:
313
                                *out++ = *in++ | 0x80;
314
                                break;
315
                        case JIS_KANJI:
316
                                if ((isjiskanji(*in) ||
317
                                     (*in >= 0x7f && *in <= 0x97)) &&
318
                                    isjiskanji(*(in + 1))) {
319
                                        *out++ = ((*in < 0x5f)
320
                                                 ? (((*in - 0x21) / 2) + 0x81)
321
                                                 : (((*in - 0x21) / 2) + 0xc1));
322
                                        *out++ = ((*in % 2)
323
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
324
                                                   ? 0x1f : 0x20)))
325
                                                 : *(in + 1) + 0x7e);
326
                                        in += 2;
327
                                } else {
328
                                        error_ = -1;
329
                                        *out++ = SUBST_CHAR;
330
                                        in++;
331
                                        if (*in != '\0') {
332
                                                *out++ = SUBST_CHAR;
333
                                                in++;
334
                                        }
335
                                }
336
                                break;
337
                        case JIS_UDC:
338
                                if (isjisudc(*in) && isjiskanji(*(in + 1))) {
339
                                        *out++ = (((*in - 0x21) / 2) + 0xf0);
340
                                        *out++ = ((*in % 2)
341
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
342
                                                   ? 0x1f : 0x20)))
343
                                                 : *(in + 1) + 0x7e);
344
                                        in += 2;
345
                                } else {
346
                                        error_ = -1;
347
                                        *out++ = SUBST_CHAR;
348
                                        in++;
349
                                        if (*in != '\0') {
350
                                                *out++ = SUBST_CHAR;
351
                                                in++;
352
                                        }
353
                                }
354
                                break;
355
                        default:
356
                                *out++ = *in++;
357
                                break;
358
                        }
359
                }
360
        }
361
362
        *out = '\0';
363
364
        if (error)
365
                *error = error_;
366
367
        return outbuf;
368
}
369
370
#define JIS_HWDAKUTEN                0x5e
371
#define JIS_HWHANDAKUTEN        0x5f
372
373
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
374
{
375
        static guint16 h2z_tbl[] = {
376
                /* 0x20 - 0x2f */
377
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
378
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
379
                /* 0x30 - 0x3f */
380
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
381
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
382
                /* 0x40 - 0x4f */
383
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
384
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
385
                /* 0x50 - 0x5f */
386
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
387
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
388
        };
389
390
        static guint16 dakuten_tbl[] = {
391
                /* 0x30 - 0x3f */
392
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
393
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
394
                /* 0x40 - 0x4f */
395
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
396
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
397
        };
398
399
        static guint16 handakuten_tbl[] = {
400
                /* 0x4a - 0x4e */
401
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
402
        };
403
404
        guint16 out_code;
405
406
        jis_code &= 0x7f;
407
        sound_sym &= 0x7f;
408
409
        if (jis_code < 0x21 || jis_code > 0x5f)
410
                return 0;
411
412
        if (sound_sym == JIS_HWDAKUTEN &&
413
            jis_code >= 0x36 && jis_code <= 0x4e) {
414
                out_code = dakuten_tbl[jis_code - 0x30];
415
                if (out_code != 0) {
416
                        *outbuf = out_code >> 8;
417
                        *(outbuf + 1) = out_code & 0xff;
418
                        return 2;
419
                }
420
        }
421
422
        if (sound_sym == JIS_HWHANDAKUTEN &&
423
            jis_code >= 0x4a && jis_code <= 0x4e) {
424
                out_code = handakuten_tbl[jis_code - 0x4a];
425
                *outbuf = out_code >> 8;
426
                *(outbuf + 1) = out_code & 0xff;
427
                return 2;
428
        }
429
430
        out_code = h2z_tbl[jis_code - 0x20];
431
        *outbuf = out_code >> 8;
432
        *(outbuf + 1) = out_code & 0xff;
433
        return 1;
434
}
435
436
static gchar *conv_euctojis(const gchar *inbuf, gint *error)
437
{
438
        gchar *outbuf;
439
        const guchar *in = (guchar *)inbuf;
440
        guchar *out;
441
        JISState state = JIS_ASCII;
442
        gint error_ = 0;
443
 
444
        outbuf = g_malloc(strlen(inbuf) * 3 + 4);
445
        out = (guchar *)outbuf;
446
447
        while (*in != '\0') {
448
                if (isascii(*in)) {
449
                        K_OUT();
450
                        *out++ = *in++;
451
                } else if (iseuckanji(*in)) {
452
                        if (iseuckanji(*(in + 1))) {
453
                                K_IN();
454
                                *out++ = *in++ & 0x7f;
455
                                *out++ = *in++ & 0x7f;
456
                        } else {
457
                                error_ = -1;
458
                                K_OUT();
459
                                *out++ = SUBST_CHAR;
460
                                in++;
461
                                if (*in != '\0' && !isascii(*in)) {
462
                                        *out++ = SUBST_CHAR;
463
                                        in++;
464
                                }
465
                        }
466
                } else if (iseuchwkana1(*in)) {
467
                        if (iseuchwkana2(*(in + 1))) {
468
                                if (allow_jisx0201_kana) {
469
                                        HW_IN();
470
                                        in++;
471
                                        *out++ = *in++ & 0x7f;
472
                                } else {
473
                                        guchar jis_ch[2];
474
                                        gint len;
475
476
                                        if (iseuchwkana1(*(in + 2)) &&
477
                                            iseuchwkana2(*(in + 3)))
478
                                                len = conv_jis_hantozen
479
                                                        (jis_ch,
480
                                                         *(in + 1), *(in + 3));
481
                                        else
482
                                                len = conv_jis_hantozen
483
                                                        (jis_ch,
484
                                                         *(in + 1), '\0');
485
                                        if (len == 0)
486
                                                in += 2;
487
                                        else {
488
                                                K_IN();
489
                                                in += len * 2;
490
                                                *out++ = jis_ch[0];
491
                                                *out++ = jis_ch[1];
492
                                        }
493
                                }
494
                        } else {
495
                                error_ = -1;
496
                                K_OUT();
497
                                in++;
498
                                if (*in != '\0' && !isascii(*in)) {
499
                                        *out++ = SUBST_CHAR;
500
                                        in++;
501
                                }
502
                        }
503
                } else if (iseucaux(*in)) {
504
                        in++;
505
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
506
                                AUX_IN();
507
                                *out++ = *in++ & 0x7f;
508
                                *out++ = *in++ & 0x7f;
509
                        } else {
510
                                error_ = -1;
511
                                K_OUT();
512
                                if (*in != '\0' && !isascii(*in)) {
513
                                        *out++ = SUBST_CHAR;
514
                                        in++;
515
                                        if (*in != '\0' && !isascii(*in)) {
516
                                                *out++ = SUBST_CHAR;
517
                                                in++;
518
                                        }
519
                                }
520
                        }
521
                } else {
522
                        error_ = -1;
523
                        K_OUT();
524
                        *out++ = SUBST_CHAR;
525
                        in++;
526
                }
527
        }
528
529
        K_OUT();
530
        *out = '\0';
531
532
        if (error)
533
                *error = error_;
534
535
        return outbuf;
536
}
537
538
#define sjistoidx(c1, c2) \
539
        (((c1) > 0x9f) \
540
        ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
541
        : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
542
#define idxtojis1(c) (((c) / 94) + 0x21)
543
#define idxtojis2(c) (((c) % 94) + 0x21)
544
545
static guint conv_idx_ibmtonec(guint idx)
546
{
547
        if      (idx >= sjistoidx(0xfa, 0x5c))
548
                idx -=  sjistoidx(0xfa, 0x5c)
549
                      - sjistoidx(0xed, 0x40);
550
/*        else if (idx == sjistoidx(0xfa, 0x5b)) */
551
/*                idx =   sjistoidx(0x81, 0xe6); */
552
/*        else if (idx == sjistoidx(0xfa, 0x5a)) */
553
/*                idx =   sjistoidx(0x87, 0x84); */
554
/*        else if (idx == sjistoidx(0xfa, 0x59)) */
555
/*                idx =   sjistoidx(0x87, 0x82); */
556
/*        else if (idx == sjistoidx(0xfa, 0x58)) */
557
/*                idx =   sjistoidx(0x87, 0x8a); */
558
        else if (idx >= sjistoidx(0xfa, 0x55))
559
                idx -=  sjistoidx(0xfa, 0x55)
560
                      - sjistoidx(0xee, 0xfa);
561
/*        else if (idx == sjistoidx(0xfa, 0x54)) */
562
/*                idx =   sjistoidx(0x81, 0xca); */
563
/*        else if (idx >= sjistoidx(0xfa, 0x4a)) */
564
/*                idx -=  sjistoidx(0xfa, 0x4a)  */
565
/*                      - sjistoidx(0x87, 0x54); */
566
        else if (idx >= sjistoidx(0xfa, 0x40))
567
                idx -=  sjistoidx(0xfa, 0x40)
568
                      - sjistoidx(0xee, 0xef);
569
        return idx;
570
}
571
572
static gchar *conv_sjistojis(const gchar *inbuf, gint *error)
573
{
574
        gchar *outbuf;
575
        const guchar *in = (guchar *)inbuf;
576
        guchar *out;
577
        JISState state = JIS_ASCII;
578
        gint error_ = 0;
579
        guint idx;
580
 
581
        outbuf = g_malloc(strlen(inbuf) * 5 + 4);
582
        out = (guchar *)outbuf;
583
584
        while (*in != '\0') {
585
                if (isascii(*in)) {
586
                        K_OUT();
587
                        *out++ = *in++;
588
                } else if (issjiskanji1(*in)) {
589
                        if (issjiskanji2(*(in + 1))) {
590
                                K_IN();
591
                                idx = sjistoidx(*in, *(in + 1));
592
                                *out++ = idxtojis1(idx);
593
                                *out++ = idxtojis2(idx);
594
                                in += 2;
595
                        } else {
596
                                error_ = -1;
597
                                K_OUT();
598
                                *out++ = SUBST_CHAR;
599
                                in++;
600
                                if (*in != '\0' && !isascii(*in)) {
601
                                        *out++ = SUBST_CHAR;
602
                                        in++;
603
                                }
604
                        }
605
                } else if (issjishwkana(*in)) {
606
                        if (allow_jisx0201_kana) {
607
                                HW_IN();
608
                                *out++ = *in++ & 0x7f;
609
                        } else {
610
                                guchar jis_ch[2];
611
                                gint len;
612
613
                                if (issjishwkana(*(in + 1)))
614
                                        len = conv_jis_hantozen
615
                                                (jis_ch,
616
                                                 *in, *(in + 1));
617
                                else
618
                                        len = conv_jis_hantozen
619
                                                (jis_ch,
620
                                                 *in, '\0');
621
                                if (len == 0)
622
                                        in++;
623
                                else {
624
                                        K_IN();
625
                                        in += len;
626
                                        *out++ = jis_ch[0];
627
                                        *out++ = jis_ch[1];
628
                                }
629
                        }
630
                } else if (issjisibmext(*in, *(in + 1))) {
631
                        K_IN();
632
                        idx = sjistoidx(*in, *(in + 1));
633
                        idx = conv_idx_ibmtonec(idx);
634
                        *out++ = idxtojis1(idx);
635
                        *out++ = idxtojis2(idx);
636
                        in += 2;
637
#if 0
638
                } else if (issjisudc(*in)) {
639
                        UDC_IN();
640
                        idx = sjistoidx(*in, *(in + 1))
641
                              - sjistoidx(0xf0, 0x40);
642
                        *out++ = idxtojis1(idx);
643
                        *out++ = idxtojis2(idx);
644
                        in += 2;
645
#endif
646
                } else if (issjisext(*in)) {
647
                        error_ = -1;
648
                        K_OUT();
649
                        *out++ = SUBST_CHAR;
650
                        in++;
651
                        if (*in != '\0' && !isascii(*in)) {
652
                                *out++ = SUBST_CHAR;
653
                                in++;
654
                        }
655
                } else {
656
                        error_ = -1;
657
                        K_OUT();
658
                        *out++ = SUBST_CHAR;
659
                        in++;
660
                }
661
        }
662
663
        K_OUT();
664
        *out = '\0';
665
666
        if (error)
667
                *error = error_;
668
669
        return outbuf;
670
}
671
672
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error)
673
{
674
        gchar *outbuf;
675
        const guchar *in = (guchar *)inbuf;
676
        guchar *out;
677
        gint error_ = 0;
678
679
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
680
        out = (guchar *)outbuf;
681
682
        while (*in != '\0') {
683
                if (isascii(*in)) {
684
                        *out++ = *in++;
685
                } else if (issjiskanji1(*in)) {
686
                        if (issjiskanji2(*(in + 1))) {
687
                                guchar out1 = *in;
688
                                guchar out2 = *(in + 1);
689
                                guchar row;
690
691
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
692
                                if (out2 < 0x9f) {
693
                                        out1 = (out1 - row) * 2 - 1;
694
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
695
                                } else {
696
                                        out1 = (out1 - row) * 2;
697
                                        out2 -= 0x7e;
698
                                }
699
700
                                *out++ = out1 | 0x80;
701
                                *out++ = out2 | 0x80;
702
                                in += 2;
703
                        } else {
704
                                error_ = -1;
705
                                *out++ = SUBST_CHAR;
706
                                in++;
707
                                if (*in != '\0' && !isascii(*in)) {
708
                                        *out++ = SUBST_CHAR;
709
                                        in++;
710
                                }
711
                        }
712
                } else if (issjishwkana(*in)) {
713
                        *out++ = SS2;
714
                        *out++ = *in++;
715
                } else if (issjisext(*in)) {
716
                        error_ = -1;
717
                        *out++ = SUBST_CHAR;
718
                        in++;
719
                        if (*in != '\0' && !isascii(*in)) {
720
                                *out++ = SUBST_CHAR;
721
                                in++;
722
                        }
723
                } else {
724
                        error_ = -1;
725
                        *out++ = SUBST_CHAR;
726
                        in++;
727
                }
728
        }
729
730
        *out = '\0';
731
732
        if (error)
733
                *error = error_;
734
735
        return outbuf;
736
}
737
738
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error)
739
{
740
        gchar *tmpstr, *utf8str;
741
        gint t_error = 0, u_error = 0;
742
743
        if (strstr(inbuf, "\033$(D")) {
744
                tmpstr = conv_jistoeuc(inbuf, &t_error);
745
                utf8str = conv_euctoutf8(tmpstr, &u_error);
746
        } else {
747
                tmpstr = conv_jistosjis(inbuf, &t_error);
748
                utf8str = conv_sjistoutf8(tmpstr, &u_error);
749
        }
750
        g_free(tmpstr);
751
752
        if (error)
753
                *error = (t_error | u_error);
754
755
        return utf8str;
756
}
757
758
#if USE_THREADS
759
#define S_LOCK_DEFINE_STATIC(name)        G_LOCK_DEFINE_STATIC(name)
760
#define S_LOCK(name)        G_LOCK(name)
761
#define S_UNLOCK(name)        G_UNLOCK(name)
762
#else
763
#define S_LOCK_DEFINE_STATIC(name)
764
#define S_LOCK(name)
765
#define S_UNLOCK(name)
766
#endif
767
768
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error)
769
{
770
        static iconv_t cd = (iconv_t)-1;
771
        static gboolean iconv_ok = TRUE;
772
        S_LOCK_DEFINE_STATIC(cd);
773
        gchar *ret;
774
775
        S_LOCK(cd);
776
777
        if (cd == (iconv_t)-1) {
778
                if (!iconv_ok) {
779
                        S_UNLOCK(cd);
780
                        if (error)
781
                                *error = -1;
782
                        return g_strdup(inbuf);
783
                }
784
785
                cd = iconv_open(CS_UTF_8, CS_CP932);
786
                if (cd == (iconv_t)-1) {
787
                        cd = iconv_open(CS_UTF_8, CS_SHIFT_JIS);
788
                        if (cd == (iconv_t)-1) {
789
                                g_warning("conv_sjistoutf8(): %s\n",
790
                                          g_strerror(errno));
791
                                iconv_ok = FALSE;
792
                                S_UNLOCK(cd);
793
                                if (error)
794
                                        *error = -1;
795
                                return g_strdup(inbuf);
796
                        }
797
                }
798
        }
799
800
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
801
        S_UNLOCK(cd);
802
        return ret;
803
}
804
805
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error)
806
{
807
        static iconv_t cd = (iconv_t)-1;
808
        static gboolean iconv_ok = TRUE;
809
        S_LOCK_DEFINE_STATIC(cd);
810
        gchar *ret;
811
812
        S_LOCK(cd);
813
814
        if (cd == (iconv_t)-1) {
815
                if (!iconv_ok) {
816
                        S_UNLOCK(cd);
817
                        if (error)
818
                                *error = -1;
819
                        return g_strdup(inbuf);
820
                }
821
822
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
823
                if (cd == (iconv_t)-1) {
824
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
825
                        if (cd == (iconv_t)-1) {
826
                                g_warning("conv_euctoutf8(): %s\n",
827
                                          g_strerror(errno));
828
                                iconv_ok = FALSE;
829
                                S_UNLOCK(cd);
830
                                if (error)
831
                                        *error = -1;
832
                                return g_strdup(inbuf);
833
                        }
834
                }
835
        }
836
837
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
838
        S_UNLOCK(cd);
839
        return ret;
840
}
841
842
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error)
843
{
844
        switch (conv_guess_ja_encoding(inbuf)) {
845
        case C_ISO_2022_JP:
846
                return conv_jistoutf8(inbuf, error);
847
        case C_SHIFT_JIS:
848
                return conv_sjistoutf8(inbuf, error);
849
        case C_EUC_JP:
850
                return conv_euctoutf8(inbuf, error);
851
        case C_UTF_8:
852
                if (error)
853
                        *error = 0;
854
                if (isutf8bom(inbuf))
855
                        inbuf += 3;
856
                return g_strdup(inbuf);
857
        default:
858
                if (error)
859
                        *error = 0;
860
                return g_strdup(inbuf);
861
        }
862
}
863
864
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error)
865
{
866
        static iconv_t cd = (iconv_t)-1;
867
        static gboolean iconv_ok = TRUE;
868
        S_LOCK_DEFINE_STATIC(cd);
869
        gchar *ret;
870
871
        S_LOCK(cd);
872
873
        if (cd == (iconv_t)-1) {
874
                if (!iconv_ok) {
875
                        S_UNLOCK(cd);
876
                        if (error)
877
                                *error = -1;
878
                        return g_strdup(inbuf);
879
                }
880
881
                cd = iconv_open(CS_CP932, CS_UTF_8);
882
                if (cd == (iconv_t)-1) {
883
                        cd = iconv_open(CS_SHIFT_JIS, CS_UTF_8);
884
                        if (cd == (iconv_t)-1) {
885
                                g_warning("conv_utf8tosjis(): %s\n",
886
                                          g_strerror(errno));
887
                                iconv_ok = FALSE;
888
                                S_UNLOCK(cd);
889
                                if (error)
890
                                        *error = -1;
891
                                return g_strdup(inbuf);
892
                        }
893
                }
894
        }
895
896
        if (isutf8bom(inbuf))
897
                inbuf += 3;
898
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
899
        S_UNLOCK(cd);
900
        return ret;
901
}
902
903
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error)
904
{
905
        static iconv_t cd = (iconv_t)-1;
906
        static gboolean iconv_ok = TRUE;
907
        S_LOCK_DEFINE_STATIC(cd);
908
        gchar *ret;
909
910
        S_LOCK(cd);
911
912
        if (cd == (iconv_t)-1) {
913
                if (!iconv_ok) {
914
                        S_UNLOCK(cd);
915
                        if (error)
916
                                *error = -1;
917
                        return g_strdup(inbuf);
918
                }
919
920
                cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
921
                if (cd == (iconv_t)-1) {
922
                        cd = iconv_open(CS_EUC_JP, CS_UTF_8);
923
                        if (cd == (iconv_t)-1) {
924
                                g_warning("conv_utf8toeuc(): %s\n",
925
                                          g_strerror(errno));
926
                                iconv_ok = FALSE;
927
                                S_UNLOCK(cd);
928
                                if (error)
929
                                        *error = -1;
930
                                return g_strdup(inbuf);
931
                        }
932
                }
933
        }
934
935
        if (isutf8bom(inbuf))
936
                inbuf += 3;
937
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
938
        S_UNLOCK(cd);
939
        return ret;
940
}
941
942
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error)
943
{
944
        gchar *tmpstr, *jisstr;
945
        gint t_error = 0, j_error = 0;
946
947
#if 1
948
        tmpstr = conv_utf8tosjis(inbuf, &t_error);
949
        jisstr = conv_sjistojis(tmpstr, &j_error);
950
#else
951
        tmpstr = conv_utf8toeuc(inbuf, &t_error);
952
        jisstr = conv_euctojis(tmpstr, &j_error);
953
#endif
954
        g_free(tmpstr);
955
956
        if (error)
957
                *error = (t_error | j_error);
958
959
        return jisstr;
960
}
961
962
#if 0
963
static gchar valid_eucjp_tbl[][96] = {
964
        /* 0xa2a0 - 0xa2ff */
965
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
966
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
967
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
968
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
969
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
970
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
971
972
        /* 0xa3a0 - 0xa3ff */
973
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
974
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
975
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
976
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
977
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
978
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
979
980
        /* 0xa4a0 - 0xa4ff */
981
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
982
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
983
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
984
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
985
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
986
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
987
988
        /* 0xa5a0 - 0xa5ff */
989
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
991
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
993
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
994
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
995
996
        /* 0xa6a0 - 0xa6ff */
997
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
998
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
999
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1000
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1001
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1002
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1003
1004
        /* 0xa7a0 - 0xa7ff */
1005
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1006
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1007
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1008
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1009
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1010
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1011
1012
        /* 0xa8a0 - 0xa8ff */
1013
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1015
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1016
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1017
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1018
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
1019
};
1020
1021
static gboolean isprintableeuckanji(guchar c1, guchar c2)
1022
{
1023
        if (c1 <= 0xa0 || c1 >= 0xf5)
1024
                return FALSE;
1025
        if (c2 <= 0xa0 || c2 == 0xff)
1026
                return FALSE;
1027
1028
        if (c1 >= 0xa9 && c1 <= 0xaf)
1029
                return FALSE;
1030
1031
        if (c1 >= 0xa2 && c1 <= 0xa8)
1032
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
1033
1034
        if (c1 == 0xcf) {
1035
                if (c2 >= 0xd4 && c2 <= 0xfe)
1036
                        return FALSE;
1037
        } else if (c1 == 0xf4) {
1038
                if (c2 >= 0xa7 && c2 <= 0xfe)
1039
                        return FALSE;
1040
        }
1041
1042
        return TRUE;
1043
}
1044
1045
static void conv_unreadable_eucjp(gchar *str)
1046
{
1047
        register guchar *p = str;
1048
1049
        while (*p != '\0') {
1050
                if (isascii(*p)) {
1051
                        /* convert CR+LF -> LF */
1052
                        if (*p == '\r' && *(p + 1) == '\n')
1053
                                memmove(p, p + 1, strlen(p));
1054
                        /* printable 7 bit code */
1055
                        p++;
1056
                } else if (iseuckanji(*p)) {
1057
                        if (isprintableeuckanji(*p, *(p + 1))) {
1058
                                /* printable euc-jp code */
1059
                                p += 2;
1060
                        } else {
1061
                                /* substitute unprintable code */
1062
                                *p++ = SUBST_CHAR;
1063
                                if (*p != '\0') {
1064
                                        if (isascii(*p))
1065
                                                p++;
1066
                                        else
1067
                                                *p++ = SUBST_CHAR;
1068
                                }
1069
                        }
1070
                } else if (iseuchwkana1(*p)) {
1071
                        if (iseuchwkana2(*(p + 1)))
1072
                                /* euc-jp hankaku kana */
1073
                                p += 2;
1074
                        else
1075
                                *p++ = SUBST_CHAR;
1076
                } else if (iseucaux(*p)) {
1077
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
1078
                                /* auxiliary kanji */
1079
                                p += 3;
1080
                        } else
1081
                                *p++ = SUBST_CHAR;
1082
                } else
1083
                        /* substitute unprintable 1 byte code */
1084
                        *p++ = SUBST_CHAR;
1085
        }
1086
}
1087
#endif
1088
1089
static void conv_unreadable_8bit(gchar *str)
1090
{
1091
        register gchar *p = str;
1092
1093
        while (*p != '\0') {
1094
                /* convert CR+LF -> LF */
1095
                if (*p == '\r' && *(p + 1) == '\n')
1096
                        memmove(p, p + 1, strlen(p));
1097
                else if (!isascii(*(guchar *)p)) *p = SUBST_CHAR;
1098
                p++;
1099
        }
1100
}
1101
1102
#if 0
1103
static void conv_unreadable_latin(gchar *str)
1104
{
1105
        register guchar *p = str;
1106
1107
        while (*p != '\0') {
1108
                /* convert CR+LF -> LF */
1109
                if (*p == '\r' && *(p + 1) == '\n')
1110
                        memmove(p, p + 1, strlen(p));
1111
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
1112
                        *p = SUBST_CHAR;
1113
                p++;
1114
        }
1115
}
1116
#endif
1117
1118
#define NCV        '\0'
1119
1120
void conv_mb_alnum(gchar *str)
1121
{
1122
        static guchar char_tbl[] = {
1123
                /* 0xa0 - 0xaf */
1124
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
1125
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
1126
                /* 0xb0 - 0xbf */
1127
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1128
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1129
                /* 0xc0 - 0xcf */
1130
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1131
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
1132
                /* 0xd0 - 0xdf */
1133
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
1134
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
1135
                /* 0xe0 - 0xef */
1136
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
1137
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
1138
        };
1139
1140
        register guchar *p = (guchar *)str;
1141
        register gint len;
1142
1143
        len = strlen(str);
1144
1145
        while (len > 1) {
1146
                if (*p == 0xa3) {
1147
                        register guchar ch = *(p + 1);
1148
1149
                        if (ch >= 0xb0 && ch <= 0xfa) {
1150
                                /* [a-zA-Z] */
1151
                                *p = ch & 0x7f;
1152
                                p++;
1153
                                len--;
1154
                                memmove(p, p + 1, len);
1155
                                len--;
1156
                        } else  {
1157
                                p += 2;
1158
                                len -= 2;
1159
                        }
1160
                } else if (*p == 0xa1) {
1161
                        register guchar ch = *(p + 1);
1162
1163
                        if (ch >= 0xa0 && ch <= 0xef &&
1164
                            NCV != char_tbl[ch - 0xa0]) {
1165
                                *p = char_tbl[ch - 0xa0];
1166
                                p++;
1167
                                len--;
1168
                                memmove(p, p + 1, len);
1169
                                len--;
1170
                        } else {
1171
                                p += 2;
1172
                                len -= 2;
1173
                        }
1174
                } else if (iseuckanji(*p)) {
1175
                        p += 2;
1176
                        len -= 2;
1177
                } else {
1178
                        p++;
1179
                        len--;
1180
                }
1181
        }
1182
}
1183
1184
CharSet conv_guess_ja_encoding(const gchar *str)
1185
{
1186
        const guchar *p = (const guchar *)str;
1187
        CharSet guessed = C_US_ASCII;
1188
1189
        while (*p != '\0') {
1190
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
1191
                        if (guessed == C_US_ASCII)
1192
                                return C_ISO_2022_JP;
1193
                        p += 2;
1194
                } else if (isascii(*p)) {
1195
                        p++;
1196
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
1197
                        if (*p >= 0xfd && *p <= 0xfe)
1198
                                return C_EUC_JP;
1199
                        else if (guessed == C_SHIFT_JIS) {
1200
                                if ((issjiskanji1(*p) &&
1201
                                     issjiskanji2(*(p + 1))) ||
1202
                                    issjishwkana(*p))
1203
                                        guessed = C_SHIFT_JIS;
1204
                                else
1205
                                        guessed = C_EUC_JP;
1206
                        } else
1207
                                guessed = C_EUC_JP;
1208
                        p += 2;
1209
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
1210
                        guessed = C_SHIFT_JIS;
1211
                        p += 2;
1212
                } else if (issjishwkana(*p)) {
1213
                        guessed = C_SHIFT_JIS;
1214
                        p++;
1215
                } else {
1216
                        if (guessed == C_US_ASCII)
1217
                                guessed = C_AUTO;
1218
                        p++;
1219
                }
1220
        }
1221
1222
        if (guessed != C_US_ASCII) {
1223
                p = (const guchar *)str;
1224
1225
                while (*p != '\0') {
1226
                        if (isascii(*p)) {
1227
                                p++;
1228
                        } else if (isutf8_3_1(*p) &&
1229
                                   isutf8_3_2(*(p + 1)) &&
1230
                                   isutf8_3_2(*(p + 2))) {
1231
                                p += 3;
1232
                        } else {
1233
                                return guessed;
1234
                        }
1235
                }
1236
1237
                return C_UTF_8;
1238
        }
1239
1240
        return guessed;
1241
}
1242
1243
static gchar *conv_jistodisp(const gchar *inbuf, gint *error)
1244
{
1245
        return conv_jistoutf8(inbuf, error);
1246
}
1247
1248
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error)
1249
{
1250
        return conv_sjistoutf8(inbuf, error);
1251
}
1252
1253
static gchar *conv_euctodisp(const gchar *inbuf, gint *error)
1254
{
1255
        return conv_euctoutf8(inbuf, error);
1256
}
1257
1258
gchar *conv_utf8todisp(const gchar *inbuf, gint *error)
1259
{
1260
        if (g_utf8_validate(inbuf, -1, NULL) == TRUE) {
1261
                if (error)
1262
                        *error = 0;
1263
                if (isutf8bom(inbuf))
1264
                        inbuf += 3;
1265
                return g_strdup(inbuf);
1266
        } else
1267
                return conv_ustodisp(inbuf, error);
1268
}
1269
1270
static gchar *conv_anytodisp(const gchar *inbuf, gint *error)
1271
{
1272
        gchar *outbuf;
1273
1274
        outbuf = conv_anytoutf8(inbuf, error);
1275
        if (g_utf8_validate(outbuf, -1, NULL) != TRUE) {
1276
                if (error)
1277
                        *error = -1;
1278
                conv_unreadable_8bit(outbuf);
1279
        }
1280
1281
        return outbuf;
1282
}
1283
1284
static gchar *conv_ustodisp(const gchar *inbuf, gint *error)
1285
{
1286
        gchar *outbuf;
1287
1288
        outbuf = g_strdup(inbuf);
1289
        conv_unreadable_8bit(outbuf);
1290
        if (error)
1291
                *error = 0;
1292
1293
        return outbuf;
1294
}
1295
1296
gchar *conv_localetodisp(const gchar *inbuf, gint *error)
1297
{
1298
        gchar *str;
1299
1300
        str = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
1301
                                CS_INTERNAL, error);
1302
        if (!str)
1303
                str = conv_utf8todisp(inbuf, NULL);
1304
1305
        return str;
1306
}
1307
1308
static gchar *conv_noconv(const gchar *inbuf, gint *error)
1309
{
1310
        if (error)
1311
                *error = 0;
1312
        return g_strdup(inbuf);
1313
}
1314
1315
static const gchar *
1316
conv_get_fallback_for_private_encoding(const gchar *encoding)
1317
{
1318
        if (encoding) {
1319
                if ((encoding[0] == 'X' || encoding[0] == 'x') &&
1320
                    encoding[1] == '-') {
1321
                        if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
1322
                                return CS_GBK;
1323
                        else if (!g_ascii_strcasecmp(encoding, CS_X_SJIS))
1324
                                return CS_SHIFT_JIS;
1325
                } else if ((encoding[0] == 'K' || encoding[0] == 'k') &&
1326
                           (encoding[1] == 'S' || encoding[1] == 's')) {
1327
                        if (!g_ascii_strcasecmp(encoding, CS_KS_C_5601_1987))
1328
                                return CS_EUC_KR;
1329
                }
1330
        }
1331
1332
        return encoding;
1333
}
1334
1335
CodeConverter *conv_code_converter_new(const gchar *src_encoding,
1336
                                       const gchar *dest_encoding)
1337
{
1338
        CodeConverter *conv;
1339
1340
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1341
1342
        conv = g_new0(CodeConverter, 1);
1343
        conv->code_conv_func =
1344
                conv_get_code_conv_func(src_encoding, dest_encoding);
1345
        conv->src_encoding = g_strdup(src_encoding);
1346
        conv->dest_encoding = g_strdup(dest_encoding);
1347
1348
        return conv;
1349
}
1350
1351
void conv_code_converter_destroy(CodeConverter *conv)
1352
{
1353
        g_free(conv->src_encoding);
1354
        g_free(conv->dest_encoding);
1355
        g_free(conv);
1356
}
1357
1358
gchar *conv_convert(CodeConverter *conv, const gchar *inbuf)
1359
{
1360
        if (!inbuf)
1361
                return NULL;
1362
        else if (conv->code_conv_func != conv_noconv)
1363
                return conv->code_conv_func(inbuf, NULL);
1364
        else
1365
                return conv_iconv_strdup
1366
                        (inbuf, conv->src_encoding, conv->dest_encoding, NULL);
1367
}
1368
1369
gchar *conv_codeset_strdup_full(const gchar *inbuf,
1370
                                const gchar *src_encoding,
1371
                                const gchar *dest_encoding,
1372
                                gint *error)
1373
{
1374
        CodeConvFunc conv_func;
1375
1376
        if (!inbuf) {
1377
                if (error)
1378
                        *error = 0;
1379
                return NULL;
1380
        }
1381
1382
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1383
1384
        conv_func = conv_get_code_conv_func(src_encoding, dest_encoding);
1385
        if (conv_func != conv_noconv)
1386
                return conv_func(inbuf, error);
1387
1388
        return conv_iconv_strdup(inbuf, src_encoding, dest_encoding, error);
1389
}
1390
1391
CodeConvFunc conv_get_code_conv_func(const gchar *src_encoding,
1392
                                     const gchar *dest_encoding)
1393
{
1394
        CodeConvFunc code_conv = conv_noconv;
1395
        CharSet src_charset;
1396
        CharSet dest_charset;
1397
1398
        if (!src_encoding)
1399
                src_charset = conv_get_locale_charset();
1400
        else
1401
                src_charset = conv_get_charset_from_str(src_encoding);
1402
1403
        /* auto detection mode */
1404
        if (!src_encoding && !dest_encoding) {
1405
                if (conv_ad_type == C_AD_JAPANESE ||
1406
                    (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
1407
                        return conv_anytodisp;
1408
                else
1409
                        return conv_noconv;
1410
        }
1411
1412
        dest_charset = conv_get_charset_from_str(dest_encoding);
1413
1414
        if (dest_charset == C_US_ASCII)
1415
                return conv_ustodisp;
1416
1417
        switch (src_charset) {
1418
        case C_US_ASCII:
1419
        case C_ISO_8859_1:
1420
        case C_ISO_8859_2:
1421
        case C_ISO_8859_3:
1422
        case C_ISO_8859_4:
1423
        case C_ISO_8859_5:
1424
        case C_ISO_8859_6:
1425
        case C_ISO_8859_7:
1426
        case C_ISO_8859_8:
1427
        case C_ISO_8859_9:
1428
        case C_ISO_8859_10:
1429
        case C_ISO_8859_11:
1430
        case C_ISO_8859_13:
1431
        case C_ISO_8859_14:
1432
        case C_ISO_8859_15:
1433
        case C_ISO_8859_16:
1434
                break;
1435
        case C_ISO_2022_JP:
1436
        case C_ISO_2022_JP_2:
1437
        case C_ISO_2022_JP_3:
1438
                if (dest_charset == C_AUTO)
1439
                        code_conv = conv_jistodisp;
1440
                else if (dest_charset == C_EUC_JP)
1441
                        code_conv = conv_jistoeuc;
1442
                else if (dest_charset == C_SHIFT_JIS ||
1443
                         dest_charset == C_CP932)
1444
                        code_conv = conv_jistosjis;
1445
                else if (dest_charset == C_UTF_8)
1446
                        code_conv = conv_jistoutf8;
1447
                break;
1448
        case C_SHIFT_JIS:
1449
        case C_CP932:
1450
                if (dest_charset == C_AUTO)
1451
                        code_conv = conv_sjistodisp;
1452
                else if (dest_charset == C_ISO_2022_JP   ||
1453
                         dest_charset == C_ISO_2022_JP_2 ||
1454
                         dest_charset == C_ISO_2022_JP_3)
1455
                        code_conv = conv_sjistojis;
1456
                else if (dest_charset == C_EUC_JP)
1457
                        code_conv = conv_sjistoeuc;
1458
                else if (dest_charset == C_UTF_8)
1459
                        code_conv = conv_sjistoutf8;
1460
                break;
1461
        case C_EUC_JP:
1462
                if (dest_charset == C_AUTO)
1463
                        code_conv = conv_euctodisp;
1464
                else if (dest_charset == C_ISO_2022_JP   ||
1465
                         dest_charset == C_ISO_2022_JP_2 ||
1466
                         dest_charset == C_ISO_2022_JP_3)
1467
                        code_conv = conv_euctojis;
1468
                else if (dest_charset == C_UTF_8)
1469
                        code_conv = conv_euctoutf8;
1470
                break;
1471
        case C_UTF_8:
1472
                if (dest_charset == C_EUC_JP)
1473
                        code_conv = conv_utf8toeuc;
1474
                else if (dest_charset == C_ISO_2022_JP   ||
1475
                         dest_charset == C_ISO_2022_JP_2 ||
1476
                         dest_charset == C_ISO_2022_JP_3)
1477
                        code_conv = conv_utf8tojis;
1478
                else if (dest_charset == C_SHIFT_JIS ||
1479
                         dest_charset == C_CP932)
1480
                        code_conv = conv_utf8tosjis;
1481
                break;
1482
        default:
1483
                break;
1484
        }
1485
1486
        return code_conv;
1487
}
1488
1489
gchar *conv_iconv_strdup(const gchar *inbuf,
1490
                         const gchar *src_code, const gchar *dest_code,
1491
                         gint *error)
1492
{
1493
        iconv_t cd;
1494
        gchar *outbuf;
1495
1496
        if (!src_code)
1497
                src_code = conv_get_locale_charset_str();
1498
        if (!dest_code)
1499
                dest_code = CS_INTERNAL;
1500
1501
        cd = iconv_open(dest_code, src_code);
1502
        if (cd == (iconv_t)-1) {
1503
                if (error)
1504
                        *error = -1;
1505
                return NULL;
1506
        }
1507
1508
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd, error);
1509
1510
        iconv_close(cd);
1511
1512
        return outbuf;
1513
}
1514
1515
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd, gint *error)
1516
{
1517
        const gchar *inbuf_p;
1518
        gchar *outbuf;
1519
        gchar *outbuf_p;
1520
        size_t in_size;
1521
        size_t in_left;
1522
        size_t out_size;
1523
        size_t out_left;
1524
        size_t n_conv;
1525
        size_t len;
1526
        gint error_ = 0;
1527
1528
        if (!inbuf) {
1529
                if (error)
1530
                        *error = 0;
1531
                return NULL;
1532
        }
1533
1534
        inbuf_p = inbuf;
1535
        in_size = strlen(inbuf);
1536
        in_left = in_size;
1537
        out_size = (in_size + 1) * 2;
1538
        outbuf = g_malloc(out_size);
1539
        outbuf_p = outbuf;
1540
        out_left = out_size;
1541
1542
#define EXPAND_BUF()                                \
1543
{                                                \
1544
        len = outbuf_p - outbuf;                \
1545
        out_size *= 2;                                \
1546
        outbuf = g_realloc(outbuf, out_size);        \
1547
        outbuf_p = outbuf + len;                \
1548
        out_left = out_size - len;                \
1549
}
1550
1551
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1552
                               &outbuf_p, &out_left)) == (size_t)-1) {
1553
                if (EILSEQ == errno) {
1554
                        /* g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno)); */
1555
                        error_ = -1;
1556
                        inbuf_p++;
1557
                        in_left--;
1558
                        if (out_left == 0) {
1559
                                EXPAND_BUF();
1560
                        }
1561
                        *outbuf_p++ = SUBST_CHAR;
1562
                        out_left--;
1563
                } else if (EINVAL == errno) {
1564
                        error_ = -1;
1565
                        break;
1566
                } else if (E2BIG == errno) {
1567
                        EXPAND_BUF();
1568
                } else {
1569
                        g_warning("conv_iconv_strdup(): %s\n",
1570
                                  g_strerror(errno));
1571
                        error_ = -1;
1572
                        break;
1573
                }
1574
        }
1575
1576
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1577
               (size_t)-1) {
1578
                if (E2BIG == errno) {
1579
                        EXPAND_BUF();
1580
                } else {
1581
                        g_warning("conv_iconv_strdup(): %s\n",
1582
                                  g_strerror(errno));
1583
                        error_ = -1;
1584
                        break;
1585
                }
1586
        }
1587
1588
#undef EXPAND_BUF
1589
1590
        len = outbuf_p - outbuf;
1591
        outbuf = g_realloc(outbuf, len + 1);
1592
        outbuf[len] = '\0';
1593
1594
        if (error)
1595
                *error = error_;
1596
1597
        return outbuf;
1598
}
1599
1600
static const struct {
1601
        CharSet charset;
1602
        gchar *const name;
1603
} charsets[] = {
1604
        {C_US_ASCII,                CS_US_ASCII},
1605
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1606
        {C_UTF_8,                CS_UTF_8},
1607
        {C_UTF_7,                CS_UTF_7},
1608
        {C_ISO_8859_1,                CS_ISO_8859_1},
1609
        {C_ISO_8859_2,                CS_ISO_8859_2},
1610
        {C_ISO_8859_3,                CS_ISO_8859_3},
1611
        {C_ISO_8859_4,                CS_ISO_8859_4},
1612
        {C_ISO_8859_5,                CS_ISO_8859_5},
1613
        {C_ISO_8859_6,                CS_ISO_8859_6},
1614
        {C_ISO_8859_7,                CS_ISO_8859_7},
1615
        {C_ISO_8859_8,                CS_ISO_8859_8},
1616
        {C_ISO_8859_9,                CS_ISO_8859_9},
1617
        {C_ISO_8859_10,                CS_ISO_8859_10},
1618
        {C_ISO_8859_11,                CS_ISO_8859_11},
1619
        {C_ISO_8859_13,                CS_ISO_8859_13},
1620
        {C_ISO_8859_14,                CS_ISO_8859_14},
1621
        {C_ISO_8859_15,                CS_ISO_8859_15},
1622
        {C_BALTIC,                CS_BALTIC},
1623
        {C_CP932,                CS_CP932},
1624
        {C_CP1250,                CS_CP1250},
1625
        {C_CP1251,                CS_CP1251},
1626
        {C_CP1252,                CS_CP1252},
1627
        {C_CP1253,                CS_CP1253},
1628
        {C_CP1254,                CS_CP1254},
1629
        {C_CP1255,                CS_CP1255},
1630
        {C_CP1256,                CS_CP1256},
1631
        {C_CP1257,                CS_CP1257},
1632
        {C_CP1258,                CS_CP1258},
1633
        {C_WINDOWS_932,                CS_WINDOWS_932},
1634
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1635
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1636
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1637
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1638
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1639
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1640
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1641
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1642
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1643
        {C_KOI8_R,                CS_KOI8_R},
1644
        {C_KOI8_T,                CS_KOI8_T},
1645
        {C_KOI8_U,                CS_KOI8_U},
1646
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1647
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1648
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1649
        {C_EUC_JP,                CS_EUC_JP},
1650
        {C_EUC_JP,                CS_EUCJP},
1651
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1652
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1653
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1654
        {C_SHIFT_JIS,                CS_SJIS},
1655
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1656
        {C_EUC_KR,                CS_EUC_KR},
1657
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1658
        {C_EUC_CN,                CS_EUC_CN},
1659
        {C_GB2312,                CS_GB2312},
1660
        {C_GBK,                        CS_GBK},
1661
        {C_EUC_TW,                CS_EUC_TW},
1662
        {C_BIG5,                CS_BIG5},
1663
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1664
        {C_TIS_620,                CS_TIS_620},
1665
        {C_WINDOWS_874,                CS_WINDOWS_874},
1666
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1667
        {C_TCVN5712_1,                CS_TCVN5712_1},
1668
        {C_ISO_8859_16,                CS_ISO_8859_16},
1669
};
1670
1671
static const struct {
1672
        gchar *const locale;
1673
        CharSet charset;
1674
        CharSet out_charset;
1675
} locale_table[] = {
1676
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1677
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1678
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1679
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1680
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1681
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1682
#ifdef G_OS_WIN32
1683
        {"ja_JP"        , C_CP932        , C_ISO_2022_JP},
1684
#elif defined(__APPLE__)
1685
        {"ja_JP"        , C_UTF_8        , C_ISO_2022_JP},
1686
#else
1687
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1688
#endif
1689
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1690
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1691
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1692
        {"zh_CN.GBK"        , C_GBK                , C_GBK},
1693
        {"zh_CN"        , C_GB2312        , C_GB2312},
1694
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1695
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1696
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1697
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1698
        {"zh_TW"        , C_BIG5        , C_BIG5},
1699
1700
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1701
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1702
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1703
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1704
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1705
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1706
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1707
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1708
1709
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1710
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1711
1712
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1713
1714
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1715
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1716
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1717
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1718
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1719
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1720
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1721
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1722
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1723
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1724
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1725
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1726
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1727
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1728
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1729
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1730
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1731
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1732
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1733
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1734
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1735
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1736
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1737
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1738
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1739
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1740
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1741
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1742
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1743
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1744
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1745
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1746
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1747
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1748
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1749
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1750
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1751
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1752
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1753
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1754
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1755
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1756
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1757
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1758
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1759
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1760
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1761
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1762
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1763
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1764
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1765
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1766
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1767
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1768
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1769
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1770
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1771
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1772
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1773
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1774
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1775
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1776
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1777
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1778
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1779
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1780
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1781
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1782
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1783
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1784
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1785
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1786
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1787
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1788
1789
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1790
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1791
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1792
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1793
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1794
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1795
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1796
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1797
1798
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1799
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1800
1801
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1802
1803
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1804
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1805
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1806
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1807
1808
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1809
1810
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1811
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1812
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1813
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1814
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1815
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1816
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1817
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1818
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1819
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1820
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1821
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1822
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1823
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1824
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1825
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1826
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1827
1828
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1829
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1830
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1831
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1832
1833
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1834
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1835
1836
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1837
1838
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1839
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1840
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1841
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1842
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1843
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1844
1845
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1846
        /* {"th_TH"        , C_WINDOWS_874}, */
1847
        /* {"th_TH"        , C_ISO_8859_11}, */
1848
1849
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1850
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1851
1852
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1853
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1854
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1855
};
1856
1857
static GHashTable *conv_get_charset_to_str_table(void)
1858
{
1859
        static GHashTable *table;
1860
        gint i;
1861
        S_LOCK_DEFINE_STATIC(table);
1862
1863
        S_LOCK(table);
1864
1865
        if (table) {
1866
                S_UNLOCK(table);
1867
                return table;
1868
        }
1869
1870
        table = g_hash_table_new(NULL, g_direct_equal);
1871
1872
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1873
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1874
                    == NULL) {
1875
                        g_hash_table_insert
1876
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1877
                                 charsets[i].name);
1878
                }
1879
        }
1880
1881
        S_UNLOCK(table);
1882
        return table;
1883
}
1884
1885
static GHashTable *conv_get_charset_from_str_table(void)
1886
{
1887
        static GHashTable *table;
1888
        S_LOCK_DEFINE_STATIC(table);
1889
1890
        gint i;
1891
1892
        S_LOCK(table);
1893
1894
        if (table) {
1895
                S_UNLOCK(table);
1896
                return table;
1897
        }
1898
1899
        table = g_hash_table_new(str_case_hash, str_case_equal);
1900
1901
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1902
                g_hash_table_insert(table, charsets[i].name,
1903
                                    GUINT_TO_POINTER(charsets[i].charset));
1904
        }
1905
1906
        S_UNLOCK(table);
1907
        return table;
1908
}
1909
1910
const gchar *conv_get_charset_str(CharSet charset)
1911
{
1912
        GHashTable *table;
1913
1914
        table = conv_get_charset_to_str_table();
1915
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1916
}
1917
1918
CharSet conv_get_charset_from_str(const gchar *charset)
1919
{
1920
        GHashTable *table;
1921
1922
        if (!charset) return C_AUTO;
1923
1924
        table = conv_get_charset_from_str_table();
1925
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1926
}
1927
1928
CharSet conv_get_locale_charset(void)
1929
{
1930
        static CharSet cur_charset = -1;
1931
        const gchar *cur_locale;
1932
        const gchar *p;
1933
#if !defined(G_OS_WIN32) && !defined(__APPLE__)
1934
        gint i;
1935
#endif
1936
        S_LOCK_DEFINE_STATIC(cur_charset);
1937
1938
        S_LOCK(cur_charset);
1939
1940
        if (cur_charset != -1) {
1941
                S_UNLOCK(cur_charset);
1942
                return cur_charset;
1943
        }
1944
1945
        cur_locale = conv_get_current_locale();
1946
        if (!cur_locale) {
1947
                cur_charset = C_US_ASCII;
1948
                S_UNLOCK(cur_charset);
1949
                return cur_charset;
1950
        }
1951
1952
        if (strcasestr(cur_locale, "UTF-8") || strcasestr(cur_locale, "utf8")) {
1953
                cur_charset = C_UTF_8;
1954
                S_UNLOCK(cur_charset);
1955
                return cur_charset;
1956
        }
1957
1958
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1959
                cur_charset = C_ISO_8859_15;
1960
                S_UNLOCK(cur_charset);
1961
                return cur_charset;
1962
        }
1963
1964
#if defined(G_OS_WIN32) || defined(__APPLE__)
1965
        cur_charset = conv_get_charset_from_str(conv_get_locale_charset_str());
1966
1967
        S_UNLOCK(cur_charset);
1968
        return cur_charset;
1969
#else
1970
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1971
                const gchar *p;
1972
1973
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1974
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1975
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1976
                                         strlen(locale_table[i].locale))) {
1977
                        cur_charset = locale_table[i].charset;
1978
                        S_UNLOCK(cur_charset);
1979
                        return cur_charset;
1980
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1981
                         !strchr(p + 1, '.')) {
1982
                        if (strlen(cur_locale) == 2 &&
1983
                            !g_ascii_strncasecmp(cur_locale,
1984
                                                 locale_table[i].locale, 2)) {
1985
                                cur_charset = locale_table[i].charset;
1986
                                S_UNLOCK(cur_charset);
1987
                                return cur_charset;
1988
                        }
1989
                }
1990
        }
1991
1992
        cur_charset = C_AUTO;
1993
        S_UNLOCK(cur_charset);
1994
        return cur_charset;
1995
#endif
1996
}
1997
1998
const gchar *conv_get_locale_charset_str(void)
1999
{
2000
        static const gchar *codeset = NULL;
2001
        S_LOCK_DEFINE_STATIC(codeset);
2002
2003
        S_LOCK(codeset);
2004
2005
        if (!codeset) {
2006
#if defined(G_OS_WIN32) || defined(__APPLE__)
2007
                g_get_charset(&codeset);
2008
                if (!strcmp(codeset, CS_US_ASCII) ||
2009
                    !strcmp(codeset, CS_ANSI_X3_4_1968))
2010
                        codeset = CS_INTERNAL;
2011
#else
2012
                codeset = conv_get_charset_str(conv_get_locale_charset());
2013
#endif
2014
        }
2015
2016
        if (codeset) {
2017
                S_UNLOCK(codeset);
2018
                return codeset;
2019
        }
2020
2021
        S_UNLOCK(codeset);
2022
        return CS_INTERNAL;
2023
}
2024
2025
CharSet conv_get_internal_charset(void)
2026
{
2027
        return C_INTERNAL;
2028
}
2029
2030
const gchar *conv_get_internal_charset_str(void)
2031
{
2032
        return CS_INTERNAL;
2033
}
2034
2035
CharSet conv_get_outgoing_charset(void)
2036
{
2037
        static CharSet out_charset = -1;
2038
        const gchar *cur_locale;
2039
        const gchar *p;
2040
        gint i;
2041
        S_LOCK_DEFINE_STATIC(out_charset);
2042
2043
        S_LOCK(out_charset);
2044
2045
        if (out_charset != -1) {
2046
                S_UNLOCK(out_charset);
2047
                return out_charset;
2048
        }
2049
2050
        cur_locale = conv_get_current_locale();
2051
        if (!cur_locale) {
2052
                out_charset = C_AUTO;
2053
                S_UNLOCK(out_charset);
2054
                return out_charset;
2055
        }
2056
2057
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
2058
                out_charset = C_ISO_8859_15;
2059
                S_UNLOCK(out_charset);
2060
                return out_charset;
2061
        }
2062
2063
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
2064
                const gchar *p;
2065
2066
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
2067
                                         strlen(locale_table[i].locale))) {
2068
                        out_charset = locale_table[i].out_charset;
2069
                        break;
2070
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
2071
                         !strchr(p + 1, '.')) {
2072
                        if (strlen(cur_locale) == 2 &&
2073
                            !g_ascii_strncasecmp(cur_locale,
2074
                                                 locale_table[i].locale, 2)) {
2075
                                out_charset = locale_table[i].out_charset;
2076
                                break;
2077
                        }
2078
                }
2079
        }
2080
2081
        S_UNLOCK(out_charset);
2082
        return out_charset;
2083
}
2084
2085
const gchar *conv_get_outgoing_charset_str(void)
2086
{
2087
        CharSet out_charset;
2088
        const gchar *str;
2089
2090
        out_charset = conv_get_outgoing_charset();
2091
        str = conv_get_charset_str(out_charset);
2092
2093
        return str ? str : CS_UTF_8;
2094
}
2095
2096
gboolean conv_is_multibyte_encoding(CharSet encoding)
2097
{
2098
        switch (encoding) {
2099
        case C_EUC_JP:
2100
        case C_EUC_JP_MS:
2101
        case C_EUC_KR:
2102
        case C_EUC_TW:
2103
        case C_EUC_CN:
2104
        case C_ISO_2022_JP:
2105
        case C_ISO_2022_JP_2:
2106
        case C_ISO_2022_JP_3:
2107
        case C_ISO_2022_KR:
2108
        case C_ISO_2022_CN:
2109
        case C_SHIFT_JIS:
2110
        case C_CP932:
2111
        case C_GB2312:
2112
        case C_GBK:
2113
        case C_BIG5:
2114
        case C_UTF_8:
2115
        case C_UTF_7:
2116
                return TRUE;
2117
        default:
2118
                return FALSE;
2119
        }
2120
}
2121
2122
const gchar *conv_get_current_locale(void)
2123
{
2124
        static const gchar *cur_locale;
2125
        S_LOCK_DEFINE_STATIC(cur_locale);
2126
2127
        S_LOCK(cur_locale);
2128
2129
        if (!cur_locale) {
2130
#ifdef G_OS_WIN32
2131
                cur_locale = g_win32_getlocale();
2132
#else
2133
                cur_locale = g_getenv("LC_ALL");
2134
                if (!cur_locale || *cur_locale == '\0')
2135
                        cur_locale = g_getenv("LC_CTYPE");
2136
                if (!cur_locale || *cur_locale == '\0')
2137
                        cur_locale = g_getenv("LANG");
2138
#ifdef HAVE_LOCALE_H
2139
                if (!cur_locale || *cur_locale == '\0')
2140
                        cur_locale = setlocale(LC_CTYPE, NULL);
2141
#endif /* HAVE_LOCALE_H */
2142
#endif /* G_OS_WIN32 */
2143
2144
                debug_print("current locale: %s\n",
2145
                            cur_locale ? cur_locale : "(none)");
2146
        }
2147
2148
        S_UNLOCK(cur_locale);
2149
        return cur_locale;
2150
}
2151
2152
gboolean conv_is_ja_locale(void)
2153
{
2154
        static gint is_ja_locale = -1;
2155
        const gchar *cur_locale;
2156
        S_LOCK_DEFINE_STATIC(is_ja_locale);
2157
2158
        S_LOCK(is_ja_locale);
2159
2160
        if (is_ja_locale != -1) {
2161
                S_UNLOCK(is_ja_locale);
2162
                return is_ja_locale != 0;
2163
        }
2164
2165
        is_ja_locale = 0;
2166
        cur_locale = conv_get_current_locale();
2167
        if (cur_locale) {
2168
                if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
2169
                        is_ja_locale = 1;
2170
        }
2171
2172
        S_UNLOCK(is_ja_locale);
2173
        return is_ja_locale != 0;
2174
}
2175
2176
void conv_set_autodetect_type(ConvADType type)
2177
{
2178
        conv_ad_type = type;
2179
}
2180
2181
ConvADType conv_get_autodetect_type(void)
2182
{
2183
        return conv_ad_type;
2184
}
2185
2186
gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
2187
{
2188
        gchar *buf;
2189
        gchar *decoded_str;
2190
2191
        if (is_ascii_str(str))
2192
                return unmime_header(str);
2193
2194
        if (default_encoding) {
2195
                buf = conv_codeset_strdup
2196
                        (str, default_encoding, CS_INTERNAL);
2197
                if (buf) {
2198
                        decoded_str = unmime_header(buf);
2199
                        g_free(buf);
2200
                        return decoded_str;
2201
                }
2202
        }
2203
2204
        if (conv_ad_type == C_AD_JAPANESE ||
2205
            (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
2206
                buf = conv_anytodisp(str, NULL);
2207
        else
2208
                buf = conv_localetodisp(str, NULL);
2209
2210
        decoded_str = unmime_header(buf);
2211
        g_free(buf);
2212
2213
        return decoded_str;
2214
}
2215
2216
#define MAX_LINELEN                76
2217
#define MAX_HARD_LINELEN        996
2218
#define MIMESEP_BEGIN                "=?"
2219
#define MIMESEP_END                "?="
2220
2221
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
2222
2223
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
2224
{                                                                        \
2225
        if (len - (destp - dest) < MAX_LINELEN + 2) {                        \
2226
                *destp = '\0';                                                \
2227
                return;                                                        \
2228
        }                                                                \
2229
                                                                        \
2230
        if ((cond) && *srcp) {                                                \
2231
                if (destp > dest && left < MAX_LINELEN - 1) {                \
2232
                        if (g_ascii_isspace(*(destp - 1)))                \
2233
                                destp--;                                \
2234
                        else if (is_plain_text &&                        \
2235
                                 g_ascii_isspace(*srcp))                \
2236
                                srcp++;                                        \
2237
                        if (*srcp) {                                        \
2238
                                *destp++ = '\n';                        \
2239
                                *destp++ = ' ';                                \
2240
                                left = MAX_LINELEN - 1;                        \
2241
                        }                                                \
2242
                }                                                        \
2243
        }                                                                \
2244
}
2245
2246
void conv_encode_header(gchar *dest, gint len, const gchar *src,
2247
                        gint header_len, gboolean addr_field,
2248
                        const gchar *out_encoding)
2249
{
2250
        const gchar *src_encoding;
2251
        gint mimestr_len;
2252
        gchar *mimesep_enc;
2253
        gint left;
2254
        const gchar *srcp = src;
2255
        gchar *destp = dest;
2256
        gboolean use_base64;
2257
2258
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
2259
2260
        src_encoding = CS_INTERNAL;
2261
        if (!out_encoding)
2262
                out_encoding = conv_get_outgoing_charset_str();
2263
        if (!strcmp(out_encoding, CS_US_ASCII))
2264
                out_encoding = CS_ISO_8859_1;
2265
2266
        if (!g_ascii_strncasecmp(out_encoding, "ISO-8859-", 9) ||
2267
            !g_ascii_strncasecmp(out_encoding, "KOI8-", 5) ||
2268
            !g_ascii_strncasecmp(out_encoding, "Windows-", 8)) {
2269
                use_base64 = FALSE;
2270
                mimesep_enc = "?Q?";
2271
        } else {
2272
                use_base64 = TRUE;
2273
                mimesep_enc = "?B?";
2274
        }
2275
2276
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(mimesep_enc) +
2277
                strlen(MIMESEP_END);
2278
2279
        left = MAX_LINELEN - header_len;
2280
2281
        while (*srcp) {
2282
                gboolean in_quote = FALSE;
2283
2284
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
2285
2286
                while (g_ascii_isspace(*srcp)) {
2287
                        *destp++ = *srcp++;
2288
                        left--;
2289
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
2290
                }
2291
2292
                /* output as it is if the next word is ASCII string */
2293
                if (!is_next_nonascii(srcp)) {
2294
                        gint word_len;
2295
2296
                        word_len = get_next_word_len(srcp);
2297
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
2298
                        while (word_len > 0) {
2299
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
2300
                                *destp++ = *srcp++;
2301
                                left--;
2302
                                word_len--;
2303
                        }
2304
2305
                        continue;
2306
                }
2307
2308
                /* don't include parentheses in encoded strings */
2309
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
2310
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
2311
                        *destp++ = *srcp++;
2312
                        left--;
2313
                }
2314
2315
                while (1) {
2316
                        gint mb_len = 0;
2317
                        gint cur_len = 0;
2318
                        gchar *part_str;
2319
                        gchar *out_str;
2320
                        gchar *enc_str;
2321
                        const gchar *p = srcp;
2322
                        const gchar *block_encoding = out_encoding;
2323
                        gint out_str_len;
2324
                        gint out_enc_str_len;
2325
                        gint mime_block_len;
2326
                        gint error = 0;
2327
                        gboolean cont = FALSE;
2328
2329
                        while (*p != '\0') {
2330
                                if (*p == '"')
2331
                                        in_quote ^= TRUE;
2332
                                else if (!in_quote) {
2333
                                        if (g_ascii_isspace(*p) &&
2334
                                            !is_next_nonascii(p + 1))
2335
                                                break;
2336
                                        /* don't include parentheses in encoded
2337
                                           strings */
2338
                                        if (addr_field &&
2339
                                            (*p == '(' || *p == ')'))
2340
                                                break;
2341
                                }
2342
2343
                                mb_len = g_utf8_skip[*(guchar *)p];
2344
2345
                                part_str = g_strndup(srcp, cur_len + mb_len);
2346
                                out_str = conv_codeset_strdup_full
2347
                                        (part_str, src_encoding, block_encoding,
2348
                                         &error);
2349
                                if (!out_str || error != 0) {
2350
                                        g_warning("conv_encode_header(): code conversion failed. Keeping UTF-8.\n");
2351
                                        out_str = g_strdup(part_str);
2352
                                        block_encoding = CS_UTF_8;
2353
                                }
2354
                                out_str_len = strlen(out_str);
2355
2356
                                if (use_base64)
2357
                                        out_enc_str_len = B64LEN(out_str_len);
2358
                                else
2359
                                        out_enc_str_len =
2360
                                                qp_get_q_encoding_len
2361
                                                        ((guchar *)out_str);
2362
2363
                                g_free(out_str);
2364
                                g_free(part_str);
2365
2366
                                if (mimestr_len + strlen(block_encoding) + out_enc_str_len <= left) {
2367
                                        cur_len += mb_len;
2368
                                        p += mb_len;
2369
                                } else if (cur_len == 0) {
2370
                                        LBREAK_IF_REQUIRED(1, FALSE);
2371
                                        if (*p == '"')
2372
                                                in_quote ^= TRUE;
2373
                                        continue;
2374
                                } else {
2375
                                        cont = TRUE;
2376
                                        if (*p == '"')
2377
                                                in_quote ^= TRUE;
2378
                                        break;
2379
                                }
2380
                        }
2381
2382
                        if (cur_len > 0) {
2383
                                error = 0;
2384
                                part_str = g_strndup(srcp, cur_len);
2385
                                out_str = conv_codeset_strdup_full
2386
                                        (part_str, src_encoding, block_encoding,
2387
                                         &error);
2388
                                if (!out_str || error != 0) {
2389
                                        g_warning("conv_encode_header(): code conversion failed\n");
2390
                                        out_str = g_strdup(part_str);
2391
                                        block_encoding = CS_UTF_8;
2392
                                }
2393
                                out_str_len = strlen(out_str);
2394
2395
                                if (use_base64)
2396
                                        out_enc_str_len = B64LEN(out_str_len);
2397
                                else
2398
                                        out_enc_str_len =
2399
                                                qp_get_q_encoding_len
2400
                                                        ((guchar *)out_str);
2401
2402
                                enc_str = g_malloc(out_enc_str_len + 1);
2403
                                if (use_base64)
2404
                                        base64_encode(enc_str,
2405
                                                      (guchar *)out_str,
2406
                                                      out_str_len);
2407
                                else
2408
                                        qp_q_encode(enc_str, (guchar *)out_str);
2409
2410
                                /* output MIME-encoded string block */
2411
                                mime_block_len = mimestr_len +
2412
                                        strlen(block_encoding) +
2413
                                        strlen(enc_str);
2414
                                g_snprintf(destp, mime_block_len + 1,
2415
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
2416
                                           block_encoding, mimesep_enc,
2417
                                           enc_str);
2418
                                destp += mime_block_len;
2419
                                srcp += cur_len;
2420
2421
                                left -= mime_block_len;
2422
2423
                                g_free(enc_str);
2424
                                g_free(out_str);
2425
                                g_free(part_str);
2426
                        }
2427
2428
                        LBREAK_IF_REQUIRED(cont, FALSE);
2429
2430
                        if (cur_len == 0)
2431
                                break;
2432
                }
2433
        }
2434
2435
        *destp = '\0';
2436
}
2437
2438
#undef LBREAK_IF_REQUIRED
2439
2440
#define INT_TO_HEX_UPPER(outp, val)                \
2441
{                                                \
2442
        if ((val) < 10)                                \
2443
                *outp = '0' + (val);                \
2444
        else                                        \
2445
                *outp = 'A' + (val) - 10;        \
2446
}
2447
2448
#define IS_ESCAPE_CHAR(c)                                        \
2449
        (c < 0x20 || c > 0x7f ||                                \
2450
         strchr("\t \r\n*'%!#$&~`,{}|()<>@,;:\\\"/[]?=", c))
2451
2452
static gchar *encode_rfc2231_filename(const gchar *str)
2453
{
2454
        const gchar *p;
2455
        gchar *out;
2456
        gchar *outp;
2457
2458
        outp = out = g_malloc(strlen(str) * 3 + 1);
2459
2460
        for (p = str; *p != '\0'; ++p) {
2461
                guchar ch = *(guchar *)p;
2462
2463
                if (IS_ESCAPE_CHAR(ch)) {
2464
                        *outp++ = '%';
2465
                        INT_TO_HEX_UPPER(outp, ch >> 4);
2466
                        ++outp;
2467
                        INT_TO_HEX_UPPER(outp, ch & 0x0f);
2468
                        ++outp;
2469
                } else
2470
                        *outp++ = ch;
2471
        }
2472
2473
        *outp = '\0';
2474
        return out;
2475
}
2476
2477
gchar *conv_encode_filename(const gchar *src, const gchar *param_name,
2478
                            const gchar *out_encoding)
2479
{
2480
        gint name_len, max_linelen;
2481
        gchar *out_str, *enc_str;
2482
        gchar cur_param[80];
2483
        GString *string;
2484
        gint count = 0;
2485
        gint cur_left_len;
2486
        gchar *p;
2487
2488
        g_return_val_if_fail(src != NULL, NULL);
2489
        g_return_val_if_fail(param_name != NULL, NULL);
2490
2491
        if (is_ascii_str(src))
2492
                return g_strdup_printf(" %s=\"%s\"", param_name, src);
2493
2494
        name_len = strlen(param_name);
2495
        max_linelen = MAX_LINELEN - name_len - 3;
2496
2497
        if (!out_encoding)
2498
                out_encoding = conv_get_outgoing_charset_str();
2499
        if (!strcmp(out_encoding, CS_US_ASCII))
2500
                out_encoding = CS_ISO_8859_1;
2501
2502
        out_str = conv_codeset_strdup(src, CS_INTERNAL, out_encoding);
2503
        if (!out_str)
2504
                return NULL;
2505
        enc_str = encode_rfc2231_filename(out_str);
2506
        g_free(out_str);
2507
2508
        if (strlen(enc_str) <= max_linelen) {
2509
                gchar *ret;
2510
                ret = g_strdup_printf(" %s*=%s''%s",
2511
                                      param_name, out_encoding, enc_str);
2512
                g_free(enc_str);
2513
                return ret;
2514
        }
2515
2516
        string = g_string_new(NULL);
2517
        g_string_printf(string, " %s*0*=%s''", param_name, out_encoding);
2518
        cur_left_len = MAX_LINELEN - string->len;
2519
2520
        p = enc_str;
2521
2522
        while (*p != '\0') {
2523
                if ((*p == '%' && cur_left_len < 4) ||
2524
                    (*p != '%' && cur_left_len < 2)) {
2525
                        gint len;
2526
2527
                        g_string_append(string, ";\n");
2528
                        ++count;
2529
                        len = g_snprintf(cur_param, sizeof(cur_param),
2530
                                         " %s*%d*=", param_name, count);
2531
                        g_string_append(string, cur_param);
2532
                        cur_left_len = MAX_LINELEN - len;
2533
                }
2534
2535
                if (*p == '%') {
2536
                        g_string_append_len(string, p, 3);
2537
                        p += 3;
2538
                        cur_left_len -= 3;
2539
                } else {
2540
                        g_string_append_c(string, *p);
2541
                        ++p;
2542
                        --cur_left_len;
2543
                }
2544
        }
2545
2546
        g_free(enc_str);
2547
2548
        return g_string_free(string, FALSE);
2549
}
2550
2551
CharSet conv_check_file_encoding(const gchar *file)
2552
{
2553
        FILE *fp;
2554
        gchar buf[BUFFSIZE];
2555
        CharSet enc;
2556
        const gchar *enc_str;
2557
        gboolean is_locale = TRUE, is_utf8 = TRUE;
2558
2559
        g_return_val_if_fail(file != NULL, C_AUTO);
2560
2561
        enc = conv_get_locale_charset();
2562
        enc_str = conv_get_locale_charset_str();
2563
        if (enc == C_UTF_8)
2564
                is_locale = FALSE;
2565
2566
        if ((fp = g_fopen(file, "rb")) == NULL) {
2567
                FILE_OP_ERROR(file, "fopen");
2568
                return C_AUTO;
2569
        }
2570
2571
        while (fgets(buf, sizeof(buf), fp) != NULL) {
2572
                gchar *str;
2573
                gint error = 0;
2574
2575
                if (is_locale) {
2576
                        str = conv_codeset_strdup_full(buf, enc_str,
2577
                                                       CS_INTERNAL, &error);
2578
                        if (!str || error != 0)
2579
                                is_locale = FALSE;
2580
                        g_free(str);
2581
                }
2582
2583
                if (is_utf8 && g_utf8_validate(buf, -1, NULL) == FALSE) {
2584
                        is_utf8 = FALSE;
2585
                }
2586
2587
                if (!is_locale && !is_utf8)
2588
                        break;
2589
        }
2590
2591
        fclose(fp);
2592
2593
        if (is_locale)
2594
                return enc;
2595
        else if (is_utf8)
2596
                return C_UTF_8;
2597
        else
2598
                return C_AUTO;
2599
}
2600
2601
gchar *conv_filename_from_utf8(const gchar *utf8_file)
2602
{
2603
        gchar *fs_file;
2604
        GError *error = NULL;
2605
2606
        g_return_val_if_fail(utf8_file != NULL, NULL);
2607
2608
        fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
2609
        if (error) {
2610
                g_warning("failed to convert encoding of file name: %s\n",
2611
                          error->message);
2612
                g_error_free(error);
2613
        }
2614
        if (!fs_file)
2615
                fs_file = g_strdup(utf8_file);
2616
2617
        return fs_file;
2618
}
2619
2620
gchar *conv_filename_to_utf8(const gchar *fs_file)
2621
{
2622
        gchar *utf8_file;
2623
        GError *error = NULL;
2624
2625
        g_return_val_if_fail(fs_file != NULL, NULL);
2626
2627
        utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
2628
        if (error) {
2629
                g_warning("failed to convert encoding of file name: %s\n",
2630
                          error->message);
2631
                g_error_free(error);
2632
        }
2633
        if (!utf8_file)
2634
                utf8_file = g_strdup(fs_file);
2635
2636
        return utf8_file;
2637
}