Statistics
| Branch: | Tag: | Revision:

root / libsylph / codeconv.c @ aebfd4cc

History | View | Annotate | Download (59.7 KB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 */
5

    
6
#ifdef HAVE_CONFIG_H
7
#  include "config.h"
8
#endif
9

    
10
#include "defs.h"
11

    
12
#include <glib.h>
13
#include <string.h>
14
#include <ctype.h>
15
#include <stdlib.h>
16
#include <errno.h>
17

    
18
#if HAVE_LOCALE_H
19
#  include <locale.h>
20
#endif
21

    
22
#include <iconv.h>
23

    
24
#include "codeconv.h"
25
#include "unmime.h"
26
#include "base64.h"
27
#include "quoted-printable.h"
28
#include "utils.h"
29

    
30
typedef enum
31
{
32
        JIS_ASCII,
33
        JIS_KANJI,
34
        JIS_HWKANA,
35
        JIS_AUXKANJI,
36
        JIS_UDC
37
} JISState;
38

    
39
#define SUBST_CHAR        '_'
40
#define ESC                '\033'
41
#define SO                0x0e
42
#define SI                0x0f
43
#define SS2                0x8e
44
#define SS3                0x8f
45

    
46
#define iseuckanji(c) \
47
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
48
#define iseuchwkana1(c) \
49
        (((c) & 0xff) == SS2)
50
#define iseuchwkana2(c) \
51
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
52
#define iseucaux(c) \
53
        (((c) & 0xff) == SS3)
54

    
55
#define issjiskanji1(c) \
56
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
57
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xef))
58
#define issjiskanji2(c) \
59
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
60
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
61
#define issjishwkana(c) \
62
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
63
#define issjisext(c) \
64
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xfc)
65
#define issjisudc(c) \
66
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xf9)
67
#define issjisibmext(c1, c2) \
68
        ((((c1) & 0xff) >= 0xfa && ((c1) & 0xff) <= 0xfb && \
69
          issjiskanji2(c2)) ||                              \
70
         (((c1) & 0xff) == 0xfc &&                          \
71
          ((c2) & 0xff) >= 0x40 && ((c2) & 0xff) <= 0x4b))
72

    
73
#define isjiskanji(c) \
74
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x7e)
75
#define isjishwkana(c) \
76
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x5f)
77
#define isjisudc(c) \
78
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x34)
79
#define isjisudclow(c) \
80
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x2a)
81
#define isjisudchigh(c) \
82
        (((c) & 0xff) >= 0x2b && ((c) & 0xff) <= 0x34)
83

    
84
/* U+0080 - U+07FF */
85
#define isutf8_2_1(c) \
86
        (((c) & 0xe0) == 0xc0)
87
#define isutf8_2_2(c) \
88
        (((c) & 0xc0) == 0x80)
89
/* U+0800 - U+FFFF */
90
#define isutf8_3_1(c) \
91
        (((c) & 0xf0) == 0xe0)
92
#define isutf8_3_2(c) \
93
        (((c) & 0xc0) == 0x80)
94

    
95
#define isutf8bom(s) \
96
        (((*(s)) & 0xff) == 0xef && ((*(s + 1)) & 0xff) == 0xbb && \
97
         ((*(s + 2)) & 0xff) == 0xbf)
98

    
99
#define K_IN()                                \
100
        if (state != JIS_KANJI) {        \
101
                *out++ = ESC;                \
102
                *out++ = '$';                \
103
                *out++ = 'B';                \
104
                state = JIS_KANJI;        \
105
        }
106

    
107
#define K_OUT()                                \
108
        if (state != JIS_ASCII) {        \
109
                *out++ = ESC;                \
110
                *out++ = '(';                \
111
                *out++ = 'B';                \
112
                state = JIS_ASCII;        \
113
        }
114

    
115
#define HW_IN()                                \
116
        if (state != JIS_HWKANA) {        \
117
                *out++ = ESC;                \
118
                *out++ = '(';                \
119
                *out++ = 'I';                \
120
                state = JIS_HWKANA;        \
121
        }
122

    
123
#define AUX_IN()                        \
124
        if (state != JIS_AUXKANJI) {        \
125
                *out++ = ESC;                \
126
                *out++ = '$';                \
127
                *out++ = '(';                \
128
                *out++ = 'D';                \
129
                state = JIS_AUXKANJI;        \
130
        }
131

    
132
#define UDC_IN()                        \
133
        if (state != JIS_UDC) {                \
134
                *out++ = ESC;                \
135
                *out++ = '$';                \
136
                *out++ = '(';                \
137
                *out++ = '?';                \
138
                state = JIS_UDC;        \
139
        }
140

    
141
static ConvADType conv_ad_type = C_AD_BY_LOCALE;
142
static gboolean allow_jisx0201_kana = FALSE;
143

    
144
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error);
145
static gchar *conv_jistosjis(const gchar *inbuf, gint *error);
146
static gchar *conv_euctojis(const gchar *inbuf, gint *error);
147
static gchar *conv_sjistojis(const gchar *inbuf, gint *error);
148
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error);
149

    
150
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error);
151
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error);
152
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error);
153
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error);
154

    
155
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error);
156
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error);
157
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error);
158

    
159
/* static void conv_unreadable_eucjp(gchar *str); */
160
static void conv_unreadable_8bit(gchar *str);
161
/* static void conv_unreadable_latin(gchar *str); */
162

    
163
static gchar *conv_jistodisp(const gchar *inbuf, gint *error);
164
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error);
165
static gchar *conv_euctodisp(const gchar *inbuf, gint *error);
166

    
167
static gchar *conv_anytodisp(const gchar *inbuf, gint *error);
168
static gchar *conv_ustodisp(const gchar *inbuf, gint *error);
169
static gchar *conv_noconv(const gchar *inbuf, gint *error);
170

    
171
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error)
172
{
173
        gchar *outbuf;
174
        const guchar *in = (guchar *)inbuf;
175
        guchar *out;
176
        JISState state = JIS_ASCII;
177
        gint error_ = 0;
178

    
179
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
180
        out = (guchar *)outbuf;
181

    
182
        while (*in != '\0') {
183
                if (*in == ESC) {
184
                        in++;
185
                        if (*in == '$') {
186
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
187
                                        state = JIS_KANJI;
188
                                        in += 2;
189
                                } else if (*(in + 1) == '(' &&
190
                                           *(in + 2) == 'D') {
191
                                        state = JIS_AUXKANJI;
192
                                        in += 3;
193
                                } else {
194
                                        /* unknown escape sequence */
195
                                        error_ = -1;
196
                                        state = JIS_ASCII;
197
                                }
198
                        } else if (*in == '(') {
199
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
200
                                        state = JIS_ASCII;
201
                                        in += 2;
202
                                } else if (*(in + 1) == 'I') {
203
                                        state = JIS_HWKANA;
204
                                        in += 2;
205
                                } else {
206
                                        /* unknown escape sequence */
207
                                        error_ = -1;
208
                                        state = JIS_ASCII;
209
                                }
210
                        } else {
211
                                /* unknown escape sequence */
212
                                error_ = -1;
213
                                state = JIS_ASCII;
214
                        }
215
                } else if (*in == 0x0e) {
216
                        state = JIS_HWKANA;
217
                        in++;
218
                } else if (*in == 0x0f) {
219
                        state = JIS_ASCII;
220
                        in++;
221
                } else {
222
                        switch (state) {
223
                        case JIS_ASCII:
224
                                *out++ = *in++;
225
                                break;
226
                        case JIS_KANJI:
227
                                *out++ = *in++ | 0x80;
228
                                if (*in == '\0') break;
229
                                *out++ = *in++ | 0x80;
230
                                break;
231
                        case JIS_HWKANA:
232
                                *out++ = 0x8e;
233
                                *out++ = *in++ | 0x80;
234
                                break;
235
                        case JIS_AUXKANJI:
236
                                *out++ = 0x8f;
237
                                *out++ = *in++ | 0x80;
238
                                if (*in == '\0') break;
239
                                *out++ = *in++ | 0x80;
240
                                break;
241
                        default:
242
                                *out++ = *in++;
243
                                break;
244
                        }
245
                }
246
        }
247

    
248
        *out = '\0';
249

    
250
        if (error)
251
                *error = error_;
252

    
253
        return outbuf;
254
}
255

    
256
static gchar *conv_jistosjis(const gchar *inbuf, gint *error)
257
{
258
        gchar *outbuf;
259
        const guchar *in = (guchar *)inbuf;
260
        guchar *out;
261
        JISState state = JIS_ASCII;
262
        gint error_ = 0;
263

    
264
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
265
        out = (guchar *)outbuf;
266

    
267
        while (*in != '\0') {
268
                if (*in == ESC) {
269
                        in++;
270
                        if (*in == '$') {
271
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
272
                                        state = JIS_KANJI;
273
                                        in += 2;
274
                                } else if (*(in + 1) == '(' &&
275
                                           *(in + 2) == '?') {
276
                                        /* ISO-2022-JP-MS extention */
277
                                        state = JIS_UDC;
278
                                        in += 3;
279
                                } else {
280
                                        /* unknown escape sequence */
281
                                        error_ = -1;
282
                                        state = JIS_ASCII;
283
                                }
284
                        } else if (*in == '(') {
285
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
286
                                        state = JIS_ASCII;
287
                                        in += 2;
288
                                } else if (*(in + 1) == 'I') {
289
                                        state = JIS_HWKANA;
290
                                        in += 2;
291
                                } else {
292
                                        /* unknown escape sequence */
293
                                        error_ = -1;
294
                                        state = JIS_ASCII;
295
                                }
296
                        } else {
297
                                /* unknown escape sequence */
298
                                error_ = -1;
299
                                state = JIS_ASCII;
300
                        }
301
                } else if (*in == SO) {
302
                        state = JIS_HWKANA;
303
                        in++;
304
                } else if (*in == SI) {
305
                        state = JIS_ASCII;
306
                        in++;
307
                } else {
308
                        switch (state) {
309
                        case JIS_ASCII:
310
                                *out++ = *in++;
311
                                break;
312
                        case JIS_HWKANA:
313
                                *out++ = *in++ | 0x80;
314
                                break;
315
                        case JIS_KANJI:
316
                                if ((isjiskanji(*in) ||
317
                                     (*in >= 0x7f && *in <= 0x97)) &&
318
                                    isjiskanji(*(in + 1))) {
319
                                        *out++ = ((*in < 0x5f)
320
                                                 ? (((*in - 0x21) / 2) + 0x81)
321
                                                 : (((*in - 0x21) / 2) + 0xc1));
322
                                        *out++ = ((*in % 2)
323
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
324
                                                   ? 0x1f : 0x20)))
325
                                                 : *(in + 1) + 0x7e);
326
                                        in += 2;
327
                                } else {
328
                                        error_ = -1;
329
                                        *out++ = SUBST_CHAR;
330
                                        in++;
331
                                        if (*in != '\0') {
332
                                                *out++ = SUBST_CHAR;
333
                                                in++;
334
                                        }
335
                                }
336
                                break;
337
                        case JIS_UDC:
338
                                if (isjisudc(*in) && isjiskanji(*(in + 1))) {
339
                                        *out++ = (((*in - 0x21) / 2) + 0xf0);
340
                                        *out++ = ((*in % 2)
341
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
342
                                                   ? 0x1f : 0x20)))
343
                                                 : *(in + 1) + 0x7e);
344
                                        in += 2;
345
                                } else {
346
                                        error_ = -1;
347
                                        *out++ = SUBST_CHAR;
348
                                        in++;
349
                                        if (*in != '\0') {
350
                                                *out++ = SUBST_CHAR;
351
                                                in++;
352
                                        }
353
                                }
354
                                break;
355
                        default:
356
                                *out++ = *in++;
357
                                break;
358
                        }
359
                }
360
        }
361

    
362
        *out = '\0';
363

    
364
        if (error)
365
                *error = error_;
366

    
367
        return outbuf;
368
}
369

    
370
#define JIS_HWDAKUTEN                0x5e
371
#define JIS_HWHANDAKUTEN        0x5f
372

    
373
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
374
{
375
        static guint16 h2z_tbl[] = {
376
                /* 0x20 - 0x2f */
377
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
378
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
379
                /* 0x30 - 0x3f */
380
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
381
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
382
                /* 0x40 - 0x4f */
383
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
384
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
385
                /* 0x50 - 0x5f */
386
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
387
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
388
        };
389

    
390
        static guint16 dakuten_tbl[] = {
391
                /* 0x30 - 0x3f */
392
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
393
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
394
                /* 0x40 - 0x4f */
395
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
396
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
397
        };
398

    
399
        static guint16 handakuten_tbl[] = {
400
                /* 0x4a - 0x4e */
401
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
402
        };
403

    
404
        guint16 out_code;
405

    
406
        jis_code &= 0x7f;
407
        sound_sym &= 0x7f;
408

    
409
        if (jis_code < 0x21 || jis_code > 0x5f)
410
                return 0;
411

    
412
        if (sound_sym == JIS_HWDAKUTEN &&
413
            jis_code >= 0x36 && jis_code <= 0x4e) {
414
                out_code = dakuten_tbl[jis_code - 0x30];
415
                if (out_code != 0) {
416
                        *outbuf = out_code >> 8;
417
                        *(outbuf + 1) = out_code & 0xff;
418
                        return 2;
419
                }
420
        }
421

    
422
        if (sound_sym == JIS_HWHANDAKUTEN &&
423
            jis_code >= 0x4a && jis_code <= 0x4e) {
424
                out_code = handakuten_tbl[jis_code - 0x4a];
425
                *outbuf = out_code >> 8;
426
                *(outbuf + 1) = out_code & 0xff;
427
                return 2;
428
        }
429

    
430
        out_code = h2z_tbl[jis_code - 0x20];
431
        *outbuf = out_code >> 8;
432
        *(outbuf + 1) = out_code & 0xff;
433
        return 1;
434
}
435

    
436
static gchar *conv_euctojis(const gchar *inbuf, gint *error)
437
{
438
        gchar *outbuf;
439
        const guchar *in = (guchar *)inbuf;
440
        guchar *out;
441
        JISState state = JIS_ASCII;
442
        gint error_ = 0;
443
 
444
        outbuf = g_malloc(strlen(inbuf) * 3 + 4);
445
        out = (guchar *)outbuf;
446

    
447
        while (*in != '\0') {
448
                if (isascii(*in)) {
449
                        K_OUT();
450
                        *out++ = *in++;
451
                } else if (iseuckanji(*in)) {
452
                        if (iseuckanji(*(in + 1))) {
453
                                K_IN();
454
                                *out++ = *in++ & 0x7f;
455
                                *out++ = *in++ & 0x7f;
456
                        } else {
457
                                error_ = -1;
458
                                K_OUT();
459
                                *out++ = SUBST_CHAR;
460
                                in++;
461
                                if (*in != '\0' && !isascii(*in)) {
462
                                        *out++ = SUBST_CHAR;
463
                                        in++;
464
                                }
465
                        }
466
                } else if (iseuchwkana1(*in)) {
467
                        if (iseuchwkana2(*(in + 1))) {
468
                                if (allow_jisx0201_kana) {
469
                                        HW_IN();
470
                                        in++;
471
                                        *out++ = *in++ & 0x7f;
472
                                } else {
473
                                        guchar jis_ch[2];
474
                                        gint len;
475

    
476
                                        if (iseuchwkana1(*(in + 2)) &&
477
                                            iseuchwkana2(*(in + 3)))
478
                                                len = conv_jis_hantozen
479
                                                        (jis_ch,
480
                                                         *(in + 1), *(in + 3));
481
                                        else
482
                                                len = conv_jis_hantozen
483
                                                        (jis_ch,
484
                                                         *(in + 1), '\0');
485
                                        if (len == 0)
486
                                                in += 2;
487
                                        else {
488
                                                K_IN();
489
                                                in += len * 2;
490
                                                *out++ = jis_ch[0];
491
                                                *out++ = jis_ch[1];
492
                                        }
493
                                }
494
                        } else {
495
                                error_ = -1;
496
                                K_OUT();
497
                                in++;
498
                                if (*in != '\0' && !isascii(*in)) {
499
                                        *out++ = SUBST_CHAR;
500
                                        in++;
501
                                }
502
                        }
503
                } else if (iseucaux(*in)) {
504
                        in++;
505
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
506
                                AUX_IN();
507
                                *out++ = *in++ & 0x7f;
508
                                *out++ = *in++ & 0x7f;
509
                        } else {
510
                                error_ = -1;
511
                                K_OUT();
512
                                if (*in != '\0' && !isascii(*in)) {
513
                                        *out++ = SUBST_CHAR;
514
                                        in++;
515
                                        if (*in != '\0' && !isascii(*in)) {
516
                                                *out++ = SUBST_CHAR;
517
                                                in++;
518
                                        }
519
                                }
520
                        }
521
                } else {
522
                        error_ = -1;
523
                        K_OUT();
524
                        *out++ = SUBST_CHAR;
525
                        in++;
526
                }
527
        }
528

    
529
        K_OUT();
530
        *out = '\0';
531

    
532
        if (error)
533
                *error = error_;
534

    
535
        return outbuf;
536
}
537

    
538
#define sjistoidx(c1, c2) \
539
        (((c1) > 0x9f) \
540
        ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
541
        : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
542
#define idxtojis1(c) (((c) / 94) + 0x21)
543
#define idxtojis2(c) (((c) % 94) + 0x21)
544

    
545
static guint conv_idx_ibmtonec(guint idx)
546
{
547
        if      (idx >= sjistoidx(0xfa, 0x5c))
548
                idx -=  sjistoidx(0xfa, 0x5c)
549
                      - sjistoidx(0xed, 0x40);
550
/*        else if (idx == sjistoidx(0xfa, 0x5b)) */
551
/*                idx =   sjistoidx(0x81, 0xe6); */
552
/*        else if (idx == sjistoidx(0xfa, 0x5a)) */
553
/*                idx =   sjistoidx(0x87, 0x84); */
554
/*        else if (idx == sjistoidx(0xfa, 0x59)) */
555
/*                idx =   sjistoidx(0x87, 0x82); */
556
/*        else if (idx == sjistoidx(0xfa, 0x58)) */
557
/*                idx =   sjistoidx(0x87, 0x8a); */
558
        else if (idx >= sjistoidx(0xfa, 0x55))
559
                idx -=  sjistoidx(0xfa, 0x55)
560
                      - sjistoidx(0xee, 0xfa);
561
/*        else if (idx == sjistoidx(0xfa, 0x54)) */
562
/*                idx =   sjistoidx(0x81, 0xca); */
563
/*        else if (idx >= sjistoidx(0xfa, 0x4a)) */
564
/*                idx -=  sjistoidx(0xfa, 0x4a)  */
565
/*                      - sjistoidx(0x87, 0x54); */
566
        else if (idx >= sjistoidx(0xfa, 0x40))
567
                idx -=  sjistoidx(0xfa, 0x40)
568
                      - sjistoidx(0xee, 0xef);
569
        return idx;
570
}
571

    
572
static gchar *conv_sjistojis(const gchar *inbuf, gint *error)
573
{
574
        gchar *outbuf;
575
        const guchar *in = (guchar *)inbuf;
576
        guchar *out;
577
        JISState state = JIS_ASCII;
578
        gint error_ = 0;
579
        guint idx;
580
 
581
        outbuf = g_malloc(strlen(inbuf) * 5 + 4);
582
        out = (guchar *)outbuf;
583

    
584
        while (*in != '\0') {
585
                if (isascii(*in)) {
586
                        K_OUT();
587
                        *out++ = *in++;
588
                } else if (issjiskanji1(*in)) {
589
                        if (issjiskanji2(*(in + 1))) {
590
                                K_IN();
591
                                idx = sjistoidx(*in, *(in + 1));
592
                                *out++ = idxtojis1(idx);
593
                                *out++ = idxtojis2(idx);
594
                                in += 2;
595
                        } else {
596
                                error_ = -1;
597
                                K_OUT();
598
                                *out++ = SUBST_CHAR;
599
                                in++;
600
                                if (*in != '\0' && !isascii(*in)) {
601
                                        *out++ = SUBST_CHAR;
602
                                        in++;
603
                                }
604
                        }
605
                } else if (issjishwkana(*in)) {
606
                        if (allow_jisx0201_kana) {
607
                                HW_IN();
608
                                *out++ = *in++ & 0x7f;
609
                        } else {
610
                                guchar jis_ch[2];
611
                                gint len;
612

    
613
                                if (issjishwkana(*(in + 1)))
614
                                        len = conv_jis_hantozen
615
                                                (jis_ch,
616
                                                 *in, *(in + 1));
617
                                else
618
                                        len = conv_jis_hantozen
619
                                                (jis_ch,
620
                                                 *in, '\0');
621
                                if (len == 0)
622
                                        in++;
623
                                else {
624
                                        K_IN();
625
                                        in += len;
626
                                        *out++ = jis_ch[0];
627
                                        *out++ = jis_ch[1];
628
                                }
629
                        }
630
                } else if (issjisibmext(*in, *(in + 1))) {
631
                        K_IN();
632
                        idx = sjistoidx(*in, *(in + 1));
633
                        idx = conv_idx_ibmtonec(idx);
634
                        *out++ = idxtojis1(idx);
635
                        *out++ = idxtojis2(idx);
636
                        in += 2;
637
#if 0
638
                } else if (issjisudc(*in)) {
639
                        UDC_IN();
640
                        idx = sjistoidx(*in, *(in + 1))
641
                              - sjistoidx(0xf0, 0x40);
642
                        *out++ = idxtojis1(idx);
643
                        *out++ = idxtojis2(idx);
644
                        in += 2;
645
#endif
646
                } else if (issjisext(*in)) {
647
                        error_ = -1;
648
                        K_OUT();
649
                        *out++ = SUBST_CHAR;
650
                        in++;
651
                        if (*in != '\0' && !isascii(*in)) {
652
                                *out++ = SUBST_CHAR;
653
                                in++;
654
                        }
655
                } else {
656
                        error_ = -1;
657
                        K_OUT();
658
                        *out++ = SUBST_CHAR;
659
                        in++;
660
                }
661
        }
662

    
663
        K_OUT();
664
        *out = '\0';
665

    
666
        if (error)
667
                *error = error_;
668

    
669
        return outbuf;
670
}
671

    
672
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error)
673
{
674
        gchar *outbuf;
675
        const guchar *in = (guchar *)inbuf;
676
        guchar *out;
677
        gint error_ = 0;
678

    
679
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
680
        out = (guchar *)outbuf;
681

    
682
        while (*in != '\0') {
683
                if (isascii(*in)) {
684
                        *out++ = *in++;
685
                } else if (issjiskanji1(*in)) {
686
                        if (issjiskanji2(*(in + 1))) {
687
                                guchar out1 = *in;
688
                                guchar out2 = *(in + 1);
689
                                guchar row;
690

    
691
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
692
                                if (out2 < 0x9f) {
693
                                        out1 = (out1 - row) * 2 - 1;
694
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
695
                                } else {
696
                                        out1 = (out1 - row) * 2;
697
                                        out2 -= 0x7e;
698
                                }
699

    
700
                                *out++ = out1 | 0x80;
701
                                *out++ = out2 | 0x80;
702
                                in += 2;
703
                        } else {
704
                                error_ = -1;
705
                                *out++ = SUBST_CHAR;
706
                                in++;
707
                                if (*in != '\0' && !isascii(*in)) {
708
                                        *out++ = SUBST_CHAR;
709
                                        in++;
710
                                }
711
                        }
712
                } else if (issjishwkana(*in)) {
713
                        *out++ = SS2;
714
                        *out++ = *in++;
715
                } else if (issjisext(*in)) {
716
                        error_ = -1;
717
                        *out++ = SUBST_CHAR;
718
                        in++;
719
                        if (*in != '\0' && !isascii(*in)) {
720
                                *out++ = SUBST_CHAR;
721
                                in++;
722
                        }
723
                } else {
724
                        error_ = -1;
725
                        *out++ = SUBST_CHAR;
726
                        in++;
727
                }
728
        }
729

    
730
        *out = '\0';
731

    
732
        if (error)
733
                *error = error_;
734

    
735
        return outbuf;
736
}
737

    
738
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error)
739
{
740
        gchar *tmpstr, *utf8str;
741
        gint t_error = 0, u_error = 0;
742

    
743
        if (strstr(inbuf, "\033$(D")) {
744
                tmpstr = conv_jistoeuc(inbuf, &t_error);
745
                utf8str = conv_euctoutf8(tmpstr, &u_error);
746
        } else {
747
                tmpstr = conv_jistosjis(inbuf, &t_error);
748
                utf8str = conv_sjistoutf8(tmpstr, &u_error);
749
        }
750
        g_free(tmpstr);
751

    
752
        if (error)
753
                *error = (t_error | u_error);
754

    
755
        return utf8str;
756
}
757

    
758
#if USE_THREADS
759
#define S_LOCK_DEFINE_STATIC(name)        G_LOCK_DEFINE_STATIC(name)
760
#define S_LOCK(name)        G_LOCK(name)
761
#define S_UNLOCK(name)        G_UNLOCK(name)
762
#else
763
#define S_LOCK_DEFINE_STATIC(name)
764
#define S_LOCK(name)
765
#define S_UNLOCK(name)
766
#endif
767

    
768
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error)
769
{
770
        static iconv_t cd = (iconv_t)-1;
771
        static gboolean iconv_ok = TRUE;
772
        S_LOCK_DEFINE_STATIC(cd);
773
        gchar *ret;
774

    
775
        S_LOCK(cd);
776

    
777
        if (cd == (iconv_t)-1) {
778
                if (!iconv_ok) {
779
                        S_UNLOCK(cd);
780
                        if (error)
781
                                *error = -1;
782
                        return g_strdup(inbuf);
783
                }
784

    
785
                cd = iconv_open(CS_UTF_8, CS_CP932);
786
                if (cd == (iconv_t)-1) {
787
                        cd = iconv_open(CS_UTF_8, CS_SHIFT_JIS);
788
                        if (cd == (iconv_t)-1) {
789
                                g_warning("conv_sjistoutf8(): %s\n",
790
                                          g_strerror(errno));
791
                                iconv_ok = FALSE;
792
                                S_UNLOCK(cd);
793
                                if (error)
794
                                        *error = -1;
795
                                return g_strdup(inbuf);
796
                        }
797
                }
798
        }
799

    
800
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
801
        S_UNLOCK(cd);
802
        return ret;
803
}
804

    
805
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error)
806
{
807
        static iconv_t cd = (iconv_t)-1;
808
        static gboolean iconv_ok = TRUE;
809
        S_LOCK_DEFINE_STATIC(cd);
810
        gchar *ret;
811

    
812
        S_LOCK(cd);
813

    
814
        if (cd == (iconv_t)-1) {
815
                if (!iconv_ok) {
816
                        S_UNLOCK(cd);
817
                        if (error)
818
                                *error = -1;
819
                        return g_strdup(inbuf);
820
                }
821

    
822
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
823
                if (cd == (iconv_t)-1) {
824
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
825
                        if (cd == (iconv_t)-1) {
826
                                g_warning("conv_euctoutf8(): %s\n",
827
                                          g_strerror(errno));
828
                                iconv_ok = FALSE;
829
                                S_UNLOCK(cd);
830
                                if (error)
831
                                        *error = -1;
832
                                return g_strdup(inbuf);
833
                        }
834
                }
835
        }
836

    
837
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
838
        S_UNLOCK(cd);
839
        return ret;
840
}
841

    
842
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error)
843
{
844
        switch (conv_guess_ja_encoding(inbuf)) {
845
        case C_ISO_2022_JP:
846
                return conv_jistoutf8(inbuf, error);
847
        case C_SHIFT_JIS:
848
                return conv_sjistoutf8(inbuf, error);
849
        case C_EUC_JP:
850
                return conv_euctoutf8(inbuf, error);
851
        case C_UTF_8:
852
                if (error)
853
                        *error = 0;
854
                if (isutf8bom(inbuf))
855
                        inbuf += 3;
856
                return g_strdup(inbuf);
857
        default:
858
                if (error)
859
                        *error = 0;
860
                return g_strdup(inbuf);
861
        }
862
}
863

    
864
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error)
865
{
866
        static iconv_t cd = (iconv_t)-1;
867
        static gboolean iconv_ok = TRUE;
868
        S_LOCK_DEFINE_STATIC(cd);
869
        gchar *ret;
870

    
871
        S_LOCK(cd);
872

    
873
        if (cd == (iconv_t)-1) {
874
                if (!iconv_ok) {
875
                        S_UNLOCK(cd);
876
                        if (error)
877
                                *error = -1;
878
                        return g_strdup(inbuf);
879
                }
880

    
881
                cd = iconv_open(CS_CP932, CS_UTF_8);
882
                if (cd == (iconv_t)-1) {
883
                        cd = iconv_open(CS_SHIFT_JIS, CS_UTF_8);
884
                        if (cd == (iconv_t)-1) {
885
                                g_warning("conv_utf8tosjis(): %s\n",
886
                                          g_strerror(errno));
887
                                iconv_ok = FALSE;
888
                                S_UNLOCK(cd);
889
                                if (error)
890
                                        *error = -1;
891
                                return g_strdup(inbuf);
892
                        }
893
                }
894
        }
895

    
896
        if (isutf8bom(inbuf))
897
                inbuf += 3;
898
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
899
        S_UNLOCK(cd);
900
        return ret;
901
}
902

    
903
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error)
904
{
905
        static iconv_t cd = (iconv_t)-1;
906
        static gboolean iconv_ok = TRUE;
907
        S_LOCK_DEFINE_STATIC(cd);
908
        gchar *ret;
909

    
910
        S_LOCK(cd);
911

    
912
        if (cd == (iconv_t)-1) {
913
                if (!iconv_ok) {
914
                        S_UNLOCK(cd);
915
                        if (error)
916
                                *error = -1;
917
                        return g_strdup(inbuf);
918
                }
919

    
920
                cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
921
                if (cd == (iconv_t)-1) {
922
                        cd = iconv_open(CS_EUC_JP, CS_UTF_8);
923
                        if (cd == (iconv_t)-1) {
924
                                g_warning("conv_utf8toeuc(): %s\n",
925
                                          g_strerror(errno));
926
                                iconv_ok = FALSE;
927
                                S_UNLOCK(cd);
928
                                if (error)
929
                                        *error = -1;
930
                                return g_strdup(inbuf);
931
                        }
932
                }
933
        }
934

    
935
        if (isutf8bom(inbuf))
936
                inbuf += 3;
937
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
938
        S_UNLOCK(cd);
939
        return ret;
940
}
941

    
942
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error)
943
{
944
        gchar *tmpstr, *jisstr;
945
        gint t_error = 0, j_error = 0;
946

    
947
#if 1
948
        tmpstr = conv_utf8tosjis(inbuf, &t_error);
949
        jisstr = conv_sjistojis(tmpstr, &j_error);
950
#else
951
        tmpstr = conv_utf8toeuc(inbuf, &t_error);
952
        jisstr = conv_euctojis(tmpstr, &j_error);
953
#endif
954
        g_free(tmpstr);
955

    
956
        if (error)
957
                *error = (t_error | j_error);
958

    
959
        return jisstr;
960
}
961

    
962
#if 0
963
static gchar valid_eucjp_tbl[][96] = {
964
        /* 0xa2a0 - 0xa2ff */
965
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
966
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
967
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
968
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
969
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
970
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
971

972
        /* 0xa3a0 - 0xa3ff */
973
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
974
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
975
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
976
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
977
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
978
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
979

980
        /* 0xa4a0 - 0xa4ff */
981
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
982
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
983
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
984
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
985
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
986
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
987

988
        /* 0xa5a0 - 0xa5ff */
989
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
991
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
993
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
994
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
995

996
        /* 0xa6a0 - 0xa6ff */
997
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
998
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
999
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1000
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1001
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1002
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1003

1004
        /* 0xa7a0 - 0xa7ff */
1005
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1006
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1007
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1008
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1009
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1010
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1011

1012
        /* 0xa8a0 - 0xa8ff */
1013
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1015
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1016
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1017
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1018
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
1019
};
1020

1021
static gboolean isprintableeuckanji(guchar c1, guchar c2)
1022
{
1023
        if (c1 <= 0xa0 || c1 >= 0xf5)
1024
                return FALSE;
1025
        if (c2 <= 0xa0 || c2 == 0xff)
1026
                return FALSE;
1027

1028
        if (c1 >= 0xa9 && c1 <= 0xaf)
1029
                return FALSE;
1030

1031
        if (c1 >= 0xa2 && c1 <= 0xa8)
1032
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
1033

1034
        if (c1 == 0xcf) {
1035
                if (c2 >= 0xd4 && c2 <= 0xfe)
1036
                        return FALSE;
1037
        } else if (c1 == 0xf4) {
1038
                if (c2 >= 0xa7 && c2 <= 0xfe)
1039
                        return FALSE;
1040
        }
1041

1042
        return TRUE;
1043
}
1044

1045
static void conv_unreadable_eucjp(gchar *str)
1046
{
1047
        register guchar *p = str;
1048

1049
        while (*p != '\0') {
1050
                if (isascii(*p)) {
1051
                        /* convert CR+LF -> LF */
1052
                        if (*p == '\r' && *(p + 1) == '\n')
1053
                                memmove(p, p + 1, strlen(p));
1054
                        /* printable 7 bit code */
1055
                        p++;
1056
                } else if (iseuckanji(*p)) {
1057
                        if (isprintableeuckanji(*p, *(p + 1))) {
1058
                                /* printable euc-jp code */
1059
                                p += 2;
1060
                        } else {
1061
                                /* substitute unprintable code */
1062
                                *p++ = SUBST_CHAR;
1063
                                if (*p != '\0') {
1064
                                        if (isascii(*p))
1065
                                                p++;
1066
                                        else
1067
                                                *p++ = SUBST_CHAR;
1068
                                }
1069
                        }
1070
                } else if (iseuchwkana1(*p)) {
1071
                        if (iseuchwkana2(*(p + 1)))
1072
                                /* euc-jp hankaku kana */
1073
                                p += 2;
1074
                        else
1075
                                *p++ = SUBST_CHAR;
1076
                } else if (iseucaux(*p)) {
1077
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
1078
                                /* auxiliary kanji */
1079
                                p += 3;
1080
                        } else
1081
                                *p++ = SUBST_CHAR;
1082
                } else
1083
                        /* substitute unprintable 1 byte code */
1084
                        *p++ = SUBST_CHAR;
1085
        }
1086
}
1087
#endif
1088

    
1089
static void conv_unreadable_8bit(gchar *str)
1090
{
1091
        register gchar *p = str;
1092

    
1093
        while (*p != '\0') {
1094
                /* convert CR+LF -> LF */
1095
                if (*p == '\r' && *(p + 1) == '\n')
1096
                        memmove(p, p + 1, strlen(p));
1097
                else if (!isascii(*(guchar *)p)) *p = SUBST_CHAR;
1098
                p++;
1099
        }
1100
}
1101

    
1102
#if 0
1103
static void conv_unreadable_latin(gchar *str)
1104
{
1105
        register guchar *p = str;
1106

1107
        while (*p != '\0') {
1108
                /* convert CR+LF -> LF */
1109
                if (*p == '\r' && *(p + 1) == '\n')
1110
                        memmove(p, p + 1, strlen(p));
1111
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
1112
                        *p = SUBST_CHAR;
1113
                p++;
1114
        }
1115
}
1116
#endif
1117

    
1118
#define NCV        '\0'
1119

    
1120
void conv_mb_alnum(gchar *str)
1121
{
1122
        static guchar char_tbl[] = {
1123
                /* 0xa0 - 0xaf */
1124
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
1125
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
1126
                /* 0xb0 - 0xbf */
1127
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1128
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1129
                /* 0xc0 - 0xcf */
1130
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1131
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
1132
                /* 0xd0 - 0xdf */
1133
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
1134
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
1135
                /* 0xe0 - 0xef */
1136
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
1137
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
1138
        };
1139

    
1140
        register guchar *p = (guchar *)str;
1141
        register gint len;
1142

    
1143
        len = strlen(str);
1144

    
1145
        while (len > 1) {
1146
                if (*p == 0xa3) {
1147
                        register guchar ch = *(p + 1);
1148

    
1149
                        if (ch >= 0xb0 && ch <= 0xfa) {
1150
                                /* [a-zA-Z] */
1151
                                *p = ch & 0x7f;
1152
                                p++;
1153
                                len--;
1154
                                memmove(p, p + 1, len);
1155
                                len--;
1156
                        } else  {
1157
                                p += 2;
1158
                                len -= 2;
1159
                        }
1160
                } else if (*p == 0xa1) {
1161
                        register guchar ch = *(p + 1);
1162

    
1163
                        if (ch >= 0xa0 && ch <= 0xef &&
1164
                            NCV != char_tbl[ch - 0xa0]) {
1165
                                *p = char_tbl[ch - 0xa0];
1166
                                p++;
1167
                                len--;
1168
                                memmove(p, p + 1, len);
1169
                                len--;
1170
                        } else {
1171
                                p += 2;
1172
                                len -= 2;
1173
                        }
1174
                } else if (iseuckanji(*p)) {
1175
                        p += 2;
1176
                        len -= 2;
1177
                } else {
1178
                        p++;
1179
                        len--;
1180
                }
1181
        }
1182
}
1183

    
1184
CharSet conv_guess_ja_encoding(const gchar *str)
1185
{
1186
        const guchar *p = (const guchar *)str;
1187
        CharSet guessed = C_US_ASCII;
1188

    
1189
        while (*p != '\0') {
1190
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
1191
                        if (guessed == C_US_ASCII)
1192
                                return C_ISO_2022_JP;
1193
                        p += 2;
1194
                } else if (isascii(*p)) {
1195
                        p++;
1196
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
1197
                        if (*p >= 0xfd && *p <= 0xfe)
1198
                                return C_EUC_JP;
1199
                        else if (guessed == C_SHIFT_JIS) {
1200
                                if ((issjiskanji1(*p) &&
1201
                                     issjiskanji2(*(p + 1))) ||
1202
                                    issjishwkana(*p))
1203
                                        guessed = C_SHIFT_JIS;
1204
                                else
1205
                                        guessed = C_EUC_JP;
1206
                        } else
1207
                                guessed = C_EUC_JP;
1208
                        p += 2;
1209
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
1210
                        guessed = C_SHIFT_JIS;
1211
                        p += 2;
1212
                } else if (issjishwkana(*p)) {
1213
                        guessed = C_SHIFT_JIS;
1214
                        p++;
1215
                } else {
1216
                        if (guessed == C_US_ASCII)
1217
                                guessed = C_AUTO;
1218
                        p++;
1219
                }
1220
        }
1221

    
1222
        if (guessed != C_US_ASCII) {
1223
                p = (const guchar *)str;
1224

    
1225
                while (*p != '\0') {
1226
                        if (isascii(*p)) {
1227
                                p++;
1228
                        } else if (isutf8_3_1(*p) &&
1229
                                   isutf8_3_2(*(p + 1)) &&
1230
                                   isutf8_3_2(*(p + 2))) {
1231
                                p += 3;
1232
                        } else {
1233
                                return guessed;
1234
                        }
1235
                }
1236

    
1237
                return C_UTF_8;
1238
        }
1239

    
1240
        return guessed;
1241
}
1242

    
1243
static gchar *conv_jistodisp(const gchar *inbuf, gint *error)
1244
{
1245
        return conv_jistoutf8(inbuf, error);
1246
}
1247

    
1248
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error)
1249
{
1250
        return conv_sjistoutf8(inbuf, error);
1251
}
1252

    
1253
static gchar *conv_euctodisp(const gchar *inbuf, gint *error)
1254
{
1255
        return conv_euctoutf8(inbuf, error);
1256
}
1257

    
1258
gchar *conv_utf8todisp(const gchar *inbuf, gint *error)
1259
{
1260
        if (g_utf8_validate(inbuf, -1, NULL) == TRUE) {
1261
                if (error)
1262
                        *error = 0;
1263
                if (isutf8bom(inbuf))
1264
                        inbuf += 3;
1265
                return g_strdup(inbuf);
1266
        } else
1267
                return conv_ustodisp(inbuf, error);
1268
}
1269

    
1270
static gchar *conv_anytodisp(const gchar *inbuf, gint *error)
1271
{
1272
        gchar *outbuf;
1273

    
1274
        outbuf = conv_anytoutf8(inbuf, error);
1275
        if (g_utf8_validate(outbuf, -1, NULL) != TRUE) {
1276
                if (error)
1277
                        *error = -1;
1278
                conv_unreadable_8bit(outbuf);
1279
        }
1280

    
1281
        return outbuf;
1282
}
1283

    
1284
static gchar *conv_ustodisp(const gchar *inbuf, gint *error)
1285
{
1286
        gchar *outbuf;
1287

    
1288
        outbuf = g_strdup(inbuf);
1289
        conv_unreadable_8bit(outbuf);
1290
        if (error)
1291
                *error = 0;
1292

    
1293
        return outbuf;
1294
}
1295

    
1296
gchar *conv_localetodisp(const gchar *inbuf, gint *error)
1297
{
1298
        gchar *str;
1299

    
1300
        str = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
1301
                                CS_INTERNAL, error);
1302
        if (!str)
1303
                str = conv_utf8todisp(inbuf, NULL);
1304

    
1305
        return str;
1306
}
1307

    
1308
static gchar *conv_noconv(const gchar *inbuf, gint *error)
1309
{
1310
        if (error)
1311
                *error = 0;
1312
        return g_strdup(inbuf);
1313
}
1314

    
1315
static const gchar *
1316
conv_get_fallback_for_private_encoding(const gchar *encoding)
1317
{
1318
        if (encoding) {
1319
                if ((encoding[0] == 'X' || encoding[0] == 'x') &&
1320
                    encoding[1] == '-') {
1321
                        if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
1322
                                return CS_GBK;
1323
                        else if (!g_ascii_strcasecmp(encoding, CS_X_SJIS))
1324
                                return CS_SHIFT_JIS;
1325
                } else if ((encoding[0] == 'K' || encoding[0] == 'k') &&
1326
                           (encoding[1] == 'S' || encoding[1] == 's')) {
1327
                        if (!g_ascii_strcasecmp(encoding, CS_KS_C_5601_1987))
1328
                                return CS_EUC_KR;
1329
                }
1330
        }
1331

    
1332
        return encoding;
1333
}
1334

    
1335
CodeConverter *conv_code_converter_new(const gchar *src_encoding,
1336
                                       const gchar *dest_encoding)
1337
{
1338
        CodeConverter *conv;
1339

    
1340
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1341

    
1342
        conv = g_new0(CodeConverter, 1);
1343
        conv->code_conv_func =
1344
                conv_get_code_conv_func(src_encoding, dest_encoding);
1345
        conv->src_encoding = g_strdup(src_encoding);
1346
        conv->dest_encoding = g_strdup(dest_encoding);
1347

    
1348
        return conv;
1349
}
1350

    
1351
void conv_code_converter_destroy(CodeConverter *conv)
1352
{
1353
        g_free(conv->src_encoding);
1354
        g_free(conv->dest_encoding);
1355
        g_free(conv);
1356
}
1357

    
1358
gchar *conv_convert(CodeConverter *conv, const gchar *inbuf)
1359
{
1360
        if (!inbuf)
1361
                return NULL;
1362
        else if (conv->code_conv_func != conv_noconv)
1363
                return conv->code_conv_func(inbuf, NULL);
1364
        else
1365
                return conv_iconv_strdup
1366
                        (inbuf, conv->src_encoding, conv->dest_encoding, NULL);
1367
}
1368

    
1369
gchar *conv_codeset_strdup_full(const gchar *inbuf,
1370
                                const gchar *src_encoding,
1371
                                const gchar *dest_encoding,
1372
                                gint *error)
1373
{
1374
        CodeConvFunc conv_func;
1375

    
1376
        if (!inbuf) {
1377
                if (error)
1378
                        *error = 0;
1379
                return NULL;
1380
        }
1381

    
1382
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1383

    
1384
        conv_func = conv_get_code_conv_func(src_encoding, dest_encoding);
1385
        if (conv_func != conv_noconv)
1386
                return conv_func(inbuf, error);
1387

    
1388
        return conv_iconv_strdup(inbuf, src_encoding, dest_encoding, error);
1389
}
1390

    
1391
CodeConvFunc conv_get_code_conv_func(const gchar *src_encoding,
1392
                                     const gchar *dest_encoding)
1393
{
1394
        CodeConvFunc code_conv = conv_noconv;
1395
        CharSet src_charset;
1396
        CharSet dest_charset;
1397

    
1398
        if (!src_encoding)
1399
                src_charset = conv_get_locale_charset();
1400
        else
1401
                src_charset = conv_get_charset_from_str(src_encoding);
1402

    
1403
        /* auto detection mode */
1404
        if (!src_encoding && !dest_encoding) {
1405
                if (conv_ad_type == C_AD_JAPANESE ||
1406
                    (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
1407
                        return conv_anytodisp;
1408
                else
1409
                        return conv_noconv;
1410
        }
1411

    
1412
        dest_charset = conv_get_charset_from_str(dest_encoding);
1413

    
1414
        if (dest_charset == C_US_ASCII)
1415
                return conv_ustodisp;
1416

    
1417
        switch (src_charset) {
1418
        case C_US_ASCII:
1419
        case C_ISO_8859_1:
1420
        case C_ISO_8859_2:
1421
        case C_ISO_8859_3:
1422
        case C_ISO_8859_4:
1423
        case C_ISO_8859_5:
1424
        case C_ISO_8859_6:
1425
        case C_ISO_8859_7:
1426
        case C_ISO_8859_8:
1427
        case C_ISO_8859_9:
1428
        case C_ISO_8859_10:
1429
        case C_ISO_8859_11:
1430
        case C_ISO_8859_13:
1431
        case C_ISO_8859_14:
1432
        case C_ISO_8859_15:
1433
        case C_ISO_8859_16:
1434
                break;
1435
        case C_ISO_2022_JP:
1436
        case C_ISO_2022_JP_2:
1437
        case C_ISO_2022_JP_3:
1438
                if (dest_charset == C_AUTO)
1439
                        code_conv = conv_jistodisp;
1440
                else if (dest_charset == C_EUC_JP)
1441
                        code_conv = conv_jistoeuc;
1442
                else if (dest_charset == C_SHIFT_JIS ||
1443
                         dest_charset == C_CP932)
1444
                        code_conv = conv_jistosjis;
1445
                else if (dest_charset == C_UTF_8)
1446
                        code_conv = conv_jistoutf8;
1447
                break;
1448
        case C_SHIFT_JIS:
1449
        case C_CP932:
1450
                if (dest_charset == C_AUTO)
1451
                        code_conv = conv_sjistodisp;
1452
                else if (dest_charset == C_ISO_2022_JP   ||
1453
                         dest_charset == C_ISO_2022_JP_2 ||
1454
                         dest_charset == C_ISO_2022_JP_3)
1455
                        code_conv = conv_sjistojis;
1456
                else if (dest_charset == C_EUC_JP)
1457
                        code_conv = conv_sjistoeuc;
1458
                else if (dest_charset == C_UTF_8)
1459
                        code_conv = conv_sjistoutf8;
1460
                break;
1461
        case C_EUC_JP:
1462
                if (dest_charset == C_AUTO)
1463
                        code_conv = conv_euctodisp;
1464
                else if (dest_charset == C_ISO_2022_JP   ||
1465
                         dest_charset == C_ISO_2022_JP_2 ||
1466
                         dest_charset == C_ISO_2022_JP_3)
1467
                        code_conv = conv_euctojis;
1468
                else if (dest_charset == C_UTF_8)
1469
                        code_conv = conv_euctoutf8;
1470
                break;
1471
        case C_UTF_8:
1472
                if (dest_charset == C_EUC_JP)
1473
                        code_conv = conv_utf8toeuc;
1474
                else if (dest_charset == C_ISO_2022_JP   ||
1475
                         dest_charset == C_ISO_2022_JP_2 ||
1476
                         dest_charset == C_ISO_2022_JP_3)
1477
                        code_conv = conv_utf8tojis;
1478
                else if (dest_charset == C_SHIFT_JIS ||
1479
                         dest_charset == C_CP932)
1480
                        code_conv = conv_utf8tosjis;
1481
                break;
1482
        default:
1483
                break;
1484
        }
1485

    
1486
        return code_conv;
1487
}
1488

    
1489
gchar *conv_iconv_strdup(const gchar *inbuf,
1490
                         const gchar *src_code, const gchar *dest_code,
1491
                         gint *error)
1492
{
1493
        iconv_t cd;
1494
        gchar *outbuf;
1495

    
1496
        if (!src_code)
1497
                src_code = conv_get_locale_charset_str();
1498
        if (!dest_code)
1499
                dest_code = CS_INTERNAL;
1500

    
1501
        cd = iconv_open(dest_code, src_code);
1502
        if (cd == (iconv_t)-1) {
1503
                if (error)
1504
                        *error = -1;
1505
                return NULL;
1506
        }
1507

    
1508
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd, error);
1509

    
1510
        iconv_close(cd);
1511

    
1512
        return outbuf;
1513
}
1514

    
1515
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd, gint *error)
1516
{
1517
        const gchar *inbuf_p;
1518
        gchar *outbuf;
1519
        gchar *outbuf_p;
1520
        size_t in_size;
1521
        size_t in_left;
1522
        size_t out_size;
1523
        size_t out_left;
1524
        size_t n_conv;
1525
        size_t len;
1526
        gint error_ = 0;
1527

    
1528
        if (!inbuf) {
1529
                if (error)
1530
                        *error = 0;
1531
                return NULL;
1532
        }
1533

    
1534
        inbuf_p = inbuf;
1535
        in_size = strlen(inbuf);
1536
        in_left = in_size;
1537
        out_size = (in_size + 1) * 2;
1538
        outbuf = g_malloc(out_size);
1539
        outbuf_p = outbuf;
1540
        out_left = out_size;
1541

    
1542
#define EXPAND_BUF()                                \
1543
{                                                \
1544
        len = outbuf_p - outbuf;                \
1545
        out_size *= 2;                                \
1546
        outbuf = g_realloc(outbuf, out_size);        \
1547
        outbuf_p = outbuf + len;                \
1548
        out_left = out_size - len;                \
1549
}
1550

    
1551
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1552
                               &outbuf_p, &out_left)) == (size_t)-1) {
1553
                if (EILSEQ == errno) {
1554
                        /* g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno)); */
1555
                        error_ = -1;
1556
                        inbuf_p++;
1557
                        in_left--;
1558
                        if (out_left == 0) {
1559
                                EXPAND_BUF();
1560
                        }
1561
                        *outbuf_p++ = SUBST_CHAR;
1562
                        out_left--;
1563
                } else if (EINVAL == errno) {
1564
                        error_ = -1;
1565
                        break;
1566
                } else if (E2BIG == errno) {
1567
                        EXPAND_BUF();
1568
                } else {
1569
                        g_warning("conv_iconv_strdup(): %s\n",
1570
                                  g_strerror(errno));
1571
                        error_ = -1;
1572
                        break;
1573
                }
1574
        }
1575

    
1576
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1577
               (size_t)-1) {
1578
                if (E2BIG == errno) {
1579
                        EXPAND_BUF();
1580
                } else {
1581
                        g_warning("conv_iconv_strdup(): %s\n",
1582
                                  g_strerror(errno));
1583
                        error_ = -1;
1584
                        break;
1585
                }
1586
        }
1587

    
1588
#undef EXPAND_BUF
1589

    
1590
        len = outbuf_p - outbuf;
1591
        outbuf = g_realloc(outbuf, len + 1);
1592
        outbuf[len] = '\0';
1593

    
1594
        if (error)
1595
                *error = error_;
1596

    
1597
        return outbuf;
1598
}
1599

    
1600
static const struct {
1601
        CharSet charset;
1602
        gchar *const name;
1603
} charsets[] = {
1604
        {C_US_ASCII,                CS_US_ASCII},
1605
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1606
        {C_UTF_8,                CS_UTF_8},
1607
        {C_UTF_7,                CS_UTF_7},
1608
        {C_ISO_8859_1,                CS_ISO_8859_1},
1609
        {C_ISO_8859_2,                CS_ISO_8859_2},
1610
        {C_ISO_8859_3,                CS_ISO_8859_3},
1611
        {C_ISO_8859_4,                CS_ISO_8859_4},
1612
        {C_ISO_8859_5,                CS_ISO_8859_5},
1613
        {C_ISO_8859_6,                CS_ISO_8859_6},
1614
        {C_ISO_8859_7,                CS_ISO_8859_7},
1615
        {C_ISO_8859_8,                CS_ISO_8859_8},
1616
        {C_ISO_8859_9,                CS_ISO_8859_9},
1617
        {C_ISO_8859_10,                CS_ISO_8859_10},
1618
        {C_ISO_8859_11,                CS_ISO_8859_11},
1619
        {C_ISO_8859_13,                CS_ISO_8859_13},
1620
        {C_ISO_8859_14,                CS_ISO_8859_14},
1621
        {C_ISO_8859_15,                CS_ISO_8859_15},
1622
        {C_BALTIC,                CS_BALTIC},
1623
        {C_CP932,                CS_CP932},
1624
        {C_CP1250,                CS_CP1250},
1625
        {C_CP1251,                CS_CP1251},
1626
        {C_CP1252,                CS_CP1252},
1627
        {C_CP1253,                CS_CP1253},
1628
        {C_CP1254,                CS_CP1254},
1629
        {C_CP1255,                CS_CP1255},
1630
        {C_CP1256,                CS_CP1256},
1631
        {C_CP1257,                CS_CP1257},
1632
        {C_CP1258,                CS_CP1258},
1633
        {C_WINDOWS_932,                CS_WINDOWS_932},
1634
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1635
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1636
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1637
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1638
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1639
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1640
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1641
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1642
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1643
        {C_KOI8_R,                CS_KOI8_R},
1644
        {C_KOI8_T,                CS_KOI8_T},
1645
        {C_KOI8_U,                CS_KOI8_U},
1646
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1647
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1648
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1649
        {C_EUC_JP,                CS_EUC_JP},
1650
        {C_EUC_JP,                CS_EUCJP},
1651
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1652
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1653
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1654
        {C_SHIFT_JIS,                CS_SJIS},
1655
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1656
        {C_EUC_KR,                CS_EUC_KR},
1657
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1658
        {C_EUC_CN,                CS_EUC_CN},
1659
        {C_GB2312,                CS_GB2312},
1660
        {C_GBK,                        CS_GBK},
1661
        {C_EUC_TW,                CS_EUC_TW},
1662
        {C_BIG5,                CS_BIG5},
1663
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1664
        {C_TIS_620,                CS_TIS_620},
1665
        {C_WINDOWS_874,                CS_WINDOWS_874},
1666
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1667
        {C_TCVN5712_1,                CS_TCVN5712_1},
1668
        {C_ISO_8859_16,                CS_ISO_8859_16},
1669
};
1670

    
1671
static const struct {
1672
        gchar *const locale;
1673
        CharSet charset;
1674
        CharSet out_charset;
1675
} locale_table[] = {
1676
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1677
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1678
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1679
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1680
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1681
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1682
#ifdef G_OS_WIN32
1683
        {"ja_JP"        , C_CP932        , C_ISO_2022_JP},
1684
#elif defined(__APPLE__)
1685
        {"ja_JP"        , C_UTF_8        , C_ISO_2022_JP},
1686
#else
1687
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1688
#endif
1689
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1690
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1691
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1692
        {"zh_CN.GBK"        , C_GBK                , C_GBK},
1693
        {"zh_CN"        , C_GB2312        , C_GB2312},
1694
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1695
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1696
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1697
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1698
        {"zh_TW"        , C_BIG5        , C_BIG5},
1699

    
1700
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1701
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1702
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1703
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1704
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1705
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1706
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1707
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1708

    
1709
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1710
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1711

    
1712
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1713

    
1714
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1715
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1716
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1717
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1718
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1719
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1720
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1721
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1722
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1723
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1724
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1725
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1726
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1727
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1728
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1729
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1730
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1731
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1732
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1733
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1734
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1735
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1736
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1737
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1738
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1739
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1740
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1741
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1742
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1743
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1744
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1745
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1746
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1747
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1748
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1749
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1750
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1751
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1752
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1753
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1754
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1755
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1756
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1757
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1758
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1759
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1760
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1761
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1762
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1763
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1764
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1765
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1766
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1767
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1768
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1769
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1770
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1771
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1772
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1773
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1774
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1775
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1776
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1777
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1778
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1779
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1780
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1781
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1782
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1783
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1784
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1785
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1786
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1787
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1788

    
1789
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1790
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1791
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1792
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1793
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1794
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1795
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1796
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1797

    
1798
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1799
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1800

    
1801
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1802

    
1803
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1804
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1805
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1806
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1807

    
1808
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1809

    
1810
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1811
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1812
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1813
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1814
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1815
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1816
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1817
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1818
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1819
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1820
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1821
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1822
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1823
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1824
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1825
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1826
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1827

    
1828
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1829
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1830
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1831
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1832

    
1833
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1834
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1835

    
1836
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1837

    
1838
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1839
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1840
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1841
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1842
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1843
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1844

    
1845
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1846
        /* {"th_TH"        , C_WINDOWS_874}, */
1847
        /* {"th_TH"        , C_ISO_8859_11}, */
1848

    
1849
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1850
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1851

    
1852
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1853
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1854
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1855
};
1856

    
1857
static GHashTable *conv_get_charset_to_str_table(void)
1858
{
1859
        static GHashTable *table;
1860
        gint i;
1861
        S_LOCK_DEFINE_STATIC(table);
1862

    
1863
        S_LOCK(table);
1864

    
1865
        if (table) {
1866
                S_UNLOCK(table);
1867
                return table;
1868
        }
1869

    
1870
        table = g_hash_table_new(NULL, g_direct_equal);
1871

    
1872
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1873
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1874
                    == NULL) {
1875
                        g_hash_table_insert
1876
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1877
                                 charsets[i].name);
1878
                }
1879
        }
1880

    
1881
        S_UNLOCK(table);
1882
        return table;
1883
}
1884

    
1885
static GHashTable *conv_get_charset_from_str_table(void)
1886
{
1887
        static GHashTable *table;
1888
        S_LOCK_DEFINE_STATIC(table);
1889

    
1890
        gint i;
1891

    
1892
        S_LOCK(table);
1893

    
1894
        if (table) {
1895
                S_UNLOCK(table);
1896
                return table;
1897
        }
1898

    
1899
        table = g_hash_table_new(str_case_hash, str_case_equal);
1900

    
1901
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1902
                g_hash_table_insert(table, charsets[i].name,
1903
                                    GUINT_TO_POINTER(charsets[i].charset));
1904
        }
1905

    
1906
        S_UNLOCK(table);
1907
        return table;
1908
}
1909

    
1910
const gchar *conv_get_charset_str(CharSet charset)
1911
{
1912
        GHashTable *table;
1913

    
1914
        table = conv_get_charset_to_str_table();
1915
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1916
}
1917

    
1918
CharSet conv_get_charset_from_str(const gchar *charset)
1919
{
1920
        GHashTable *table;
1921

    
1922
        if (!charset) return C_AUTO;
1923

    
1924
        table = conv_get_charset_from_str_table();
1925
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1926
}
1927

    
1928
CharSet conv_get_locale_charset(void)
1929
{
1930
        static CharSet cur_charset = -1;
1931
        const gchar *cur_locale;
1932
        const gchar *p;
1933
#if !defined(G_OS_WIN32) && !defined(__APPLE__)
1934
        gint i;
1935
#endif
1936
        S_LOCK_DEFINE_STATIC(cur_charset);
1937

    
1938
        S_LOCK(cur_charset);
1939

    
1940
        if (cur_charset != -1) {
1941
                S_UNLOCK(cur_charset);
1942
                return cur_charset;
1943
        }
1944

    
1945
        cur_locale = conv_get_current_locale();
1946
        if (!cur_locale) {
1947
                cur_charset = C_US_ASCII;
1948
                S_UNLOCK(cur_charset);
1949
                return cur_charset;
1950
        }
1951

    
1952
        if (strcasestr(cur_locale, "UTF-8") || strcasestr(cur_locale, "utf8")) {
1953
                cur_charset = C_UTF_8;
1954
                S_UNLOCK(cur_charset);
1955
                return cur_charset;
1956
        }
1957

    
1958
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1959
                cur_charset = C_ISO_8859_15;
1960
                S_UNLOCK(cur_charset);
1961
                return cur_charset;
1962
        }
1963

    
1964
#if defined(G_OS_WIN32) || defined(__APPLE__)
1965
        cur_charset = conv_get_charset_from_str(conv_get_locale_charset_str());
1966

    
1967
        S_UNLOCK(cur_charset);
1968
        return cur_charset;
1969
#else
1970
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1971
                const gchar *p;
1972

    
1973
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1974
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1975
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1976
                                         strlen(locale_table[i].locale))) {
1977
                        cur_charset = locale_table[i].charset;
1978
                        S_UNLOCK(cur_charset);
1979
                        return cur_charset;
1980
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1981
                         !strchr(p + 1, '.')) {
1982
                        if (strlen(cur_locale) == 2 &&
1983
                            !g_ascii_strncasecmp(cur_locale,
1984
                                                 locale_table[i].locale, 2)) {
1985
                                cur_charset = locale_table[i].charset;
1986
                                S_UNLOCK(cur_charset);
1987
                                return cur_charset;
1988
                        }
1989
                }
1990
        }
1991

    
1992
        cur_charset = C_AUTO;
1993
        S_UNLOCK(cur_charset);
1994
        return cur_charset;
1995
#endif
1996
}
1997

    
1998
const gchar *conv_get_locale_charset_str(void)
1999
{
2000
        static const gchar *codeset = NULL;
2001
        S_LOCK_DEFINE_STATIC(codeset);
2002

    
2003
        S_LOCK(codeset);
2004

    
2005
        if (!codeset) {
2006
#if defined(G_OS_WIN32) || defined(__APPLE__)
2007
                g_get_charset(&codeset);
2008
                if (!strcmp(codeset, CS_US_ASCII) ||
2009
                    !strcmp(codeset, CS_ANSI_X3_4_1968))
2010
                        codeset = CS_INTERNAL;
2011
#else
2012
                codeset = conv_get_charset_str(conv_get_locale_charset());
2013
#endif
2014
        }
2015

    
2016
        if (codeset) {
2017
                S_UNLOCK(codeset);
2018
                return codeset;
2019
        }
2020

    
2021
        S_UNLOCK(codeset);
2022
        return CS_INTERNAL;
2023
}
2024

    
2025
CharSet conv_get_internal_charset(void)
2026
{
2027
        return C_INTERNAL;
2028
}
2029

    
2030
const gchar *conv_get_internal_charset_str(void)
2031
{
2032
        return CS_INTERNAL;
2033
}
2034

    
2035
CharSet conv_get_outgoing_charset(void)
2036
{
2037
        static CharSet out_charset = -1;
2038
        const gchar *cur_locale;
2039
        const gchar *p;
2040
        gint i;
2041
        S_LOCK_DEFINE_STATIC(out_charset);
2042

    
2043
        S_LOCK(out_charset);
2044

    
2045
        if (out_charset != -1) {
2046
                S_UNLOCK(out_charset);
2047
                return out_charset;
2048
        }
2049

    
2050
        cur_locale = conv_get_current_locale();
2051
        if (!cur_locale) {
2052
                out_charset = C_AUTO;
2053
                S_UNLOCK(out_charset);
2054
                return out_charset;
2055
        }
2056

    
2057
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
2058
                out_charset = C_ISO_8859_15;
2059
                S_UNLOCK(out_charset);
2060
                return out_charset;
2061
        }
2062

    
2063
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
2064
                const gchar *p;
2065

    
2066
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
2067
                                         strlen(locale_table[i].locale))) {
2068
                        out_charset = locale_table[i].out_charset;
2069
                        break;
2070
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
2071
                         !strchr(p + 1, '.')) {
2072
                        if (strlen(cur_locale) == 2 &&
2073
                            !g_ascii_strncasecmp(cur_locale,
2074
                                                 locale_table[i].locale, 2)) {
2075
                                out_charset = locale_table[i].out_charset;
2076
                                break;
2077
                        }
2078
                }
2079
        }
2080

    
2081
        S_UNLOCK(out_charset);
2082
        return out_charset;
2083
}
2084

    
2085
const gchar *conv_get_outgoing_charset_str(void)
2086
{
2087
        CharSet out_charset;
2088
        const gchar *str;
2089

    
2090
        out_charset = conv_get_outgoing_charset();
2091
        str = conv_get_charset_str(out_charset);
2092

    
2093
        return str ? str : CS_UTF_8;
2094
}
2095

    
2096
gboolean conv_is_multibyte_encoding(CharSet encoding)
2097
{
2098
        switch (encoding) {
2099
        case C_EUC_JP:
2100
        case C_EUC_JP_MS:
2101
        case C_EUC_KR:
2102
        case C_EUC_TW:
2103
        case C_EUC_CN:
2104
        case C_ISO_2022_JP:
2105
        case C_ISO_2022_JP_2:
2106
        case C_ISO_2022_JP_3:
2107
        case C_ISO_2022_KR:
2108
        case C_ISO_2022_CN:
2109
        case C_SHIFT_JIS:
2110
        case C_CP932:
2111
        case C_GB2312:
2112
        case C_GBK:
2113
        case C_BIG5:
2114
        case C_UTF_8:
2115
        case C_UTF_7:
2116
                return TRUE;
2117
        default:
2118
                return FALSE;
2119
        }
2120
}
2121

    
2122
const gchar *conv_get_current_locale(void)
2123
{
2124
        static const gchar *cur_locale;
2125
        S_LOCK_DEFINE_STATIC(cur_locale);
2126

    
2127
        S_LOCK(cur_locale);
2128

    
2129
        if (!cur_locale) {
2130
#ifdef G_OS_WIN32
2131
                cur_locale = g_win32_getlocale();
2132
#else
2133
                cur_locale = g_getenv("LC_ALL");
2134
                if (!cur_locale || *cur_locale == '\0')
2135
                        cur_locale = g_getenv("LC_CTYPE");
2136
                if (!cur_locale || *cur_locale == '\0')
2137
                        cur_locale = g_getenv("LANG");
2138
#ifdef HAVE_LOCALE_H
2139
                if (!cur_locale || *cur_locale == '\0')
2140
                        cur_locale = setlocale(LC_CTYPE, NULL);
2141
#endif /* HAVE_LOCALE_H */
2142
#endif /* G_OS_WIN32 */
2143

    
2144
                debug_print("current locale: %s\n",
2145
                            cur_locale ? cur_locale : "(none)");
2146
        }
2147

    
2148
        S_UNLOCK(cur_locale);
2149
        return cur_locale;
2150
}
2151

    
2152
gboolean conv_is_ja_locale(void)
2153
{
2154
        static gint is_ja_locale = -1;
2155
        const gchar *cur_locale;
2156
        S_LOCK_DEFINE_STATIC(is_ja_locale);
2157

    
2158
        S_LOCK(is_ja_locale);
2159

    
2160
        if (is_ja_locale != -1) {
2161
                S_UNLOCK(is_ja_locale);
2162
                return is_ja_locale != 0;
2163
        }
2164

    
2165
        is_ja_locale = 0;
2166
        cur_locale = conv_get_current_locale();
2167
        if (cur_locale) {
2168
                if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
2169
                        is_ja_locale = 1;
2170
        }
2171

    
2172
        S_UNLOCK(is_ja_locale);
2173
        return is_ja_locale != 0;
2174
}
2175

    
2176
void conv_set_autodetect_type(ConvADType type)
2177
{
2178
        conv_ad_type = type;
2179
}
2180

    
2181
ConvADType conv_get_autodetect_type(void)
2182
{
2183
        return conv_ad_type;
2184
}
2185

    
2186
gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
2187
{
2188
        gchar *buf;
2189
        gchar *decoded_str;
2190

    
2191
        if (is_ascii_str(str))
2192
                return unmime_header(str);
2193

    
2194
        if (default_encoding) {
2195
                buf = conv_codeset_strdup
2196
                        (str, default_encoding, CS_INTERNAL);
2197
                if (buf) {
2198
                        decoded_str = unmime_header(buf);
2199
                        g_free(buf);
2200
                        return decoded_str;
2201
                }
2202
        }
2203

    
2204
        if (conv_ad_type == C_AD_JAPANESE ||
2205
            (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
2206
                buf = conv_anytodisp(str, NULL);
2207
        else
2208
                buf = conv_localetodisp(str, NULL);
2209

    
2210
        decoded_str = unmime_header(buf);
2211
        g_free(buf);
2212

    
2213
        return decoded_str;
2214
}
2215

    
2216
#define MAX_LINELEN                76
2217
#define MAX_HARD_LINELEN        996
2218
#define MIMESEP_BEGIN                "=?"
2219
#define MIMESEP_END                "?="
2220

    
2221
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
2222

    
2223
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
2224
{                                                                        \
2225
        if (len - (destp - dest) < MAX_LINELEN + 2) {                        \
2226
                *destp = '\0';                                                \
2227
                return;                                                        \
2228
        }                                                                \
2229
                                                                        \
2230
        if ((cond) && *srcp) {                                                \
2231
                if (destp > dest && left < MAX_LINELEN - 1) {                \
2232
                        if (g_ascii_isspace(*(destp - 1)))                \
2233
                                destp--;                                \
2234
                        else if (is_plain_text &&                        \
2235
                                 g_ascii_isspace(*srcp))                \
2236
                                srcp++;                                        \
2237
                        if (*srcp) {                                        \
2238
                                *destp++ = '\n';                        \
2239
                                *destp++ = ' ';                                \
2240
                                left = MAX_LINELEN - 1;                        \
2241
                        }                                                \
2242
                }                                                        \
2243
        }                                                                \
2244
}
2245

    
2246
void conv_encode_header(gchar *dest, gint len, const gchar *src,
2247
                        gint header_len, gboolean addr_field,
2248
                        const gchar *out_encoding)
2249
{
2250
        const gchar *src_encoding;
2251
        gint mimestr_len;
2252
        gchar *mimesep_enc;
2253
        gint left;
2254
        const gchar *srcp = src;
2255
        gchar *destp = dest;
2256
        gboolean use_base64;
2257

    
2258
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
2259

    
2260
        src_encoding = CS_INTERNAL;
2261
        if (!out_encoding)
2262
                out_encoding = conv_get_outgoing_charset_str();
2263
        if (!strcmp(out_encoding, CS_US_ASCII))
2264
                out_encoding = CS_ISO_8859_1;
2265

    
2266
        if (!g_ascii_strncasecmp(out_encoding, "ISO-8859-", 9) ||
2267
            !g_ascii_strncasecmp(out_encoding, "KOI8-", 5) ||
2268
            !g_ascii_strncasecmp(out_encoding, "Windows-", 8)) {
2269
                use_base64 = FALSE;
2270
                mimesep_enc = "?Q?";
2271
        } else {
2272
                use_base64 = TRUE;
2273
                mimesep_enc = "?B?";
2274
        }
2275

    
2276
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(mimesep_enc) +
2277
                strlen(MIMESEP_END);
2278

    
2279
        left = MAX_LINELEN - header_len;
2280

    
2281
        while (*srcp) {
2282
                gboolean in_quote = FALSE;
2283

    
2284
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
2285

    
2286
                while (g_ascii_isspace(*srcp)) {
2287
                        *destp++ = *srcp++;
2288
                        left--;
2289
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
2290
                }
2291

    
2292
                /* output as it is if the next word is ASCII string */
2293
                if (!is_next_nonascii(srcp)) {
2294
                        gint word_len;
2295

    
2296
                        word_len = get_next_word_len(srcp);
2297
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
2298
                        while (word_len > 0) {
2299
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
2300
                                *destp++ = *srcp++;
2301
                                left--;
2302
                                word_len--;
2303
                        }
2304

    
2305
                        continue;
2306
                }
2307

    
2308
                /* don't include parentheses in encoded strings */
2309
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
2310
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
2311
                        *destp++ = *srcp++;
2312
                        left--;
2313
                }
2314

    
2315
                while (1) {
2316
                        gint mb_len = 0;
2317
                        gint cur_len = 0;
2318
                        gchar *part_str;
2319
                        gchar *out_str;
2320
                        gchar *enc_str;
2321
                        const gchar *p = srcp;
2322
                        const gchar *block_encoding = out_encoding;
2323
                        gint out_str_len;
2324
                        gint out_enc_str_len;
2325
                        gint mime_block_len;
2326
                        gint error = 0;
2327
                        gboolean cont = FALSE;
2328

    
2329
                        while (*p != '\0') {
2330
                                if (*p == '"')
2331
                                        in_quote ^= TRUE;
2332
                                else if (!in_quote) {
2333
                                        if (g_ascii_isspace(*p) &&
2334
                                            !is_next_nonascii(p + 1))
2335
                                                break;
2336
                                        /* don't include parentheses in encoded
2337
                                           strings */
2338
                                        if (addr_field &&
2339
                                            (*p == '(' || *p == ')'))
2340
                                                break;
2341
                                }
2342

    
2343
                                mb_len = g_utf8_skip[*(guchar *)p];
2344

    
2345
                                part_str = g_strndup(srcp, cur_len + mb_len);
2346
                                out_str = conv_codeset_strdup_full
2347
                                        (part_str, src_encoding, block_encoding,
2348
                                         &error);
2349
                                if (!out_str || error != 0) {
2350
                                        g_warning("conv_encode_header(): code conversion failed. Keeping UTF-8.\n");
2351
                                        out_str = g_strdup(part_str);
2352
                                        block_encoding = CS_UTF_8;
2353
                                }
2354
                                out_str_len = strlen(out_str);
2355

    
2356
                                if (use_base64)
2357
                                        out_enc_str_len = B64LEN(out_str_len);
2358
                                else
2359
                                        out_enc_str_len =
2360
                                                qp_get_q_encoding_len
2361
                                                        ((guchar *)out_str);
2362

    
2363
                                g_free(out_str);
2364
                                g_free(part_str);
2365

    
2366
                                if (mimestr_len + strlen(block_encoding) + out_enc_str_len <= left) {
2367
                                        cur_len += mb_len;
2368
                                        p += mb_len;
2369
                                } else if (cur_len == 0) {
2370
                                        LBREAK_IF_REQUIRED(1, FALSE);
2371
                                        if (*p == '"')
2372
                                                in_quote ^= TRUE;
2373
                                        continue;
2374
                                } else {
2375
                                        cont = TRUE;
2376
                                        if (*p == '"')
2377
                                                in_quote ^= TRUE;
2378
                                        break;
2379
                                }
2380
                        }
2381

    
2382
                        if (cur_len > 0) {
2383
                                error = 0;
2384
                                part_str = g_strndup(srcp, cur_len);
2385
                                out_str = conv_codeset_strdup_full
2386
                                        (part_str, src_encoding, block_encoding,
2387
                                         &error);
2388
                                if (!out_str || error != 0) {
2389
                                        g_warning("conv_encode_header(): code conversion failed\n");
2390
                                        out_str = g_strdup(part_str);
2391
                                        block_encoding = CS_UTF_8;
2392
                                }
2393
                                out_str_len = strlen(out_str);
2394

    
2395
                                if (use_base64)
2396
                                        out_enc_str_len = B64LEN(out_str_len);
2397
                                else
2398
                                        out_enc_str_len =
2399
                                                qp_get_q_encoding_len
2400
                                                        ((guchar *)out_str);
2401

    
2402
                                enc_str = g_malloc(out_enc_str_len + 1);
2403
                                if (use_base64)
2404
                                        base64_encode(enc_str,
2405
                                                      (guchar *)out_str,
2406
                                                      out_str_len);
2407
                                else
2408
                                        qp_q_encode(enc_str, (guchar *)out_str);
2409

    
2410
                                /* output MIME-encoded string block */
2411
                                mime_block_len = mimestr_len +
2412
                                        strlen(block_encoding) +
2413
                                        strlen(enc_str);
2414
                                g_snprintf(destp, mime_block_len + 1,
2415
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
2416
                                           block_encoding, mimesep_enc,
2417
                                           enc_str);
2418
                                destp += mime_block_len;
2419
                                srcp += cur_len;
2420

    
2421
                                left -= mime_block_len;
2422

    
2423
                                g_free(enc_str);
2424
                                g_free(out_str);
2425
                                g_free(part_str);
2426
                        }
2427

    
2428
                        LBREAK_IF_REQUIRED(cont, FALSE);
2429

    
2430
                        if (cur_len == 0)
2431
                                break;
2432
                }
2433
        }
2434

    
2435
        *destp = '\0';
2436
}
2437

    
2438
#undef LBREAK_IF_REQUIRED
2439

    
2440
#define INT_TO_HEX_UPPER(outp, val)                \
2441
{                                                \
2442
        if ((val) < 10)                                \
2443
                *outp = '0' + (val);                \
2444
        else                                        \
2445
                *outp = 'A' + (val) - 10;        \
2446
}
2447

    
2448
#define IS_ESCAPE_CHAR(c)                                        \
2449
        (c < 0x20 || c > 0x7f ||                                \
2450
         strchr("\t \r\n*'%!#$&~`,{}|()<>@,;:\\\"/[]?=", c))
2451

    
2452
static gchar *encode_rfc2231_filename(const gchar *str)
2453
{
2454
        const gchar *p;
2455
        gchar *out;
2456
        gchar *outp;
2457

    
2458
        outp = out = g_malloc(strlen(str) * 3 + 1);
2459

    
2460
        for (p = str; *p != '\0'; ++p) {
2461
                guchar ch = *(guchar *)p;
2462

    
2463
                if (IS_ESCAPE_CHAR(ch)) {
2464
                        *outp++ = '%';
2465
                        INT_TO_HEX_UPPER(outp, ch >> 4);
2466
                        ++outp;
2467
                        INT_TO_HEX_UPPER(outp, ch & 0x0f);
2468
                        ++outp;
2469
                } else
2470
                        *outp++ = ch;
2471
        }
2472

    
2473
        *outp = '\0';
2474
        return out;
2475
}
2476

    
2477
gchar *conv_encode_filename(const gchar *src, const gchar *param_name,
2478
                            const gchar *out_encoding)
2479
{
2480
        gint name_len, max_linelen;
2481
        gchar *out_str, *enc_str;
2482
        gchar cur_param[80];
2483
        GString *string;
2484
        gint count = 0;
2485
        gint cur_left_len;
2486
        gchar *p;
2487

    
2488
        g_return_val_if_fail(src != NULL, NULL);
2489
        g_return_val_if_fail(param_name != NULL, NULL);
2490

    
2491
        if (is_ascii_str(src))
2492
                return g_strdup_printf(" %s=\"%s\"", param_name, src);
2493

    
2494
        name_len = strlen(param_name);
2495
        max_linelen = MAX_LINELEN - name_len - 3;
2496

    
2497
        if (!out_encoding)
2498
                out_encoding = conv_get_outgoing_charset_str();
2499
        if (!strcmp(out_encoding, CS_US_ASCII))
2500
                out_encoding = CS_ISO_8859_1;
2501

    
2502
        out_str = conv_codeset_strdup(src, CS_INTERNAL, out_encoding);
2503
        if (!out_str)
2504
                return NULL;
2505
        enc_str = encode_rfc2231_filename(out_str);
2506
        g_free(out_str);
2507

    
2508
        if (strlen(enc_str) <= max_linelen) {
2509
                gchar *ret;
2510
                ret = g_strdup_printf(" %s*=%s''%s",
2511
                                      param_name, out_encoding, enc_str);
2512
                g_free(enc_str);
2513
                return ret;
2514
        }
2515

    
2516
        string = g_string_new(NULL);
2517
        g_string_printf(string, " %s*0*=%s''", param_name, out_encoding);
2518
        cur_left_len = MAX_LINELEN - string->len;
2519

    
2520
        p = enc_str;
2521

    
2522
        while (*p != '\0') {
2523
                if ((*p == '%' && cur_left_len < 4) ||
2524
                    (*p != '%' && cur_left_len < 2)) {
2525
                        gint len;
2526

    
2527
                        g_string_append(string, ";\n");
2528
                        ++count;
2529
                        len = g_snprintf(cur_param, sizeof(cur_param),
2530
                                         " %s*%d*=", param_name, count);
2531
                        g_string_append(string, cur_param);
2532
                        cur_left_len = MAX_LINELEN - len;
2533
                }
2534

    
2535
                if (*p == '%') {
2536
                        g_string_append_len(string, p, 3);
2537
                        p += 3;
2538
                        cur_left_len -= 3;
2539
                } else {
2540
                        g_string_append_c(string, *p);
2541
                        ++p;
2542
                        --cur_left_len;
2543
                }
2544
        }
2545

    
2546
        g_free(enc_str);
2547

    
2548
        return g_string_free(string, FALSE);
2549
}
2550

    
2551
CharSet conv_check_file_encoding(const gchar *file)
2552
{
2553
        FILE *fp;
2554
        gchar buf[BUFFSIZE];
2555
        CharSet enc;
2556
        const gchar *enc_str;
2557
        gboolean is_locale = TRUE, is_utf8 = TRUE;
2558

    
2559
        g_return_val_if_fail(file != NULL, C_AUTO);
2560

    
2561
        enc = conv_get_locale_charset();
2562
        enc_str = conv_get_locale_charset_str();
2563
        if (enc == C_UTF_8)
2564
                is_locale = FALSE;
2565

    
2566
        if ((fp = g_fopen(file, "rb")) == NULL) {
2567
                FILE_OP_ERROR(file, "fopen");
2568
                return C_AUTO;
2569
        }
2570

    
2571
        while (fgets(buf, sizeof(buf), fp) != NULL) {
2572
                gchar *str;
2573
                gint error = 0;
2574

    
2575
                if (is_locale) {
2576
                        str = conv_codeset_strdup_full(buf, enc_str,
2577
                                                       CS_INTERNAL, &error);
2578
                        if (!str || error != 0)
2579
                                is_locale = FALSE;
2580
                        g_free(str);
2581
                }
2582

    
2583
                if (is_utf8 && g_utf8_validate(buf, -1, NULL) == FALSE) {
2584
                        is_utf8 = FALSE;
2585
                }
2586

    
2587
                if (!is_locale && !is_utf8)
2588
                        break;
2589
        }
2590

    
2591
        fclose(fp);
2592

    
2593
        if (is_locale)
2594
                return enc;
2595
        else if (is_utf8)
2596
                return C_UTF_8;
2597
        else
2598
                return C_AUTO;
2599
}
2600

    
2601
gchar *conv_filename_from_utf8(const gchar *utf8_file)
2602
{
2603
        gchar *fs_file;
2604
        GError *error = NULL;
2605

    
2606
        g_return_val_if_fail(utf8_file != NULL, NULL);
2607

    
2608
        fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
2609
        if (error) {
2610
                g_warning("failed to convert encoding of file name: %s\n",
2611
                          error->message);
2612
                g_error_free(error);
2613
        }
2614
        if (!fs_file)
2615
                fs_file = g_strdup(utf8_file);
2616

    
2617
        return fs_file;
2618
}
2619

    
2620
gchar *conv_filename_to_utf8(const gchar *fs_file)
2621
{
2622
        gchar *utf8_file;
2623
        GError *error = NULL;
2624

    
2625
        g_return_val_if_fail(fs_file != NULL, NULL);
2626

    
2627
        utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
2628
        if (error) {
2629
                g_warning("failed to convert encoding of file name: %s\n",
2630
                          error->message);
2631
                g_error_free(error);
2632
        }
2633
        if (!utf8_file)
2634
                utf8_file = g_strdup(fs_file);
2635

    
2636
        return utf8_file;
2637
}