Statistics
| Branch: | Tag: | Revision:

root / libsylph / codeconv.c @ 8d7dcace

History | View | Annotate | Download (62.1 KB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2.1 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19

    
20
#ifdef HAVE_CONFIG_H
21
#  include "config.h"
22
#endif
23

    
24
#include "defs.h"
25

    
26
#include <glib.h>
27
#include <string.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
#include <errno.h>
31

    
32
#if HAVE_LOCALE_H
33
#  include <locale.h>
34
#endif
35

    
36
#include <iconv.h>
37

    
38
#include "codeconv.h"
39
#include "unmime.h"
40
#include "base64.h"
41
#include "quoted-printable.h"
42
#include "utils.h"
43

    
44
typedef enum
45
{
46
        JIS_ASCII,
47
        JIS_KANJI,
48
        JIS_HWKANA,
49
        JIS_AUXKANJI,
50
        JIS_UDC
51
} JISState;
52

    
53
#define SUBST_CHAR        '_'
54
#define ESC                '\033'
55
#define SO                0x0e
56
#define SI                0x0f
57
#define SS2                0x8e
58
#define SS3                0x8f
59

    
60
#define iseuckanji(c) \
61
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62
#define iseuchwkana1(c) \
63
        (((c) & 0xff) == SS2)
64
#define iseuchwkana2(c) \
65
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
66
#define iseucaux(c) \
67
        (((c) & 0xff) == SS3)
68

    
69
#define issjiskanji1(c) \
70
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xef))
72
#define issjiskanji2(c) \
73
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75
#define issjishwkana(c) \
76
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
77
#define issjisext(c) \
78
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xfc)
79
#define issjisudc(c) \
80
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xf9)
81
#define issjisibmext(c1, c2) \
82
        ((((c1) & 0xff) >= 0xfa && ((c1) & 0xff) <= 0xfb && \
83
          issjiskanji2(c2)) ||                              \
84
         (((c1) & 0xff) == 0xfc &&                          \
85
          ((c2) & 0xff) >= 0x40 && ((c2) & 0xff) <= 0x4b))
86

    
87
#define isjiskanji(c) \
88
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x7e)
89
#define isjishwkana(c) \
90
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x5f)
91
#define isjisudc(c) \
92
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x34)
93
#define isjisudclow(c) \
94
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x2a)
95
#define isjisudchigh(c) \
96
        (((c) & 0xff) >= 0x2b && ((c) & 0xff) <= 0x34)
97

    
98
/* U+0080 - U+07FF */
99
#define isutf8_2_1(c) \
100
        (((c) & 0xe0) == 0xc0)
101
#define isutf8_2_2(c) \
102
        (((c) & 0xc0) == 0x80)
103
/* U+0800 - U+FFFF */
104
#define isutf8_3_1(c) \
105
        (((c) & 0xf0) == 0xe0)
106
#define isutf8_3_2(c) \
107
        (((c) & 0xc0) == 0x80)
108

    
109
#define isutf8bom(s) \
110
        (((*(s)) & 0xff) == 0xef && ((*(s + 1)) & 0xff) == 0xbb && \
111
         ((*(s + 2)) & 0xff) == 0xbf)
112

    
113
#define K_IN()                                \
114
        if (state != JIS_KANJI) {        \
115
                *out++ = ESC;                \
116
                *out++ = '$';                \
117
                *out++ = 'B';                \
118
                state = JIS_KANJI;        \
119
        }
120

    
121
#define K_OUT()                                \
122
        if (state != JIS_ASCII) {        \
123
                *out++ = ESC;                \
124
                *out++ = '(';                \
125
                *out++ = 'B';                \
126
                state = JIS_ASCII;        \
127
        }
128

    
129
#define HW_IN()                                \
130
        if (state != JIS_HWKANA) {        \
131
                *out++ = ESC;                \
132
                *out++ = '(';                \
133
                *out++ = 'I';                \
134
                state = JIS_HWKANA;        \
135
        }
136

    
137
#define AUX_IN()                        \
138
        if (state != JIS_AUXKANJI) {        \
139
                *out++ = ESC;                \
140
                *out++ = '$';                \
141
                *out++ = '(';                \
142
                *out++ = 'D';                \
143
                state = JIS_AUXKANJI;        \
144
        }
145

    
146
#define UDC_IN()                        \
147
        if (state != JIS_UDC) {                \
148
                *out++ = ESC;                \
149
                *out++ = '$';                \
150
                *out++ = '(';                \
151
                *out++ = '?';                \
152
                state = JIS_UDC;        \
153
        }
154

    
155
static ConvADType conv_ad_type = C_AD_BY_LOCALE;
156
static gboolean allow_jisx0201_kana = FALSE;
157

    
158
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error);
159
static gchar *conv_jistosjis(const gchar *inbuf, gint *error);
160
static gchar *conv_euctojis(const gchar *inbuf, gint *error);
161
static gchar *conv_sjistojis(const gchar *inbuf, gint *error);
162
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error);
163

    
164
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error);
165
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error);
166
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error);
167
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error);
168

    
169
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error);
170
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error);
171
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error);
172

    
173
/* static void conv_unreadable_eucjp(gchar *str); */
174
static void conv_unreadable_8bit(gchar *str);
175
/* static void conv_unreadable_latin(gchar *str); */
176

    
177
static gchar *conv_jistodisp(const gchar *inbuf, gint *error);
178
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error);
179
static gchar *conv_euctodisp(const gchar *inbuf, gint *error);
180

    
181
static gchar *conv_anytodisp(const gchar *inbuf, gint *error);
182
static gchar *conv_ustodisp(const gchar *inbuf, gint *error);
183
static gchar *conv_noconv(const gchar *inbuf, gint *error);
184

    
185
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error)
186
{
187
        gchar *outbuf;
188
        const guchar *in = (guchar *)inbuf;
189
        guchar *out;
190
        JISState state = JIS_ASCII;
191
        gint error_ = 0;
192

    
193
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
194
        out = (guchar *)outbuf;
195

    
196
        while (*in != '\0') {
197
                if (*in == ESC) {
198
                        in++;
199
                        if (*in == '$') {
200
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
201
                                        state = JIS_KANJI;
202
                                        in += 2;
203
                                } else if (*(in + 1) == '(' &&
204
                                           *(in + 2) == 'D') {
205
                                        state = JIS_AUXKANJI;
206
                                        in += 3;
207
                                } else {
208
                                        /* unknown escape sequence */
209
                                        error_ = -1;
210
                                        state = JIS_ASCII;
211
                                }
212
                        } else if (*in == '(') {
213
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
214
                                        state = JIS_ASCII;
215
                                        in += 2;
216
                                } else if (*(in + 1) == 'I') {
217
                                        state = JIS_HWKANA;
218
                                        in += 2;
219
                                } else {
220
                                        /* unknown escape sequence */
221
                                        error_ = -1;
222
                                        state = JIS_ASCII;
223
                                }
224
                        } else {
225
                                /* unknown escape sequence */
226
                                error_ = -1;
227
                                state = JIS_ASCII;
228
                        }
229
                } else if (*in == 0x0e) {
230
                        state = JIS_HWKANA;
231
                        in++;
232
                } else if (*in == 0x0f) {
233
                        state = JIS_ASCII;
234
                        in++;
235
                } else {
236
                        switch (state) {
237
                        case JIS_ASCII:
238
                                *out++ = *in++;
239
                                break;
240
                        case JIS_KANJI:
241
                                *out++ = *in++ | 0x80;
242
                                if (*in == '\0') break;
243
                                *out++ = *in++ | 0x80;
244
                                break;
245
                        case JIS_HWKANA:
246
                                *out++ = 0x8e;
247
                                *out++ = *in++ | 0x80;
248
                                break;
249
                        case JIS_AUXKANJI:
250
                                *out++ = 0x8f;
251
                                *out++ = *in++ | 0x80;
252
                                if (*in == '\0') break;
253
                                *out++ = *in++ | 0x80;
254
                                break;
255
                        default:
256
                                *out++ = *in++;
257
                                break;
258
                        }
259
                }
260
        }
261

    
262
        *out = '\0';
263

    
264
        if (error)
265
                *error = error_;
266

    
267
        return outbuf;
268
}
269

    
270
static gchar *conv_jistosjis(const gchar *inbuf, gint *error)
271
{
272
        gchar *outbuf;
273
        const guchar *in = (guchar *)inbuf;
274
        guchar *out;
275
        JISState state = JIS_ASCII;
276
        gint error_ = 0;
277

    
278
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
279
        out = (guchar *)outbuf;
280

    
281
        while (*in != '\0') {
282
                if (*in == ESC) {
283
                        in++;
284
                        if (*in == '$') {
285
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
286
                                        state = JIS_KANJI;
287
                                        in += 2;
288
                                } else if (*(in + 1) == '(' &&
289
                                           *(in + 2) == '?') {
290
                                        /* ISO-2022-JP-MS extention */
291
                                        state = JIS_UDC;
292
                                        in += 3;
293
                                } else {
294
                                        /* unknown escape sequence */
295
                                        error_ = -1;
296
                                        state = JIS_ASCII;
297
                                }
298
                        } else if (*in == '(') {
299
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
300
                                        state = JIS_ASCII;
301
                                        in += 2;
302
                                } else if (*(in + 1) == 'I') {
303
                                        state = JIS_HWKANA;
304
                                        in += 2;
305
                                } else {
306
                                        /* unknown escape sequence */
307
                                        error_ = -1;
308
                                        state = JIS_ASCII;
309
                                }
310
                        } else {
311
                                /* unknown escape sequence */
312
                                error_ = -1;
313
                                state = JIS_ASCII;
314
                        }
315
                } else if (*in == SO) {
316
                        state = JIS_HWKANA;
317
                        in++;
318
                } else if (*in == SI) {
319
                        state = JIS_ASCII;
320
                        in++;
321
                } else {
322
                        switch (state) {
323
                        case JIS_ASCII:
324
                                *out++ = *in++;
325
                                break;
326
                        case JIS_HWKANA:
327
                                *out++ = *in++ | 0x80;
328
                                break;
329
                        case JIS_KANJI:
330
                                if ((isjiskanji(*in) ||
331
                                     (*in >= 0x7f && *in <= 0x97)) &&
332
                                    isjiskanji(*(in + 1))) {
333
                                        *out++ = ((*in < 0x5f)
334
                                                 ? (((*in - 0x21) / 2) + 0x81)
335
                                                 : (((*in - 0x21) / 2) + 0xc1));
336
                                        *out++ = ((*in % 2)
337
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
338
                                                   ? 0x1f : 0x20)))
339
                                                 : *(in + 1) + 0x7e);
340
                                        in += 2;
341
                                } else {
342
                                        error_ = -1;
343
                                        *out++ = SUBST_CHAR;
344
                                        in++;
345
                                        if (*in != '\0') {
346
                                                *out++ = SUBST_CHAR;
347
                                                in++;
348
                                        }
349
                                }
350
                                break;
351
                        case JIS_UDC:
352
                                if (isjisudc(*in) && isjiskanji(*(in + 1))) {
353
                                        *out++ = (((*in - 0x21) / 2) + 0xf0);
354
                                        *out++ = ((*in % 2)
355
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
356
                                                   ? 0x1f : 0x20)))
357
                                                 : *(in + 1) + 0x7e);
358
                                        in += 2;
359
                                } else {
360
                                        error_ = -1;
361
                                        *out++ = SUBST_CHAR;
362
                                        in++;
363
                                        if (*in != '\0') {
364
                                                *out++ = SUBST_CHAR;
365
                                                in++;
366
                                        }
367
                                }
368
                                break;
369
                        default:
370
                                *out++ = *in++;
371
                                break;
372
                        }
373
                }
374
        }
375

    
376
        *out = '\0';
377

    
378
        if (error)
379
                *error = error_;
380

    
381
        return outbuf;
382
}
383

    
384
#define JIS_HWDAKUTEN                0x5e
385
#define JIS_HWHANDAKUTEN        0x5f
386

    
387
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
388
{
389
        static guint16 h2z_tbl[] = {
390
                /* 0x20 - 0x2f */
391
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
392
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
393
                /* 0x30 - 0x3f */
394
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
395
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
396
                /* 0x40 - 0x4f */
397
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
398
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
399
                /* 0x50 - 0x5f */
400
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
401
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
402
        };
403

    
404
        static guint16 dakuten_tbl[] = {
405
                /* 0x30 - 0x3f */
406
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
407
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
408
                /* 0x40 - 0x4f */
409
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
410
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
411
        };
412

    
413
        static guint16 handakuten_tbl[] = {
414
                /* 0x4a - 0x4e */
415
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
416
        };
417

    
418
        guint16 out_code;
419

    
420
        jis_code &= 0x7f;
421
        sound_sym &= 0x7f;
422

    
423
        if (jis_code < 0x21 || jis_code > 0x5f)
424
                return 0;
425

    
426
        if (sound_sym == JIS_HWDAKUTEN &&
427
            jis_code >= 0x36 && jis_code <= 0x4e) {
428
                out_code = dakuten_tbl[jis_code - 0x30];
429
                if (out_code != 0) {
430
                        *outbuf = out_code >> 8;
431
                        *(outbuf + 1) = out_code & 0xff;
432
                        return 2;
433
                }
434
        }
435

    
436
        if (sound_sym == JIS_HWHANDAKUTEN &&
437
            jis_code >= 0x4a && jis_code <= 0x4e) {
438
                out_code = handakuten_tbl[jis_code - 0x4a];
439
                *outbuf = out_code >> 8;
440
                *(outbuf + 1) = out_code & 0xff;
441
                return 2;
442
        }
443

    
444
        out_code = h2z_tbl[jis_code - 0x20];
445
        *outbuf = out_code >> 8;
446
        *(outbuf + 1) = out_code & 0xff;
447
        return 1;
448
}
449

    
450
static gchar *conv_euctojis(const gchar *inbuf, gint *error)
451
{
452
        gchar *outbuf;
453
        const guchar *in = (guchar *)inbuf;
454
        guchar *out;
455
        JISState state = JIS_ASCII;
456
        gint error_ = 0;
457
 
458
        outbuf = g_malloc(strlen(inbuf) * 3 + 4);
459
        out = (guchar *)outbuf;
460

    
461
        while (*in != '\0') {
462
                if (isascii(*in)) {
463
                        K_OUT();
464
                        *out++ = *in++;
465
                } else if (iseuckanji(*in)) {
466
                        if (iseuckanji(*(in + 1))) {
467
                                K_IN();
468
                                *out++ = *in++ & 0x7f;
469
                                *out++ = *in++ & 0x7f;
470
                        } else {
471
                                error_ = -1;
472
                                K_OUT();
473
                                *out++ = SUBST_CHAR;
474
                                in++;
475
                                if (*in != '\0' && !isascii(*in)) {
476
                                        *out++ = SUBST_CHAR;
477
                                        in++;
478
                                }
479
                        }
480
                } else if (iseuchwkana1(*in)) {
481
                        if (iseuchwkana2(*(in + 1))) {
482
                                if (allow_jisx0201_kana) {
483
                                        HW_IN();
484
                                        in++;
485
                                        *out++ = *in++ & 0x7f;
486
                                } else {
487
                                        guchar jis_ch[2];
488
                                        gint len;
489

    
490
                                        if (iseuchwkana1(*(in + 2)) &&
491
                                            iseuchwkana2(*(in + 3)))
492
                                                len = conv_jis_hantozen
493
                                                        (jis_ch,
494
                                                         *(in + 1), *(in + 3));
495
                                        else
496
                                                len = conv_jis_hantozen
497
                                                        (jis_ch,
498
                                                         *(in + 1), '\0');
499
                                        if (len == 0)
500
                                                in += 2;
501
                                        else {
502
                                                K_IN();
503
                                                in += len * 2;
504
                                                *out++ = jis_ch[0];
505
                                                *out++ = jis_ch[1];
506
                                        }
507
                                }
508
                        } else {
509
                                error_ = -1;
510
                                K_OUT();
511
                                in++;
512
                                if (*in != '\0' && !isascii(*in)) {
513
                                        *out++ = SUBST_CHAR;
514
                                        in++;
515
                                }
516
                        }
517
                } else if (iseucaux(*in)) {
518
                        in++;
519
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
520
                                AUX_IN();
521
                                *out++ = *in++ & 0x7f;
522
                                *out++ = *in++ & 0x7f;
523
                        } else {
524
                                error_ = -1;
525
                                K_OUT();
526
                                if (*in != '\0' && !isascii(*in)) {
527
                                        *out++ = SUBST_CHAR;
528
                                        in++;
529
                                        if (*in != '\0' && !isascii(*in)) {
530
                                                *out++ = SUBST_CHAR;
531
                                                in++;
532
                                        }
533
                                }
534
                        }
535
                } else {
536
                        error_ = -1;
537
                        K_OUT();
538
                        *out++ = SUBST_CHAR;
539
                        in++;
540
                }
541
        }
542

    
543
        K_OUT();
544
        *out = '\0';
545

    
546
        if (error)
547
                *error = error_;
548

    
549
        return outbuf;
550
}
551

    
552
#define sjistoidx(c1, c2) \
553
        (((c1) > 0x9f) \
554
        ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
555
        : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
556
#define idxtojis1(c) (((c) / 94) + 0x21)
557
#define idxtojis2(c) (((c) % 94) + 0x21)
558

    
559
static guint conv_idx_ibmtonec(guint idx)
560
{
561
        if      (idx >= sjistoidx(0xfa, 0x5c))
562
                idx -=  sjistoidx(0xfa, 0x5c)
563
                      - sjistoidx(0xed, 0x40);
564
/*        else if (idx == sjistoidx(0xfa, 0x5b)) */
565
/*                idx =   sjistoidx(0x81, 0xe6); */
566
/*        else if (idx == sjistoidx(0xfa, 0x5a)) */
567
/*                idx =   sjistoidx(0x87, 0x84); */
568
/*        else if (idx == sjistoidx(0xfa, 0x59)) */
569
/*                idx =   sjistoidx(0x87, 0x82); */
570
/*        else if (idx == sjistoidx(0xfa, 0x58)) */
571
/*                idx =   sjistoidx(0x87, 0x8a); */
572
        else if (idx >= sjistoidx(0xfa, 0x55))
573
                idx -=  sjistoidx(0xfa, 0x55)
574
                      - sjistoidx(0xee, 0xfa);
575
/*        else if (idx == sjistoidx(0xfa, 0x54)) */
576
/*                idx =   sjistoidx(0x81, 0xca); */
577
/*        else if (idx >= sjistoidx(0xfa, 0x4a)) */
578
/*                idx -=  sjistoidx(0xfa, 0x4a)  */
579
/*                      - sjistoidx(0x87, 0x54); */
580
        else if (idx >= sjistoidx(0xfa, 0x40))
581
                idx -=  sjistoidx(0xfa, 0x40)
582
                      - sjistoidx(0xee, 0xef);
583
        return idx;
584
}
585

    
586
static gchar *conv_sjistojis(const gchar *inbuf, gint *error)
587
{
588
        gchar *outbuf;
589
        const guchar *in = (guchar *)inbuf;
590
        guchar *out;
591
        JISState state = JIS_ASCII;
592
        gint error_ = 0;
593
        guint idx;
594
 
595
        outbuf = g_malloc(strlen(inbuf) * 5 + 4);
596
        out = (guchar *)outbuf;
597

    
598
        while (*in != '\0') {
599
                if (isascii(*in)) {
600
                        K_OUT();
601
                        *out++ = *in++;
602
                } else if (issjiskanji1(*in)) {
603
                        if (issjiskanji2(*(in + 1))) {
604
                                K_IN();
605
                                idx = sjistoidx(*in, *(in + 1));
606
                                *out++ = idxtojis1(idx);
607
                                *out++ = idxtojis2(idx);
608
                                in += 2;
609
                        } else {
610
                                error_ = -1;
611
                                K_OUT();
612
                                *out++ = SUBST_CHAR;
613
                                in++;
614
                                if (*in != '\0' && !isascii(*in)) {
615
                                        *out++ = SUBST_CHAR;
616
                                        in++;
617
                                }
618
                        }
619
                } else if (issjishwkana(*in)) {
620
                        if (allow_jisx0201_kana) {
621
                                HW_IN();
622
                                *out++ = *in++ & 0x7f;
623
                        } else {
624
                                guchar jis_ch[2];
625
                                gint len;
626

    
627
                                if (issjishwkana(*(in + 1)))
628
                                        len = conv_jis_hantozen
629
                                                (jis_ch,
630
                                                 *in, *(in + 1));
631
                                else
632
                                        len = conv_jis_hantozen
633
                                                (jis_ch,
634
                                                 *in, '\0');
635
                                if (len == 0)
636
                                        in++;
637
                                else {
638
                                        K_IN();
639
                                        in += len;
640
                                        *out++ = jis_ch[0];
641
                                        *out++ = jis_ch[1];
642
                                }
643
                        }
644
                } else if (issjisibmext(*in, *(in + 1))) {
645
                        K_IN();
646
                        idx = sjistoidx(*in, *(in + 1));
647
                        idx = conv_idx_ibmtonec(idx);
648
                        *out++ = idxtojis1(idx);
649
                        *out++ = idxtojis2(idx);
650
                        in += 2;
651
#if 0
652
                } else if (issjisudc(*in)) {
653
                        UDC_IN();
654
                        idx = sjistoidx(*in, *(in + 1))
655
                              - sjistoidx(0xf0, 0x40);
656
                        *out++ = idxtojis1(idx);
657
                        *out++ = idxtojis2(idx);
658
                        in += 2;
659
#endif
660
                } else if (issjisext(*in)) {
661
                        error_ = -1;
662
                        K_OUT();
663
                        *out++ = SUBST_CHAR;
664
                        in++;
665
                        if (*in != '\0' && !isascii(*in)) {
666
                                *out++ = SUBST_CHAR;
667
                                in++;
668
                        }
669
                } else {
670
                        error_ = -1;
671
                        K_OUT();
672
                        *out++ = SUBST_CHAR;
673
                        in++;
674
                }
675
        }
676

    
677
        K_OUT();
678
        *out = '\0';
679

    
680
        if (error)
681
                *error = error_;
682

    
683
        return outbuf;
684
}
685

    
686
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error)
687
{
688
        gchar *outbuf;
689
        const guchar *in = (guchar *)inbuf;
690
        guchar *out;
691
        gint error_ = 0;
692

    
693
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
694
        out = (guchar *)outbuf;
695

    
696
        while (*in != '\0') {
697
                if (isascii(*in)) {
698
                        *out++ = *in++;
699
                } else if (issjiskanji1(*in)) {
700
                        if (issjiskanji2(*(in + 1))) {
701
                                guchar out1 = *in;
702
                                guchar out2 = *(in + 1);
703
                                guchar row;
704

    
705
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
706
                                if (out2 < 0x9f) {
707
                                        out1 = (out1 - row) * 2 - 1;
708
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
709
                                } else {
710
                                        out1 = (out1 - row) * 2;
711
                                        out2 -= 0x7e;
712
                                }
713

    
714
                                *out++ = out1 | 0x80;
715
                                *out++ = out2 | 0x80;
716
                                in += 2;
717
                        } else {
718
                                error_ = -1;
719
                                *out++ = SUBST_CHAR;
720
                                in++;
721
                                if (*in != '\0' && !isascii(*in)) {
722
                                        *out++ = SUBST_CHAR;
723
                                        in++;
724
                                }
725
                        }
726
                } else if (issjishwkana(*in)) {
727
                        *out++ = SS2;
728
                        *out++ = *in++;
729
                } else if (issjisext(*in)) {
730
                        error_ = -1;
731
                        *out++ = SUBST_CHAR;
732
                        in++;
733
                        if (*in != '\0' && !isascii(*in)) {
734
                                *out++ = SUBST_CHAR;
735
                                in++;
736
                        }
737
                } else {
738
                        error_ = -1;
739
                        *out++ = SUBST_CHAR;
740
                        in++;
741
                }
742
        }
743

    
744
        *out = '\0';
745

    
746
        if (error)
747
                *error = error_;
748

    
749
        return outbuf;
750
}
751

    
752
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error)
753
{
754
        gchar *tmpstr, *utf8str;
755
        gint t_error = 0, u_error = 0;
756

    
757
        if (strstr(inbuf, "\033$(D")) {
758
                tmpstr = conv_jistoeuc(inbuf, &t_error);
759
                utf8str = conv_euctoutf8(tmpstr, &u_error);
760
        } else {
761
                tmpstr = conv_jistosjis(inbuf, &t_error);
762
                utf8str = conv_sjistoutf8(tmpstr, &u_error);
763
        }
764
        g_free(tmpstr);
765

    
766
        if (error)
767
                *error = (t_error | u_error);
768

    
769
        return utf8str;
770
}
771

    
772
#if USE_THREADS
773
#define S_LOCK_DEFINE_STATIC(name)        G_LOCK_DEFINE_STATIC(name)
774
#define S_LOCK(name)        G_LOCK(name)
775
#define S_UNLOCK(name)        G_UNLOCK(name)
776
#else
777
#define S_LOCK_DEFINE_STATIC(name)
778
#define S_LOCK(name)
779
#define S_UNLOCK(name)
780
#endif
781

    
782
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error)
783
{
784
        static iconv_t cd = (iconv_t)-1;
785
        static gboolean iconv_ok = TRUE;
786
        S_LOCK_DEFINE_STATIC(cd);
787
        gchar *ret;
788

    
789
        S_LOCK(cd);
790

    
791
        if (cd == (iconv_t)-1) {
792
                if (!iconv_ok) {
793
                        S_UNLOCK(cd);
794
                        if (error)
795
                                *error = -1;
796
                        return g_strdup(inbuf);
797
                }
798

    
799
                cd = iconv_open(CS_UTF_8, CS_CP932);
800
                if (cd == (iconv_t)-1) {
801
                        cd = iconv_open(CS_UTF_8, CS_SHIFT_JIS);
802
                        if (cd == (iconv_t)-1) {
803
                                g_warning("conv_sjistoutf8(): %s\n",
804
                                          g_strerror(errno));
805
                                iconv_ok = FALSE;
806
                                S_UNLOCK(cd);
807
                                if (error)
808
                                        *error = -1;
809
                                return g_strdup(inbuf);
810
                        }
811
                }
812
        }
813

    
814
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
815
        S_UNLOCK(cd);
816
        return ret;
817
}
818

    
819
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error)
820
{
821
        static iconv_t cd = (iconv_t)-1;
822
        static gboolean iconv_ok = TRUE;
823
        S_LOCK_DEFINE_STATIC(cd);
824
        gchar *ret;
825

    
826
        S_LOCK(cd);
827

    
828
        if (cd == (iconv_t)-1) {
829
                if (!iconv_ok) {
830
                        S_UNLOCK(cd);
831
                        if (error)
832
                                *error = -1;
833
                        return g_strdup(inbuf);
834
                }
835

    
836
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
837
                if (cd == (iconv_t)-1) {
838
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
839
                        if (cd == (iconv_t)-1) {
840
                                g_warning("conv_euctoutf8(): %s\n",
841
                                          g_strerror(errno));
842
                                iconv_ok = FALSE;
843
                                S_UNLOCK(cd);
844
                                if (error)
845
                                        *error = -1;
846
                                return g_strdup(inbuf);
847
                        }
848
                }
849
        }
850

    
851
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
852
        S_UNLOCK(cd);
853
        return ret;
854
}
855

    
856
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error)
857
{
858
        switch (conv_guess_ja_encoding(inbuf)) {
859
        case C_ISO_2022_JP:
860
                return conv_jistoutf8(inbuf, error);
861
        case C_SHIFT_JIS:
862
                return conv_sjistoutf8(inbuf, error);
863
        case C_EUC_JP:
864
                return conv_euctoutf8(inbuf, error);
865
        case C_UTF_8:
866
                if (error)
867
                        *error = 0;
868
                if (isutf8bom(inbuf))
869
                        inbuf += 3;
870
                return g_strdup(inbuf);
871
        default:
872
                if (error)
873
                        *error = 0;
874
                return g_strdup(inbuf);
875
        }
876
}
877

    
878
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error)
879
{
880
        static iconv_t cd = (iconv_t)-1;
881
        static gboolean iconv_ok = TRUE;
882
        S_LOCK_DEFINE_STATIC(cd);
883
        gchar *ret;
884

    
885
        S_LOCK(cd);
886

    
887
        if (cd == (iconv_t)-1) {
888
                if (!iconv_ok) {
889
                        S_UNLOCK(cd);
890
                        if (error)
891
                                *error = -1;
892
                        return g_strdup(inbuf);
893
                }
894

    
895
                cd = iconv_open(CS_CP932, CS_UTF_8);
896
                if (cd == (iconv_t)-1) {
897
                        cd = iconv_open(CS_SHIFT_JIS, CS_UTF_8);
898
                        if (cd == (iconv_t)-1) {
899
                                g_warning("conv_utf8tosjis(): %s\n",
900
                                          g_strerror(errno));
901
                                iconv_ok = FALSE;
902
                                S_UNLOCK(cd);
903
                                if (error)
904
                                        *error = -1;
905
                                return g_strdup(inbuf);
906
                        }
907
                }
908
        }
909

    
910
        if (isutf8bom(inbuf))
911
                inbuf += 3;
912
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
913
        S_UNLOCK(cd);
914
        return ret;
915
}
916

    
917
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error)
918
{
919
        static iconv_t cd = (iconv_t)-1;
920
        static gboolean iconv_ok = TRUE;
921
        S_LOCK_DEFINE_STATIC(cd);
922
        gchar *ret;
923

    
924
        S_LOCK(cd);
925

    
926
        if (cd == (iconv_t)-1) {
927
                if (!iconv_ok) {
928
                        S_UNLOCK(cd);
929
                        if (error)
930
                                *error = -1;
931
                        return g_strdup(inbuf);
932
                }
933

    
934
                cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
935
                if (cd == (iconv_t)-1) {
936
                        cd = iconv_open(CS_EUC_JP, CS_UTF_8);
937
                        if (cd == (iconv_t)-1) {
938
                                g_warning("conv_utf8toeuc(): %s\n",
939
                                          g_strerror(errno));
940
                                iconv_ok = FALSE;
941
                                S_UNLOCK(cd);
942
                                if (error)
943
                                        *error = -1;
944
                                return g_strdup(inbuf);
945
                        }
946
                }
947
        }
948

    
949
        if (isutf8bom(inbuf))
950
                inbuf += 3;
951
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
952
        S_UNLOCK(cd);
953
        return ret;
954
}
955

    
956
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error)
957
{
958
        gchar *tmpstr, *jisstr;
959
        gint t_error = 0, j_error = 0;
960

    
961
#if 1
962
        tmpstr = conv_utf8tosjis(inbuf, &t_error);
963
        jisstr = conv_sjistojis(tmpstr, &j_error);
964
#else
965
        tmpstr = conv_utf8toeuc(inbuf, &t_error);
966
        jisstr = conv_euctojis(tmpstr, &j_error);
967
#endif
968
        g_free(tmpstr);
969

    
970
        if (error)
971
                *error = (t_error | j_error);
972

    
973
        return jisstr;
974
}
975

    
976
#if 0
977
static gchar valid_eucjp_tbl[][96] = {
978
        /* 0xa2a0 - 0xa2ff */
979
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
980
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
981
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
982
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
983
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
984
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
985

986
        /* 0xa3a0 - 0xa3ff */
987
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
988
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
989
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
991
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
993

994
        /* 0xa4a0 - 0xa4ff */
995
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
996
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
997
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
998
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
999
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1000
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1001

1002
        /* 0xa5a0 - 0xa5ff */
1003
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1004
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1005
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1006
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1007
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1008
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1009

1010
        /* 0xa6a0 - 0xa6ff */
1011
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1012
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1013
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1015
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1016
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1017

1018
        /* 0xa7a0 - 0xa7ff */
1019
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1020
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1021
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1022
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1023
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1024
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1025

1026
        /* 0xa8a0 - 0xa8ff */
1027
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1028
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1029
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1030
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1031
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1032
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
1033
};
1034

1035
static gboolean isprintableeuckanji(guchar c1, guchar c2)
1036
{
1037
        if (c1 <= 0xa0 || c1 >= 0xf5)
1038
                return FALSE;
1039
        if (c2 <= 0xa0 || c2 == 0xff)
1040
                return FALSE;
1041

1042
        if (c1 >= 0xa9 && c1 <= 0xaf)
1043
                return FALSE;
1044

1045
        if (c1 >= 0xa2 && c1 <= 0xa8)
1046
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
1047

1048
        if (c1 == 0xcf) {
1049
                if (c2 >= 0xd4 && c2 <= 0xfe)
1050
                        return FALSE;
1051
        } else if (c1 == 0xf4) {
1052
                if (c2 >= 0xa7 && c2 <= 0xfe)
1053
                        return FALSE;
1054
        }
1055

1056
        return TRUE;
1057
}
1058

1059
static void conv_unreadable_eucjp(gchar *str)
1060
{
1061
        register guchar *p = str;
1062

1063
        while (*p != '\0') {
1064
                if (isascii(*p)) {
1065
                        /* convert CR+LF -> LF */
1066
                        if (*p == '\r' && *(p + 1) == '\n')
1067
                                memmove(p, p + 1, strlen(p));
1068
                        /* printable 7 bit code */
1069
                        p++;
1070
                } else if (iseuckanji(*p)) {
1071
                        if (isprintableeuckanji(*p, *(p + 1))) {
1072
                                /* printable euc-jp code */
1073
                                p += 2;
1074
                        } else {
1075
                                /* substitute unprintable code */
1076
                                *p++ = SUBST_CHAR;
1077
                                if (*p != '\0') {
1078
                                        if (isascii(*p))
1079
                                                p++;
1080
                                        else
1081
                                                *p++ = SUBST_CHAR;
1082
                                }
1083
                        }
1084
                } else if (iseuchwkana1(*p)) {
1085
                        if (iseuchwkana2(*(p + 1)))
1086
                                /* euc-jp hankaku kana */
1087
                                p += 2;
1088
                        else
1089
                                *p++ = SUBST_CHAR;
1090
                } else if (iseucaux(*p)) {
1091
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
1092
                                /* auxiliary kanji */
1093
                                p += 3;
1094
                        } else
1095
                                *p++ = SUBST_CHAR;
1096
                } else
1097
                        /* substitute unprintable 1 byte code */
1098
                        *p++ = SUBST_CHAR;
1099
        }
1100
}
1101
#endif
1102

    
1103
static void conv_unreadable_8bit(gchar *str)
1104
{
1105
        register gchar *p = str;
1106

    
1107
        while (*p != '\0') {
1108
                /* convert CR+LF -> LF */
1109
                if (*p == '\r' && *(p + 1) == '\n')
1110
                        memmove(p, p + 1, strlen(p));
1111
                else if (!isascii(*(guchar *)p)) *p = SUBST_CHAR;
1112
                p++;
1113
        }
1114
}
1115

    
1116
#if 0
1117
static void conv_unreadable_latin(gchar *str)
1118
{
1119
        register guchar *p = str;
1120

1121
        while (*p != '\0') {
1122
                /* convert CR+LF -> LF */
1123
                if (*p == '\r' && *(p + 1) == '\n')
1124
                        memmove(p, p + 1, strlen(p));
1125
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
1126
                        *p = SUBST_CHAR;
1127
                p++;
1128
        }
1129
}
1130
#endif
1131

    
1132
#define NCV        '\0'
1133

    
1134
void conv_mb_alnum(gchar *str)
1135
{
1136
        static guchar char_tbl[] = {
1137
                /* 0xa0 - 0xaf */
1138
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
1139
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
1140
                /* 0xb0 - 0xbf */
1141
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1142
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1143
                /* 0xc0 - 0xcf */
1144
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1145
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
1146
                /* 0xd0 - 0xdf */
1147
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
1148
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
1149
                /* 0xe0 - 0xef */
1150
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
1151
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
1152
        };
1153

    
1154
        register guchar *p = (guchar *)str;
1155
        register gint len;
1156

    
1157
        len = strlen(str);
1158

    
1159
        while (len > 1) {
1160
                if (*p == 0xa3) {
1161
                        register guchar ch = *(p + 1);
1162

    
1163
                        if (ch >= 0xb0 && ch <= 0xfa) {
1164
                                /* [a-zA-Z] */
1165
                                *p = ch & 0x7f;
1166
                                p++;
1167
                                len--;
1168
                                memmove(p, p + 1, len);
1169
                                len--;
1170
                        } else  {
1171
                                p += 2;
1172
                                len -= 2;
1173
                        }
1174
                } else if (*p == 0xa1) {
1175
                        register guchar ch = *(p + 1);
1176

    
1177
                        if (ch >= 0xa0 && ch <= 0xef &&
1178
                            NCV != char_tbl[ch - 0xa0]) {
1179
                                *p = char_tbl[ch - 0xa0];
1180
                                p++;
1181
                                len--;
1182
                                memmove(p, p + 1, len);
1183
                                len--;
1184
                        } else {
1185
                                p += 2;
1186
                                len -= 2;
1187
                        }
1188
                } else if (iseuckanji(*p)) {
1189
                        p += 2;
1190
                        len -= 2;
1191
                } else {
1192
                        p++;
1193
                        len--;
1194
                }
1195
        }
1196
}
1197

    
1198
CharSet conv_guess_ja_encoding(const gchar *str)
1199
{
1200
        const guchar *p = (const guchar *)str;
1201
        CharSet guessed = C_US_ASCII;
1202

    
1203
        while (*p != '\0') {
1204
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
1205
                        if (guessed == C_US_ASCII)
1206
                                return C_ISO_2022_JP;
1207
                        p += 2;
1208
                } else if (isascii(*p)) {
1209
                        p++;
1210
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
1211
                        if (*p >= 0xfd && *p <= 0xfe)
1212
                                return C_EUC_JP;
1213
                        else if (guessed == C_SHIFT_JIS) {
1214
                                if ((issjiskanji1(*p) &&
1215
                                     issjiskanji2(*(p + 1))) ||
1216
                                    issjishwkana(*p))
1217
                                        guessed = C_SHIFT_JIS;
1218
                                else
1219
                                        guessed = C_EUC_JP;
1220
                        } else
1221
                                guessed = C_EUC_JP;
1222
                        p += 2;
1223
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
1224
                        guessed = C_SHIFT_JIS;
1225
                        p += 2;
1226
                } else if (issjishwkana(*p)) {
1227
                        guessed = C_SHIFT_JIS;
1228
                        p++;
1229
                } else {
1230
                        if (guessed == C_US_ASCII)
1231
                                guessed = C_AUTO;
1232
                        p++;
1233
                }
1234
        }
1235

    
1236
        if (guessed != C_US_ASCII) {
1237
                p = (const guchar *)str;
1238

    
1239
                while (*p != '\0') {
1240
                        if (isascii(*p)) {
1241
                                p++;
1242
                        } else if (isutf8_3_1(*p) &&
1243
                                   isutf8_3_2(*(p + 1)) &&
1244
                                   isutf8_3_2(*(p + 2))) {
1245
                                p += 3;
1246
                        } else {
1247
                                return guessed;
1248
                        }
1249
                }
1250

    
1251
                return C_UTF_8;
1252
        }
1253

    
1254
        return guessed;
1255
}
1256

    
1257
static gchar *conv_jistodisp(const gchar *inbuf, gint *error)
1258
{
1259
        return conv_jistoutf8(inbuf, error);
1260
}
1261

    
1262
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error)
1263
{
1264
        return conv_sjistoutf8(inbuf, error);
1265
}
1266

    
1267
static gchar *conv_euctodisp(const gchar *inbuf, gint *error)
1268
{
1269
        return conv_euctoutf8(inbuf, error);
1270
}
1271

    
1272
gchar *conv_utf8todisp(const gchar *inbuf, gint *error)
1273
{
1274
        if (g_utf8_validate(inbuf, -1, NULL) == TRUE) {
1275
                if (error)
1276
                        *error = 0;
1277
                if (isutf8bom(inbuf))
1278
                        inbuf += 3;
1279
                return g_strdup(inbuf);
1280
        } else
1281
                return conv_ustodisp(inbuf, error);
1282
}
1283

    
1284
static gchar *conv_anytodisp(const gchar *inbuf, gint *error)
1285
{
1286
        gchar *outbuf;
1287

    
1288
        outbuf = conv_anytoutf8(inbuf, error);
1289
        if (g_utf8_validate(outbuf, -1, NULL) != TRUE) {
1290
                if (error)
1291
                        *error = -1;
1292
                conv_unreadable_8bit(outbuf);
1293
        }
1294

    
1295
        return outbuf;
1296
}
1297

    
1298
static gchar *conv_ustodisp(const gchar *inbuf, gint *error)
1299
{
1300
        gchar *outbuf;
1301

    
1302
        outbuf = g_strdup(inbuf);
1303
        conv_unreadable_8bit(outbuf);
1304
        if (error)
1305
                *error = 0;
1306

    
1307
        return outbuf;
1308
}
1309

    
1310
gchar *conv_localetodisp(const gchar *inbuf, gint *error)
1311
{
1312
        gchar *str;
1313

    
1314
        str = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
1315
                                CS_INTERNAL, error);
1316
        if (!str)
1317
                str = conv_utf8todisp(inbuf, NULL);
1318

    
1319
        return str;
1320
}
1321

    
1322
static gchar *conv_noconv(const gchar *inbuf, gint *error)
1323
{
1324
        if (error)
1325
                *error = 0;
1326
        return g_strdup(inbuf);
1327
}
1328

    
1329
static const gchar *
1330
conv_get_fallback_for_private_encoding(const gchar *encoding)
1331
{
1332
        if (encoding) {
1333
                if ((encoding[0] == 'X' || encoding[0] == 'x') &&
1334
                    encoding[1] == '-') {
1335
                        if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
1336
                                return CS_GBK;
1337
                        else if (!g_ascii_strcasecmp(encoding, CS_X_SJIS))
1338
                                return CS_SHIFT_JIS;
1339
                } else if ((encoding[0] == 'K' || encoding[0] == 'k') &&
1340
                           (encoding[1] == 'S' || encoding[1] == 's')) {
1341
                        if (!g_ascii_strcasecmp(encoding, CS_KS_C_5601_1987))
1342
                                return CS_EUC_KR;
1343
                }
1344
        }
1345

    
1346
        return encoding;
1347
}
1348

    
1349
CodeConverter *conv_code_converter_new(const gchar *src_encoding,
1350
                                       const gchar *dest_encoding)
1351
{
1352
        CodeConverter *conv;
1353

    
1354
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1355

    
1356
        conv = g_new0(CodeConverter, 1);
1357
        conv->code_conv_func =
1358
                conv_get_code_conv_func(src_encoding, dest_encoding);
1359
        conv->src_encoding = g_strdup(src_encoding);
1360
        conv->dest_encoding = g_strdup(dest_encoding);
1361

    
1362
        return conv;
1363
}
1364

    
1365
void conv_code_converter_destroy(CodeConverter *conv)
1366
{
1367
        g_free(conv->src_encoding);
1368
        g_free(conv->dest_encoding);
1369
        g_free(conv);
1370
}
1371

    
1372
gchar *conv_convert(CodeConverter *conv, const gchar *inbuf)
1373
{
1374
        if (!inbuf)
1375
                return NULL;
1376
        else if (conv->code_conv_func != conv_noconv)
1377
                return conv->code_conv_func(inbuf, NULL);
1378
        else
1379
                return conv_iconv_strdup
1380
                        (inbuf, conv->src_encoding, conv->dest_encoding, NULL);
1381
}
1382

    
1383
gchar *conv_codeset_strdup_full(const gchar *inbuf,
1384
                                const gchar *src_encoding,
1385
                                const gchar *dest_encoding,
1386
                                gint *error)
1387
{
1388
        CodeConvFunc conv_func;
1389

    
1390
        if (!inbuf) {
1391
                if (error)
1392
                        *error = 0;
1393
                return NULL;
1394
        }
1395

    
1396
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1397

    
1398
        conv_func = conv_get_code_conv_func(src_encoding, dest_encoding);
1399
        if (conv_func != conv_noconv)
1400
                return conv_func(inbuf, error);
1401

    
1402
        return conv_iconv_strdup(inbuf, src_encoding, dest_encoding, error);
1403
}
1404

    
1405
CodeConvFunc conv_get_code_conv_func(const gchar *src_encoding,
1406
                                     const gchar *dest_encoding)
1407
{
1408
        CodeConvFunc code_conv = conv_noconv;
1409
        CharSet src_charset;
1410
        CharSet dest_charset;
1411

    
1412
        if (!src_encoding)
1413
                src_charset = conv_get_locale_charset();
1414
        else
1415
                src_charset = conv_get_charset_from_str(src_encoding);
1416

    
1417
        /* auto detection mode */
1418
        if (!src_encoding && !dest_encoding) {
1419
                if (conv_ad_type == C_AD_JAPANESE ||
1420
                    (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
1421
                        return conv_anytodisp;
1422
                else
1423
                        return conv_noconv;
1424
        }
1425

    
1426
        dest_charset = conv_get_charset_from_str(dest_encoding);
1427

    
1428
        if (dest_charset == C_US_ASCII)
1429
                return conv_ustodisp;
1430

    
1431
        switch (src_charset) {
1432
        case C_US_ASCII:
1433
        case C_ISO_8859_1:
1434
        case C_ISO_8859_2:
1435
        case C_ISO_8859_3:
1436
        case C_ISO_8859_4:
1437
        case C_ISO_8859_5:
1438
        case C_ISO_8859_6:
1439
        case C_ISO_8859_7:
1440
        case C_ISO_8859_8:
1441
        case C_ISO_8859_9:
1442
        case C_ISO_8859_10:
1443
        case C_ISO_8859_11:
1444
        case C_ISO_8859_13:
1445
        case C_ISO_8859_14:
1446
        case C_ISO_8859_15:
1447
        case C_ISO_8859_16:
1448
                break;
1449
        case C_ISO_2022_JP:
1450
        case C_ISO_2022_JP_2:
1451
        case C_ISO_2022_JP_3:
1452
                if (dest_charset == C_AUTO)
1453
                        code_conv = conv_jistodisp;
1454
                else if (dest_charset == C_EUC_JP)
1455
                        code_conv = conv_jistoeuc;
1456
                else if (dest_charset == C_SHIFT_JIS ||
1457
                         dest_charset == C_CP932)
1458
                        code_conv = conv_jistosjis;
1459
                else if (dest_charset == C_UTF_8)
1460
                        code_conv = conv_jistoutf8;
1461
                break;
1462
        case C_SHIFT_JIS:
1463
        case C_CP932:
1464
                if (dest_charset == C_AUTO)
1465
                        code_conv = conv_sjistodisp;
1466
                else if (dest_charset == C_ISO_2022_JP   ||
1467
                         dest_charset == C_ISO_2022_JP_2 ||
1468
                         dest_charset == C_ISO_2022_JP_3)
1469
                        code_conv = conv_sjistojis;
1470
                else if (dest_charset == C_EUC_JP)
1471
                        code_conv = conv_sjistoeuc;
1472
                else if (dest_charset == C_UTF_8)
1473
                        code_conv = conv_sjistoutf8;
1474
                break;
1475
        case C_EUC_JP:
1476
                if (dest_charset == C_AUTO)
1477
                        code_conv = conv_euctodisp;
1478
                else if (dest_charset == C_ISO_2022_JP   ||
1479
                         dest_charset == C_ISO_2022_JP_2 ||
1480
                         dest_charset == C_ISO_2022_JP_3)
1481
                        code_conv = conv_euctojis;
1482
                else if (dest_charset == C_UTF_8)
1483
                        code_conv = conv_euctoutf8;
1484
                break;
1485
        case C_UTF_8:
1486
                if (dest_charset == C_EUC_JP)
1487
                        code_conv = conv_utf8toeuc;
1488
                else if (dest_charset == C_ISO_2022_JP   ||
1489
                         dest_charset == C_ISO_2022_JP_2 ||
1490
                         dest_charset == C_ISO_2022_JP_3)
1491
                        code_conv = conv_utf8tojis;
1492
                else if (dest_charset == C_SHIFT_JIS ||
1493
                         dest_charset == C_CP932)
1494
                        code_conv = conv_utf8tosjis;
1495
                break;
1496
        default:
1497
                break;
1498
        }
1499

    
1500
        return code_conv;
1501
}
1502

    
1503
gchar *conv_iconv_strdup(const gchar *inbuf,
1504
                         const gchar *src_code, const gchar *dest_code,
1505
                         gint *error)
1506
{
1507
        iconv_t cd;
1508
        gchar *outbuf;
1509

    
1510
        if (!src_code)
1511
                src_code = conv_get_locale_charset_str();
1512
        if (!dest_code)
1513
                dest_code = CS_INTERNAL;
1514

    
1515
        cd = iconv_open(dest_code, src_code);
1516
        if (cd == (iconv_t)-1) {
1517
                if (error)
1518
                        *error = -1;
1519
                return NULL;
1520
        }
1521

    
1522
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd, error);
1523

    
1524
        iconv_close(cd);
1525

    
1526
        return outbuf;
1527
}
1528

    
1529
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd, gint *error)
1530
{
1531
        const gchar *inbuf_p;
1532
        gchar *outbuf;
1533
        gchar *outbuf_p;
1534
        size_t in_size;
1535
        size_t in_left;
1536
        size_t out_size;
1537
        size_t out_left;
1538
        size_t n_conv;
1539
        size_t len;
1540
        gint error_ = 0;
1541

    
1542
        if (!inbuf) {
1543
                if (error)
1544
                        *error = 0;
1545
                return NULL;
1546
        }
1547

    
1548
        inbuf_p = inbuf;
1549
        in_size = strlen(inbuf);
1550
        in_left = in_size;
1551
        out_size = (in_size + 1) * 2;
1552
        outbuf = g_malloc(out_size);
1553
        outbuf_p = outbuf;
1554
        out_left = out_size;
1555

    
1556
#define EXPAND_BUF()                                \
1557
{                                                \
1558
        len = outbuf_p - outbuf;                \
1559
        out_size *= 2;                                \
1560
        outbuf = g_realloc(outbuf, out_size);        \
1561
        outbuf_p = outbuf + len;                \
1562
        out_left = out_size - len;                \
1563
}
1564

    
1565
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1566
                               &outbuf_p, &out_left)) == (size_t)-1) {
1567
                if (EILSEQ == errno) {
1568
                        /* g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno)); */
1569
                        error_ = -1;
1570
                        inbuf_p++;
1571
                        in_left--;
1572
                        if (out_left == 0) {
1573
                                EXPAND_BUF();
1574
                        }
1575
                        *outbuf_p++ = SUBST_CHAR;
1576
                        out_left--;
1577
                } else if (EINVAL == errno) {
1578
                        error_ = -1;
1579
                        break;
1580
                } else if (E2BIG == errno) {
1581
                        EXPAND_BUF();
1582
                } else {
1583
                        g_warning("conv_iconv_strdup(): %s\n",
1584
                                  g_strerror(errno));
1585
                        error_ = -1;
1586
                        break;
1587
                }
1588
        }
1589

    
1590
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1591
               (size_t)-1) {
1592
                if (E2BIG == errno) {
1593
                        EXPAND_BUF();
1594
                } else {
1595
                        g_warning("conv_iconv_strdup(): %s\n",
1596
                                  g_strerror(errno));
1597
                        error_ = -1;
1598
                        break;
1599
                }
1600
        }
1601

    
1602
#undef EXPAND_BUF
1603

    
1604
        len = outbuf_p - outbuf;
1605
        outbuf = g_realloc(outbuf, len + 1);
1606
        outbuf[len] = '\0';
1607

    
1608
        if (error)
1609
                *error = error_;
1610

    
1611
        return outbuf;
1612
}
1613

    
1614
static const struct {
1615
        CharSet charset;
1616
        gchar *const name;
1617
} charsets[] = {
1618
        {C_US_ASCII,                CS_US_ASCII},
1619
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1620
        {C_UTF_8,                CS_UTF_8},
1621
        {C_UTF_7,                CS_UTF_7},
1622
        {C_ISO_8859_1,                CS_ISO_8859_1},
1623
        {C_ISO_8859_2,                CS_ISO_8859_2},
1624
        {C_ISO_8859_3,                CS_ISO_8859_3},
1625
        {C_ISO_8859_4,                CS_ISO_8859_4},
1626
        {C_ISO_8859_5,                CS_ISO_8859_5},
1627
        {C_ISO_8859_6,                CS_ISO_8859_6},
1628
        {C_ISO_8859_7,                CS_ISO_8859_7},
1629
        {C_ISO_8859_8,                CS_ISO_8859_8},
1630
        {C_ISO_8859_9,                CS_ISO_8859_9},
1631
        {C_ISO_8859_10,                CS_ISO_8859_10},
1632
        {C_ISO_8859_11,                CS_ISO_8859_11},
1633
        {C_ISO_8859_13,                CS_ISO_8859_13},
1634
        {C_ISO_8859_14,                CS_ISO_8859_14},
1635
        {C_ISO_8859_15,                CS_ISO_8859_15},
1636
        {C_BALTIC,                CS_BALTIC},
1637
        {C_CP932,                CS_CP932},
1638
        {C_CP1250,                CS_CP1250},
1639
        {C_CP1251,                CS_CP1251},
1640
        {C_CP1252,                CS_CP1252},
1641
        {C_CP1253,                CS_CP1253},
1642
        {C_CP1254,                CS_CP1254},
1643
        {C_CP1255,                CS_CP1255},
1644
        {C_CP1256,                CS_CP1256},
1645
        {C_CP1257,                CS_CP1257},
1646
        {C_CP1258,                CS_CP1258},
1647
        {C_WINDOWS_932,                CS_WINDOWS_932},
1648
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1649
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1650
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1651
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1652
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1653
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1654
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1655
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1656
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1657
        {C_KOI8_R,                CS_KOI8_R},
1658
        {C_KOI8_T,                CS_KOI8_T},
1659
        {C_KOI8_U,                CS_KOI8_U},
1660
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1661
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1662
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1663
        {C_EUC_JP,                CS_EUC_JP},
1664
        {C_EUC_JP,                CS_EUCJP},
1665
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1666
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1667
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1668
        {C_SHIFT_JIS,                CS_SJIS},
1669
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1670
        {C_EUC_KR,                CS_EUC_KR},
1671
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1672
        {C_EUC_CN,                CS_EUC_CN},
1673
        {C_GB2312,                CS_GB2312},
1674
        {C_GBK,                        CS_GBK},
1675
        {C_EUC_TW,                CS_EUC_TW},
1676
        {C_BIG5,                CS_BIG5},
1677
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1678
        {C_TIS_620,                CS_TIS_620},
1679
        {C_WINDOWS_874,                CS_WINDOWS_874},
1680
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1681
        {C_TCVN5712_1,                CS_TCVN5712_1},
1682
        {C_ISO_8859_16,                CS_ISO_8859_16},
1683
};
1684

    
1685
static const struct {
1686
        gchar *const locale;
1687
        CharSet charset;
1688
        CharSet out_charset;
1689
} locale_table[] = {
1690
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1691
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1692
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1693
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1694
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1695
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1696
#ifdef G_OS_WIN32
1697
        {"ja_JP"        , C_CP932        , C_ISO_2022_JP},
1698
#elif defined(__APPLE__)
1699
        {"ja_JP"        , C_UTF_8        , C_ISO_2022_JP},
1700
#else
1701
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1702
#endif
1703
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1704
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1705
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1706
        {"zh_CN.GBK"        , C_GBK                , C_GBK},
1707
        {"zh_CN"        , C_GB2312        , C_GB2312},
1708
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1709
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1710
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1711
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1712
        {"zh_TW"        , C_BIG5        , C_BIG5},
1713

    
1714
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1715
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1716
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1717
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1718
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1719
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1720
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1721
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1722

    
1723
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1724
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1725

    
1726
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1727

    
1728
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1729
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1730
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1731
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1732
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1733
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1734
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1735
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1736
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1737
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1738
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1739
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1740
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1741
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1742
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1743
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1744
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1745
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1746
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1747
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1748
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1749
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1750
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1751
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1752
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1753
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1754
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1755
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1756
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1757
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1758
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1759
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1760
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1761
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1762
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1763
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1764
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1765
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1766
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1767
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1768
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1769
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1770
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1771
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1772
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1773
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1774
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1775
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1776
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1777
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1778
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1779
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1780
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1781
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1782
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1783
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1784
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1785
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1786
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1787
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1788
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1789
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1790
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1791
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1792
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1793
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1794
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1795
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1796
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1797
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1798
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1799
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1800
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1801
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1802

    
1803
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1804
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1805
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1806
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1807
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1808
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1809
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1810
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1811

    
1812
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1813
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1814

    
1815
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1816

    
1817
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1818
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1819
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1820
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1821

    
1822
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1823

    
1824
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1825
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1826
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1827
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1828
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1829
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1830
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1831
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1832
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1833
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1834
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1835
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1836
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1837
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1838
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1839
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1840
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1841

    
1842
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1843
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1844
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1845
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1846

    
1847
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1848
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1849

    
1850
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1851

    
1852
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1853
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1854
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1855
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1856
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1857
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1858

    
1859
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1860
        /* {"th_TH"        , C_WINDOWS_874}, */
1861
        /* {"th_TH"        , C_ISO_8859_11}, */
1862

    
1863
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1864
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1865

    
1866
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1867
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1868
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1869
};
1870

    
1871
static GHashTable *conv_get_charset_to_str_table(void)
1872
{
1873
        static GHashTable *table;
1874
        gint i;
1875
        S_LOCK_DEFINE_STATIC(table);
1876

    
1877
        S_LOCK(table);
1878

    
1879
        if (table) {
1880
                S_UNLOCK(table);
1881
                return table;
1882
        }
1883

    
1884
        table = g_hash_table_new(NULL, g_direct_equal);
1885

    
1886
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1887
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1888
                    == NULL) {
1889
                        g_hash_table_insert
1890
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1891
                                 charsets[i].name);
1892
                }
1893
        }
1894

    
1895
        S_UNLOCK(table);
1896
        return table;
1897
}
1898

    
1899
static GHashTable *conv_get_charset_from_str_table(void)
1900
{
1901
        static GHashTable *table;
1902
        S_LOCK_DEFINE_STATIC(table);
1903

    
1904
        gint i;
1905

    
1906
        S_LOCK(table);
1907

    
1908
        if (table) {
1909
                S_UNLOCK(table);
1910
                return table;
1911
        }
1912

    
1913
        table = g_hash_table_new(str_case_hash, str_case_equal);
1914

    
1915
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1916
                g_hash_table_insert(table, charsets[i].name,
1917
                                    GUINT_TO_POINTER(charsets[i].charset));
1918
        }
1919

    
1920
        S_UNLOCK(table);
1921
        return table;
1922
}
1923

    
1924
const gchar *conv_get_charset_str(CharSet charset)
1925
{
1926
        GHashTable *table;
1927

    
1928
        table = conv_get_charset_to_str_table();
1929
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1930
}
1931

    
1932
CharSet conv_get_charset_from_str(const gchar *charset)
1933
{
1934
        GHashTable *table;
1935

    
1936
        if (!charset) return C_AUTO;
1937

    
1938
        table = conv_get_charset_from_str_table();
1939
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1940
}
1941

    
1942
CharSet conv_get_locale_charset(void)
1943
{
1944
        static CharSet cur_charset = -1;
1945
        const gchar *cur_locale;
1946
        const gchar *p;
1947
#if !defined(G_OS_WIN32) && !defined(__APPLE__)
1948
        gint i;
1949
#endif
1950
        S_LOCK_DEFINE_STATIC(cur_charset);
1951

    
1952
        S_LOCK(cur_charset);
1953

    
1954
        if (cur_charset != -1) {
1955
                S_UNLOCK(cur_charset);
1956
                return cur_charset;
1957
        }
1958

    
1959
        cur_locale = conv_get_current_locale();
1960
        if (!cur_locale) {
1961
                cur_charset = C_US_ASCII;
1962
                S_UNLOCK(cur_charset);
1963
                return cur_charset;
1964
        }
1965

    
1966
        if (strcasestr(cur_locale, "UTF-8") || strcasestr(cur_locale, "utf8")) {
1967
                cur_charset = C_UTF_8;
1968
                S_UNLOCK(cur_charset);
1969
                return cur_charset;
1970
        }
1971

    
1972
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1973
                cur_charset = C_ISO_8859_15;
1974
                S_UNLOCK(cur_charset);
1975
                return cur_charset;
1976
        }
1977

    
1978
#if defined(G_OS_WIN32) || defined(__APPLE__)
1979
        cur_charset = conv_get_charset_from_str(conv_get_locale_charset_str());
1980

    
1981
        S_UNLOCK(cur_charset);
1982
        return cur_charset;
1983
#else
1984
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1985
                const gchar *p;
1986

    
1987
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1988
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1989
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1990
                                         strlen(locale_table[i].locale))) {
1991
                        cur_charset = locale_table[i].charset;
1992
                        S_UNLOCK(cur_charset);
1993
                        return cur_charset;
1994
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1995
                         !strchr(p + 1, '.')) {
1996
                        if (strlen(cur_locale) == 2 &&
1997
                            !g_ascii_strncasecmp(cur_locale,
1998
                                                 locale_table[i].locale, 2)) {
1999
                                cur_charset = locale_table[i].charset;
2000
                                S_UNLOCK(cur_charset);
2001
                                return cur_charset;
2002
                        }
2003
                }
2004
        }
2005

    
2006
        cur_charset = C_AUTO;
2007
        S_UNLOCK(cur_charset);
2008
        return cur_charset;
2009
#endif
2010
}
2011

    
2012
const gchar *conv_get_locale_charset_str(void)
2013
{
2014
        static const gchar *codeset = NULL;
2015
        S_LOCK_DEFINE_STATIC(codeset);
2016

    
2017
        S_LOCK(codeset);
2018

    
2019
        if (!codeset) {
2020
#if defined(G_OS_WIN32) || defined(__APPLE__)
2021
                g_get_charset(&codeset);
2022
                if (!strcmp(codeset, CS_US_ASCII) ||
2023
                    !strcmp(codeset, CS_ANSI_X3_4_1968))
2024
                        codeset = CS_INTERNAL;
2025
#else
2026
                codeset = conv_get_charset_str(conv_get_locale_charset());
2027
#endif
2028
        }
2029

    
2030
        if (codeset) {
2031
                S_UNLOCK(codeset);
2032
                return codeset;
2033
        }
2034

    
2035
        S_UNLOCK(codeset);
2036
        return CS_INTERNAL;
2037
}
2038

    
2039
CharSet conv_get_internal_charset(void)
2040
{
2041
        return C_INTERNAL;
2042
}
2043

    
2044
const gchar *conv_get_internal_charset_str(void)
2045
{
2046
        return CS_INTERNAL;
2047
}
2048

    
2049
CharSet conv_get_outgoing_charset(void)
2050
{
2051
        static CharSet out_charset = -1;
2052
        const gchar *cur_locale;
2053
        const gchar *p;
2054
        gint i;
2055
        S_LOCK_DEFINE_STATIC(out_charset);
2056

    
2057
        S_LOCK(out_charset);
2058

    
2059
        if (out_charset != -1) {
2060
                S_UNLOCK(out_charset);
2061
                return out_charset;
2062
        }
2063

    
2064
        cur_locale = conv_get_current_locale();
2065
        if (!cur_locale) {
2066
                out_charset = C_AUTO;
2067
                S_UNLOCK(out_charset);
2068
                return out_charset;
2069
        }
2070

    
2071
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
2072
                out_charset = C_ISO_8859_15;
2073
                S_UNLOCK(out_charset);
2074
                return out_charset;
2075
        }
2076

    
2077
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
2078
                const gchar *p;
2079

    
2080
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
2081
                                         strlen(locale_table[i].locale))) {
2082
                        out_charset = locale_table[i].out_charset;
2083
                        break;
2084
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
2085
                         !strchr(p + 1, '.')) {
2086
                        if (strlen(cur_locale) == 2 &&
2087
                            !g_ascii_strncasecmp(cur_locale,
2088
                                                 locale_table[i].locale, 2)) {
2089
                                out_charset = locale_table[i].out_charset;
2090
                                break;
2091
                        }
2092
                }
2093
        }
2094

    
2095
        S_UNLOCK(out_charset);
2096
        return out_charset;
2097
}
2098

    
2099
const gchar *conv_get_outgoing_charset_str(void)
2100
{
2101
        CharSet out_charset;
2102
        const gchar *str;
2103

    
2104
        out_charset = conv_get_outgoing_charset();
2105
        str = conv_get_charset_str(out_charset);
2106

    
2107
        return str ? str : CS_UTF_8;
2108
}
2109

    
2110
gboolean conv_is_multibyte_encoding(CharSet encoding)
2111
{
2112
        switch (encoding) {
2113
        case C_EUC_JP:
2114
        case C_EUC_JP_MS:
2115
        case C_EUC_KR:
2116
        case C_EUC_TW:
2117
        case C_EUC_CN:
2118
        case C_ISO_2022_JP:
2119
        case C_ISO_2022_JP_2:
2120
        case C_ISO_2022_JP_3:
2121
        case C_ISO_2022_KR:
2122
        case C_ISO_2022_CN:
2123
        case C_SHIFT_JIS:
2124
        case C_CP932:
2125
        case C_GB2312:
2126
        case C_GBK:
2127
        case C_BIG5:
2128
        case C_UTF_8:
2129
        case C_UTF_7:
2130
                return TRUE;
2131
        default:
2132
                return FALSE;
2133
        }
2134
}
2135

    
2136
const gchar *conv_get_current_locale(void)
2137
{
2138
        static const gchar *cur_locale;
2139
        S_LOCK_DEFINE_STATIC(cur_locale);
2140

    
2141
        S_LOCK(cur_locale);
2142

    
2143
        if (!cur_locale) {
2144
#ifdef G_OS_WIN32
2145
                cur_locale = g_win32_getlocale();
2146
#else
2147
                cur_locale = g_getenv("LC_ALL");
2148
                if (!cur_locale || *cur_locale == '\0')
2149
                        cur_locale = g_getenv("LC_CTYPE");
2150
                if (!cur_locale || *cur_locale == '\0')
2151
                        cur_locale = g_getenv("LANG");
2152
#ifdef HAVE_LOCALE_H
2153
                if (!cur_locale || *cur_locale == '\0')
2154
                        cur_locale = setlocale(LC_CTYPE, NULL);
2155
#endif /* HAVE_LOCALE_H */
2156
#endif /* G_OS_WIN32 */
2157

    
2158
                debug_print("current locale: %s\n",
2159
                            cur_locale ? cur_locale : "(none)");
2160
        }
2161

    
2162
        S_UNLOCK(cur_locale);
2163
        return cur_locale;
2164
}
2165

    
2166
gboolean conv_is_ja_locale(void)
2167
{
2168
        static gint is_ja_locale = -1;
2169
        const gchar *cur_locale;
2170
        S_LOCK_DEFINE_STATIC(is_ja_locale);
2171

    
2172
        S_LOCK(is_ja_locale);
2173

    
2174
        if (is_ja_locale != -1) {
2175
                S_UNLOCK(is_ja_locale);
2176
                return is_ja_locale != 0;
2177
        }
2178

    
2179
        is_ja_locale = 0;
2180
        cur_locale = conv_get_current_locale();
2181
        if (cur_locale) {
2182
                if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
2183
                        is_ja_locale = 1;
2184
        }
2185

    
2186
        S_UNLOCK(is_ja_locale);
2187
        return is_ja_locale != 0;
2188
}
2189

    
2190
void conv_set_autodetect_type(ConvADType type)
2191
{
2192
        conv_ad_type = type;
2193
}
2194

    
2195
ConvADType conv_get_autodetect_type(void)
2196
{
2197
        return conv_ad_type;
2198
}
2199

    
2200
gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
2201
{
2202
        gchar *buf;
2203
        gchar *decoded_str;
2204

    
2205
        if (is_ascii_str(str))
2206
                return unmime_header(str);
2207

    
2208
        if (default_encoding) {
2209
                buf = conv_codeset_strdup
2210
                        (str, default_encoding, CS_INTERNAL);
2211
                if (buf) {
2212
                        decoded_str = unmime_header(buf);
2213
                        g_free(buf);
2214
                        return decoded_str;
2215
                }
2216
        }
2217

    
2218
        if (conv_ad_type == C_AD_JAPANESE ||
2219
            (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
2220
                buf = conv_anytodisp(str, NULL);
2221
        else
2222
                buf = conv_localetodisp(str, NULL);
2223

    
2224
        decoded_str = unmime_header(buf);
2225
        g_free(buf);
2226

    
2227
        return decoded_str;
2228
}
2229

    
2230
#define MAX_LINELEN                76
2231
#define MAX_HARD_LINELEN        996
2232
#define MIMESEP_BEGIN                "=?"
2233
#define MIMESEP_END                "?="
2234

    
2235
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
2236

    
2237
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
2238
{                                                                        \
2239
        if (len - (destp - dest) < MAX_LINELEN + 2) {                        \
2240
                *destp = '\0';                                                \
2241
                return;                                                        \
2242
        }                                                                \
2243
                                                                        \
2244
        if ((cond) && *srcp) {                                                \
2245
                if (destp > dest && left < MAX_LINELEN - 1) {                \
2246
                        if (g_ascii_isspace(*(destp - 1)))                \
2247
                                destp--;                                \
2248
                        else if (is_plain_text &&                        \
2249
                                 g_ascii_isspace(*srcp))                \
2250
                                srcp++;                                        \
2251
                        if (*srcp) {                                        \
2252
                                *destp++ = '\n';                        \
2253
                                *destp++ = ' ';                                \
2254
                                left = MAX_LINELEN - 1;                        \
2255
                        }                                                \
2256
                }                                                        \
2257
        }                                                                \
2258
}
2259

    
2260
void conv_encode_header(gchar *dest, gint len, const gchar *src,
2261
                        gint header_len, gboolean addr_field,
2262
                        const gchar *out_encoding)
2263
{
2264
        const gchar *src_encoding;
2265
        gint mimestr_len;
2266
        gchar *mimesep_enc;
2267
        gint left;
2268
        const gchar *srcp = src;
2269
        gchar *destp = dest;
2270
        gboolean use_base64;
2271

    
2272
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
2273

    
2274
        src_encoding = CS_INTERNAL;
2275
        if (!out_encoding)
2276
                out_encoding = conv_get_outgoing_charset_str();
2277
        if (!strcmp(out_encoding, CS_US_ASCII))
2278
                out_encoding = CS_ISO_8859_1;
2279

    
2280
        if (!g_ascii_strncasecmp(out_encoding, "ISO-8859-", 9) ||
2281
            !g_ascii_strncasecmp(out_encoding, "KOI8-", 5) ||
2282
            !g_ascii_strncasecmp(out_encoding, "Windows-", 8)) {
2283
                use_base64 = FALSE;
2284
                mimesep_enc = "?Q?";
2285
        } else {
2286
                use_base64 = TRUE;
2287
                mimesep_enc = "?B?";
2288
        }
2289

    
2290
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(mimesep_enc) +
2291
                strlen(MIMESEP_END);
2292

    
2293
        left = MAX_LINELEN - header_len;
2294

    
2295
        while (*srcp) {
2296
                gboolean in_quote = FALSE;
2297

    
2298
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
2299

    
2300
                while (g_ascii_isspace(*srcp)) {
2301
                        *destp++ = *srcp++;
2302
                        left--;
2303
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
2304
                }
2305

    
2306
                /* output as it is if the next word is ASCII string */
2307
                if (!is_next_nonascii(srcp)) {
2308
                        gint word_len;
2309

    
2310
                        word_len = get_next_word_len(srcp);
2311
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
2312
                        while (word_len > 0) {
2313
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
2314
                                *destp++ = *srcp++;
2315
                                left--;
2316
                                word_len--;
2317
                        }
2318

    
2319
                        continue;
2320
                }
2321

    
2322
                /* don't include parentheses in encoded strings */
2323
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
2324
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
2325
                        *destp++ = *srcp++;
2326
                        left--;
2327
                }
2328

    
2329
                while (1) {
2330
                        gint mb_len = 0;
2331
                        gint cur_len = 0;
2332
                        gchar *part_str;
2333
                        gchar *out_str;
2334
                        gchar *enc_str;
2335
                        const gchar *p = srcp;
2336
                        const gchar *block_encoding = out_encoding;
2337
                        gint out_str_len;
2338
                        gint out_enc_str_len;
2339
                        gint mime_block_len;
2340
                        gint error = 0;
2341
                        gboolean cont = FALSE;
2342

    
2343
                        while (*p != '\0') {
2344
                                if (*p == '"')
2345
                                        in_quote ^= TRUE;
2346
                                else if (!in_quote) {
2347
                                        if (g_ascii_isspace(*p) &&
2348
                                            !is_next_nonascii(p + 1))
2349
                                                break;
2350
                                        /* don't include parentheses in encoded
2351
                                           strings */
2352
                                        if (addr_field &&
2353
                                            (*p == '(' || *p == ')'))
2354
                                                break;
2355
                                }
2356

    
2357
                                mb_len = g_utf8_skip[*(guchar *)p];
2358

    
2359
                                part_str = g_strndup(srcp, cur_len + mb_len);
2360
                                out_str = conv_codeset_strdup_full
2361
                                        (part_str, src_encoding, block_encoding,
2362
                                         &error);
2363
                                if (!out_str || error != 0) {
2364
                                        g_warning("conv_encode_header(): code conversion failed. Keeping UTF-8.\n");
2365
                                        out_str = g_strdup(part_str);
2366
                                        block_encoding = CS_UTF_8;
2367
                                }
2368
                                out_str_len = strlen(out_str);
2369

    
2370
                                if (use_base64)
2371
                                        out_enc_str_len = B64LEN(out_str_len);
2372
                                else
2373
                                        out_enc_str_len =
2374
                                                qp_get_q_encoding_len
2375
                                                        ((guchar *)out_str);
2376

    
2377
                                g_free(out_str);
2378
                                g_free(part_str);
2379

    
2380
                                if (mimestr_len + strlen(block_encoding) + out_enc_str_len <= left) {
2381
                                        cur_len += mb_len;
2382
                                        p += mb_len;
2383
                                } else if (cur_len == 0) {
2384
                                        LBREAK_IF_REQUIRED(1, FALSE);
2385
                                        if (*p == '"')
2386
                                                in_quote ^= TRUE;
2387
                                        continue;
2388
                                } else {
2389
                                        cont = TRUE;
2390
                                        if (*p == '"')
2391
                                                in_quote ^= TRUE;
2392
                                        break;
2393
                                }
2394
                        }
2395

    
2396
                        if (cur_len > 0) {
2397
                                error = 0;
2398
                                part_str = g_strndup(srcp, cur_len);
2399
                                out_str = conv_codeset_strdup_full
2400
                                        (part_str, src_encoding, block_encoding,
2401
                                         &error);
2402
                                if (!out_str || error != 0) {
2403
                                        g_warning("conv_encode_header(): code conversion failed\n");
2404
                                        out_str = g_strdup(part_str);
2405
                                        block_encoding = CS_UTF_8;
2406
                                }
2407
                                out_str_len = strlen(out_str);
2408

    
2409
                                if (use_base64)
2410
                                        out_enc_str_len = B64LEN(out_str_len);
2411
                                else
2412
                                        out_enc_str_len =
2413
                                                qp_get_q_encoding_len
2414
                                                        ((guchar *)out_str);
2415

    
2416
                                enc_str = g_malloc(out_enc_str_len + 1);
2417
                                if (use_base64)
2418
                                        base64_encode(enc_str,
2419
                                                      (guchar *)out_str,
2420
                                                      out_str_len);
2421
                                else
2422
                                        qp_q_encode(enc_str, (guchar *)out_str);
2423

    
2424
                                /* output MIME-encoded string block */
2425
                                mime_block_len = mimestr_len +
2426
                                        strlen(block_encoding) +
2427
                                        strlen(enc_str);
2428
                                g_snprintf(destp, mime_block_len + 1,
2429
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
2430
                                           block_encoding, mimesep_enc,
2431
                                           enc_str);
2432
                                destp += mime_block_len;
2433
                                srcp += cur_len;
2434

    
2435
                                left -= mime_block_len;
2436

    
2437
                                g_free(enc_str);
2438
                                g_free(out_str);
2439
                                g_free(part_str);
2440
                        }
2441

    
2442
                        LBREAK_IF_REQUIRED(cont, FALSE);
2443

    
2444
                        if (cur_len == 0)
2445
                                break;
2446
                }
2447
        }
2448

    
2449
        *destp = '\0';
2450
}
2451

    
2452
#undef LBREAK_IF_REQUIRED
2453

    
2454
#define INT_TO_HEX_UPPER(outp, val)                \
2455
{                                                \
2456
        if ((val) < 10)                                \
2457
                *outp = '0' + (val);                \
2458
        else                                        \
2459
                *outp = 'A' + (val) - 10;        \
2460
}
2461

    
2462
#define IS_ESCAPE_CHAR(c)                                        \
2463
        (c < 0x20 || c > 0x7f ||                                \
2464
         strchr("\t \r\n*'%!#$&~`,{}|()<>@,;:\\\"/[]?=", c))
2465

    
2466
static gchar *encode_rfc2231_filename(const gchar *str)
2467
{
2468
        const gchar *p;
2469
        gchar *out;
2470
        gchar *outp;
2471

    
2472
        outp = out = g_malloc(strlen(str) * 3 + 1);
2473

    
2474
        for (p = str; *p != '\0'; ++p) {
2475
                guchar ch = *(guchar *)p;
2476

    
2477
                if (IS_ESCAPE_CHAR(ch)) {
2478
                        *outp++ = '%';
2479
                        INT_TO_HEX_UPPER(outp, ch >> 4);
2480
                        ++outp;
2481
                        INT_TO_HEX_UPPER(outp, ch & 0x0f);
2482
                        ++outp;
2483
                } else
2484
                        *outp++ = ch;
2485
        }
2486

    
2487
        *outp = '\0';
2488
        return out;
2489
}
2490

    
2491
gchar *conv_encode_filename(const gchar *src, const gchar *param_name,
2492
                            const gchar *out_encoding)
2493
{
2494
        gint name_len, max_linelen;
2495
        gchar *out_str, *enc_str;
2496
        gchar cur_param[80];
2497
        GString *string;
2498
        gint count = 0;
2499
        gint cur_left_len;
2500
        gchar *p;
2501

    
2502
        g_return_val_if_fail(src != NULL, NULL);
2503
        g_return_val_if_fail(param_name != NULL, NULL);
2504

    
2505
        if (is_ascii_str(src))
2506
                return g_strdup_printf(" %s=\"%s\"", param_name, src);
2507

    
2508
        name_len = strlen(param_name);
2509
        max_linelen = MAX_LINELEN - name_len - 3;
2510

    
2511
        if (!out_encoding)
2512
                out_encoding = conv_get_outgoing_charset_str();
2513
        if (!strcmp(out_encoding, CS_US_ASCII))
2514
                out_encoding = CS_ISO_8859_1;
2515

    
2516
        out_str = conv_codeset_strdup(src, CS_INTERNAL, out_encoding);
2517
        if (!out_str)
2518
                return NULL;
2519
        enc_str = encode_rfc2231_filename(out_str);
2520
        g_free(out_str);
2521

    
2522
        if (strlen(enc_str) <= max_linelen) {
2523
                gchar *ret;
2524
                ret = g_strdup_printf(" %s*=%s''%s",
2525
                                      param_name, out_encoding, enc_str);
2526
                g_free(enc_str);
2527
                return ret;
2528
        }
2529

    
2530
        string = g_string_new(NULL);
2531
        g_string_printf(string, " %s*0*=%s''", param_name, out_encoding);
2532
        cur_left_len = MAX_LINELEN - string->len;
2533

    
2534
        p = enc_str;
2535

    
2536
        while (*p != '\0') {
2537
                if ((*p == '%' && cur_left_len < 4) ||
2538
                    (*p != '%' && cur_left_len < 2)) {
2539
                        gint len;
2540

    
2541
                        g_string_append(string, ";\n");
2542
                        ++count;
2543
                        len = g_snprintf(cur_param, sizeof(cur_param),
2544
                                         " %s*%d*=", param_name, count);
2545
                        g_string_append(string, cur_param);
2546
                        cur_left_len = MAX_LINELEN - len;
2547
                }
2548

    
2549
                if (*p == '%') {
2550
                        g_string_append_len(string, p, 3);
2551
                        p += 3;
2552
                        cur_left_len -= 3;
2553
                } else {
2554
                        g_string_append_c(string, *p);
2555
                        ++p;
2556
                        --cur_left_len;
2557
                }
2558
        }
2559

    
2560
        g_free(enc_str);
2561

    
2562
        return g_string_free(string, FALSE);
2563
}
2564

    
2565
gint conv_copy_file(const gchar *src, const gchar *dest, const gchar *encoding)
2566
{
2567
        FILE *src_fp, *dest_fp;
2568
        gchar buf[BUFFSIZE];
2569
        CodeConverter *conv;
2570
        gboolean err = FALSE;
2571

    
2572
        if ((src_fp = g_fopen(src, "rb")) == NULL) {
2573
                FILE_OP_ERROR(src, "fopen");
2574
                return -1;
2575
        }
2576
        if ((dest_fp = g_fopen(dest, "wb")) == NULL) {
2577
                FILE_OP_ERROR(dest, "fopen");
2578
                fclose(src_fp);
2579
                return -1;
2580
        }
2581

    
2582
        if (change_file_mode_rw(dest_fp, dest) < 0) {
2583
                FILE_OP_ERROR(dest, "chmod");
2584
                g_warning("can't change file mode\n");
2585
        }
2586

    
2587
        conv = conv_code_converter_new(encoding, NULL);
2588

    
2589
        while (fgets(buf, sizeof(buf), src_fp) != NULL) {
2590
                gchar *outbuf;
2591

    
2592
                outbuf = conv_convert(conv, buf);
2593
                if (outbuf) {
2594
                        fputs(outbuf, dest_fp);
2595
                        g_free(outbuf);
2596
                } else
2597
                        fputs(buf, dest_fp);
2598
        }
2599

    
2600
        conv_code_converter_destroy(conv);
2601

    
2602
        if (ferror(src_fp)) {
2603
                FILE_OP_ERROR(src, "fgets");
2604
                err = TRUE;
2605
        }
2606
        fclose(src_fp);
2607
        if (fclose(dest_fp) == EOF) {
2608
                FILE_OP_ERROR(dest, "fclose");
2609
                err = TRUE;
2610
        }
2611
        if (err) {
2612
                g_unlink(dest);
2613
                return -1;
2614
        }
2615

    
2616
        return 0;
2617
}
2618

    
2619
gint conv_copy_dir(const gchar *src, const gchar *dest, const gchar *encoding)
2620
{
2621
        GDir *dir;
2622
        const gchar *dir_name;
2623
        gchar *src_file;
2624
        gchar *dest_file;
2625

    
2626
        if ((dir = g_dir_open(src, 0, NULL)) == NULL) {
2627
                g_warning("failed to open directory: %s\n", src);
2628
                return -1;
2629
        }
2630

    
2631
        if (make_dir_hier(dest) < 0) {
2632
                g_dir_close(dir);
2633
                return -1;
2634
        }
2635

    
2636
        while ((dir_name = g_dir_read_name(dir)) != NULL) {
2637
                src_file = g_strconcat(src, G_DIR_SEPARATOR_S, dir_name, NULL);
2638
                dest_file = g_strconcat(dest, G_DIR_SEPARATOR_S, dir_name,
2639
                                        NULL);
2640
                if (is_file_exist(src_file))
2641
                        conv_copy_file(src_file, dest_file, encoding);
2642
                g_free(dest_file);
2643
                g_free(src_file);
2644
        }
2645

    
2646
        g_dir_close(dir);
2647

    
2648
        return 0;
2649
}
2650

    
2651
CharSet conv_check_file_encoding(const gchar *file)
2652
{
2653
        FILE *fp;
2654
        gchar buf[BUFFSIZE];
2655
        CharSet enc;
2656
        const gchar *enc_str;
2657
        gboolean is_locale = TRUE, is_utf8 = TRUE;
2658

    
2659
        g_return_val_if_fail(file != NULL, C_AUTO);
2660

    
2661
        enc = conv_get_locale_charset();
2662
        enc_str = conv_get_locale_charset_str();
2663
        if (enc == C_UTF_8)
2664
                is_locale = FALSE;
2665

    
2666
        if ((fp = g_fopen(file, "rb")) == NULL) {
2667
                FILE_OP_ERROR(file, "fopen");
2668
                return C_AUTO;
2669
        }
2670

    
2671
        while (fgets(buf, sizeof(buf), fp) != NULL) {
2672
                gchar *str;
2673
                gint error = 0;
2674

    
2675
                if (is_locale) {
2676
                        str = conv_codeset_strdup_full(buf, enc_str,
2677
                                                       CS_INTERNAL, &error);
2678
                        if (!str || error != 0)
2679
                                is_locale = FALSE;
2680
                        g_free(str);
2681
                }
2682

    
2683
                if (is_utf8 && g_utf8_validate(buf, -1, NULL) == FALSE) {
2684
                        is_utf8 = FALSE;
2685
                }
2686

    
2687
                if (!is_locale && !is_utf8)
2688
                        break;
2689
        }
2690

    
2691
        fclose(fp);
2692

    
2693
        if (is_locale)
2694
                return enc;
2695
        else if (is_utf8)
2696
                return C_UTF_8;
2697
        else
2698
                return C_AUTO;
2699
}
2700

    
2701
gchar *conv_filename_from_utf8(const gchar *utf8_file)
2702
{
2703
        gchar *fs_file;
2704
        GError *error = NULL;
2705

    
2706
        g_return_val_if_fail(utf8_file != NULL, NULL);
2707

    
2708
        fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
2709
        if (error) {
2710
                g_warning("failed to convert encoding of file name: %s\n",
2711
                          error->message);
2712
                g_error_free(error);
2713
        }
2714
        if (!fs_file)
2715
                fs_file = g_strdup(utf8_file);
2716

    
2717
        return fs_file;
2718
}
2719

    
2720
gchar *conv_filename_to_utf8(const gchar *fs_file)
2721
{
2722
        gchar *utf8_file;
2723
        GError *error = NULL;
2724

    
2725
        g_return_val_if_fail(fs_file != NULL, NULL);
2726

    
2727
        utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
2728
        if (error) {
2729
                g_warning("failed to convert encoding of file name: %s\n",
2730
                          error->message);
2731
                g_error_free(error);
2732
        }
2733
        if (!utf8_file)
2734
                utf8_file = g_strdup(fs_file);
2735

    
2736
        return utf8_file;
2737
}