Statistics
| Branch: | Tag: | Revision:

root / libsylph / codeconv.c @ 8d7dcace

History | View | Annotate | Download (62.1 kB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2.1 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config.h"
22
#endif
23
24
#include "defs.h"
25
26
#include <glib.h>
27
#include <string.h>
28
#include <ctype.h>
29
#include <stdlib.h>
30
#include <errno.h>
31
32
#if HAVE_LOCALE_H
33
#  include <locale.h>
34
#endif
35
36
#include <iconv.h>
37
38
#include "codeconv.h"
39
#include "unmime.h"
40
#include "base64.h"
41
#include "quoted-printable.h"
42
#include "utils.h"
43
44
typedef enum
45
{
46
        JIS_ASCII,
47
        JIS_KANJI,
48
        JIS_HWKANA,
49
        JIS_AUXKANJI,
50
        JIS_UDC
51
} JISState;
52
53
#define SUBST_CHAR        '_'
54
#define ESC                '\033'
55
#define SO                0x0e
56
#define SI                0x0f
57
#define SS2                0x8e
58
#define SS3                0x8f
59
60
#define iseuckanji(c) \
61
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62
#define iseuchwkana1(c) \
63
        (((c) & 0xff) == SS2)
64
#define iseuchwkana2(c) \
65
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
66
#define iseucaux(c) \
67
        (((c) & 0xff) == SS3)
68
69
#define issjiskanji1(c) \
70
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xef))
72
#define issjiskanji2(c) \
73
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75
#define issjishwkana(c) \
76
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
77
#define issjisext(c) \
78
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xfc)
79
#define issjisudc(c) \
80
        (((c) & 0xff) >= 0xf0 && ((c) & 0xff) <= 0xf9)
81
#define issjisibmext(c1, c2) \
82
        ((((c1) & 0xff) >= 0xfa && ((c1) & 0xff) <= 0xfb && \
83
          issjiskanji2(c2)) ||                              \
84
         (((c1) & 0xff) == 0xfc &&                          \
85
          ((c2) & 0xff) >= 0x40 && ((c2) & 0xff) <= 0x4b))
86
87
#define isjiskanji(c) \
88
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x7e)
89
#define isjishwkana(c) \
90
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x5f)
91
#define isjisudc(c) \
92
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x34)
93
#define isjisudclow(c) \
94
        (((c) & 0xff) >= 0x21 && ((c) & 0xff) <= 0x2a)
95
#define isjisudchigh(c) \
96
        (((c) & 0xff) >= 0x2b && ((c) & 0xff) <= 0x34)
97
98
/* U+0080 - U+07FF */
99
#define isutf8_2_1(c) \
100
        (((c) & 0xe0) == 0xc0)
101
#define isutf8_2_2(c) \
102
        (((c) & 0xc0) == 0x80)
103
/* U+0800 - U+FFFF */
104
#define isutf8_3_1(c) \
105
        (((c) & 0xf0) == 0xe0)
106
#define isutf8_3_2(c) \
107
        (((c) & 0xc0) == 0x80)
108
109
#define isutf8bom(s) \
110
        (((*(s)) & 0xff) == 0xef && ((*(s + 1)) & 0xff) == 0xbb && \
111
         ((*(s + 2)) & 0xff) == 0xbf)
112
113
#define K_IN()                                \
114
        if (state != JIS_KANJI) {        \
115
                *out++ = ESC;                \
116
                *out++ = '$';                \
117
                *out++ = 'B';                \
118
                state = JIS_KANJI;        \
119
        }
120
121
#define K_OUT()                                \
122
        if (state != JIS_ASCII) {        \
123
                *out++ = ESC;                \
124
                *out++ = '(';                \
125
                *out++ = 'B';                \
126
                state = JIS_ASCII;        \
127
        }
128
129
#define HW_IN()                                \
130
        if (state != JIS_HWKANA) {        \
131
                *out++ = ESC;                \
132
                *out++ = '(';                \
133
                *out++ = 'I';                \
134
                state = JIS_HWKANA;        \
135
        }
136
137
#define AUX_IN()                        \
138
        if (state != JIS_AUXKANJI) {        \
139
                *out++ = ESC;                \
140
                *out++ = '$';                \
141
                *out++ = '(';                \
142
                *out++ = 'D';                \
143
                state = JIS_AUXKANJI;        \
144
        }
145
146
#define UDC_IN()                        \
147
        if (state != JIS_UDC) {                \
148
                *out++ = ESC;                \
149
                *out++ = '$';                \
150
                *out++ = '(';                \
151
                *out++ = '?';                \
152
                state = JIS_UDC;        \
153
        }
154
155
static ConvADType conv_ad_type = C_AD_BY_LOCALE;
156
static gboolean allow_jisx0201_kana = FALSE;
157
158
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error);
159
static gchar *conv_jistosjis(const gchar *inbuf, gint *error);
160
static gchar *conv_euctojis(const gchar *inbuf, gint *error);
161
static gchar *conv_sjistojis(const gchar *inbuf, gint *error);
162
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error);
163
164
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error);
165
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error);
166
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error);
167
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error);
168
169
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error);
170
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error);
171
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error);
172
173
/* static void conv_unreadable_eucjp(gchar *str); */
174
static void conv_unreadable_8bit(gchar *str);
175
/* static void conv_unreadable_latin(gchar *str); */
176
177
static gchar *conv_jistodisp(const gchar *inbuf, gint *error);
178
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error);
179
static gchar *conv_euctodisp(const gchar *inbuf, gint *error);
180
181
static gchar *conv_anytodisp(const gchar *inbuf, gint *error);
182
static gchar *conv_ustodisp(const gchar *inbuf, gint *error);
183
static gchar *conv_noconv(const gchar *inbuf, gint *error);
184
185
static gchar *conv_jistoeuc(const gchar *inbuf, gint *error)
186
{
187
        gchar *outbuf;
188
        const guchar *in = (guchar *)inbuf;
189
        guchar *out;
190
        JISState state = JIS_ASCII;
191
        gint error_ = 0;
192
193
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
194
        out = (guchar *)outbuf;
195
196
        while (*in != '\0') {
197
                if (*in == ESC) {
198
                        in++;
199
                        if (*in == '$') {
200
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
201
                                        state = JIS_KANJI;
202
                                        in += 2;
203
                                } else if (*(in + 1) == '(' &&
204
                                           *(in + 2) == 'D') {
205
                                        state = JIS_AUXKANJI;
206
                                        in += 3;
207
                                } else {
208
                                        /* unknown escape sequence */
209
                                        error_ = -1;
210
                                        state = JIS_ASCII;
211
                                }
212
                        } else if (*in == '(') {
213
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
214
                                        state = JIS_ASCII;
215
                                        in += 2;
216
                                } else if (*(in + 1) == 'I') {
217
                                        state = JIS_HWKANA;
218
                                        in += 2;
219
                                } else {
220
                                        /* unknown escape sequence */
221
                                        error_ = -1;
222
                                        state = JIS_ASCII;
223
                                }
224
                        } else {
225
                                /* unknown escape sequence */
226
                                error_ = -1;
227
                                state = JIS_ASCII;
228
                        }
229
                } else if (*in == 0x0e) {
230
                        state = JIS_HWKANA;
231
                        in++;
232
                } else if (*in == 0x0f) {
233
                        state = JIS_ASCII;
234
                        in++;
235
                } else {
236
                        switch (state) {
237
                        case JIS_ASCII:
238
                                *out++ = *in++;
239
                                break;
240
                        case JIS_KANJI:
241
                                *out++ = *in++ | 0x80;
242
                                if (*in == '\0') break;
243
                                *out++ = *in++ | 0x80;
244
                                break;
245
                        case JIS_HWKANA:
246
                                *out++ = 0x8e;
247
                                *out++ = *in++ | 0x80;
248
                                break;
249
                        case JIS_AUXKANJI:
250
                                *out++ = 0x8f;
251
                                *out++ = *in++ | 0x80;
252
                                if (*in == '\0') break;
253
                                *out++ = *in++ | 0x80;
254
                                break;
255
                        default:
256
                                *out++ = *in++;
257
                                break;
258
                        }
259
                }
260
        }
261
262
        *out = '\0';
263
264
        if (error)
265
                *error = error_;
266
267
        return outbuf;
268
}
269
270
static gchar *conv_jistosjis(const gchar *inbuf, gint *error)
271
{
272
        gchar *outbuf;
273
        const guchar *in = (guchar *)inbuf;
274
        guchar *out;
275
        JISState state = JIS_ASCII;
276
        gint error_ = 0;
277
278
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
279
        out = (guchar *)outbuf;
280
281
        while (*in != '\0') {
282
                if (*in == ESC) {
283
                        in++;
284
                        if (*in == '$') {
285
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
286
                                        state = JIS_KANJI;
287
                                        in += 2;
288
                                } else if (*(in + 1) == '(' &&
289
                                           *(in + 2) == '?') {
290
                                        /* ISO-2022-JP-MS extention */
291
                                        state = JIS_UDC;
292
                                        in += 3;
293
                                } else {
294
                                        /* unknown escape sequence */
295
                                        error_ = -1;
296
                                        state = JIS_ASCII;
297
                                }
298
                        } else if (*in == '(') {
299
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
300
                                        state = JIS_ASCII;
301
                                        in += 2;
302
                                } else if (*(in + 1) == 'I') {
303
                                        state = JIS_HWKANA;
304
                                        in += 2;
305
                                } else {
306
                                        /* unknown escape sequence */
307
                                        error_ = -1;
308
                                        state = JIS_ASCII;
309
                                }
310
                        } else {
311
                                /* unknown escape sequence */
312
                                error_ = -1;
313
                                state = JIS_ASCII;
314
                        }
315
                } else if (*in == SO) {
316
                        state = JIS_HWKANA;
317
                        in++;
318
                } else if (*in == SI) {
319
                        state = JIS_ASCII;
320
                        in++;
321
                } else {
322
                        switch (state) {
323
                        case JIS_ASCII:
324
                                *out++ = *in++;
325
                                break;
326
                        case JIS_HWKANA:
327
                                *out++ = *in++ | 0x80;
328
                                break;
329
                        case JIS_KANJI:
330
                                if ((isjiskanji(*in) ||
331
                                     (*in >= 0x7f && *in <= 0x97)) &&
332
                                    isjiskanji(*(in + 1))) {
333
                                        *out++ = ((*in < 0x5f)
334
                                                 ? (((*in - 0x21) / 2) + 0x81)
335
                                                 : (((*in - 0x21) / 2) + 0xc1));
336
                                        *out++ = ((*in % 2)
337
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
338
                                                   ? 0x1f : 0x20)))
339
                                                 : *(in + 1) + 0x7e);
340
                                        in += 2;
341
                                } else {
342
                                        error_ = -1;
343
                                        *out++ = SUBST_CHAR;
344
                                        in++;
345
                                        if (*in != '\0') {
346
                                                *out++ = SUBST_CHAR;
347
                                                in++;
348
                                        }
349
                                }
350
                                break;
351
                        case JIS_UDC:
352
                                if (isjisudc(*in) && isjiskanji(*(in + 1))) {
353
                                        *out++ = (((*in - 0x21) / 2) + 0xf0);
354
                                        *out++ = ((*in % 2)
355
                                                 ? ((*(in + 1) + ((*(in + 1) < 0x60)
356
                                                   ? 0x1f : 0x20)))
357
                                                 : *(in + 1) + 0x7e);
358
                                        in += 2;
359
                                } else {
360
                                        error_ = -1;
361
                                        *out++ = SUBST_CHAR;
362
                                        in++;
363
                                        if (*in != '\0') {
364
                                                *out++ = SUBST_CHAR;
365
                                                in++;
366
                                        }
367
                                }
368
                                break;
369
                        default:
370
                                *out++ = *in++;
371
                                break;
372
                        }
373
                }
374
        }
375
376
        *out = '\0';
377
378
        if (error)
379
                *error = error_;
380
381
        return outbuf;
382
}
383
384
#define JIS_HWDAKUTEN                0x5e
385
#define JIS_HWHANDAKUTEN        0x5f
386
387
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
388
{
389
        static guint16 h2z_tbl[] = {
390
                /* 0x20 - 0x2f */
391
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
392
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
393
                /* 0x30 - 0x3f */
394
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
395
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
396
                /* 0x40 - 0x4f */
397
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
398
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
399
                /* 0x50 - 0x5f */
400
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
401
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
402
        };
403
404
        static guint16 dakuten_tbl[] = {
405
                /* 0x30 - 0x3f */
406
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
407
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
408
                /* 0x40 - 0x4f */
409
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
410
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
411
        };
412
413
        static guint16 handakuten_tbl[] = {
414
                /* 0x4a - 0x4e */
415
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
416
        };
417
418
        guint16 out_code;
419
420
        jis_code &= 0x7f;
421
        sound_sym &= 0x7f;
422
423
        if (jis_code < 0x21 || jis_code > 0x5f)
424
                return 0;
425
426
        if (sound_sym == JIS_HWDAKUTEN &&
427
            jis_code >= 0x36 && jis_code <= 0x4e) {
428
                out_code = dakuten_tbl[jis_code - 0x30];
429
                if (out_code != 0) {
430
                        *outbuf = out_code >> 8;
431
                        *(outbuf + 1) = out_code & 0xff;
432
                        return 2;
433
                }
434
        }
435
436
        if (sound_sym == JIS_HWHANDAKUTEN &&
437
            jis_code >= 0x4a && jis_code <= 0x4e) {
438
                out_code = handakuten_tbl[jis_code - 0x4a];
439
                *outbuf = out_code >> 8;
440
                *(outbuf + 1) = out_code & 0xff;
441
                return 2;
442
        }
443
444
        out_code = h2z_tbl[jis_code - 0x20];
445
        *outbuf = out_code >> 8;
446
        *(outbuf + 1) = out_code & 0xff;
447
        return 1;
448
}
449
450
static gchar *conv_euctojis(const gchar *inbuf, gint *error)
451
{
452
        gchar *outbuf;
453
        const guchar *in = (guchar *)inbuf;
454
        guchar *out;
455
        JISState state = JIS_ASCII;
456
        gint error_ = 0;
457
 
458
        outbuf = g_malloc(strlen(inbuf) * 3 + 4);
459
        out = (guchar *)outbuf;
460
461
        while (*in != '\0') {
462
                if (isascii(*in)) {
463
                        K_OUT();
464
                        *out++ = *in++;
465
                } else if (iseuckanji(*in)) {
466
                        if (iseuckanji(*(in + 1))) {
467
                                K_IN();
468
                                *out++ = *in++ & 0x7f;
469
                                *out++ = *in++ & 0x7f;
470
                        } else {
471
                                error_ = -1;
472
                                K_OUT();
473
                                *out++ = SUBST_CHAR;
474
                                in++;
475
                                if (*in != '\0' && !isascii(*in)) {
476
                                        *out++ = SUBST_CHAR;
477
                                        in++;
478
                                }
479
                        }
480
                } else if (iseuchwkana1(*in)) {
481
                        if (iseuchwkana2(*(in + 1))) {
482
                                if (allow_jisx0201_kana) {
483
                                        HW_IN();
484
                                        in++;
485
                                        *out++ = *in++ & 0x7f;
486
                                } else {
487
                                        guchar jis_ch[2];
488
                                        gint len;
489
490
                                        if (iseuchwkana1(*(in + 2)) &&
491
                                            iseuchwkana2(*(in + 3)))
492
                                                len = conv_jis_hantozen
493
                                                        (jis_ch,
494
                                                         *(in + 1), *(in + 3));
495
                                        else
496
                                                len = conv_jis_hantozen
497
                                                        (jis_ch,
498
                                                         *(in + 1), '\0');
499
                                        if (len == 0)
500
                                                in += 2;
501
                                        else {
502
                                                K_IN();
503
                                                in += len * 2;
504
                                                *out++ = jis_ch[0];
505
                                                *out++ = jis_ch[1];
506
                                        }
507
                                }
508
                        } else {
509
                                error_ = -1;
510
                                K_OUT();
511
                                in++;
512
                                if (*in != '\0' && !isascii(*in)) {
513
                                        *out++ = SUBST_CHAR;
514
                                        in++;
515
                                }
516
                        }
517
                } else if (iseucaux(*in)) {
518
                        in++;
519
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
520
                                AUX_IN();
521
                                *out++ = *in++ & 0x7f;
522
                                *out++ = *in++ & 0x7f;
523
                        } else {
524
                                error_ = -1;
525
                                K_OUT();
526
                                if (*in != '\0' && !isascii(*in)) {
527
                                        *out++ = SUBST_CHAR;
528
                                        in++;
529
                                        if (*in != '\0' && !isascii(*in)) {
530
                                                *out++ = SUBST_CHAR;
531
                                                in++;
532
                                        }
533
                                }
534
                        }
535
                } else {
536
                        error_ = -1;
537
                        K_OUT();
538
                        *out++ = SUBST_CHAR;
539
                        in++;
540
                }
541
        }
542
543
        K_OUT();
544
        *out = '\0';
545
546
        if (error)
547
                *error = error_;
548
549
        return outbuf;
550
}
551
552
#define sjistoidx(c1, c2) \
553
        (((c1) > 0x9f) \
554
        ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
555
        : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
556
#define idxtojis1(c) (((c) / 94) + 0x21)
557
#define idxtojis2(c) (((c) % 94) + 0x21)
558
559
static guint conv_idx_ibmtonec(guint idx)
560
{
561
        if      (idx >= sjistoidx(0xfa, 0x5c))
562
                idx -=  sjistoidx(0xfa, 0x5c)
563
                      - sjistoidx(0xed, 0x40);
564
/*        else if (idx == sjistoidx(0xfa, 0x5b)) */
565
/*                idx =   sjistoidx(0x81, 0xe6); */
566
/*        else if (idx == sjistoidx(0xfa, 0x5a)) */
567
/*                idx =   sjistoidx(0x87, 0x84); */
568
/*        else if (idx == sjistoidx(0xfa, 0x59)) */
569
/*                idx =   sjistoidx(0x87, 0x82); */
570
/*        else if (idx == sjistoidx(0xfa, 0x58)) */
571
/*                idx =   sjistoidx(0x87, 0x8a); */
572
        else if (idx >= sjistoidx(0xfa, 0x55))
573
                idx -=  sjistoidx(0xfa, 0x55)
574
                      - sjistoidx(0xee, 0xfa);
575
/*        else if (idx == sjistoidx(0xfa, 0x54)) */
576
/*                idx =   sjistoidx(0x81, 0xca); */
577
/*        else if (idx >= sjistoidx(0xfa, 0x4a)) */
578
/*                idx -=  sjistoidx(0xfa, 0x4a)  */
579
/*                      - sjistoidx(0x87, 0x54); */
580
        else if (idx >= sjistoidx(0xfa, 0x40))
581
                idx -=  sjistoidx(0xfa, 0x40)
582
                      - sjistoidx(0xee, 0xef);
583
        return idx;
584
}
585
586
static gchar *conv_sjistojis(const gchar *inbuf, gint *error)
587
{
588
        gchar *outbuf;
589
        const guchar *in = (guchar *)inbuf;
590
        guchar *out;
591
        JISState state = JIS_ASCII;
592
        gint error_ = 0;
593
        guint idx;
594
 
595
        outbuf = g_malloc(strlen(inbuf) * 5 + 4);
596
        out = (guchar *)outbuf;
597
598
        while (*in != '\0') {
599
                if (isascii(*in)) {
600
                        K_OUT();
601
                        *out++ = *in++;
602
                } else if (issjiskanji1(*in)) {
603
                        if (issjiskanji2(*(in + 1))) {
604
                                K_IN();
605
                                idx = sjistoidx(*in, *(in + 1));
606
                                *out++ = idxtojis1(idx);
607
                                *out++ = idxtojis2(idx);
608
                                in += 2;
609
                        } else {
610
                                error_ = -1;
611
                                K_OUT();
612
                                *out++ = SUBST_CHAR;
613
                                in++;
614
                                if (*in != '\0' && !isascii(*in)) {
615
                                        *out++ = SUBST_CHAR;
616
                                        in++;
617
                                }
618
                        }
619
                } else if (issjishwkana(*in)) {
620
                        if (allow_jisx0201_kana) {
621
                                HW_IN();
622
                                *out++ = *in++ & 0x7f;
623
                        } else {
624
                                guchar jis_ch[2];
625
                                gint len;
626
627
                                if (issjishwkana(*(in + 1)))
628
                                        len = conv_jis_hantozen
629
                                                (jis_ch,
630
                                                 *in, *(in + 1));
631
                                else
632
                                        len = conv_jis_hantozen
633
                                                (jis_ch,
634
                                                 *in, '\0');
635
                                if (len == 0)
636
                                        in++;
637
                                else {
638
                                        K_IN();
639
                                        in += len;
640
                                        *out++ = jis_ch[0];
641
                                        *out++ = jis_ch[1];
642
                                }
643
                        }
644
                } else if (issjisibmext(*in, *(in + 1))) {
645
                        K_IN();
646
                        idx = sjistoidx(*in, *(in + 1));
647
                        idx = conv_idx_ibmtonec(idx);
648
                        *out++ = idxtojis1(idx);
649
                        *out++ = idxtojis2(idx);
650
                        in += 2;
651
#if 0
652
                } else if (issjisudc(*in)) {
653
                        UDC_IN();
654
                        idx = sjistoidx(*in, *(in + 1))
655
                              - sjistoidx(0xf0, 0x40);
656
                        *out++ = idxtojis1(idx);
657
                        *out++ = idxtojis2(idx);
658
                        in += 2;
659
#endif
660
                } else if (issjisext(*in)) {
661
                        error_ = -1;
662
                        K_OUT();
663
                        *out++ = SUBST_CHAR;
664
                        in++;
665
                        if (*in != '\0' && !isascii(*in)) {
666
                                *out++ = SUBST_CHAR;
667
                                in++;
668
                        }
669
                } else {
670
                        error_ = -1;
671
                        K_OUT();
672
                        *out++ = SUBST_CHAR;
673
                        in++;
674
                }
675
        }
676
677
        K_OUT();
678
        *out = '\0';
679
680
        if (error)
681
                *error = error_;
682
683
        return outbuf;
684
}
685
686
static gchar *conv_sjistoeuc(const gchar *inbuf, gint *error)
687
{
688
        gchar *outbuf;
689
        const guchar *in = (guchar *)inbuf;
690
        guchar *out;
691
        gint error_ = 0;
692
693
        outbuf = g_malloc(strlen(inbuf) * 2 + 1);
694
        out = (guchar *)outbuf;
695
696
        while (*in != '\0') {
697
                if (isascii(*in)) {
698
                        *out++ = *in++;
699
                } else if (issjiskanji1(*in)) {
700
                        if (issjiskanji2(*(in + 1))) {
701
                                guchar out1 = *in;
702
                                guchar out2 = *(in + 1);
703
                                guchar row;
704
705
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
706
                                if (out2 < 0x9f) {
707
                                        out1 = (out1 - row) * 2 - 1;
708
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
709
                                } else {
710
                                        out1 = (out1 - row) * 2;
711
                                        out2 -= 0x7e;
712
                                }
713
714
                                *out++ = out1 | 0x80;
715
                                *out++ = out2 | 0x80;
716
                                in += 2;
717
                        } else {
718
                                error_ = -1;
719
                                *out++ = SUBST_CHAR;
720
                                in++;
721
                                if (*in != '\0' && !isascii(*in)) {
722
                                        *out++ = SUBST_CHAR;
723
                                        in++;
724
                                }
725
                        }
726
                } else if (issjishwkana(*in)) {
727
                        *out++ = SS2;
728
                        *out++ = *in++;
729
                } else if (issjisext(*in)) {
730
                        error_ = -1;
731
                        *out++ = SUBST_CHAR;
732
                        in++;
733
                        if (*in != '\0' && !isascii(*in)) {
734
                                *out++ = SUBST_CHAR;
735
                                in++;
736
                        }
737
                } else {
738
                        error_ = -1;
739
                        *out++ = SUBST_CHAR;
740
                        in++;
741
                }
742
        }
743
744
        *out = '\0';
745
746
        if (error)
747
                *error = error_;
748
749
        return outbuf;
750
}
751
752
static gchar *conv_jistoutf8(const gchar *inbuf, gint *error)
753
{
754
        gchar *tmpstr, *utf8str;
755
        gint t_error = 0, u_error = 0;
756
757
        if (strstr(inbuf, "\033$(D")) {
758
                tmpstr = conv_jistoeuc(inbuf, &t_error);
759
                utf8str = conv_euctoutf8(tmpstr, &u_error);
760
        } else {
761
                tmpstr = conv_jistosjis(inbuf, &t_error);
762
                utf8str = conv_sjistoutf8(tmpstr, &u_error);
763
        }
764
        g_free(tmpstr);
765
766
        if (error)
767
                *error = (t_error | u_error);
768
769
        return utf8str;
770
}
771
772
#if USE_THREADS
773
#define S_LOCK_DEFINE_STATIC(name)        G_LOCK_DEFINE_STATIC(name)
774
#define S_LOCK(name)        G_LOCK(name)
775
#define S_UNLOCK(name)        G_UNLOCK(name)
776
#else
777
#define S_LOCK_DEFINE_STATIC(name)
778
#define S_LOCK(name)
779
#define S_UNLOCK(name)
780
#endif
781
782
static gchar *conv_sjistoutf8(const gchar *inbuf, gint *error)
783
{
784
        static iconv_t cd = (iconv_t)-1;
785
        static gboolean iconv_ok = TRUE;
786
        S_LOCK_DEFINE_STATIC(cd);
787
        gchar *ret;
788
789
        S_LOCK(cd);
790
791
        if (cd == (iconv_t)-1) {
792
                if (!iconv_ok) {
793
                        S_UNLOCK(cd);
794
                        if (error)
795
                                *error = -1;
796
                        return g_strdup(inbuf);
797
                }
798
799
                cd = iconv_open(CS_UTF_8, CS_CP932);
800
                if (cd == (iconv_t)-1) {
801
                        cd = iconv_open(CS_UTF_8, CS_SHIFT_JIS);
802
                        if (cd == (iconv_t)-1) {
803
                                g_warning("conv_sjistoutf8(): %s\n",
804
                                          g_strerror(errno));
805
                                iconv_ok = FALSE;
806
                                S_UNLOCK(cd);
807
                                if (error)
808
                                        *error = -1;
809
                                return g_strdup(inbuf);
810
                        }
811
                }
812
        }
813
814
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
815
        S_UNLOCK(cd);
816
        return ret;
817
}
818
819
static gchar *conv_euctoutf8(const gchar *inbuf, gint *error)
820
{
821
        static iconv_t cd = (iconv_t)-1;
822
        static gboolean iconv_ok = TRUE;
823
        S_LOCK_DEFINE_STATIC(cd);
824
        gchar *ret;
825
826
        S_LOCK(cd);
827
828
        if (cd == (iconv_t)-1) {
829
                if (!iconv_ok) {
830
                        S_UNLOCK(cd);
831
                        if (error)
832
                                *error = -1;
833
                        return g_strdup(inbuf);
834
                }
835
836
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
837
                if (cd == (iconv_t)-1) {
838
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
839
                        if (cd == (iconv_t)-1) {
840
                                g_warning("conv_euctoutf8(): %s\n",
841
                                          g_strerror(errno));
842
                                iconv_ok = FALSE;
843
                                S_UNLOCK(cd);
844
                                if (error)
845
                                        *error = -1;
846
                                return g_strdup(inbuf);
847
                        }
848
                }
849
        }
850
851
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
852
        S_UNLOCK(cd);
853
        return ret;
854
}
855
856
static gchar *conv_anytoutf8(const gchar *inbuf, gint *error)
857
{
858
        switch (conv_guess_ja_encoding(inbuf)) {
859
        case C_ISO_2022_JP:
860
                return conv_jistoutf8(inbuf, error);
861
        case C_SHIFT_JIS:
862
                return conv_sjistoutf8(inbuf, error);
863
        case C_EUC_JP:
864
                return conv_euctoutf8(inbuf, error);
865
        case C_UTF_8:
866
                if (error)
867
                        *error = 0;
868
                if (isutf8bom(inbuf))
869
                        inbuf += 3;
870
                return g_strdup(inbuf);
871
        default:
872
                if (error)
873
                        *error = 0;
874
                return g_strdup(inbuf);
875
        }
876
}
877
878
static gchar *conv_utf8tosjis(const gchar *inbuf, gint *error)
879
{
880
        static iconv_t cd = (iconv_t)-1;
881
        static gboolean iconv_ok = TRUE;
882
        S_LOCK_DEFINE_STATIC(cd);
883
        gchar *ret;
884
885
        S_LOCK(cd);
886
887
        if (cd == (iconv_t)-1) {
888
                if (!iconv_ok) {
889
                        S_UNLOCK(cd);
890
                        if (error)
891
                                *error = -1;
892
                        return g_strdup(inbuf);
893
                }
894
895
                cd = iconv_open(CS_CP932, CS_UTF_8);
896
                if (cd == (iconv_t)-1) {
897
                        cd = iconv_open(CS_SHIFT_JIS, CS_UTF_8);
898
                        if (cd == (iconv_t)-1) {
899
                                g_warning("conv_utf8tosjis(): %s\n",
900
                                          g_strerror(errno));
901
                                iconv_ok = FALSE;
902
                                S_UNLOCK(cd);
903
                                if (error)
904
                                        *error = -1;
905
                                return g_strdup(inbuf);
906
                        }
907
                }
908
        }
909
910
        if (isutf8bom(inbuf))
911
                inbuf += 3;
912
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
913
        S_UNLOCK(cd);
914
        return ret;
915
}
916
917
static gchar *conv_utf8toeuc(const gchar *inbuf, gint *error)
918
{
919
        static iconv_t cd = (iconv_t)-1;
920
        static gboolean iconv_ok = TRUE;
921
        S_LOCK_DEFINE_STATIC(cd);
922
        gchar *ret;
923
924
        S_LOCK(cd);
925
926
        if (cd == (iconv_t)-1) {
927
                if (!iconv_ok) {
928
                        S_UNLOCK(cd);
929
                        if (error)
930
                                *error = -1;
931
                        return g_strdup(inbuf);
932
                }
933
934
                cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
935
                if (cd == (iconv_t)-1) {
936
                        cd = iconv_open(CS_EUC_JP, CS_UTF_8);
937
                        if (cd == (iconv_t)-1) {
938
                                g_warning("conv_utf8toeuc(): %s\n",
939
                                          g_strerror(errno));
940
                                iconv_ok = FALSE;
941
                                S_UNLOCK(cd);
942
                                if (error)
943
                                        *error = -1;
944
                                return g_strdup(inbuf);
945
                        }
946
                }
947
        }
948
949
        if (isutf8bom(inbuf))
950
                inbuf += 3;
951
        ret = conv_iconv_strdup_with_cd(inbuf, cd, error);
952
        S_UNLOCK(cd);
953
        return ret;
954
}
955
956
static gchar *conv_utf8tojis(const gchar *inbuf, gint *error)
957
{
958
        gchar *tmpstr, *jisstr;
959
        gint t_error = 0, j_error = 0;
960
961
#if 1
962
        tmpstr = conv_utf8tosjis(inbuf, &t_error);
963
        jisstr = conv_sjistojis(tmpstr, &j_error);
964
#else
965
        tmpstr = conv_utf8toeuc(inbuf, &t_error);
966
        jisstr = conv_euctojis(tmpstr, &j_error);
967
#endif
968
        g_free(tmpstr);
969
970
        if (error)
971
                *error = (t_error | j_error);
972
973
        return jisstr;
974
}
975
976
#if 0
977
static gchar valid_eucjp_tbl[][96] = {
978
        /* 0xa2a0 - 0xa2ff */
979
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
980
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
981
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
982
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
983
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
984
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
985
986
        /* 0xa3a0 - 0xa3ff */
987
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
988
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
989
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
991
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
993
994
        /* 0xa4a0 - 0xa4ff */
995
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
996
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
997
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
998
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
999
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1000
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1001
1002
        /* 0xa5a0 - 0xa5ff */
1003
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1004
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1005
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1006
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1007
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1008
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1009
1010
        /* 0xa6a0 - 0xa6ff */
1011
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1012
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1013
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
1015
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1016
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1017
1018
        /* 0xa7a0 - 0xa7ff */
1019
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1020
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1021
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1022
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1023
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1024
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
1025
1026
        /* 0xa8a0 - 0xa8ff */
1027
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1028
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1029
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1030
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1031
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1032
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
1033
};
1034
1035
static gboolean isprintableeuckanji(guchar c1, guchar c2)
1036
{
1037
        if (c1 <= 0xa0 || c1 >= 0xf5)
1038
                return FALSE;
1039
        if (c2 <= 0xa0 || c2 == 0xff)
1040
                return FALSE;
1041
1042
        if (c1 >= 0xa9 && c1 <= 0xaf)
1043
                return FALSE;
1044
1045
        if (c1 >= 0xa2 && c1 <= 0xa8)
1046
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
1047
1048
        if (c1 == 0xcf) {
1049
                if (c2 >= 0xd4 && c2 <= 0xfe)
1050
                        return FALSE;
1051
        } else if (c1 == 0xf4) {
1052
                if (c2 >= 0xa7 && c2 <= 0xfe)
1053
                        return FALSE;
1054
        }
1055
1056
        return TRUE;
1057
}
1058
1059
static void conv_unreadable_eucjp(gchar *str)
1060
{
1061
        register guchar *p = str;
1062
1063
        while (*p != '\0') {
1064
                if (isascii(*p)) {
1065
                        /* convert CR+LF -> LF */
1066
                        if (*p == '\r' && *(p + 1) == '\n')
1067
                                memmove(p, p + 1, strlen(p));
1068
                        /* printable 7 bit code */
1069
                        p++;
1070
                } else if (iseuckanji(*p)) {
1071
                        if (isprintableeuckanji(*p, *(p + 1))) {
1072
                                /* printable euc-jp code */
1073
                                p += 2;
1074
                        } else {
1075
                                /* substitute unprintable code */
1076
                                *p++ = SUBST_CHAR;
1077
                                if (*p != '\0') {
1078
                                        if (isascii(*p))
1079
                                                p++;
1080
                                        else
1081
                                                *p++ = SUBST_CHAR;
1082
                                }
1083
                        }
1084
                } else if (iseuchwkana1(*p)) {
1085
                        if (iseuchwkana2(*(p + 1)))
1086
                                /* euc-jp hankaku kana */
1087
                                p += 2;
1088
                        else
1089
                                *p++ = SUBST_CHAR;
1090
                } else if (iseucaux(*p)) {
1091
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
1092
                                /* auxiliary kanji */
1093
                                p += 3;
1094
                        } else
1095
                                *p++ = SUBST_CHAR;
1096
                } else
1097
                        /* substitute unprintable 1 byte code */
1098
                        *p++ = SUBST_CHAR;
1099
        }
1100
}
1101
#endif
1102
1103
static void conv_unreadable_8bit(gchar *str)
1104
{
1105
        register gchar *p = str;
1106
1107
        while (*p != '\0') {
1108
                /* convert CR+LF -> LF */
1109
                if (*p == '\r' && *(p + 1) == '\n')
1110
                        memmove(p, p + 1, strlen(p));
1111
                else if (!isascii(*(guchar *)p)) *p = SUBST_CHAR;
1112
                p++;
1113
        }
1114
}
1115
1116
#if 0
1117
static void conv_unreadable_latin(gchar *str)
1118
{
1119
        register guchar *p = str;
1120
1121
        while (*p != '\0') {
1122
                /* convert CR+LF -> LF */
1123
                if (*p == '\r' && *(p + 1) == '\n')
1124
                        memmove(p, p + 1, strlen(p));
1125
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
1126
                        *p = SUBST_CHAR;
1127
                p++;
1128
        }
1129
}
1130
#endif
1131
1132
#define NCV        '\0'
1133
1134
void conv_mb_alnum(gchar *str)
1135
{
1136
        static guchar char_tbl[] = {
1137
                /* 0xa0 - 0xaf */
1138
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
1139
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
1140
                /* 0xb0 - 0xbf */
1141
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1142
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1143
                /* 0xc0 - 0xcf */
1144
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
1145
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
1146
                /* 0xd0 - 0xdf */
1147
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
1148
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
1149
                /* 0xe0 - 0xef */
1150
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
1151
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
1152
        };
1153
1154
        register guchar *p = (guchar *)str;
1155
        register gint len;
1156
1157
        len = strlen(str);
1158
1159
        while (len > 1) {
1160
                if (*p == 0xa3) {
1161
                        register guchar ch = *(p + 1);
1162
1163
                        if (ch >= 0xb0 && ch <= 0xfa) {
1164
                                /* [a-zA-Z] */
1165
                                *p = ch & 0x7f;
1166
                                p++;
1167
                                len--;
1168
                                memmove(p, p + 1, len);
1169
                                len--;
1170
                        } else  {
1171
                                p += 2;
1172
                                len -= 2;
1173
                        }
1174
                } else if (*p == 0xa1) {
1175
                        register guchar ch = *(p + 1);
1176
1177
                        if (ch >= 0xa0 && ch <= 0xef &&
1178
                            NCV != char_tbl[ch - 0xa0]) {
1179
                                *p = char_tbl[ch - 0xa0];
1180
                                p++;
1181
                                len--;
1182
                                memmove(p, p + 1, len);
1183
                                len--;
1184
                        } else {
1185
                                p += 2;
1186
                                len -= 2;
1187
                        }
1188
                } else if (iseuckanji(*p)) {
1189
                        p += 2;
1190
                        len -= 2;
1191
                } else {
1192
                        p++;
1193
                        len--;
1194
                }
1195
        }
1196
}
1197
1198
CharSet conv_guess_ja_encoding(const gchar *str)
1199
{
1200
        const guchar *p = (const guchar *)str;
1201
        CharSet guessed = C_US_ASCII;
1202
1203
        while (*p != '\0') {
1204
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
1205
                        if (guessed == C_US_ASCII)
1206
                                return C_ISO_2022_JP;
1207
                        p += 2;
1208
                } else if (isascii(*p)) {
1209
                        p++;
1210
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
1211
                        if (*p >= 0xfd && *p <= 0xfe)
1212
                                return C_EUC_JP;
1213
                        else if (guessed == C_SHIFT_JIS) {
1214
                                if ((issjiskanji1(*p) &&
1215
                                     issjiskanji2(*(p + 1))) ||
1216
                                    issjishwkana(*p))
1217
                                        guessed = C_SHIFT_JIS;
1218
                                else
1219
                                        guessed = C_EUC_JP;
1220
                        } else
1221
                                guessed = C_EUC_JP;
1222
                        p += 2;
1223
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
1224
                        guessed = C_SHIFT_JIS;
1225
                        p += 2;
1226
                } else if (issjishwkana(*p)) {
1227
                        guessed = C_SHIFT_JIS;
1228
                        p++;
1229
                } else {
1230
                        if (guessed == C_US_ASCII)
1231
                                guessed = C_AUTO;
1232
                        p++;
1233
                }
1234
        }
1235
1236
        if (guessed != C_US_ASCII) {
1237
                p = (const guchar *)str;
1238
1239
                while (*p != '\0') {
1240
                        if (isascii(*p)) {
1241
                                p++;
1242
                        } else if (isutf8_3_1(*p) &&
1243
                                   isutf8_3_2(*(p + 1)) &&
1244
                                   isutf8_3_2(*(p + 2))) {
1245
                                p += 3;
1246
                        } else {
1247
                                return guessed;
1248
                        }
1249
                }
1250
1251
                return C_UTF_8;
1252
        }
1253
1254
        return guessed;
1255
}
1256
1257
static gchar *conv_jistodisp(const gchar *inbuf, gint *error)
1258
{
1259
        return conv_jistoutf8(inbuf, error);
1260
}
1261
1262
static gchar *conv_sjistodisp(const gchar *inbuf, gint *error)
1263
{
1264
        return conv_sjistoutf8(inbuf, error);
1265
}
1266
1267
static gchar *conv_euctodisp(const gchar *inbuf, gint *error)
1268
{
1269
        return conv_euctoutf8(inbuf, error);
1270
}
1271
1272
gchar *conv_utf8todisp(const gchar *inbuf, gint *error)
1273
{
1274
        if (g_utf8_validate(inbuf, -1, NULL) == TRUE) {
1275
                if (error)
1276
                        *error = 0;
1277
                if (isutf8bom(inbuf))
1278
                        inbuf += 3;
1279
                return g_strdup(inbuf);
1280
        } else
1281
                return conv_ustodisp(inbuf, error);
1282
}
1283
1284
static gchar *conv_anytodisp(const gchar *inbuf, gint *error)
1285
{
1286
        gchar *outbuf;
1287
1288
        outbuf = conv_anytoutf8(inbuf, error);
1289
        if (g_utf8_validate(outbuf, -1, NULL) != TRUE) {
1290
                if (error)
1291
                        *error = -1;
1292
                conv_unreadable_8bit(outbuf);
1293
        }
1294
1295
        return outbuf;
1296
}
1297
1298
static gchar *conv_ustodisp(const gchar *inbuf, gint *error)
1299
{
1300
        gchar *outbuf;
1301
1302
        outbuf = g_strdup(inbuf);
1303
        conv_unreadable_8bit(outbuf);
1304
        if (error)
1305
                *error = 0;
1306
1307
        return outbuf;
1308
}
1309
1310
gchar *conv_localetodisp(const gchar *inbuf, gint *error)
1311
{
1312
        gchar *str;
1313
1314
        str = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
1315
                                CS_INTERNAL, error);
1316
        if (!str)
1317
                str = conv_utf8todisp(inbuf, NULL);
1318
1319
        return str;
1320
}
1321
1322
static gchar *conv_noconv(const gchar *inbuf, gint *error)
1323
{
1324
        if (error)
1325
                *error = 0;
1326
        return g_strdup(inbuf);
1327
}
1328
1329
static const gchar *
1330
conv_get_fallback_for_private_encoding(const gchar *encoding)
1331
{
1332
        if (encoding) {
1333
                if ((encoding[0] == 'X' || encoding[0] == 'x') &&
1334
                    encoding[1] == '-') {
1335
                        if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
1336
                                return CS_GBK;
1337
                        else if (!g_ascii_strcasecmp(encoding, CS_X_SJIS))
1338
                                return CS_SHIFT_JIS;
1339
                } else if ((encoding[0] == 'K' || encoding[0] == 'k') &&
1340
                           (encoding[1] == 'S' || encoding[1] == 's')) {
1341
                        if (!g_ascii_strcasecmp(encoding, CS_KS_C_5601_1987))
1342
                                return CS_EUC_KR;
1343
                }
1344
        }
1345
1346
        return encoding;
1347
}
1348
1349
CodeConverter *conv_code_converter_new(const gchar *src_encoding,
1350
                                       const gchar *dest_encoding)
1351
{
1352
        CodeConverter *conv;
1353
1354
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1355
1356
        conv = g_new0(CodeConverter, 1);
1357
        conv->code_conv_func =
1358
                conv_get_code_conv_func(src_encoding, dest_encoding);
1359
        conv->src_encoding = g_strdup(src_encoding);
1360
        conv->dest_encoding = g_strdup(dest_encoding);
1361
1362
        return conv;
1363
}
1364
1365
void conv_code_converter_destroy(CodeConverter *conv)
1366
{
1367
        g_free(conv->src_encoding);
1368
        g_free(conv->dest_encoding);
1369
        g_free(conv);
1370
}
1371
1372
gchar *conv_convert(CodeConverter *conv, const gchar *inbuf)
1373
{
1374
        if (!inbuf)
1375
                return NULL;
1376
        else if (conv->code_conv_func != conv_noconv)
1377
                return conv->code_conv_func(inbuf, NULL);
1378
        else
1379
                return conv_iconv_strdup
1380
                        (inbuf, conv->src_encoding, conv->dest_encoding, NULL);
1381
}
1382
1383
gchar *conv_codeset_strdup_full(const gchar *inbuf,
1384
                                const gchar *src_encoding,
1385
                                const gchar *dest_encoding,
1386
                                gint *error)
1387
{
1388
        CodeConvFunc conv_func;
1389
1390
        if (!inbuf) {
1391
                if (error)
1392
                        *error = 0;
1393
                return NULL;
1394
        }
1395
1396
        src_encoding = conv_get_fallback_for_private_encoding(src_encoding);
1397
1398
        conv_func = conv_get_code_conv_func(src_encoding, dest_encoding);
1399
        if (conv_func != conv_noconv)
1400
                return conv_func(inbuf, error);
1401
1402
        return conv_iconv_strdup(inbuf, src_encoding, dest_encoding, error);
1403
}
1404
1405
CodeConvFunc conv_get_code_conv_func(const gchar *src_encoding,
1406
                                     const gchar *dest_encoding)
1407
{
1408
        CodeConvFunc code_conv = conv_noconv;
1409
        CharSet src_charset;
1410
        CharSet dest_charset;
1411
1412
        if (!src_encoding)
1413
                src_charset = conv_get_locale_charset();
1414
        else
1415
                src_charset = conv_get_charset_from_str(src_encoding);
1416
1417
        /* auto detection mode */
1418
        if (!src_encoding && !dest_encoding) {
1419
                if (conv_ad_type == C_AD_JAPANESE ||
1420
                    (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
1421
                        return conv_anytodisp;
1422
                else
1423
                        return conv_noconv;
1424
        }
1425
1426
        dest_charset = conv_get_charset_from_str(dest_encoding);
1427
1428
        if (dest_charset == C_US_ASCII)
1429
                return conv_ustodisp;
1430
1431
        switch (src_charset) {
1432
        case C_US_ASCII:
1433
        case C_ISO_8859_1:
1434
        case C_ISO_8859_2:
1435
        case C_ISO_8859_3:
1436
        case C_ISO_8859_4:
1437
        case C_ISO_8859_5:
1438
        case C_ISO_8859_6:
1439
        case C_ISO_8859_7:
1440
        case C_ISO_8859_8:
1441
        case C_ISO_8859_9:
1442
        case C_ISO_8859_10:
1443
        case C_ISO_8859_11:
1444
        case C_ISO_8859_13:
1445
        case C_ISO_8859_14:
1446
        case C_ISO_8859_15:
1447
        case C_ISO_8859_16:
1448
                break;
1449
        case C_ISO_2022_JP:
1450
        case C_ISO_2022_JP_2:
1451
        case C_ISO_2022_JP_3:
1452
                if (dest_charset == C_AUTO)
1453
                        code_conv = conv_jistodisp;
1454
                else if (dest_charset == C_EUC_JP)
1455
                        code_conv = conv_jistoeuc;
1456
                else if (dest_charset == C_SHIFT_JIS ||
1457
                         dest_charset == C_CP932)
1458
                        code_conv = conv_jistosjis;
1459
                else if (dest_charset == C_UTF_8)
1460
                        code_conv = conv_jistoutf8;
1461
                break;
1462
        case C_SHIFT_JIS:
1463
        case C_CP932:
1464
                if (dest_charset == C_AUTO)
1465
                        code_conv = conv_sjistodisp;
1466
                else if (dest_charset == C_ISO_2022_JP   ||
1467
                         dest_charset == C_ISO_2022_JP_2 ||
1468
                         dest_charset == C_ISO_2022_JP_3)
1469
                        code_conv = conv_sjistojis;
1470
                else if (dest_charset == C_EUC_JP)
1471
                        code_conv = conv_sjistoeuc;
1472
                else if (dest_charset == C_UTF_8)
1473
                        code_conv = conv_sjistoutf8;
1474
                break;
1475
        case C_EUC_JP:
1476
                if (dest_charset == C_AUTO)
1477
                        code_conv = conv_euctodisp;
1478
                else if (dest_charset == C_ISO_2022_JP   ||
1479
                         dest_charset == C_ISO_2022_JP_2 ||
1480
                         dest_charset == C_ISO_2022_JP_3)
1481
                        code_conv = conv_euctojis;
1482
                else if (dest_charset == C_UTF_8)
1483
                        code_conv = conv_euctoutf8;
1484
                break;
1485
        case C_UTF_8:
1486
                if (dest_charset == C_EUC_JP)
1487
                        code_conv = conv_utf8toeuc;
1488
                else if (dest_charset == C_ISO_2022_JP   ||
1489
                         dest_charset == C_ISO_2022_JP_2 ||
1490
                         dest_charset == C_ISO_2022_JP_3)
1491
                        code_conv = conv_utf8tojis;
1492
                else if (dest_charset == C_SHIFT_JIS ||
1493
                         dest_charset == C_CP932)
1494
                        code_conv = conv_utf8tosjis;
1495
                break;
1496
        default:
1497
                break;
1498
        }
1499
1500
        return code_conv;
1501
}
1502
1503
gchar *conv_iconv_strdup(const gchar *inbuf,
1504
                         const gchar *src_code, const gchar *dest_code,
1505
                         gint *error)
1506
{
1507
        iconv_t cd;
1508
        gchar *outbuf;
1509
1510
        if (!src_code)
1511
                src_code = conv_get_locale_charset_str();
1512
        if (!dest_code)
1513
                dest_code = CS_INTERNAL;
1514
1515
        cd = iconv_open(dest_code, src_code);
1516
        if (cd == (iconv_t)-1) {
1517
                if (error)
1518
                        *error = -1;
1519
                return NULL;
1520
        }
1521
1522
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd, error);
1523
1524
        iconv_close(cd);
1525
1526
        return outbuf;
1527
}
1528
1529
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd, gint *error)
1530
{
1531
        const gchar *inbuf_p;
1532
        gchar *outbuf;
1533
        gchar *outbuf_p;
1534
        size_t in_size;
1535
        size_t in_left;
1536
        size_t out_size;
1537
        size_t out_left;
1538
        size_t n_conv;
1539
        size_t len;
1540
        gint error_ = 0;
1541
1542
        if (!inbuf) {
1543
                if (error)
1544
                        *error = 0;
1545
                return NULL;
1546
        }
1547
1548
        inbuf_p = inbuf;
1549
        in_size = strlen(inbuf);
1550
        in_left = in_size;
1551
        out_size = (in_size + 1) * 2;
1552
        outbuf = g_malloc(out_size);
1553
        outbuf_p = outbuf;
1554
        out_left = out_size;
1555
1556
#define EXPAND_BUF()                                \
1557
{                                                \
1558
        len = outbuf_p - outbuf;                \
1559
        out_size *= 2;                                \
1560
        outbuf = g_realloc(outbuf, out_size);        \
1561
        outbuf_p = outbuf + len;                \
1562
        out_left = out_size - len;                \
1563
}
1564
1565
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1566
                               &outbuf_p, &out_left)) == (size_t)-1) {
1567
                if (EILSEQ == errno) {
1568
                        /* g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno)); */
1569
                        error_ = -1;
1570
                        inbuf_p++;
1571
                        in_left--;
1572
                        if (out_left == 0) {
1573
                                EXPAND_BUF();
1574
                        }
1575
                        *outbuf_p++ = SUBST_CHAR;
1576
                        out_left--;
1577
                } else if (EINVAL == errno) {
1578
                        error_ = -1;
1579
                        break;
1580
                } else if (E2BIG == errno) {
1581
                        EXPAND_BUF();
1582
                } else {
1583
                        g_warning("conv_iconv_strdup(): %s\n",
1584
                                  g_strerror(errno));
1585
                        error_ = -1;
1586
                        break;
1587
                }
1588
        }
1589
1590
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1591
               (size_t)-1) {
1592
                if (E2BIG == errno) {
1593
                        EXPAND_BUF();
1594
                } else {
1595
                        g_warning("conv_iconv_strdup(): %s\n",
1596
                                  g_strerror(errno));
1597
                        error_ = -1;
1598
                        break;
1599
                }
1600
        }
1601
1602
#undef EXPAND_BUF
1603
1604
        len = outbuf_p - outbuf;
1605
        outbuf = g_realloc(outbuf, len + 1);
1606
        outbuf[len] = '\0';
1607
1608
        if (error)
1609
                *error = error_;
1610
1611
        return outbuf;
1612
}
1613
1614
static const struct {
1615
        CharSet charset;
1616
        gchar *const name;
1617
} charsets[] = {
1618
        {C_US_ASCII,                CS_US_ASCII},
1619
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1620
        {C_UTF_8,                CS_UTF_8},
1621
        {C_UTF_7,                CS_UTF_7},
1622
        {C_ISO_8859_1,                CS_ISO_8859_1},
1623
        {C_ISO_8859_2,                CS_ISO_8859_2},
1624
        {C_ISO_8859_3,                CS_ISO_8859_3},
1625
        {C_ISO_8859_4,                CS_ISO_8859_4},
1626
        {C_ISO_8859_5,                CS_ISO_8859_5},
1627
        {C_ISO_8859_6,                CS_ISO_8859_6},
1628
        {C_ISO_8859_7,                CS_ISO_8859_7},
1629
        {C_ISO_8859_8,                CS_ISO_8859_8},
1630
        {C_ISO_8859_9,                CS_ISO_8859_9},
1631
        {C_ISO_8859_10,                CS_ISO_8859_10},
1632
        {C_ISO_8859_11,                CS_ISO_8859_11},
1633
        {C_ISO_8859_13,                CS_ISO_8859_13},
1634
        {C_ISO_8859_14,                CS_ISO_8859_14},
1635
        {C_ISO_8859_15,                CS_ISO_8859_15},
1636
        {C_BALTIC,                CS_BALTIC},
1637
        {C_CP932,                CS_CP932},
1638
        {C_CP1250,                CS_CP1250},
1639
        {C_CP1251,                CS_CP1251},
1640
        {C_CP1252,                CS_CP1252},
1641
        {C_CP1253,                CS_CP1253},
1642
        {C_CP1254,                CS_CP1254},
1643
        {C_CP1255,                CS_CP1255},
1644
        {C_CP1256,                CS_CP1256},
1645
        {C_CP1257,                CS_CP1257},
1646
        {C_CP1258,                CS_CP1258},
1647
        {C_WINDOWS_932,                CS_WINDOWS_932},
1648
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1649
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1650
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1651
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1652
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1653
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1654
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1655
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1656
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1657
        {C_KOI8_R,                CS_KOI8_R},
1658
        {C_KOI8_T,                CS_KOI8_T},
1659
        {C_KOI8_U,                CS_KOI8_U},
1660
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1661
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1662
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1663
        {C_EUC_JP,                CS_EUC_JP},
1664
        {C_EUC_JP,                CS_EUCJP},
1665
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1666
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1667
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1668
        {C_SHIFT_JIS,                CS_SJIS},
1669
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1670
        {C_EUC_KR,                CS_EUC_KR},
1671
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1672
        {C_EUC_CN,                CS_EUC_CN},
1673
        {C_GB2312,                CS_GB2312},
1674
        {C_GBK,                        CS_GBK},
1675
        {C_EUC_TW,                CS_EUC_TW},
1676
        {C_BIG5,                CS_BIG5},
1677
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1678
        {C_TIS_620,                CS_TIS_620},
1679
        {C_WINDOWS_874,                CS_WINDOWS_874},
1680
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1681
        {C_TCVN5712_1,                CS_TCVN5712_1},
1682
        {C_ISO_8859_16,                CS_ISO_8859_16},
1683
};
1684
1685
static const struct {
1686
        gchar *const locale;
1687
        CharSet charset;
1688
        CharSet out_charset;
1689
} locale_table[] = {
1690
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1691
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1692
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1693
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1694
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1695
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1696
#ifdef G_OS_WIN32
1697
        {"ja_JP"        , C_CP932        , C_ISO_2022_JP},
1698
#elif defined(__APPLE__)
1699
        {"ja_JP"        , C_UTF_8        , C_ISO_2022_JP},
1700
#else
1701
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1702
#endif
1703
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1704
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1705
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1706
        {"zh_CN.GBK"        , C_GBK                , C_GBK},
1707
        {"zh_CN"        , C_GB2312        , C_GB2312},
1708
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1709
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1710
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1711
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1712
        {"zh_TW"        , C_BIG5        , C_BIG5},
1713
1714
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1715
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1716
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1717
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1718
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1719
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1720
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1721
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1722
1723
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1724
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1725
1726
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1727
1728
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1729
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1730
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1731
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1732
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1733
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1734
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1735
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1736
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1737
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1738
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1739
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1740
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1741
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1742
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1743
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1744
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1745
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1746
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1747
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1748
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1749
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1750
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1751
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1752
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1753
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1754
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1755
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1756
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1757
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1758
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1759
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1760
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1761
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1762
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1763
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1764
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1765
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1766
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1767
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1768
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1769
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1770
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1771
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1772
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1773
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1774
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1775
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1776
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1777
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1778
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1779
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1780
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1781
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1782
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1783
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1784
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1785
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1786
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1787
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1788
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1789
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1790
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1791
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1792
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1793
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1794
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1795
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1796
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1797
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1798
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1799
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1800
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1801
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1802
1803
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1804
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1805
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1806
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1807
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1808
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1809
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1810
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1811
1812
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1813
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1814
1815
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1816
1817
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1818
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1819
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1820
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1821
1822
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1823
1824
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1825
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1826
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1827
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1828
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1829
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1830
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1831
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1832
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1833
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1834
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1835
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1836
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1837
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1838
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1839
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1840
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1841
1842
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1843
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1844
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1845
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1846
1847
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1848
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1849
1850
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1851
1852
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1853
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1854
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1855
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1856
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1857
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1858
1859
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1860
        /* {"th_TH"        , C_WINDOWS_874}, */
1861
        /* {"th_TH"        , C_ISO_8859_11}, */
1862
1863
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1864
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1865
1866
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1867
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1868
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1869
};
1870
1871
static GHashTable *conv_get_charset_to_str_table(void)
1872
{
1873
        static GHashTable *table;
1874
        gint i;
1875
        S_LOCK_DEFINE_STATIC(table);
1876
1877
        S_LOCK(table);
1878
1879
        if (table) {
1880
                S_UNLOCK(table);
1881
                return table;
1882
        }
1883
1884
        table = g_hash_table_new(NULL, g_direct_equal);
1885
1886
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1887
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1888
                    == NULL) {
1889
                        g_hash_table_insert
1890
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1891
                                 charsets[i].name);
1892
                }
1893
        }
1894
1895
        S_UNLOCK(table);
1896
        return table;
1897
}
1898
1899
static GHashTable *conv_get_charset_from_str_table(void)
1900
{
1901
        static GHashTable *table;
1902
        S_LOCK_DEFINE_STATIC(table);
1903
1904
        gint i;
1905
1906
        S_LOCK(table);
1907
1908
        if (table) {
1909
                S_UNLOCK(table);
1910
                return table;
1911
        }
1912
1913
        table = g_hash_table_new(str_case_hash, str_case_equal);
1914
1915
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1916
                g_hash_table_insert(table, charsets[i].name,
1917
                                    GUINT_TO_POINTER(charsets[i].charset));
1918
        }
1919
1920
        S_UNLOCK(table);
1921
        return table;
1922
}
1923
1924
const gchar *conv_get_charset_str(CharSet charset)
1925
{
1926
        GHashTable *table;
1927
1928
        table = conv_get_charset_to_str_table();
1929
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1930
}
1931
1932
CharSet conv_get_charset_from_str(const gchar *charset)
1933
{
1934
        GHashTable *table;
1935
1936
        if (!charset) return C_AUTO;
1937
1938
        table = conv_get_charset_from_str_table();
1939
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1940
}
1941
1942
CharSet conv_get_locale_charset(void)
1943
{
1944
        static CharSet cur_charset = -1;
1945
        const gchar *cur_locale;
1946
        const gchar *p;
1947
#if !defined(G_OS_WIN32) && !defined(__APPLE__)
1948
        gint i;
1949
#endif
1950
        S_LOCK_DEFINE_STATIC(cur_charset);
1951
1952
        S_LOCK(cur_charset);
1953
1954
        if (cur_charset != -1) {
1955
                S_UNLOCK(cur_charset);
1956
                return cur_charset;
1957
        }
1958
1959
        cur_locale = conv_get_current_locale();
1960
        if (!cur_locale) {
1961
                cur_charset = C_US_ASCII;
1962
                S_UNLOCK(cur_charset);
1963
                return cur_charset;
1964
        }
1965
1966
        if (strcasestr(cur_locale, "UTF-8") || strcasestr(cur_locale, "utf8")) {
1967
                cur_charset = C_UTF_8;
1968
                S_UNLOCK(cur_charset);
1969
                return cur_charset;
1970
        }
1971
1972
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1973
                cur_charset = C_ISO_8859_15;
1974
                S_UNLOCK(cur_charset);
1975
                return cur_charset;
1976
        }
1977
1978
#if defined(G_OS_WIN32) || defined(__APPLE__)
1979
        cur_charset = conv_get_charset_from_str(conv_get_locale_charset_str());
1980
1981
        S_UNLOCK(cur_charset);
1982
        return cur_charset;
1983
#else
1984
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1985
                const gchar *p;
1986
1987
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1988
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1989
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1990
                                         strlen(locale_table[i].locale))) {
1991
                        cur_charset = locale_table[i].charset;
1992
                        S_UNLOCK(cur_charset);
1993
                        return cur_charset;
1994
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1995
                         !strchr(p + 1, '.')) {
1996
                        if (strlen(cur_locale) == 2 &&
1997
                            !g_ascii_strncasecmp(cur_locale,
1998
                                                 locale_table[i].locale, 2)) {
1999
                                cur_charset = locale_table[i].charset;
2000
                                S_UNLOCK(cur_charset);
2001
                                return cur_charset;
2002
                        }
2003
                }
2004
        }
2005
2006
        cur_charset = C_AUTO;
2007
        S_UNLOCK(cur_charset);
2008
        return cur_charset;
2009
#endif
2010
}
2011
2012
const gchar *conv_get_locale_charset_str(void)
2013
{
2014
        static const gchar *codeset = NULL;
2015
        S_LOCK_DEFINE_STATIC(codeset);
2016
2017
        S_LOCK(codeset);
2018
2019
        if (!codeset) {
2020
#if defined(G_OS_WIN32) || defined(__APPLE__)
2021
                g_get_charset(&codeset);
2022
                if (!strcmp(codeset, CS_US_ASCII) ||
2023
                    !strcmp(codeset, CS_ANSI_X3_4_1968))
2024
                        codeset = CS_INTERNAL;
2025
#else
2026
                codeset = conv_get_charset_str(conv_get_locale_charset());
2027
#endif
2028
        }
2029
2030
        if (codeset) {
2031
                S_UNLOCK(codeset);
2032
                return codeset;
2033
        }
2034
2035
        S_UNLOCK(codeset);
2036
        return CS_INTERNAL;
2037
}
2038
2039
CharSet conv_get_internal_charset(void)
2040
{
2041
        return C_INTERNAL;
2042
}
2043
2044
const gchar *conv_get_internal_charset_str(void)
2045
{
2046
        return CS_INTERNAL;
2047
}
2048
2049
CharSet conv_get_outgoing_charset(void)
2050
{
2051
        static CharSet out_charset = -1;
2052
        const gchar *cur_locale;
2053
        const gchar *p;
2054
        gint i;
2055
        S_LOCK_DEFINE_STATIC(out_charset);
2056
2057
        S_LOCK(out_charset);
2058
2059
        if (out_charset != -1) {
2060
                S_UNLOCK(out_charset);
2061
                return out_charset;
2062
        }
2063
2064
        cur_locale = conv_get_current_locale();
2065
        if (!cur_locale) {
2066
                out_charset = C_AUTO;
2067
                S_UNLOCK(out_charset);
2068
                return out_charset;
2069
        }
2070
2071
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
2072
                out_charset = C_ISO_8859_15;
2073
                S_UNLOCK(out_charset);
2074
                return out_charset;
2075
        }
2076
2077
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
2078
                const gchar *p;
2079
2080
                if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
2081
                                         strlen(locale_table[i].locale))) {
2082
                        out_charset = locale_table[i].out_charset;
2083
                        break;
2084
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
2085
                         !strchr(p + 1, '.')) {
2086
                        if (strlen(cur_locale) == 2 &&
2087
                            !g_ascii_strncasecmp(cur_locale,
2088
                                                 locale_table[i].locale, 2)) {
2089
                                out_charset = locale_table[i].out_charset;
2090
                                break;
2091
                        }
2092
                }
2093
        }
2094
2095
        S_UNLOCK(out_charset);
2096
        return out_charset;
2097
}
2098
2099
const gchar *conv_get_outgoing_charset_str(void)
2100
{
2101
        CharSet out_charset;
2102
        const gchar *str;
2103
2104
        out_charset = conv_get_outgoing_charset();
2105
        str = conv_get_charset_str(out_charset);
2106
2107
        return str ? str : CS_UTF_8;
2108
}
2109
2110
gboolean conv_is_multibyte_encoding(CharSet encoding)
2111
{
2112
        switch (encoding) {
2113
        case C_EUC_JP:
2114
        case C_EUC_JP_MS:
2115
        case C_EUC_KR:
2116
        case C_EUC_TW:
2117
        case C_EUC_CN:
2118
        case C_ISO_2022_JP:
2119
        case C_ISO_2022_JP_2:
2120
        case C_ISO_2022_JP_3:
2121
        case C_ISO_2022_KR:
2122
        case C_ISO_2022_CN:
2123
        case C_SHIFT_JIS:
2124
        case C_CP932:
2125
        case C_GB2312:
2126
        case C_GBK:
2127
        case C_BIG5:
2128
        case C_UTF_8:
2129
        case C_UTF_7:
2130
                return TRUE;
2131
        default:
2132
                return FALSE;
2133
        }
2134
}
2135
2136
const gchar *conv_get_current_locale(void)
2137
{
2138
        static const gchar *cur_locale;
2139
        S_LOCK_DEFINE_STATIC(cur_locale);
2140
2141
        S_LOCK(cur_locale);
2142
2143
        if (!cur_locale) {
2144
#ifdef G_OS_WIN32
2145
                cur_locale = g_win32_getlocale();
2146
#else
2147
                cur_locale = g_getenv("LC_ALL");
2148
                if (!cur_locale || *cur_locale == '\0')
2149
                        cur_locale = g_getenv("LC_CTYPE");
2150
                if (!cur_locale || *cur_locale == '\0')
2151
                        cur_locale = g_getenv("LANG");
2152
#ifdef HAVE_LOCALE_H
2153
                if (!cur_locale || *cur_locale == '\0')
2154
                        cur_locale = setlocale(LC_CTYPE, NULL);
2155
#endif /* HAVE_LOCALE_H */
2156
#endif /* G_OS_WIN32 */
2157
2158
                debug_print("current locale: %s\n",
2159
                            cur_locale ? cur_locale : "(none)");
2160
        }
2161
2162
        S_UNLOCK(cur_locale);
2163
        return cur_locale;
2164
}
2165
2166
gboolean conv_is_ja_locale(void)
2167
{
2168
        static gint is_ja_locale = -1;
2169
        const gchar *cur_locale;
2170
        S_LOCK_DEFINE_STATIC(is_ja_locale);
2171
2172
        S_LOCK(is_ja_locale);
2173
2174
        if (is_ja_locale != -1) {
2175
                S_UNLOCK(is_ja_locale);
2176
                return is_ja_locale != 0;
2177
        }
2178
2179
        is_ja_locale = 0;
2180
        cur_locale = conv_get_current_locale();
2181
        if (cur_locale) {
2182
                if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
2183
                        is_ja_locale = 1;
2184
        }
2185
2186
        S_UNLOCK(is_ja_locale);
2187
        return is_ja_locale != 0;
2188
}
2189
2190
void conv_set_autodetect_type(ConvADType type)
2191
{
2192
        conv_ad_type = type;
2193
}
2194
2195
ConvADType conv_get_autodetect_type(void)
2196
{
2197
        return conv_ad_type;
2198
}
2199
2200
gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
2201
{
2202
        gchar *buf;
2203
        gchar *decoded_str;
2204
2205
        if (is_ascii_str(str))
2206
                return unmime_header(str);
2207
2208
        if (default_encoding) {
2209
                buf = conv_codeset_strdup
2210
                        (str, default_encoding, CS_INTERNAL);
2211
                if (buf) {
2212
                        decoded_str = unmime_header(buf);
2213
                        g_free(buf);
2214
                        return decoded_str;
2215
                }
2216
        }
2217
2218
        if (conv_ad_type == C_AD_JAPANESE ||
2219
            (conv_ad_type == C_AD_BY_LOCALE && conv_is_ja_locale()))
2220
                buf = conv_anytodisp(str, NULL);
2221
        else
2222
                buf = conv_localetodisp(str, NULL);
2223
2224
        decoded_str = unmime_header(buf);
2225
        g_free(buf);
2226
2227
        return decoded_str;
2228
}
2229
2230
#define MAX_LINELEN                76
2231
#define MAX_HARD_LINELEN        996
2232
#define MIMESEP_BEGIN                "=?"
2233
#define MIMESEP_END                "?="
2234
2235
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
2236
2237
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
2238
{                                                                        \
2239
        if (len - (destp - dest) < MAX_LINELEN + 2) {                        \
2240
                *destp = '\0';                                                \
2241
                return;                                                        \
2242
        }                                                                \
2243
                                                                        \
2244
        if ((cond) && *srcp) {                                                \
2245
                if (destp > dest && left < MAX_LINELEN - 1) {                \
2246
                        if (g_ascii_isspace(*(destp - 1)))                \
2247
                                destp--;                                \
2248
                        else if (is_plain_text &&                        \
2249
                                 g_ascii_isspace(*srcp))                \
2250
                                srcp++;                                        \
2251
                        if (*srcp) {                                        \
2252
                                *destp++ = '\n';                        \
2253
                                *destp++ = ' ';                                \
2254
                                left = MAX_LINELEN - 1;                        \
2255
                        }                                                \
2256
                }                                                        \
2257
        }                                                                \
2258
}
2259
2260
void conv_encode_header(gchar *dest, gint len, const gchar *src,
2261
                        gint header_len, gboolean addr_field,
2262
                        const gchar *out_encoding)
2263
{
2264
        const gchar *src_encoding;
2265
        gint mimestr_len;
2266
        gchar *mimesep_enc;
2267
        gint left;
2268
        const gchar *srcp = src;
2269
        gchar *destp = dest;
2270
        gboolean use_base64;
2271
2272
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
2273
2274
        src_encoding = CS_INTERNAL;
2275
        if (!out_encoding)
2276
                out_encoding = conv_get_outgoing_charset_str();
2277
        if (!strcmp(out_encoding, CS_US_ASCII))
2278
                out_encoding = CS_ISO_8859_1;
2279
2280
        if (!g_ascii_strncasecmp(out_encoding, "ISO-8859-", 9) ||
2281
            !g_ascii_strncasecmp(out_encoding, "KOI8-", 5) ||
2282
            !g_ascii_strncasecmp(out_encoding, "Windows-", 8)) {
2283
                use_base64 = FALSE;
2284
                mimesep_enc = "?Q?";
2285
        } else {
2286
                use_base64 = TRUE;
2287
                mimesep_enc = "?B?";
2288
        }
2289
2290
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(mimesep_enc) +
2291
                strlen(MIMESEP_END);
2292
2293
        left = MAX_LINELEN - header_len;
2294
2295
        while (*srcp) {
2296
                gboolean in_quote = FALSE;
2297
2298
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
2299
2300
                while (g_ascii_isspace(*srcp)) {
2301
                        *destp++ = *srcp++;
2302
                        left--;
2303
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
2304
                }
2305
2306
                /* output as it is if the next word is ASCII string */
2307
                if (!is_next_nonascii(srcp)) {
2308
                        gint word_len;
2309
2310
                        word_len = get_next_word_len(srcp);
2311
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
2312
                        while (word_len > 0) {
2313
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
2314
                                *destp++ = *srcp++;
2315
                                left--;
2316
                                word_len--;
2317
                        }
2318
2319
                        continue;
2320
                }
2321
2322
                /* don't include parentheses in encoded strings */
2323
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
2324
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
2325
                        *destp++ = *srcp++;
2326
                        left--;
2327
                }
2328
2329
                while (1) {
2330
                        gint mb_len = 0;
2331
                        gint cur_len = 0;
2332
                        gchar *part_str;
2333
                        gchar *out_str;
2334
                        gchar *enc_str;
2335
                        const gchar *p = srcp;
2336
                        const gchar *block_encoding = out_encoding;
2337
                        gint out_str_len;
2338
                        gint out_enc_str_len;
2339
                        gint mime_block_len;
2340
                        gint error = 0;
2341
                        gboolean cont = FALSE;
2342
2343
                        while (*p != '\0') {
2344
                                if (*p == '"')
2345
                                        in_quote ^= TRUE;
2346
                                else if (!in_quote) {
2347
                                        if (g_ascii_isspace(*p) &&
2348
                                            !is_next_nonascii(p + 1))
2349
                                                break;
2350
                                        /* don't include parentheses in encoded
2351
                                           strings */
2352
                                        if (addr_field &&
2353
                                            (*p == '(' || *p == ')'))
2354
                                                break;
2355
                                }
2356
2357
                                mb_len = g_utf8_skip[*(guchar *)p];
2358
2359
                                part_str = g_strndup(srcp, cur_len + mb_len);
2360
                                out_str = conv_codeset_strdup_full
2361
                                        (part_str, src_encoding, block_encoding,
2362
                                         &error);
2363
                                if (!out_str || error != 0) {
2364
                                        g_warning("conv_encode_header(): code conversion failed. Keeping UTF-8.\n");
2365
                                        out_str = g_strdup(part_str);
2366
                                        block_encoding = CS_UTF_8;
2367
                                }
2368
                                out_str_len = strlen(out_str);
2369
2370
                                if (use_base64)
2371
                                        out_enc_str_len = B64LEN(out_str_len);
2372
                                else
2373
                                        out_enc_str_len =
2374
                                                qp_get_q_encoding_len
2375
                                                        ((guchar *)out_str);
2376
2377
                                g_free(out_str);
2378
                                g_free(part_str);
2379
2380
                                if (mimestr_len + strlen(block_encoding) + out_enc_str_len <= left) {
2381
                                        cur_len += mb_len;
2382
                                        p += mb_len;
2383
                                } else if (cur_len == 0) {
2384
                                        LBREAK_IF_REQUIRED(1, FALSE);
2385
                                        if (*p == '"')
2386
                                                in_quote ^= TRUE;
2387
                                        continue;
2388
                                } else {
2389
                                        cont = TRUE;
2390
                                        if (*p == '"')
2391
                                                in_quote ^= TRUE;
2392
                                        break;
2393
                                }
2394
                        }
2395
2396
                        if (cur_len > 0) {
2397
                                error = 0;
2398
                                part_str = g_strndup(srcp, cur_len);
2399
                                out_str = conv_codeset_strdup_full
2400
                                        (part_str, src_encoding, block_encoding,
2401
                                         &error);
2402
                                if (!out_str || error != 0) {
2403
                                        g_warning("conv_encode_header(): code conversion failed\n");
2404
                                        out_str = g_strdup(part_str);
2405
                                        block_encoding = CS_UTF_8;
2406
                                }
2407
                                out_str_len = strlen(out_str);
2408
2409
                                if (use_base64)
2410
                                        out_enc_str_len = B64LEN(out_str_len);
2411
                                else
2412
                                        out_enc_str_len =
2413
                                                qp_get_q_encoding_len
2414
                                                        ((guchar *)out_str);
2415
2416
                                enc_str = g_malloc(out_enc_str_len + 1);
2417
                                if (use_base64)
2418
                                        base64_encode(enc_str,
2419
                                                      (guchar *)out_str,
2420
                                                      out_str_len);
2421
                                else
2422
                                        qp_q_encode(enc_str, (guchar *)out_str);
2423
2424
                                /* output MIME-encoded string block */
2425
                                mime_block_len = mimestr_len +
2426
                                        strlen(block_encoding) +
2427
                                        strlen(enc_str);
2428
                                g_snprintf(destp, mime_block_len + 1,
2429
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
2430
                                           block_encoding, mimesep_enc,
2431
                                           enc_str);
2432
                                destp += mime_block_len;
2433
                                srcp += cur_len;
2434
2435
                                left -= mime_block_len;
2436
2437
                                g_free(enc_str);
2438
                                g_free(out_str);
2439
                                g_free(part_str);
2440
                        }
2441
2442
                        LBREAK_IF_REQUIRED(cont, FALSE);
2443
2444
                        if (cur_len == 0)
2445
                                break;
2446
                }
2447
        }
2448
2449
        *destp = '\0';
2450
}
2451
2452
#undef LBREAK_IF_REQUIRED
2453
2454
#define INT_TO_HEX_UPPER(outp, val)                \
2455
{                                                \
2456
        if ((val) < 10)                                \
2457
                *outp = '0' + (val);                \
2458
        else                                        \
2459
                *outp = 'A' + (val) - 10;        \
2460
}
2461
2462
#define IS_ESCAPE_CHAR(c)                                        \
2463
        (c < 0x20 || c > 0x7f ||                                \
2464
         strchr("\t \r\n*'%!#$&~`,{}|()<>@,;:\\\"/[]?=", c))
2465
2466
static gchar *encode_rfc2231_filename(const gchar *str)
2467
{
2468
        const gchar *p;
2469
        gchar *out;
2470
        gchar *outp;
2471
2472
        outp = out = g_malloc(strlen(str) * 3 + 1);
2473
2474
        for (p = str; *p != '\0'; ++p) {
2475
                guchar ch = *(guchar *)p;
2476
2477
                if (IS_ESCAPE_CHAR(ch)) {
2478
                        *outp++ = '%';
2479
                        INT_TO_HEX_UPPER(outp, ch >> 4);
2480
                        ++outp;
2481
                        INT_TO_HEX_UPPER(outp, ch & 0x0f);
2482
                        ++outp;
2483
                } else
2484
                        *outp++ = ch;
2485
        }
2486
2487
        *outp = '\0';
2488
        return out;
2489
}
2490
2491
gchar *conv_encode_filename(const gchar *src, const gchar *param_name,
2492
                            const gchar *out_encoding)
2493
{
2494
        gint name_len, max_linelen;
2495
        gchar *out_str, *enc_str;
2496
        gchar cur_param[80];
2497
        GString *string;
2498
        gint count = 0;
2499
        gint cur_left_len;
2500
        gchar *p;
2501
2502
        g_return_val_if_fail(src != NULL, NULL);
2503
        g_return_val_if_fail(param_name != NULL, NULL);
2504
2505
        if (is_ascii_str(src))
2506
                return g_strdup_printf(" %s=\"%s\"", param_name, src);
2507
2508
        name_len = strlen(param_name);
2509
        max_linelen = MAX_LINELEN - name_len - 3;
2510
2511
        if (!out_encoding)
2512
                out_encoding = conv_get_outgoing_charset_str();
2513
        if (!strcmp(out_encoding, CS_US_ASCII))
2514
                out_encoding = CS_ISO_8859_1;
2515
2516
        out_str = conv_codeset_strdup(src, CS_INTERNAL, out_encoding);
2517
        if (!out_str)
2518
                return NULL;
2519
        enc_str = encode_rfc2231_filename(out_str);
2520
        g_free(out_str);
2521
2522
        if (strlen(enc_str) <= max_linelen) {
2523
                gchar *ret;
2524
                ret = g_strdup_printf(" %s*=%s''%s",
2525
                                      param_name, out_encoding, enc_str);
2526
                g_free(enc_str);
2527
                return ret;
2528
        }
2529
2530
        string = g_string_new(NULL);
2531
        g_string_printf(string, " %s*0*=%s''", param_name, out_encoding);
2532
        cur_left_len = MAX_LINELEN - string->len;
2533
2534
        p = enc_str;
2535
2536
        while (*p != '\0') {
2537
                if ((*p == '%' && cur_left_len < 4) ||
2538
                    (*p != '%' && cur_left_len < 2)) {
2539
                        gint len;
2540
2541
                        g_string_append(string, ";\n");
2542
                        ++count;
2543
                        len = g_snprintf(cur_param, sizeof(cur_param),
2544
                                         " %s*%d*=", param_name, count);
2545
                        g_string_append(string, cur_param);
2546
                        cur_left_len = MAX_LINELEN - len;
2547
                }
2548
2549
                if (*p == '%') {
2550
                        g_string_append_len(string, p, 3);
2551
                        p += 3;
2552
                        cur_left_len -= 3;
2553
                } else {
2554
                        g_string_append_c(string, *p);
2555
                        ++p;
2556
                        --cur_left_len;
2557
                }
2558
        }
2559
2560
        g_free(enc_str);
2561
2562
        return g_string_free(string, FALSE);
2563
}
2564
2565
gint conv_copy_file(const gchar *src, const gchar *dest, const gchar *encoding)
2566
{
2567
        FILE *src_fp, *dest_fp;
2568
        gchar buf[BUFFSIZE];
2569
        CodeConverter *conv;
2570
        gboolean err = FALSE;
2571
2572
        if ((src_fp = g_fopen(src, "rb")) == NULL) {
2573
                FILE_OP_ERROR(src, "fopen");
2574
                return -1;
2575
        }
2576
        if ((dest_fp = g_fopen(dest, "wb")) == NULL) {
2577
                FILE_OP_ERROR(dest, "fopen");
2578
                fclose(src_fp);
2579
                return -1;
2580
        }
2581
2582
        if (change_file_mode_rw(dest_fp, dest) < 0) {
2583
                FILE_OP_ERROR(dest, "chmod");
2584
                g_warning("can't change file mode\n");
2585
        }
2586
2587
        conv = conv_code_converter_new(encoding, NULL);
2588
2589
        while (fgets(buf, sizeof(buf), src_fp) != NULL) {
2590
                gchar *outbuf;
2591
2592
                outbuf = conv_convert(conv, buf);
2593
                if (outbuf) {
2594
                        fputs(outbuf, dest_fp);
2595
                        g_free(outbuf);
2596
                } else
2597
                        fputs(buf, dest_fp);
2598
        }
2599
2600
        conv_code_converter_destroy(conv);
2601
2602
        if (ferror(src_fp)) {
2603
                FILE_OP_ERROR(src, "fgets");
2604
                err = TRUE;
2605
        }
2606
        fclose(src_fp);
2607
        if (fclose(dest_fp) == EOF) {
2608
                FILE_OP_ERROR(dest, "fclose");
2609
                err = TRUE;
2610
        }
2611
        if (err) {
2612
                g_unlink(dest);
2613
                return -1;
2614
        }
2615
2616
        return 0;
2617
}
2618
2619
gint conv_copy_dir(const gchar *src, const gchar *dest, const gchar *encoding)
2620
{
2621
        GDir *dir;
2622
        const gchar *dir_name;
2623
        gchar *src_file;
2624
        gchar *dest_file;
2625
2626
        if ((dir = g_dir_open(src, 0, NULL)) == NULL) {
2627
                g_warning("failed to open directory: %s\n", src);
2628
                return -1;
2629
        }
2630
2631
        if (make_dir_hier(dest) < 0) {
2632
                g_dir_close(dir);
2633
                return -1;
2634
        }
2635
2636
        while ((dir_name = g_dir_read_name(dir)) != NULL) {
2637
                src_file = g_strconcat(src, G_DIR_SEPARATOR_S, dir_name, NULL);
2638
                dest_file = g_strconcat(dest, G_DIR_SEPARATOR_S, dir_name,
2639
                                        NULL);
2640
                if (is_file_exist(src_file))
2641
                        conv_copy_file(src_file, dest_file, encoding);
2642
                g_free(dest_file);
2643
                g_free(src_file);
2644
        }
2645
2646
        g_dir_close(dir);
2647
2648
        return 0;
2649
}
2650
2651
CharSet conv_check_file_encoding(const gchar *file)
2652
{
2653
        FILE *fp;
2654
        gchar buf[BUFFSIZE];
2655
        CharSet enc;
2656
        const gchar *enc_str;
2657
        gboolean is_locale = TRUE, is_utf8 = TRUE;
2658
2659
        g_return_val_if_fail(file != NULL, C_AUTO);
2660
2661
        enc = conv_get_locale_charset();
2662
        enc_str = conv_get_locale_charset_str();
2663
        if (enc == C_UTF_8)
2664
                is_locale = FALSE;
2665
2666
        if ((fp = g_fopen(file, "rb")) == NULL) {
2667
                FILE_OP_ERROR(file, "fopen");
2668
                return C_AUTO;
2669
        }
2670
2671
        while (fgets(buf, sizeof(buf), fp) != NULL) {
2672
                gchar *str;
2673
                gint error = 0;
2674
2675
                if (is_locale) {
2676
                        str = conv_codeset_strdup_full(buf, enc_str,
2677
                                                       CS_INTERNAL, &error);
2678
                        if (!str || error != 0)
2679
                                is_locale = FALSE;
2680
                        g_free(str);
2681
                }
2682
2683
                if (is_utf8 && g_utf8_validate(buf, -1, NULL) == FALSE) {
2684
                        is_utf8 = FALSE;
2685
                }
2686
2687
                if (!is_locale && !is_utf8)
2688
                        break;
2689
        }
2690
2691
        fclose(fp);
2692
2693
        if (is_locale)
2694
                return enc;
2695
        else if (is_utf8)
2696
                return C_UTF_8;
2697
        else
2698
                return C_AUTO;
2699
}
2700
2701
gchar *conv_filename_from_utf8(const gchar *utf8_file)
2702
{
2703
        gchar *fs_file;
2704
        GError *error = NULL;
2705
2706
        g_return_val_if_fail(utf8_file != NULL, NULL);
2707
2708
        fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
2709
        if (error) {
2710
                g_warning("failed to convert encoding of file name: %s\n",
2711
                          error->message);
2712
                g_error_free(error);
2713
        }
2714
        if (!fs_file)
2715
                fs_file = g_strdup(utf8_file);
2716
2717
        return fs_file;
2718
}
2719
2720
gchar *conv_filename_to_utf8(const gchar *fs_file)
2721
{
2722
        gchar *utf8_file;
2723
        GError *error = NULL;
2724
2725
        g_return_val_if_fail(fs_file != NULL, NULL);
2726
2727
        utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
2728
        if (error) {
2729
                g_warning("failed to convert encoding of file name: %s\n",
2730
                          error->message);
2731
                g_error_free(error);
2732
        }
2733
        if (!utf8_file)
2734
                utf8_file = g_strdup(fs_file);
2735
2736
        return utf8_file;
2737
}