Statistics
| Revision:

root / src / codeconv.c @ 92

History | View | Annotate | Download (44.8 kB)

1
/*
2
 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3
 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
 */
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config.h"
22
#endif
23
24
#include "defs.h"
25
26
#include <glib.h>
27
#include <glib/gi18n.h>
28
#include <string.h>
29
#include <ctype.h>
30
#include <stdlib.h>
31
#include <errno.h>
32
33
#if HAVE_LOCALE_H
34
#  include <locale.h>
35
#endif
36
37
#include <iconv.h>
38
39
#include "codeconv.h"
40
#include "unmime.h"
41
#include "base64.h"
42
#include "quoted-printable.h"
43
#include "utils.h"
44
#include "prefs_common.h"
45
46
typedef enum
47
{
48
        JIS_ASCII,
49
        JIS_KANJI,
50
        JIS_HWKANA,
51
        JIS_AUXKANJI
52
} JISState;
53
54
#define SUBST_CHAR        '_'
55
#define ESC                '\033'
56
57
#define iseuckanji(c) \
58
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59
#define iseuchwkana1(c) \
60
        (((c) & 0xff) == 0x8e)
61
#define iseuchwkana2(c) \
62
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
63
#define iseucaux(c) \
64
        (((c) & 0xff) == 0x8f)
65
#define issjiskanji1(c) \
66
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68
#define issjiskanji2(c) \
69
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71
#define issjishwkana(c) \
72
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
73
74
#define K_IN()                                \
75
        if (state != JIS_KANJI) {        \
76
                *out++ = ESC;                \
77
                *out++ = '$';                \
78
                *out++ = 'B';                \
79
                state = JIS_KANJI;        \
80
        }
81
82
#define K_OUT()                                \
83
        if (state != JIS_ASCII) {        \
84
                *out++ = ESC;                \
85
                *out++ = '(';                \
86
                *out++ = 'B';                \
87
                state = JIS_ASCII;        \
88
        }
89
90
#define HW_IN()                                \
91
        if (state != JIS_HWKANA) {        \
92
                *out++ = ESC;                \
93
                *out++ = '(';                \
94
                *out++ = 'I';                \
95
                state = JIS_HWKANA;        \
96
        }
97
98
#define AUX_IN()                        \
99
        if (state != JIS_AUXKANJI) {        \
100
                *out++ = ESC;                \
101
                *out++ = '$';                \
102
                *out++ = '(';                \
103
                *out++ = 'D';                \
104
                state = JIS_AUXKANJI;        \
105
        }
106
107
static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
108
static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
109
static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
110
111
static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
112
static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
113
static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
114
static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
115
116
static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
117
static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
118
119
static void conv_unreadable_eucjp(gchar *str);
120
static void conv_unreadable_8bit(gchar *str);
121
static void conv_unreadable_latin(gchar *str);
122
123
static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
124
static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
125
static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
126
127
static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
128
static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
129
static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
130
131
static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
132
{
133
        const guchar *in = inbuf;
134
        guchar *out = outbuf;
135
        JISState state = JIS_ASCII;
136
137
        while (*in != '\0') {
138
                if (*in == ESC) {
139
                        in++;
140
                        if (*in == '$') {
141
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
142
                                        state = JIS_KANJI;
143
                                        in += 2;
144
                                } else if (*(in + 1) == '(' &&
145
                                           *(in + 2) == 'D') {
146
                                        state = JIS_AUXKANJI;
147
                                        in += 3;
148
                                } else {
149
                                        /* unknown escape sequence */
150
                                        state = JIS_ASCII;
151
                                }
152
                        } else if (*in == '(') {
153
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
154
                                        state = JIS_ASCII;
155
                                        in += 2;
156
                                } else if (*(in + 1) == 'I') {
157
                                        state = JIS_HWKANA;
158
                                        in += 2;
159
                                } else {
160
                                        /* unknown escape sequence */
161
                                        state = JIS_ASCII;
162
                                }
163
                        } else {
164
                                /* unknown escape sequence */
165
                                state = JIS_ASCII;
166
                        }
167
                } else if (*in == 0x0e) {
168
                        state = JIS_HWKANA;
169
                        in++;
170
                } else if (*in == 0x0f) {
171
                        state = JIS_ASCII;
172
                        in++;
173
                } else {
174
                        switch (state) {
175
                        case JIS_ASCII:
176
                                *out++ = *in++;
177
                                break;
178
                        case JIS_KANJI:
179
                                *out++ = *in++ | 0x80;
180
                                if (*in == '\0') break;
181
                                *out++ = *in++ | 0x80;
182
                                break;
183
                        case JIS_HWKANA:
184
                                *out++ = 0x8e;
185
                                *out++ = *in++ | 0x80;
186
                                break;
187
                        case JIS_AUXKANJI:
188
                                *out++ = 0x8f;
189
                                *out++ = *in++ | 0x80;
190
                                if (*in == '\0') break;
191
                                *out++ = *in++ | 0x80;
192
                                break;
193
                        }
194
                }
195
        }
196
197
        *out = '\0';
198
}
199
200
#define JIS_HWDAKUTEN                0x5e
201
#define JIS_HWHANDAKUTEN        0x5f
202
203
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
204
{
205
        static guint16 h2z_tbl[] = {
206
                /* 0x20 - 0x2f */
207
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
208
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
209
                /* 0x30 - 0x3f */
210
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
211
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
212
                /* 0x40 - 0x4f */
213
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
214
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
215
                /* 0x50 - 0x5f */
216
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
217
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
218
        };
219
220
        static guint16 dakuten_tbl[] = {
221
                /* 0x30 - 0x3f */
222
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
223
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
224
                /* 0x40 - 0x4f */
225
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
226
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
227
        };
228
229
        static guint16 handakuten_tbl[] = {
230
                /* 0x4a - 0x4e */
231
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
232
        };
233
234
        guint16 out_code;
235
236
        jis_code &= 0x7f;
237
        sound_sym &= 0x7f;
238
239
        if (jis_code < 0x21 || jis_code > 0x5f)
240
                return 0;
241
242
        if (sound_sym == JIS_HWDAKUTEN &&
243
            jis_code >= 0x36 && jis_code <= 0x4e) {
244
                out_code = dakuten_tbl[jis_code - 0x30];
245
                if (out_code != 0) {
246
                        *outbuf = out_code >> 8;
247
                        *(outbuf + 1) = out_code & 0xff;
248
                        return 2;
249
                }
250
        }
251
252
        if (sound_sym == JIS_HWHANDAKUTEN &&
253
            jis_code >= 0x4a && jis_code <= 0x4e) {
254
                out_code = handakuten_tbl[jis_code - 0x4a];
255
                *outbuf = out_code >> 8;
256
                *(outbuf + 1) = out_code & 0xff;
257
                return 2;
258
        }
259
260
        out_code = h2z_tbl[jis_code - 0x20];
261
        *outbuf = out_code >> 8;
262
        *(outbuf + 1) = out_code & 0xff;
263
        return 1;
264
}
265
266
static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
267
{
268
        const guchar *in = inbuf;
269
        guchar *out = outbuf;
270
        JISState state = JIS_ASCII;
271
272
        while (*in != '\0') {
273
                if (isascii(*in)) {
274
                        K_OUT();
275
                        *out++ = *in++;
276
                } else if (iseuckanji(*in)) {
277
                        if (iseuckanji(*(in + 1))) {
278
                                K_IN();
279
                                *out++ = *in++ & 0x7f;
280
                                *out++ = *in++ & 0x7f;
281
                        } else {
282
                                K_OUT();
283
                                *out++ = SUBST_CHAR;
284
                                in++;
285
                                if (*in != '\0' && !isascii(*in)) {
286
                                        *out++ = SUBST_CHAR;
287
                                        in++;
288
                                }
289
                        }
290
                } else if (iseuchwkana1(*in)) {
291
                        if (iseuchwkana2(*(in + 1))) {
292
                                if (prefs_common.allow_jisx0201_kana) {
293
                                        HW_IN();
294
                                        in++;
295
                                        *out++ = *in++ & 0x7f;
296
                                } else {
297
                                        guchar jis_ch[2];
298
                                        gint len;
299
300
                                        if (iseuchwkana1(*(in + 2)) &&
301
                                            iseuchwkana2(*(in + 3)))
302
                                                len = conv_jis_hantozen
303
                                                        (jis_ch,
304
                                                         *(in + 1), *(in + 3));
305
                                        else
306
                                                len = conv_jis_hantozen
307
                                                        (jis_ch,
308
                                                         *(in + 1), '\0');
309
                                        if (len == 0)
310
                                                in += 2;
311
                                        else {
312
                                                K_IN();
313
                                                in += len * 2;
314
                                                *out++ = jis_ch[0];
315
                                                *out++ = jis_ch[1];
316
                                        }
317
                                }
318
                        } else {
319
                                K_OUT();
320
                                in++;
321
                                if (*in != '\0' && !isascii(*in)) {
322
                                        *out++ = SUBST_CHAR;
323
                                        in++;
324
                                }
325
                        }
326
                } else if (iseucaux(*in)) {
327
                        in++;
328
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
329
                                AUX_IN();
330
                                *out++ = *in++ & 0x7f;
331
                                *out++ = *in++ & 0x7f;
332
                        } else {
333
                                K_OUT();
334
                                if (*in != '\0' && !isascii(*in)) {
335
                                        *out++ = SUBST_CHAR;
336
                                        in++;
337
                                        if (*in != '\0' && !isascii(*in)) {
338
                                                *out++ = SUBST_CHAR;
339
                                                in++;
340
                                        }
341
                                }
342
                        }
343
                } else {
344
                        K_OUT();
345
                        *out++ = SUBST_CHAR;
346
                        in++;
347
                }
348
        }
349
350
        K_OUT();
351
        *out = '\0';
352
}
353
354
static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
355
{
356
        const guchar *in = inbuf;
357
        guchar *out = outbuf;
358
359
        while (*in != '\0') {
360
                if (isascii(*in)) {
361
                        *out++ = *in++;
362
                } else if (issjiskanji1(*in)) {
363
                        if (issjiskanji2(*(in + 1))) {
364
                                guchar out1 = *in;
365
                                guchar out2 = *(in + 1);
366
                                guchar row;
367
368
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
369
                                if (out2 < 0x9f) {
370
                                        out1 = (out1 - row) * 2 - 1;
371
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
372
                                } else {
373
                                        out1 = (out1 - row) * 2;
374
                                        out2 -= 0x7e;
375
                                }
376
377
                                *out++ = out1 | 0x80;
378
                                *out++ = out2 | 0x80;
379
                                in += 2;
380
                        } else {
381
                                *out++ = SUBST_CHAR;
382
                                in++;
383
                                if (*in != '\0' && !isascii(*in)) {
384
                                        *out++ = SUBST_CHAR;
385
                                        in++;
386
                                }
387
                        }
388
                } else if (issjishwkana(*in)) {
389
                        *out++ = 0x8e;
390
                        *out++ = *in++;
391
                } else {
392
                        *out++ = SUBST_CHAR;
393
                        in++;
394
                }
395
        }
396
397
        *out = '\0';
398
}
399
400
static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
401
{
402
        gchar *eucstr;
403
404
        Xalloca(eucstr, outlen, return);
405
406
        conv_jistoeuc(eucstr, outlen, inbuf);
407
        conv_euctoutf8(outbuf, outlen, eucstr);
408
}
409
410
static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
411
{
412
        gchar *tmpstr;
413
414
        tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
415
        if (tmpstr) {
416
                strncpy2(outbuf, tmpstr, outlen);
417
                g_free(tmpstr);
418
        } else
419
                strncpy2(outbuf, inbuf, outlen);
420
}
421
422
static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
423
{
424
        static iconv_t cd = (iconv_t)-1;
425
        static gboolean iconv_ok = TRUE;
426
        gchar *tmpstr;
427
428
        if (cd == (iconv_t)-1) {
429
                if (!iconv_ok) {
430
                        strncpy2(outbuf, inbuf, outlen);
431
                        return;
432
                }
433
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
434
                if (cd == (iconv_t)-1) {
435
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
436
                        if (cd == (iconv_t)-1) {
437
                                g_warning("conv_euctoutf8(): %s\n",
438
                                          g_strerror(errno));
439
                                iconv_ok = FALSE;
440
                                strncpy2(outbuf, inbuf, outlen);
441
                                return;
442
                        }
443
                }
444
        }
445
446
        tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
447
        if (tmpstr) {
448
                strncpy2(outbuf, tmpstr, outlen);
449
                g_free(tmpstr);
450
        } else
451
                strncpy2(outbuf, inbuf, outlen);
452
}
453
454
static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
455
{
456
        switch (conv_guess_ja_encoding(inbuf)) {
457
        case C_ISO_2022_JP:
458
                conv_jistoutf8(outbuf, outlen, inbuf);
459
                break;
460
        case C_SHIFT_JIS:
461
                conv_sjistoutf8(outbuf, outlen, inbuf);
462
                break;
463
        case C_EUC_JP:
464
                conv_euctoutf8(outbuf, outlen, inbuf);
465
                break;
466
        default:
467
                strncpy2(outbuf, inbuf, outlen);
468
                break;
469
        }
470
}
471
472
static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
473
{
474
        static iconv_t cd = (iconv_t)-1;
475
        static gboolean iconv_ok = TRUE;
476
        gchar *tmpstr;
477
478
        if (cd == (iconv_t)-1) {
479
                if (!iconv_ok) {
480
                        strncpy2(outbuf, inbuf, outlen);
481
                        return;
482
                }
483
                cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
484
                if (cd == (iconv_t)-1) {
485
                        cd = iconv_open(CS_EUC_JP, CS_UTF_8);
486
                        if (cd == (iconv_t)-1) {
487
                                g_warning("conv_utf8toeuc(): %s\n",
488
                                          g_strerror(errno));
489
                                iconv_ok = FALSE;
490
                                strncpy2(outbuf, inbuf, outlen);
491
                                return;
492
                        }
493
                }
494
        }
495
496
        tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
497
        if (tmpstr) {
498
                strncpy2(outbuf, tmpstr, outlen);
499
                g_free(tmpstr);
500
        } else
501
                strncpy2(outbuf, inbuf, outlen);
502
}
503
504
static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
505
{
506
        gchar *eucstr;
507
508
        Xalloca(eucstr, outlen, return);
509
510
        conv_utf8toeuc(eucstr, outlen, inbuf);
511
        conv_euctojis(outbuf, outlen, eucstr);
512
}
513
514
static gchar valid_eucjp_tbl[][96] = {
515
        /* 0xa2a0 - 0xa2ff */
516
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
517
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
518
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
519
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
520
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
521
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
522
523
        /* 0xa3a0 - 0xa3ff */
524
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
525
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
526
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
527
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
528
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
529
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
530
531
        /* 0xa4a0 - 0xa4ff */
532
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
533
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
534
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
535
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
536
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
537
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
538
539
        /* 0xa5a0 - 0xa5ff */
540
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
541
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
542
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
543
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
544
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
545
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
546
547
        /* 0xa6a0 - 0xa6ff */
548
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
549
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
550
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
551
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
552
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
553
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
554
555
        /* 0xa7a0 - 0xa7ff */
556
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
557
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
558
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
559
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
560
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
561
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
562
563
        /* 0xa8a0 - 0xa8ff */
564
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
565
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
566
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
567
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
568
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
569
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
570
};
571
572
static gboolean isprintableeuckanji(guchar c1, guchar c2)
573
{
574
        if (c1 <= 0xa0 || c1 >= 0xf5)
575
                return FALSE;
576
        if (c2 <= 0xa0 || c2 == 0xff)
577
                return FALSE;
578
579
        if (c1 >= 0xa9 && c1 <= 0xaf)
580
                return FALSE;
581
582
        if (c1 >= 0xa2 && c1 <= 0xa8)
583
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
584
585
        if (c1 == 0xcf) {
586
                if (c2 >= 0xd4 && c2 <= 0xfe)
587
                        return FALSE;
588
        } else if (c1 == 0xf4) {
589
                if (c2 >= 0xa7 && c2 <= 0xfe)
590
                        return FALSE;
591
        }
592
593
        return TRUE;
594
}
595
596
static void conv_unreadable_eucjp(gchar *str)
597
{
598
        register guchar *p = str;
599
600
        while (*p != '\0') {
601
                if (isascii(*p)) {
602
                        /* convert CR+LF -> LF */
603
                        if (*p == '\r' && *(p + 1) == '\n')
604
                                memmove(p, p + 1, strlen(p));
605
                        /* printable 7 bit code */
606
                        p++;
607
                } else if (iseuckanji(*p)) {
608
                        if (isprintableeuckanji(*p, *(p + 1))) {
609
                                /* printable euc-jp code */
610
                                p += 2;
611
                        } else {
612
                                /* substitute unprintable code */
613
                                *p++ = SUBST_CHAR;
614
                                if (*p != '\0') {
615
                                        if (isascii(*p))
616
                                                p++;
617
                                        else
618
                                                *p++ = SUBST_CHAR;
619
                                }
620
                        }
621
                } else if (iseuchwkana1(*p)) {
622
                        if (iseuchwkana2(*(p + 1)))
623
                                /* euc-jp hankaku kana */
624
                                p += 2;
625
                        else
626
                                *p++ = SUBST_CHAR;
627
                } else if (iseucaux(*p)) {
628
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
629
                                /* auxiliary kanji */
630
                                p += 3;
631
                        } else
632
                                *p++ = SUBST_CHAR;
633
                } else
634
                        /* substitute unprintable 1 byte code */
635
                        *p++ = SUBST_CHAR;
636
        }
637
}
638
639
static void conv_unreadable_8bit(gchar *str)
640
{
641
        register guchar *p = str;
642
643
        while (*p != '\0') {
644
                /* convert CR+LF -> LF */
645
                if (*p == '\r' && *(p + 1) == '\n')
646
                        memmove(p, p + 1, strlen(p));
647
                else if (!isascii(*p)) *p = SUBST_CHAR;
648
                p++;
649
        }
650
}
651
652
static void conv_unreadable_latin(gchar *str)
653
{
654
        register guchar *p = str;
655
656
        while (*p != '\0') {
657
                /* convert CR+LF -> LF */
658
                if (*p == '\r' && *(p + 1) == '\n')
659
                        memmove(p, p + 1, strlen(p));
660
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
661
                        *p = SUBST_CHAR;
662
                p++;
663
        }
664
}
665
666
#define NCV        '\0'
667
668
void conv_mb_alnum(gchar *str)
669
{
670
        static guchar char_tbl[] = {
671
                /* 0xa0 - 0xaf */
672
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
673
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
674
                /* 0xb0 - 0xbf */
675
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
676
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
677
                /* 0xc0 - 0xcf */
678
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
679
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
680
                /* 0xd0 - 0xdf */
681
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
682
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
683
                /* 0xe0 - 0xef */
684
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
685
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
686
        };
687
688
        register guchar *p = str;
689
        register gint len;
690
691
        len = strlen(str);
692
693
        while (len > 1) {
694
                if (*p == 0xa3) {
695
                        register guchar ch = *(p + 1);
696
697
                        if (ch >= 0xb0 && ch <= 0xfa) {
698
                                /* [a-zA-Z] */
699
                                *p = ch & 0x7f;
700
                                p++;
701
                                len--;
702
                                memmove(p, p + 1, len);
703
                                len--;
704
                        } else  {
705
                                p += 2;
706
                                len -= 2;
707
                        }
708
                } else if (*p == 0xa1) {
709
                        register guchar ch = *(p + 1);
710
711
                        if (ch >= 0xa0 && ch <= 0xef &&
712
                            NCV != char_tbl[ch - 0xa0]) {
713
                                *p = char_tbl[ch - 0xa0];
714
                                p++;
715
                                len--;
716
                                memmove(p, p + 1, len);
717
                                len--;
718
                        } else {
719
                                p += 2;
720
                                len -= 2;
721
                        }
722
                } else if (iseuckanji(*p)) {
723
                        p += 2;
724
                        len -= 2;
725
                } else {
726
                        p++;
727
                        len--;
728
                }
729
        }
730
}
731
732
CharSet conv_guess_ja_encoding(const gchar *str)
733
{
734
        const guchar *p = str;
735
        CharSet guessed = C_US_ASCII;
736
737
        while (*p != '\0') {
738
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
739
                        if (guessed == C_US_ASCII)
740
                                return C_ISO_2022_JP;
741
                        p += 2;
742
                } else if (isascii(*p)) {
743
                        p++;
744
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
745
                        if (*p >= 0xfd && *p <= 0xfe)
746
                                return C_EUC_JP;
747
                        else if (guessed == C_SHIFT_JIS) {
748
                                if ((issjiskanji1(*p) &&
749
                                     issjiskanji2(*(p + 1))) ||
750
                                    issjishwkana(*p))
751
                                        guessed = C_SHIFT_JIS;
752
                                else
753
                                        guessed = C_EUC_JP;
754
                        } else
755
                                guessed = C_EUC_JP;
756
                        p += 2;
757
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
758
                        if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
759
                                guessed = C_SHIFT_JIS;
760
                        else
761
                                return C_SHIFT_JIS;
762
                        p += 2;
763
                } else if (issjishwkana(*p)) {
764
                        guessed = C_SHIFT_JIS;
765
                        p++;
766
                } else {
767
                        p++;
768
                }
769
        }
770
771
        return guessed;
772
}
773
774
static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
775
{
776
        conv_jistoutf8(outbuf, outlen, inbuf);
777
}
778
779
static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
780
{
781
        conv_sjistoutf8(outbuf, outlen, inbuf);
782
}
783
784
static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
785
{
786
        conv_euctoutf8(outbuf, outlen, inbuf);
787
}
788
789
void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
790
{
791
        if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
792
                strncpy2(outbuf, inbuf, outlen);
793
        else
794
                conv_ustodisp(outbuf, outlen, inbuf);
795
}
796
797
static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
798
{
799
        conv_anytoutf8(outbuf, outlen, inbuf);
800
}
801
802
static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
803
{
804
        strncpy2(outbuf, inbuf, outlen);
805
        conv_unreadable_8bit(outbuf);
806
}
807
808
void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
809
{
810
        gchar *tmpstr;
811
812
        tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
813
                                   CS_INTERNAL);
814
        if (tmpstr) {
815
                strncpy2(outbuf, tmpstr, outlen);
816
                g_free(tmpstr);
817
        } else
818
                strncpy2(outbuf, inbuf, outlen);
819
}
820
821
static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
822
{
823
        strncpy2(outbuf, inbuf, outlen);
824
}
825
826
CodeConverter *conv_code_converter_new(const gchar *src_charset)
827
{
828
        CodeConverter *conv;
829
830
        conv = g_new0(CodeConverter, 1);
831
        conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
832
        conv->charset_str = g_strdup(src_charset);
833
        conv->charset = conv_get_charset_from_str(src_charset);
834
835
        return conv;
836
}
837
838
void conv_code_converter_destroy(CodeConverter *conv)
839
{
840
        g_free(conv->charset_str);
841
        g_free(conv);
842
}
843
844
gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
845
                  const gchar *inbuf)
846
{
847
        if (conv->code_conv_func != conv_noconv)
848
                conv->code_conv_func(outbuf, outlen, inbuf);
849
        else {
850
                gchar *str;
851
852
                str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
853
                if (!str)
854
                        return -1;
855
                else {
856
                        strncpy2(outbuf, str, outlen);
857
                        g_free(str);
858
                }
859
        }
860
861
        return 0;
862
}
863
864
gchar *conv_codeset_strdup(const gchar *inbuf,
865
                           const gchar *src_code, const gchar *dest_code)
866
{
867
        gchar *buf;
868
        size_t len;
869
        CodeConvFunc conv_func;
870
871
        conv_func = conv_get_code_conv_func(src_code, dest_code);
872
        if (conv_func != conv_noconv) {
873
                len = (strlen(inbuf) + 1) * 3;
874
                buf = g_malloc(len);
875
                if (!buf) return NULL;
876
877
                conv_func(buf, len, inbuf);
878
                return g_realloc(buf, strlen(buf) + 1);
879
        }
880
881
        return conv_iconv_strdup(inbuf, src_code, dest_code);
882
}
883
884
CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
885
                                     const gchar *dest_charset_str)
886
{
887
        CodeConvFunc code_conv = conv_noconv;
888
        CharSet src_charset;
889
        CharSet dest_charset;
890
891
        if (!src_charset_str)
892
                src_charset = conv_get_locale_charset();
893
        else
894
                src_charset = conv_get_charset_from_str(src_charset_str);
895
896
        /* auto detection mode */
897
        if (!src_charset_str && !dest_charset_str) {
898
                if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
899
                        return conv_anytodisp;
900
                else
901
                        return conv_noconv;
902
        }
903
904
        dest_charset = conv_get_charset_from_str(dest_charset_str);
905
906
        if (dest_charset == C_US_ASCII)
907
                return conv_ustodisp;
908
909
        switch (src_charset) {
910
        case C_US_ASCII:
911
        case C_ISO_8859_1:
912
        case C_ISO_8859_2:
913
        case C_ISO_8859_3:
914
        case C_ISO_8859_4:
915
        case C_ISO_8859_5:
916
        case C_ISO_8859_6:
917
        case C_ISO_8859_7:
918
        case C_ISO_8859_8:
919
        case C_ISO_8859_9:
920
        case C_ISO_8859_10:
921
        case C_ISO_8859_11:
922
        case C_ISO_8859_13:
923
        case C_ISO_8859_14:
924
        case C_ISO_8859_15:
925
                break;
926
        case C_ISO_2022_JP:
927
        case C_ISO_2022_JP_2:
928
        case C_ISO_2022_JP_3:
929
                if (dest_charset == C_AUTO)
930
                        code_conv = conv_jistodisp;
931
                else if (dest_charset == C_EUC_JP)
932
                        code_conv = conv_jistoeuc;
933
                else if (dest_charset == C_UTF_8)
934
                        code_conv = conv_jistoutf8;
935
                break;
936
        case C_SHIFT_JIS:
937
                if (dest_charset == C_AUTO)
938
                        code_conv = conv_sjistodisp;
939
                else if (dest_charset == C_EUC_JP)
940
                        code_conv = conv_sjistoeuc;
941
                else if (dest_charset == C_UTF_8)
942
                        code_conv = conv_sjistoutf8;
943
                break;
944
        case C_EUC_JP:
945
                if (dest_charset == C_AUTO)
946
                        code_conv = conv_euctodisp;
947
                else if (dest_charset == C_ISO_2022_JP   ||
948
                         dest_charset == C_ISO_2022_JP_2 ||
949
                         dest_charset == C_ISO_2022_JP_3)
950
                        code_conv = conv_euctojis;
951
                else if (dest_charset == C_UTF_8)
952
                        code_conv = conv_euctoutf8;
953
                break;
954
        case C_UTF_8:
955
                if (dest_charset == C_EUC_JP)
956
                        code_conv = conv_utf8toeuc;
957
                else if (dest_charset == C_ISO_2022_JP   ||
958
                         dest_charset == C_ISO_2022_JP_2 ||
959
                         dest_charset == C_ISO_2022_JP_3)
960
                        code_conv = conv_utf8tojis;
961
                break;
962
        default:
963
                break;
964
        }
965
966
        return code_conv;
967
}
968
969
gchar *conv_iconv_strdup(const gchar *inbuf,
970
                         const gchar *src_code, const gchar *dest_code)
971
{
972
        iconv_t cd;
973
        gchar *outbuf;
974
975
        if (!src_code)
976
                src_code = conv_get_outgoing_charset_str();
977
        if (!dest_code)
978
                dest_code = CS_INTERNAL;
979
980
        /* don't convert if src and dest codeset are identical */
981
        if (!strcasecmp(src_code, dest_code))
982
                return g_strdup(inbuf);
983
984
        /* don't convert if current codeset is US-ASCII */
985
        if (!strcasecmp(dest_code, CS_US_ASCII))
986
                return g_strdup(inbuf);
987
988
        cd = iconv_open(dest_code, src_code);
989
        if (cd == (iconv_t)-1)
990
                return NULL;
991
992
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
993
994
        iconv_close(cd);
995
996
        return outbuf;
997
}
998
999
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1000
{
1001
        const gchar *inbuf_p;
1002
        gchar *outbuf;
1003
        gchar *outbuf_p;
1004
        size_t in_size;
1005
        size_t in_left;
1006
        size_t out_size;
1007
        size_t out_left;
1008
        size_t n_conv;
1009
        size_t len;
1010
1011
        inbuf_p = inbuf;
1012
        in_size = strlen(inbuf);
1013
        in_left = in_size;
1014
        out_size = (in_size + 1) * 2;
1015
        outbuf = g_malloc(out_size);
1016
        outbuf_p = outbuf;
1017
        out_left = out_size;
1018
1019
#define EXPAND_BUF()                                \
1020
{                                                \
1021
        len = outbuf_p - outbuf;                \
1022
        out_size *= 2;                                \
1023
        outbuf = g_realloc(outbuf, out_size);        \
1024
        outbuf_p = outbuf + len;                \
1025
        out_left = out_size - len;                \
1026
}
1027
1028
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1029
                               &outbuf_p, &out_left)) == (size_t)-1) {
1030
                if (EILSEQ == errno) {
1031
                        //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1032
                        inbuf_p++;
1033
                        in_left--;
1034
                        if (out_left == 0) {
1035
                                EXPAND_BUF();
1036
                        }
1037
                        *outbuf_p++ = SUBST_CHAR;
1038
                        out_left--;
1039
                } else if (EINVAL == errno) {
1040
                        break;
1041
                } else if (E2BIG == errno) {
1042
                        EXPAND_BUF();
1043
                } else {
1044
                        g_warning("conv_iconv_strdup(): %s\n",
1045
                                  g_strerror(errno));
1046
                        break;
1047
                }
1048
        }
1049
1050
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1051
               (size_t)-1) {
1052
                if (E2BIG == errno) {
1053
                        EXPAND_BUF();
1054
                } else {
1055
                        g_warning("conv_iconv_strdup(): %s\n",
1056
                                  g_strerror(errno));
1057
                        break;
1058
                }
1059
        }
1060
1061
#undef EXPAND_BUF
1062
1063
        len = outbuf_p - outbuf;
1064
        outbuf = g_realloc(outbuf, len + 1);
1065
        outbuf[len] = '\0';
1066
1067
        return outbuf;
1068
}
1069
1070
static const struct {
1071
        CharSet charset;
1072
        gchar *const name;
1073
} charsets[] = {
1074
        {C_US_ASCII,                CS_US_ASCII},
1075
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1076
        {C_UTF_8,                CS_UTF_8},
1077
        {C_UTF_7,                CS_UTF_7},
1078
        {C_ISO_8859_1,                CS_ISO_8859_1},
1079
        {C_ISO_8859_2,                CS_ISO_8859_2},
1080
        {C_ISO_8859_3,                CS_ISO_8859_3},
1081
        {C_ISO_8859_4,                CS_ISO_8859_4},
1082
        {C_ISO_8859_5,                CS_ISO_8859_5},
1083
        {C_ISO_8859_6,                CS_ISO_8859_6},
1084
        {C_ISO_8859_7,                CS_ISO_8859_7},
1085
        {C_ISO_8859_8,                CS_ISO_8859_8},
1086
        {C_ISO_8859_9,                CS_ISO_8859_9},
1087
        {C_ISO_8859_10,                CS_ISO_8859_10},
1088
        {C_ISO_8859_11,                CS_ISO_8859_11},
1089
        {C_ISO_8859_13,                CS_ISO_8859_13},
1090
        {C_ISO_8859_14,                CS_ISO_8859_14},
1091
        {C_ISO_8859_15,                CS_ISO_8859_15},
1092
        {C_BALTIC,                CS_BALTIC},
1093
        {C_CP1250,                CS_CP1250},
1094
        {C_CP1251,                CS_CP1251},
1095
        {C_CP1252,                CS_CP1252},
1096
        {C_CP1253,                CS_CP1253},
1097
        {C_CP1254,                CS_CP1254},
1098
        {C_CP1255,                CS_CP1255},
1099
        {C_CP1256,                CS_CP1256},
1100
        {C_CP1257,                CS_CP1257},
1101
        {C_CP1258,                CS_CP1258},
1102
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1103
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1104
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1105
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1106
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1107
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1108
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1109
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1110
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1111
        {C_KOI8_R,                CS_KOI8_R},
1112
        {C_KOI8_T,                CS_KOI8_T},
1113
        {C_KOI8_U,                CS_KOI8_U},
1114
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1115
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1116
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1117
        {C_EUC_JP,                CS_EUC_JP},
1118
        {C_EUC_JP,                CS_EUCJP},
1119
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1120
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1121
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1122
        {C_SHIFT_JIS,                CS_SJIS},
1123
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1124
        {C_EUC_KR,                CS_EUC_KR},
1125
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1126
        {C_EUC_CN,                CS_EUC_CN},
1127
        {C_GB2312,                CS_GB2312},
1128
        {C_GBK,                        CS_GBK},
1129
        {C_EUC_TW,                CS_EUC_TW},
1130
        {C_BIG5,                CS_BIG5},
1131
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1132
        {C_TIS_620,                CS_TIS_620},
1133
        {C_WINDOWS_874,                CS_WINDOWS_874},
1134
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1135
        {C_TCVN5712_1,                CS_TCVN5712_1},
1136
};
1137
1138
static const struct {
1139
        gchar *const locale;
1140
        CharSet charset;
1141
        CharSet out_charset;
1142
} locale_table[] = {
1143
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1144
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1145
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1146
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1147
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1148
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1149
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1150
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1151
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1152
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1153
        {"zh_CN.GBK"        , C_GBK                , C_GB2312},
1154
        {"zh_CN"        , C_GB2312        , C_GB2312},
1155
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1156
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1157
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1158
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1159
        {"zh_TW"        , C_BIG5        , C_BIG5},
1160
1161
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1162
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1163
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1164
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1165
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1166
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1167
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1168
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1169
1170
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1171
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1172
1173
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1174
1175
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1176
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1177
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1178
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1179
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1180
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1181
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1182
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1183
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1184
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1185
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1186
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1187
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1188
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1189
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1190
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1191
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1192
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1193
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1194
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1195
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1196
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1197
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1198
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1199
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1200
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1201
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1202
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1203
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1204
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1205
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1206
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1207
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1208
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1209
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1210
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1211
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1212
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1213
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1214
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1215
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1216
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1217
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1218
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1219
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1220
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1221
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1222
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1223
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1224
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1225
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1226
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1227
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1228
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1229
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1230
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1231
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1232
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1233
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1234
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1235
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1236
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1237
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1238
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1239
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1240
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1241
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1242
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1243
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1244
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1245
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1246
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1247
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1248
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1249
1250
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1251
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1252
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1253
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1254
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1255
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1256
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1257
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1258
1259
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1260
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1261
1262
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1263
1264
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1265
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1266
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1267
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1268
1269
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1270
1271
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1272
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1273
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1274
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1275
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1276
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1277
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1278
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1279
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1280
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1281
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1282
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1283
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1284
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1285
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1286
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1287
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1288
1289
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1290
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1291
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1292
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1293
1294
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1295
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1296
1297
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1298
1299
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1300
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1301
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1302
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1303
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1304
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1305
1306
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1307
        /* {"th_TH"        , C_WINDOWS_874}, */
1308
        /* {"th_TH"        , C_ISO_8859_11}, */
1309
1310
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1311
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1312
1313
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1314
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1315
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1316
};
1317
1318
static GHashTable *conv_get_charset_to_str_table(void)
1319
{
1320
        static GHashTable *table;
1321
        gint i;
1322
1323
        if (table)
1324
                return table;
1325
1326
        table = g_hash_table_new(NULL, g_direct_equal);
1327
1328
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1329
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1330
                    == NULL) {
1331
                        g_hash_table_insert
1332
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1333
                                 charsets[i].name);
1334
                }
1335
        }
1336
1337
        return table;
1338
}
1339
1340
static GHashTable *conv_get_charset_from_str_table(void)
1341
{
1342
        static GHashTable *table;
1343
        gint i;
1344
1345
        if (table)
1346
                return table;
1347
1348
        table = g_hash_table_new(str_case_hash, str_case_equal);
1349
1350
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1351
                g_hash_table_insert(table, charsets[i].name,
1352
                                    GUINT_TO_POINTER(charsets[i].charset));
1353
        }
1354
1355
        return table;
1356
}
1357
1358
const gchar *conv_get_charset_str(CharSet charset)
1359
{
1360
        GHashTable *table;
1361
1362
        table = conv_get_charset_to_str_table();
1363
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1364
}
1365
1366
CharSet conv_get_charset_from_str(const gchar *charset)
1367
{
1368
        GHashTable *table;
1369
1370
        if (!charset) return C_AUTO;
1371
1372
        table = conv_get_charset_from_str_table();
1373
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1374
}
1375
1376
CharSet conv_get_locale_charset(void)
1377
{
1378
        static CharSet cur_charset = -1;
1379
        const gchar *cur_locale;
1380
        const gchar *p;
1381
        gint i;
1382
1383
        if (cur_charset != -1)
1384
                return cur_charset;
1385
1386
        cur_locale = conv_get_current_locale();
1387
        if (!cur_locale) {
1388
                cur_charset = C_US_ASCII;
1389
                return cur_charset;
1390
        }
1391
1392
        if (strcasestr(cur_locale, "UTF-8")) {
1393
                cur_charset = C_UTF_8;
1394
                return cur_charset;
1395
        }
1396
1397
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1398
                cur_charset = C_ISO_8859_15;
1399
                return cur_charset;
1400
        }
1401
1402
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1403
                const gchar *p;
1404
1405
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1406
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1407
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1408
                                 strlen(locale_table[i].locale))) {
1409
                        cur_charset = locale_table[i].charset;
1410
                        return cur_charset;
1411
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1412
                         !strchr(p + 1, '.')) {
1413
                        if (strlen(cur_locale) == 2 &&
1414
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1415
                                cur_charset = locale_table[i].charset;
1416
                                return cur_charset;
1417
                        }
1418
                }
1419
        }
1420
1421
        cur_charset = C_AUTO;
1422
        return cur_charset;
1423
}
1424
1425
const gchar *conv_get_locale_charset_str(void)
1426
{
1427
        static const gchar *codeset = NULL;
1428
1429
        if (!codeset)
1430
                codeset = conv_get_charset_str(conv_get_locale_charset());
1431
1432
        return codeset ? codeset : CS_INTERNAL;
1433
}
1434
1435
CharSet conv_get_internal_charset(void)
1436
{
1437
        return C_INTERNAL;
1438
}
1439
1440
const gchar *conv_get_internal_charset_str(void)
1441
{
1442
        return CS_INTERNAL;
1443
}
1444
1445
CharSet conv_get_outgoing_charset(void)
1446
{
1447
        static CharSet out_charset = -1;
1448
        const gchar *cur_locale;
1449
        const gchar *p;
1450
        gint i;
1451
1452
        if (out_charset != -1)
1453
                return out_charset;
1454
1455
        cur_locale = conv_get_current_locale();
1456
        if (!cur_locale) {
1457
                out_charset = C_AUTO;
1458
                return out_charset;
1459
        }
1460
1461
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1462
                out_charset = C_ISO_8859_15;
1463
                return out_charset;
1464
        }
1465
1466
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1467
                const gchar *p;
1468
1469
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1470
                                 strlen(locale_table[i].locale))) {
1471
                        out_charset = locale_table[i].out_charset;
1472
                        break;
1473
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1474
                         !strchr(p + 1, '.')) {
1475
                        if (strlen(cur_locale) == 2 &&
1476
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1477
                                out_charset = locale_table[i].out_charset;
1478
                                break;
1479
                        }
1480
                }
1481
        }
1482
1483
        return out_charset;
1484
}
1485
1486
const gchar *conv_get_outgoing_charset_str(void)
1487
{
1488
        CharSet out_charset;
1489
        const gchar *str;
1490
1491
        if (prefs_common.outgoing_charset) {
1492
                if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1493
                        g_free(prefs_common.outgoing_charset);
1494
                        prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1495
                } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1496
                        return prefs_common.outgoing_charset;
1497
        }
1498
1499
        out_charset = conv_get_outgoing_charset();
1500
        str = conv_get_charset_str(out_charset);
1501
1502
        return str ? str : CS_UTF_8;
1503
}
1504
1505
gboolean conv_is_multibyte_encoding(CharSet encoding)
1506
{
1507
        switch (encoding) {
1508
        case C_EUC_JP:
1509
        case C_EUC_JP_MS:
1510
        case C_EUC_KR:
1511
        case C_EUC_TW:
1512
        case C_EUC_CN:
1513
        case C_ISO_2022_JP:
1514
        case C_ISO_2022_JP_2:
1515
        case C_ISO_2022_JP_3:
1516
        case C_ISO_2022_KR:
1517
        case C_ISO_2022_CN:
1518
        case C_SHIFT_JIS:
1519
        case C_GB2312:
1520
        case C_BIG5:
1521
        case C_UTF_8:
1522
        case C_UTF_7:
1523
                return TRUE;
1524
        default:
1525
                return FALSE;
1526
        }
1527
}
1528
1529
const gchar *conv_get_current_locale(void)
1530
{
1531
        const gchar *cur_locale;
1532
1533
        cur_locale = g_getenv("LC_ALL");
1534
        if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1535
        if (!cur_locale) cur_locale = g_getenv("LANG");
1536
        if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1537
1538
        debug_print("current locale: %s\n",
1539
                    cur_locale ? cur_locale : "(none)");
1540
1541
        return cur_locale;
1542
}
1543
1544
void conv_unmime_header_overwrite(gchar *str)
1545
{
1546
        gchar *buf;
1547
        gint buflen;
1548
        CharSet cur_charset;
1549
1550
        cur_charset = conv_get_locale_charset();
1551
1552
        if (cur_charset == C_EUC_JP) {
1553
                buflen = strlen(str) * 2 + 1;
1554
                Xalloca(buf, buflen, return);
1555
                conv_anytodisp(buf, buflen, str);
1556
                unmime_header(str, buf);
1557
        } else {
1558
                buflen = strlen(str) + 1;
1559
                Xalloca(buf, buflen, return);
1560
                unmime_header(buf, str);
1561
                strncpy2(str, buf, buflen);
1562
        }
1563
}
1564
1565
void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1566
                        const gchar *charset)
1567
{
1568
        CharSet cur_charset;
1569
1570
        cur_charset = conv_get_locale_charset();
1571
1572
        if (cur_charset == C_EUC_JP) {
1573
                gchar *buf;
1574
                gint buflen;
1575
1576
                buflen = strlen(str) * 2 + 1;
1577
                Xalloca(buf, buflen, return);
1578
                conv_anytodisp(buf, buflen, str);
1579
                unmime_header(outbuf, buf);
1580
        } else
1581
                unmime_header(outbuf, str);
1582
}
1583
1584
#define MAX_LINELEN                76
1585
#define MAX_HARD_LINELEN        996
1586
#define MIMESEP_BEGIN                "=?"
1587
#define MIMESEP_END                "?="
1588
1589
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1590
1591
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
1592
{                                                                        \
1593
        if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {                \
1594
                *destp = '\0';                                                \
1595
                return;                                                        \
1596
        }                                                                \
1597
                                                                        \
1598
        if ((cond) && *srcp) {                                                \
1599
                if (destp > (guchar *)dest && left < MAX_LINELEN - 1) {        \
1600
                        if (isspace(*(destp - 1)))                        \
1601
                                destp--;                                \
1602
                        else if (is_plain_text && isspace(*srcp))        \
1603
                                srcp++;                                        \
1604
                        if (*srcp) {                                        \
1605
                                *destp++ = '\n';                        \
1606
                                *destp++ = ' ';                                \
1607
                                left = MAX_LINELEN - 1;                        \
1608
                        }                                                \
1609
                }                                                        \
1610
        }                                                                \
1611
}
1612
1613
void conv_encode_header(gchar *dest, gint len, const gchar *src,
1614
                        gint header_len, gboolean addr_field)
1615
{
1616
        const gchar *cur_encoding;
1617
        const gchar *out_encoding;
1618
        gint mimestr_len;
1619
        gchar *mimesep_enc;
1620
        gint left;
1621
        const guchar *srcp = src;
1622
        guchar *destp = dest;
1623
        gboolean use_base64;
1624
1625
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1626
1627
        if (MB_CUR_MAX > 1) {
1628
                use_base64 = TRUE;
1629
                mimesep_enc = "?B?";
1630
        } else {
1631
                use_base64 = FALSE;
1632
                mimesep_enc = "?Q?";
1633
        }
1634
1635
        cur_encoding = CS_INTERNAL;
1636
        out_encoding = conv_get_outgoing_charset_str();
1637
        if (!strcmp(out_encoding, CS_US_ASCII))
1638
                out_encoding = CS_ISO_8859_1;
1639
1640
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1641
                strlen(mimesep_enc) + strlen(MIMESEP_END);
1642
1643
        left = MAX_LINELEN - header_len;
1644
1645
        while (*srcp) {
1646
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
1647
1648
                while (isspace(*srcp)) {
1649
                        *destp++ = *srcp++;
1650
                        left--;
1651
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
1652
                }
1653
1654
                /* output as it is if the next word is ASCII string */
1655
                if (!is_next_nonascii(srcp)) {
1656
                        gint word_len;
1657
1658
                        word_len = get_next_word_len(srcp);
1659
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
1660
                        while (word_len > 0) {
1661
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1662
                                *destp++ = *srcp++;
1663
                                left--;
1664
                                word_len--;
1665
                        }
1666
1667
                        continue;
1668
                }
1669
1670
                /* don't include parentheses in encoded strings */
1671
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
1672
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
1673
                        *destp++ = *srcp++;
1674
                        left--;
1675
                }
1676
1677
                while (1) {
1678
                        gint mb_len = 0;
1679
                        gint cur_len = 0;
1680
                        gchar *part_str;
1681
                        gchar *out_str;
1682
                        gchar *enc_str;
1683
                        const guchar *p = srcp;
1684
                        gint out_str_len;
1685
                        gint out_enc_str_len;
1686
                        gint mime_block_len;
1687
                        gboolean cont = FALSE;
1688
1689
                        while (*p != '\0') {
1690
                                if (isspace(*p) && !is_next_nonascii(p + 1))
1691
                                        break;
1692
                                /* don't include parentheses in encoded
1693
                                   strings */
1694
                                if (addr_field && (*p == '(' || *p == ')'))
1695
                                        break;
1696
1697
                                mb_len = g_utf8_skip[*p];
1698
1699
                                Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1700
                                out_str = conv_codeset_strdup
1701
                                        (part_str, cur_encoding, out_encoding);
1702
                                if (!out_str) {
1703
                                        g_warning("conv_encode_header(): code conversion failed\n");
1704
                                        conv_unreadable_8bit(part_str);
1705
                                        out_str = g_strdup(part_str);
1706
                                }
1707
                                out_str_len = strlen(out_str);
1708
1709
                                if (use_base64)
1710
                                        out_enc_str_len = B64LEN(out_str_len);
1711
                                else
1712
                                        out_enc_str_len =
1713
                                                qp_get_q_encoding_len(out_str);
1714
1715
                                g_free(out_str);
1716
1717
                                if (mimestr_len + out_enc_str_len <= left) {
1718
                                        cur_len += mb_len;
1719
                                        p += mb_len;
1720
                                } else if (cur_len == 0) {
1721
                                        LBREAK_IF_REQUIRED(1, FALSE);
1722
                                        continue;
1723
                                } else {
1724
                                        cont = TRUE;
1725
                                        break;
1726
                                }
1727
                        }
1728
1729
                        if (cur_len > 0) {
1730
                                Xstrndup_a(part_str, srcp, cur_len, );
1731
                                out_str = conv_codeset_strdup
1732
                                        (part_str, cur_encoding, out_encoding);
1733
                                if (!out_str) {
1734
                                        g_warning("conv_encode_header(): code conversion failed\n");
1735
                                        conv_unreadable_8bit(part_str);
1736
                                        out_str = g_strdup(part_str);
1737
                                }
1738
                                out_str_len = strlen(out_str);
1739
1740
                                if (use_base64)
1741
                                        out_enc_str_len = B64LEN(out_str_len);
1742
                                else
1743
                                        out_enc_str_len =
1744
                                                qp_get_q_encoding_len(out_str);
1745
1746
                                Xalloca(enc_str, out_enc_str_len + 1, );
1747
                                if (use_base64)
1748
                                        base64_encode(enc_str, out_str, out_str_len);
1749
                                else
1750
                                        qp_q_encode(enc_str, out_str);
1751
1752
                                g_free(out_str);
1753
1754
                                /* output MIME-encoded string block */
1755
                                mime_block_len = mimestr_len + strlen(enc_str);
1756
                                g_snprintf(destp, mime_block_len + 1,
1757
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1758
                                           out_encoding, mimesep_enc, enc_str);
1759
                                destp += mime_block_len;
1760
                                srcp += cur_len;
1761
1762
                                left -= mime_block_len;
1763
                        }
1764
1765
                        LBREAK_IF_REQUIRED(cont, FALSE);
1766
1767
                        if (cur_len == 0)
1768
                                break;
1769
                }
1770
        }
1771
1772
        *destp = '\0';
1773
}
1774
1775
#undef LBREAK_IF_REQUIRED
1776
1777
gint conv_copy_file(const gchar *src, const gchar *dest, const gchar *encoding)
1778
{
1779
        FILE *src_fp, *dest_fp;
1780
        gchar buf[BUFFSIZE], outbuf[BUFFSIZE];
1781
        CodeConverter *conv;
1782
        gboolean err = FALSE;
1783
1784
        if ((src_fp = fopen(src, "rb")) == NULL) {
1785
                FILE_OP_ERROR(src, "fopen");
1786
                return -1;
1787
        }
1788
        if ((dest_fp = fopen(dest, "wb")) == NULL) {
1789
                FILE_OP_ERROR(dest, "fopen");
1790
                fclose(src_fp);
1791
                return -1;
1792
        }
1793
1794
        if (change_file_mode_rw(dest_fp, dest) < 0) {
1795
                FILE_OP_ERROR(dest, "chmod");
1796
                g_warning("can't change file mode\n");
1797
        }
1798
1799
        conv = conv_code_converter_new(encoding);
1800
1801
        while (fgets(buf, sizeof(buf), src_fp) != NULL) {
1802
                if (conv_convert(conv, outbuf, sizeof(outbuf), buf) == 0)
1803
                        fputs(outbuf, dest_fp);
1804
                else
1805
                        fputs(buf, dest_fp);
1806
        }
1807
1808
        conv_code_converter_destroy(conv);
1809
1810
        if (ferror(src_fp)) {
1811
                FILE_OP_ERROR(src, "fgets");
1812
                err = TRUE;
1813
        }
1814
        fclose(src_fp);
1815
        if (fclose(dest_fp) == EOF) {
1816
                FILE_OP_ERROR(dest, "fclose");
1817
                err = TRUE;
1818
        }
1819
        if (err) {
1820
                unlink(dest);
1821
                return -1;
1822
        }
1823
1824
        return 0;
1825
}
1826
1827
gint conv_copy_dir(const gchar *src, const gchar *dest, const gchar *encoding)
1828
{
1829
        DIR *dp;
1830
        struct dirent *d;
1831
        gchar *src_file;
1832
        gchar *dest_file;
1833
1834
        if ((dp = opendir(src)) == NULL) {
1835
                FILE_OP_ERROR(src, "opendir");
1836
                return -1;
1837
        }
1838
1839
        if (make_dir_hier(dest) < 0) {
1840
                closedir(dp);
1841
                return -1;
1842
        }
1843
1844
        while ((d = readdir(dp)) != NULL) {
1845
                if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
1846
                        continue;
1847
1848
                src_file = g_strconcat(src, G_DIR_SEPARATOR_S, d->d_name, NULL);
1849
                dest_file = g_strconcat(dest, G_DIR_SEPARATOR_S, d->d_name,
1850
                                        NULL);
1851
                if (is_file_exist(src_file))
1852
                        conv_copy_file(src_file, dest_file, encoding);
1853
                g_free(dest_file);
1854
                g_free(src_file);
1855
        }
1856
1857
        closedir(dp);
1858
1859
        return 0;
1860
}