Statistics
| Revision:

root / src / codeconv.c @ 1

History | View | Annotate | Download (41.9 KB)

1
/*
2
 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3
 * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
 */
19

    
20
#ifdef HAVE_CONFIG_H
21
#  include "config.h"
22
#endif
23

    
24
#include <glib.h>
25
#include <string.h>
26
#include <ctype.h>
27
#include <stdlib.h>
28
#include <errno.h>
29

    
30
#if HAVE_LOCALE_H
31
#  include <locale.h>
32
#endif
33

    
34
#if HAVE_ICONV
35
#  include <iconv.h>
36
#endif
37

    
38
#include "intl.h"
39
#include "codeconv.h"
40
#include "unmime.h"
41
#include "base64.h"
42
#include "quoted-printable.h"
43
#include "utils.h"
44
#include "prefs_common.h"
45

    
46
typedef enum
47
{
48
        JIS_ASCII,
49
        JIS_KANJI,
50
        JIS_HWKANA,
51
        JIS_AUXKANJI
52
} JISState;
53

    
54
#define SUBST_CHAR        '_'
55
#define ESC                '\033'
56

    
57
#define iseuckanji(c) \
58
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59
#define iseuchwkana1(c) \
60
        (((c) & 0xff) == 0x8e)
61
#define iseuchwkana2(c) \
62
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
63
#define iseucaux(c) \
64
        (((c) & 0xff) == 0x8f)
65
#define issjiskanji1(c) \
66
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68
#define issjiskanji2(c) \
69
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71
#define issjishwkana(c) \
72
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
73

    
74
#define K_IN()                                \
75
        if (state != JIS_KANJI) {        \
76
                *out++ = ESC;                \
77
                *out++ = '$';                \
78
                *out++ = 'B';                \
79
                state = JIS_KANJI;        \
80
        }
81

    
82
#define K_OUT()                                \
83
        if (state != JIS_ASCII) {        \
84
                *out++ = ESC;                \
85
                *out++ = '(';                \
86
                *out++ = 'B';                \
87
                state = JIS_ASCII;        \
88
        }
89

    
90
#define HW_IN()                                \
91
        if (state != JIS_HWKANA) {        \
92
                *out++ = ESC;                \
93
                *out++ = '(';                \
94
                *out++ = 'I';                \
95
                state = JIS_HWKANA;        \
96
        }
97

    
98
#define AUX_IN()                        \
99
        if (state != JIS_AUXKANJI) {        \
100
                *out++ = ESC;                \
101
                *out++ = '$';                \
102
                *out++ = '(';                \
103
                *out++ = 'D';                \
104
                state = JIS_AUXKANJI;        \
105
        }
106

    
107
void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
108
{
109
        const guchar *in = inbuf;
110
        guchar *out = outbuf;
111
        JISState state = JIS_ASCII;
112

    
113
        while (*in != '\0') {
114
                if (*in == ESC) {
115
                        in++;
116
                        if (*in == '$') {
117
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
118
                                        state = JIS_KANJI;
119
                                        in += 2;
120
                                } else if (*(in + 1) == '(' &&
121
                                           *(in + 2) == 'D') {
122
                                        state = JIS_AUXKANJI;
123
                                        in += 3;
124
                                } else {
125
                                        /* unknown escape sequence */
126
                                        state = JIS_ASCII;
127
                                }
128
                        } else if (*in == '(') {
129
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
130
                                        state = JIS_ASCII;
131
                                        in += 2;
132
                                } else if (*(in + 1) == 'I') {
133
                                        state = JIS_HWKANA;
134
                                        in += 2;
135
                                } else {
136
                                        /* unknown escape sequence */
137
                                        state = JIS_ASCII;
138
                                }
139
                        } else {
140
                                /* unknown escape sequence */
141
                                state = JIS_ASCII;
142
                        }
143
                } else if (*in == 0x0e) {
144
                        state = JIS_HWKANA;
145
                        in++;
146
                } else if (*in == 0x0f) {
147
                        state = JIS_ASCII;
148
                        in++;
149
                } else {
150
                        switch (state) {
151
                        case JIS_ASCII:
152
                                *out++ = *in++;
153
                                break;
154
                        case JIS_KANJI:
155
                                *out++ = *in++ | 0x80;
156
                                if (*in == '\0') break;
157
                                *out++ = *in++ | 0x80;
158
                                break;
159
                        case JIS_HWKANA:
160
                                *out++ = 0x8e;
161
                                *out++ = *in++ | 0x80;
162
                                break;
163
                        case JIS_AUXKANJI:
164
                                *out++ = 0x8f;
165
                                *out++ = *in++ | 0x80;
166
                                if (*in == '\0') break;
167
                                *out++ = *in++ | 0x80;
168
                                break;
169
                        }
170
                }
171
        }
172

    
173
        *out = '\0';
174
}
175

    
176
#define JIS_HWDAKUTEN                0x5e
177
#define JIS_HWHANDAKUTEN        0x5f
178

    
179
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
180
{
181
        static guint16 h2z_tbl[] = {
182
                /* 0x20 - 0x2f */
183
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
184
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
185
                /* 0x30 - 0x3f */
186
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
187
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
188
                /* 0x40 - 0x4f */
189
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
190
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
191
                /* 0x50 - 0x5f */
192
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
193
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
194
        };
195

    
196
        static guint16 dakuten_tbl[] = {
197
                /* 0x30 - 0x3f */
198
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
199
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
200
                /* 0x40 - 0x4f */
201
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
202
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
203
        };
204

    
205
        static guint16 handakuten_tbl[] = {
206
                /* 0x4a - 0x4e */
207
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
208
        };
209

    
210
        guint16 out_code;
211

    
212
        jis_code &= 0x7f;
213
        sound_sym &= 0x7f;
214

    
215
        if (jis_code < 0x21 || jis_code > 0x5f)
216
                return 0;
217

    
218
        if (sound_sym == JIS_HWDAKUTEN &&
219
            jis_code >= 0x36 && jis_code <= 0x4e) {
220
                out_code = dakuten_tbl[jis_code - 0x30];
221
                if (out_code != 0) {
222
                        *outbuf = out_code >> 8;
223
                        *(outbuf + 1) = out_code & 0xff;
224
                        return 2;
225
                }
226
        }
227

    
228
        if (sound_sym == JIS_HWHANDAKUTEN &&
229
            jis_code >= 0x4a && jis_code <= 0x4e) {
230
                out_code = handakuten_tbl[jis_code - 0x4a];
231
                *outbuf = out_code >> 8;
232
                *(outbuf + 1) = out_code & 0xff;
233
                return 2;
234
        }
235

    
236
        out_code = h2z_tbl[jis_code - 0x20];
237
        *outbuf = out_code >> 8;
238
        *(outbuf + 1) = out_code & 0xff;
239
        return 1;
240
}
241

    
242
void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
243
{
244
        const guchar *in = inbuf;
245
        guchar *out = outbuf;
246
        JISState state = JIS_ASCII;
247

    
248
        while (*in != '\0') {
249
                if (isascii(*in)) {
250
                        K_OUT();
251
                        *out++ = *in++;
252
                } else if (iseuckanji(*in)) {
253
                        if (iseuckanji(*(in + 1))) {
254
                                K_IN();
255
                                *out++ = *in++ & 0x7f;
256
                                *out++ = *in++ & 0x7f;
257
                        } else {
258
                                K_OUT();
259
                                *out++ = SUBST_CHAR;
260
                                in++;
261
                                if (*in != '\0' && !isascii(*in)) {
262
                                        *out++ = SUBST_CHAR;
263
                                        in++;
264
                                }
265
                        }
266
                } else if (iseuchwkana1(*in)) {
267
                        if (iseuchwkana2(*(in + 1))) {
268
                                if (prefs_common.allow_jisx0201_kana) {
269
                                        HW_IN();
270
                                        in++;
271
                                        *out++ = *in++ & 0x7f;
272
                                } else {
273
                                        guchar jis_ch[2];
274
                                        gint len;
275

    
276
                                        if (iseuchwkana1(*(in + 2)) &&
277
                                            iseuchwkana2(*(in + 3)))
278
                                                len = conv_jis_hantozen
279
                                                        (jis_ch,
280
                                                         *(in + 1), *(in + 3));
281
                                        else
282
                                                len = conv_jis_hantozen
283
                                                        (jis_ch,
284
                                                         *(in + 1), '\0');
285
                                        if (len == 0)
286
                                                in += 2;
287
                                        else {
288
                                                K_IN();
289
                                                in += len * 2;
290
                                                *out++ = jis_ch[0];
291
                                                *out++ = jis_ch[1];
292
                                        }
293
                                }
294
                        } else {
295
                                K_OUT();
296
                                in++;
297
                                if (*in != '\0' && !isascii(*in)) {
298
                                        *out++ = SUBST_CHAR;
299
                                        in++;
300
                                }
301
                        }
302
                } else if (iseucaux(*in)) {
303
                        in++;
304
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
305
                                AUX_IN();
306
                                *out++ = *in++ & 0x7f;
307
                                *out++ = *in++ & 0x7f;
308
                        } else {
309
                                K_OUT();
310
                                if (*in != '\0' && !isascii(*in)) {
311
                                        *out++ = SUBST_CHAR;
312
                                        in++;
313
                                        if (*in != '\0' && !isascii(*in)) {
314
                                                *out++ = SUBST_CHAR;
315
                                                in++;
316
                                        }
317
                                }
318
                        }
319
                } else {
320
                        K_OUT();
321
                        *out++ = SUBST_CHAR;
322
                        in++;
323
                }
324
        }
325

    
326
        K_OUT();
327
        *out = '\0';
328
}
329

    
330
void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
331
{
332
        const guchar *in = inbuf;
333
        guchar *out = outbuf;
334

    
335
        while (*in != '\0') {
336
                if (isascii(*in)) {
337
                        *out++ = *in++;
338
                } else if (issjiskanji1(*in)) {
339
                        if (issjiskanji2(*(in + 1))) {
340
                                guchar out1 = *in;
341
                                guchar out2 = *(in + 1);
342
                                guchar row;
343

    
344
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
345
                                if (out2 < 0x9f) {
346
                                        out1 = (out1 - row) * 2 - 1;
347
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
348
                                } else {
349
                                        out1 = (out1 - row) * 2;
350
                                        out2 -= 0x7e;
351
                                }
352

    
353
                                *out++ = out1 | 0x80;
354
                                *out++ = out2 | 0x80;
355
                                in += 2;
356
                        } else {
357
                                *out++ = SUBST_CHAR;
358
                                in++;
359
                                if (*in != '\0' && !isascii(*in)) {
360
                                        *out++ = SUBST_CHAR;
361
                                        in++;
362
                                }
363
                        }
364
                } else if (issjishwkana(*in)) {
365
                        *out++ = 0x8e;
366
                        *out++ = *in++;
367
                } else {
368
                        *out++ = SUBST_CHAR;
369
                        in++;
370
                }
371
        }
372

    
373
        *out = '\0';
374
}
375

    
376
void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
377
{
378
#if HAVE_ICONV
379
        gchar *tmpstr;
380

    
381
        tmpstr = conv_iconv_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8);
382
        if (tmpstr) {
383
                strncpy2(outbuf, tmpstr, outlen);
384
                g_free(tmpstr);
385
        } else
386
                strncpy2(outbuf, inbuf, outlen);
387
#else
388
        strncpy2(outbuf, inbuf, outlen);
389
#endif /* HAVE_ICONV */
390
}
391

    
392
void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
393
{
394
#if HAVE_ICONV
395
        gchar *tmpstr;
396

    
397
        tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
398
        if (tmpstr) {
399
                strncpy2(outbuf, tmpstr, outlen);
400
                g_free(tmpstr);
401
        } else
402
                strncpy2(outbuf, inbuf, outlen);
403
#else
404
        strncpy2(outbuf, inbuf, outlen);
405
#endif /* HAVE_ICONV */
406
}
407

    
408
void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
409
{
410
#if HAVE_ICONV
411
        gchar *tmpstr;
412

    
413
        tmpstr = conv_iconv_strdup(inbuf, CS_EUC_JP, CS_UTF_8);
414
        if (tmpstr) {
415
                strncpy2(outbuf, tmpstr, outlen);
416
                g_free(tmpstr);
417
        } else
418
                strncpy2(outbuf, inbuf, outlen);
419
#else
420
        strncpy2(outbuf, inbuf, outlen);
421
#endif /* HAVE_ICONV */
422
}
423

    
424
void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
425
{
426
        switch (conv_guess_ja_encoding(inbuf)) {
427
        case C_ISO_2022_JP:
428
                conv_jistoeuc(outbuf, outlen, inbuf);
429
                break;
430
        case C_SHIFT_JIS:
431
                conv_sjistoeuc(outbuf, outlen, inbuf);
432
                break;
433
        default:
434
                strncpy2(outbuf, inbuf, outlen);
435
                break;
436
        }
437
}
438

    
439
void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
440
{
441
        switch (conv_guess_ja_encoding(inbuf)) {
442
        case C_ISO_2022_JP:
443
                conv_jistoutf8(outbuf, outlen, inbuf);
444
                break;
445
        case C_SHIFT_JIS:
446
                conv_sjistoutf8(outbuf, outlen, inbuf);
447
                break;
448
        case C_EUC_JP:
449
                conv_euctoutf8(outbuf, outlen, inbuf);
450
                break;
451
        default:
452
                strncpy2(outbuf, inbuf, outlen);
453
                break;
454
        }
455
}
456

    
457
void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
458
{
459
        switch (conv_guess_ja_encoding(inbuf)) {
460
        case C_EUC_JP:
461
                conv_euctojis(outbuf, outlen, inbuf);
462
                break;
463
        default:
464
                strncpy2(outbuf, inbuf, outlen);
465
                break;
466
        }
467
}
468

    
469
static gchar valid_eucjp_tbl[][96] = {
470
        /* 0xa2a0 - 0xa2ff */
471
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
472
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
473
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
474
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
475
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
476
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
477

    
478
        /* 0xa3a0 - 0xa3ff */
479
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
480
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
481
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
482
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
483
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
484
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
485

    
486
        /* 0xa4a0 - 0xa4ff */
487
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
488
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
489
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
490
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
491
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
492
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
493

    
494
        /* 0xa5a0 - 0xa5ff */
495
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
496
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
497
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
498
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
499
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
500
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
501

    
502
        /* 0xa6a0 - 0xa6ff */
503
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
504
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
505
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
506
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
507
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
508
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
509

    
510
        /* 0xa7a0 - 0xa7ff */
511
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
512
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
513
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
514
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
515
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
516
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
517

    
518
        /* 0xa8a0 - 0xa8ff */
519
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
520
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
521
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
522
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
523
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
524
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
525
};
526

    
527
static gboolean isprintableeuckanji(guchar c1, guchar c2)
528
{
529
        if (c1 <= 0xa0 || c1 >= 0xf5)
530
                return FALSE;
531
        if (c2 <= 0xa0 || c2 == 0xff)
532
                return FALSE;
533

    
534
        if (c1 >= 0xa9 && c1 <= 0xaf)
535
                return FALSE;
536

    
537
        if (c1 >= 0xa2 && c1 <= 0xa8)
538
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
539

    
540
        if (c1 == 0xcf) {
541
                if (c2 >= 0xd4 && c2 <= 0xfe)
542
                        return FALSE;
543
        } else if (c1 == 0xf4) {
544
                if (c2 >= 0xa7 && c2 <= 0xfe)
545
                        return FALSE;
546
        }
547

    
548
        return TRUE;
549
}
550

    
551
void conv_unreadable_eucjp(gchar *str)
552
{
553
        register guchar *p = str;
554

    
555
        while (*p != '\0') {
556
                if (isascii(*p)) {
557
                        /* convert CR+LF -> LF */
558
                        if (*p == '\r' && *(p + 1) == '\n')
559
                                memmove(p, p + 1, strlen(p));
560
                        /* printable 7 bit code */
561
                        p++;
562
                } else if (iseuckanji(*p)) {
563
                        if (isprintableeuckanji(*p, *(p + 1))) {
564
                                /* printable euc-jp code */
565
                                p += 2;
566
                        } else {
567
                                /* substitute unprintable code */
568
                                *p++ = SUBST_CHAR;
569
                                if (*p != '\0') {
570
                                        if (isascii(*p))
571
                                                p++;
572
                                        else
573
                                                *p++ = SUBST_CHAR;
574
                                }
575
                        }
576
                } else if (iseuchwkana1(*p)) {
577
                        if (iseuchwkana2(*(p + 1)))
578
                                /* euc-jp hankaku kana */
579
                                p += 2;
580
                        else
581
                                *p++ = SUBST_CHAR;
582
                } else if (iseucaux(*p)) {
583
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
584
                                /* auxiliary kanji */
585
                                p += 3;
586
                        } else
587
                                *p++ = SUBST_CHAR;
588
                } else
589
                        /* substitute unprintable 1 byte code */
590
                        *p++ = SUBST_CHAR;
591
        }
592
}
593

    
594
void conv_unreadable_8bit(gchar *str)
595
{
596
        register guchar *p = str;
597

    
598
        while (*p != '\0') {
599
                /* convert CR+LF -> LF */
600
                if (*p == '\r' && *(p + 1) == '\n')
601
                        memmove(p, p + 1, strlen(p));
602
                else if (!isascii(*p)) *p = SUBST_CHAR;
603
                p++;
604
        }
605
}
606

    
607
void conv_unreadable_latin(gchar *str)
608
{
609
        register guchar *p = str;
610

    
611
        while (*p != '\0') {
612
                /* convert CR+LF -> LF */
613
                if (*p == '\r' && *(p + 1) == '\n')
614
                        memmove(p, p + 1, strlen(p));
615
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
616
                        *p = SUBST_CHAR;
617
                p++;
618
        }
619
}
620

    
621
void conv_unreadable_locale(gchar *str)
622
{
623
        switch (conv_get_locale_charset()) {
624
        case C_US_ASCII:
625
        case C_ISO_8859_1:
626
        case C_ISO_8859_2:
627
        case C_ISO_8859_3:
628
        case C_ISO_8859_4:
629
        case C_ISO_8859_5:
630
        case C_ISO_8859_6:
631
        case C_ISO_8859_7:
632
        case C_ISO_8859_8:
633
        case C_ISO_8859_9:
634
        case C_ISO_8859_10:
635
        case C_ISO_8859_11:
636
        case C_ISO_8859_13:
637
        case C_ISO_8859_14:
638
        case C_ISO_8859_15:
639
                conv_unreadable_latin(str);
640
                break;
641
        case C_EUC_JP:
642
                conv_unreadable_eucjp(str);
643
                break;
644
        default:
645
                break;
646
        }
647
}
648

    
649
#define NCV        '\0'
650

    
651
void conv_mb_alnum(gchar *str)
652
{
653
        static guchar char_tbl[] = {
654
                /* 0xa0 - 0xaf */
655
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
656
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
657
                /* 0xb0 - 0xbf */
658
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
659
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
660
                /* 0xc0 - 0xcf */
661
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
662
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
663
                /* 0xd0 - 0xdf */
664
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
665
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
666
                /* 0xe0 - 0xef */
667
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
668
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
669
        };
670

    
671
        register guchar *p = str;
672
        register gint len;
673

    
674
        len = strlen(str);
675

    
676
        while (len > 1) {
677
                if (*p == 0xa3) {
678
                        register guchar ch = *(p + 1);
679

    
680
                        if (ch >= 0xb0 && ch <= 0xfa) {
681
                                /* [a-zA-Z] */
682
                                *p = ch & 0x7f;
683
                                p++;
684
                                len--;
685
                                memmove(p, p + 1, len);
686
                                len--;
687
                        } else  {
688
                                p += 2;
689
                                len -= 2;
690
                        }
691
                } else if (*p == 0xa1) {
692
                        register guchar ch = *(p + 1);
693

    
694
                        if (ch >= 0xa0 && ch <= 0xef &&
695
                            NCV != char_tbl[ch - 0xa0]) {
696
                                *p = char_tbl[ch - 0xa0];
697
                                p++;
698
                                len--;
699
                                memmove(p, p + 1, len);
700
                                len--;
701
                        } else {
702
                                p += 2;
703
                                len -= 2;
704
                        }
705
                } else if (iseuckanji(*p)) {
706
                        p += 2;
707
                        len -= 2;
708
                } else {
709
                        p++;
710
                        len--;
711
                }
712
        }
713
}
714

    
715
CharSet conv_guess_ja_encoding(const gchar *str)
716
{
717
        const guchar *p = str;
718
        CharSet guessed = C_US_ASCII;
719

    
720
        while (*p != '\0') {
721
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
722
                        if (guessed == C_US_ASCII)
723
                                return C_ISO_2022_JP;
724
                        p += 2;
725
                } else if (isascii(*p)) {
726
                        p++;
727
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
728
                        if (*p >= 0xfd && *p <= 0xfe)
729
                                return C_EUC_JP;
730
                        else if (guessed == C_SHIFT_JIS) {
731
                                if ((issjiskanji1(*p) &&
732
                                     issjiskanji2(*(p + 1))) ||
733
                                    issjishwkana(*p))
734
                                        guessed = C_SHIFT_JIS;
735
                                else
736
                                        guessed = C_EUC_JP;
737
                        } else
738
                                guessed = C_EUC_JP;
739
                        p += 2;
740
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
741
                        if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
742
                                guessed = C_SHIFT_JIS;
743
                        else
744
                                return C_SHIFT_JIS;
745
                        p += 2;
746
                } else if (issjishwkana(*p)) {
747
                        guessed = C_SHIFT_JIS;
748
                        p++;
749
                } else {
750
                        p++;
751
                }
752
        }
753

    
754
        return guessed;
755
}
756

    
757
void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
758
{
759
        conv_jistoutf8(outbuf, outlen, inbuf);
760
}
761

    
762
void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
763
{
764
        conv_sjistoutf8(outbuf, outlen, inbuf);
765
}
766

    
767
void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
768
{
769
        conv_euctoutf8(outbuf, outlen, inbuf);
770
}
771

    
772
void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
773
{
774
        conv_anytoutf8(outbuf, outlen, inbuf);
775
}
776

    
777
void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
778
{
779
        strncpy2(outbuf, inbuf, outlen);
780
        conv_unreadable_8bit(outbuf);
781
}
782

    
783
void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
784
{
785
        strncpy2(outbuf, inbuf, outlen);
786
        //conv_unreadable_latin(outbuf);
787
}
788

    
789
void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
790
{
791
#if HAVE_ICONV
792
        gchar *tmpstr;
793

    
794
        tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
795
                                   conv_get_internal_charset_str());
796
        if (tmpstr) {
797
                strncpy2(outbuf, tmpstr, outlen);
798
                g_free(tmpstr);
799
        } else
800
                strncpy2(outbuf, inbuf, outlen);
801
#else
802
        strncpy2(outbuf, inbuf, outlen);
803
#endif /* HAVE_ICONV */
804
}
805

    
806
void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
807
{
808
        strncpy2(outbuf, inbuf, outlen);
809
}
810

    
811
CodeConverter *conv_code_converter_new(const gchar *src_charset)
812
{
813
        CodeConverter *conv;
814

    
815
        conv = g_new0(CodeConverter, 1);
816
        conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
817
        conv->charset_str = g_strdup(src_charset);
818
        conv->charset = conv_get_charset_from_str(src_charset);
819

    
820
        return conv;
821
}
822

    
823
void conv_code_converter_destroy(CodeConverter *conv)
824
{
825
        g_free(conv->charset_str);
826
        g_free(conv);
827
}
828

    
829
gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
830
                  const gchar *inbuf)
831
{
832
#if HAVE_ICONV
833
        if (conv->code_conv_func != conv_noconv)
834
                conv->code_conv_func(outbuf, outlen, inbuf);
835
        else {
836
                gchar *str;
837

    
838
                str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
839
                if (!str)
840
                        return -1;
841
                else {
842
                        strncpy2(outbuf, str, outlen);
843
                        g_free(str);
844
                }
845
        }
846
#else /* !HAVE_ICONV */
847
        conv->code_conv_func(outbuf, outlen, inbuf);
848
#endif
849

    
850
        return 0;
851
}
852

    
853
gchar *conv_codeset_strdup(const gchar *inbuf,
854
                           const gchar *src_code, const gchar *dest_code)
855
{
856
        gchar *buf;
857
        size_t len;
858
        CodeConvFunc conv_func;
859

    
860
        conv_func = conv_get_code_conv_func(src_code, dest_code);
861
        if (conv_func != conv_noconv) {
862
                len = (strlen(inbuf) + 1) * 3;
863
                buf = g_malloc(len);
864
                if (!buf) return NULL;
865

    
866
                conv_func(buf, len, inbuf);
867
                return g_realloc(buf, strlen(buf) + 1);
868
        }
869

    
870
#if HAVE_ICONV
871
        return conv_iconv_strdup(inbuf, src_code, dest_code);
872
#else
873
        return g_strdup(inbuf);
874
#endif /* HAVE_ICONV */
875
}
876

    
877
CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
878
                                     const gchar *dest_charset_str)
879
{
880
        CodeConvFunc code_conv = conv_noconv;
881
        CharSet src_charset;
882
        CharSet dest_charset;
883

    
884
        if (!src_charset_str)
885
                src_charset = conv_get_locale_charset();
886
        else
887
                src_charset = conv_get_charset_from_str(src_charset_str);
888

    
889
        /* auto detection mode */
890
        if (!src_charset_str && !dest_charset_str) {
891
                //if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
892
                //        return conv_anytodisp;
893
                //else
894
                        return conv_noconv;
895
        }
896

    
897
        dest_charset = conv_get_charset_from_str(dest_charset_str);
898

    
899
        if (dest_charset == C_US_ASCII)
900
                return conv_ustodisp;
901

    
902
        switch (src_charset) {
903
        case C_ISO_2022_JP:
904
        case C_ISO_2022_JP_2:
905
        case C_ISO_2022_JP_3:
906
                if (dest_charset == C_AUTO)
907
                        code_conv = conv_jistodisp;
908
                else if (dest_charset == C_EUC_JP)
909
                        code_conv = conv_jistoeuc;
910
                else if (dest_charset == C_UTF_8)
911
                        code_conv = conv_jistoutf8;
912
                break;
913
        case C_US_ASCII:
914
                if (dest_charset == C_AUTO)
915
                        code_conv = conv_ustodisp;
916
                break;
917
        case C_ISO_8859_1:
918
        case C_ISO_8859_2:
919
        case C_ISO_8859_3:
920
        case C_ISO_8859_4:
921
        case C_ISO_8859_5:
922
        case C_ISO_8859_6:
923
        case C_ISO_8859_7:
924
        case C_ISO_8859_8:
925
        case C_ISO_8859_9:
926
        case C_ISO_8859_10:
927
        case C_ISO_8859_11:
928
        case C_ISO_8859_13:
929
        case C_ISO_8859_14:
930
        case C_ISO_8859_15:
931
                break;
932
        case C_SHIFT_JIS:
933
                if (dest_charset == C_AUTO)
934
                        code_conv = conv_sjistodisp;
935
                else if (dest_charset == C_EUC_JP)
936
                        code_conv = conv_sjistoeuc;
937
                else if (dest_charset == C_UTF_8)
938
                        code_conv = conv_sjistoutf8;
939
                break;
940
        case C_EUC_JP:
941
                if (dest_charset == C_AUTO)
942
                        code_conv = conv_euctodisp;
943
                else if (dest_charset == C_ISO_2022_JP   ||
944
                         dest_charset == C_ISO_2022_JP_2 ||
945
                         dest_charset == C_ISO_2022_JP_3)
946
                        code_conv = conv_euctojis;
947
                else if (dest_charset == C_UTF_8)
948
                        code_conv = conv_euctoutf8;
949
                break;
950
        default:
951
                break;
952
        }
953

    
954
        return code_conv;
955
}
956

    
957
#if HAVE_ICONV
958
gchar *conv_iconv_strdup(const gchar *inbuf,
959
                         const gchar *src_code, const gchar *dest_code)
960
{
961
        iconv_t cd;
962
        const gchar *inbuf_p;
963
        gchar *outbuf;
964
        gchar *outbuf_p;
965
        size_t in_size;
966
        size_t in_left;
967
        size_t out_size;
968
        size_t out_left;
969
        size_t n_conv;
970
        size_t len;
971

    
972
        if (!src_code)
973
                src_code = conv_get_outgoing_charset_str();
974
        if (!dest_code)
975
                dest_code = conv_get_locale_charset_str();
976

    
977
        /* don't convert if current codeset is US-ASCII */
978
        if (!strcasecmp(dest_code, CS_US_ASCII))
979
                return g_strdup(inbuf);
980

    
981
        /* don't convert if src and dest codeset are identical */
982
        if (!strcasecmp(src_code, dest_code))
983
                return g_strdup(inbuf);
984

    
985
        cd = iconv_open(dest_code, src_code);
986
        if (cd == (iconv_t)-1)
987
                return NULL;
988

    
989
        inbuf_p = inbuf;
990
        in_size = strlen(inbuf);
991
        in_left = in_size;
992
        out_size = (in_size + 1) * 2;
993
        outbuf = g_malloc(out_size);
994
        outbuf_p = outbuf;
995
        out_left = out_size;
996

    
997
#define EXPAND_BUF()                                \
998
{                                                \
999
        len = outbuf_p - outbuf;                \
1000
        out_size *= 2;                                \
1001
        outbuf = g_realloc(outbuf, out_size);        \
1002
        outbuf_p = outbuf + len;                \
1003
        out_left = out_size - len;                \
1004
}
1005

    
1006
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1007
                               &outbuf_p, &out_left)) == (size_t)-1) {
1008
                if (EILSEQ == errno) {
1009
                        inbuf_p++;
1010
                        in_left--;
1011
                        if (out_left == 0) {
1012
                                EXPAND_BUF();
1013
                        }
1014
                        *outbuf_p++ = SUBST_CHAR;
1015
                        out_left--;
1016
                } else if (EINVAL == errno) {
1017
                        break;
1018
                } else if (E2BIG == errno) {
1019
                        EXPAND_BUF();
1020
                } else {
1021
                        g_warning("conv_iconv_strdup(): %s\n",
1022
                                  g_strerror(errno));
1023
                        break;
1024
                }
1025
        }
1026

    
1027
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1028
               (size_t)-1) {
1029
                if (E2BIG == errno) {
1030
                        EXPAND_BUF();
1031
                } else {
1032
                        g_warning("conv_iconv_strdup(): %s\n",
1033
                                  g_strerror(errno));
1034
                        break;
1035
                }
1036
        }
1037

    
1038
#undef EXPAND_BUF
1039

    
1040
        len = outbuf_p - outbuf;
1041
        outbuf = g_realloc(outbuf, len + 1);
1042
        outbuf[len] = '\0';
1043

    
1044
        iconv_close(cd);
1045

    
1046
        return outbuf;
1047
}
1048
#endif /* HAVE_ICONV */
1049

    
1050
static const struct {
1051
        CharSet charset;
1052
        gchar *const name;
1053
} charsets[] = {
1054
        {C_US_ASCII,                CS_US_ASCII},
1055
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1056
        {C_UTF_8,                CS_UTF_8},
1057
        {C_UTF_7,                CS_UTF_7},
1058
        {C_ISO_8859_1,                CS_ISO_8859_1},
1059
        {C_ISO_8859_2,                CS_ISO_8859_2},
1060
        {C_ISO_8859_3,                CS_ISO_8859_3},
1061
        {C_ISO_8859_4,                CS_ISO_8859_4},
1062
        {C_ISO_8859_5,                CS_ISO_8859_5},
1063
        {C_ISO_8859_6,                CS_ISO_8859_6},
1064
        {C_ISO_8859_7,                CS_ISO_8859_7},
1065
        {C_ISO_8859_8,                CS_ISO_8859_8},
1066
        {C_ISO_8859_9,                CS_ISO_8859_9},
1067
        {C_ISO_8859_10,                CS_ISO_8859_10},
1068
        {C_ISO_8859_11,                CS_ISO_8859_11},
1069
        {C_ISO_8859_13,                CS_ISO_8859_13},
1070
        {C_ISO_8859_14,                CS_ISO_8859_14},
1071
        {C_ISO_8859_15,                CS_ISO_8859_15},
1072
        {C_BALTIC,                CS_BALTIC},
1073
        {C_CP1250,                CS_CP1250},
1074
        {C_CP1251,                CS_CP1251},
1075
        {C_CP1252,                CS_CP1252},
1076
        {C_CP1253,                CS_CP1253},
1077
        {C_CP1254,                CS_CP1254},
1078
        {C_CP1255,                CS_CP1255},
1079
        {C_CP1256,                CS_CP1256},
1080
        {C_CP1257,                CS_CP1257},
1081
        {C_CP1258,                CS_CP1258},
1082
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1083
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1084
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1085
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1086
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1087
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1088
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1089
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1090
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1091
        {C_KOI8_R,                CS_KOI8_R},
1092
        {C_KOI8_T,                CS_KOI8_T},
1093
        {C_KOI8_U,                CS_KOI8_U},
1094
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1095
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1096
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1097
        {C_EUC_JP,                CS_EUC_JP},
1098
        {C_EUC_JP,                CS_EUCJP},
1099
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1100
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1101
        {C_SHIFT_JIS,                CS_SJIS},
1102
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1103
        {C_EUC_KR,                CS_EUC_KR},
1104
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1105
        {C_EUC_CN,                CS_EUC_CN},
1106
        {C_GB2312,                CS_GB2312},
1107
        {C_GBK,                        CS_GBK},
1108
        {C_EUC_TW,                CS_EUC_TW},
1109
        {C_BIG5,                CS_BIG5},
1110
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1111
        {C_TIS_620,                CS_TIS_620},
1112
        {C_WINDOWS_874,                CS_WINDOWS_874},
1113
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1114
        {C_TCVN5712_1,                CS_TCVN5712_1},
1115
};
1116

    
1117
static const struct {
1118
        gchar *const locale;
1119
        CharSet charset;
1120
        CharSet out_charset;
1121
} locale_table[] = {
1122
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1123
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1124
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1125
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1126
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1127
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1128
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1129
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1130
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1131
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1132
        {"zh_CN.GBK"        , C_GBK                , C_GB2312},
1133
        {"zh_CN"        , C_GB2312        , C_GB2312},
1134
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1135
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1136
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1137
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1138
        {"zh_TW"        , C_BIG5        , C_BIG5},
1139

    
1140
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1141
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1142
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1143
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1144
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1145
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1146
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1147
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1148

    
1149
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1150
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1151

    
1152
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1153

    
1154
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1155
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1156
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1157
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1158
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1159
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1160
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1161
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1162
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1163
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1164
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1165
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1166
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1167
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1168
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1169
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1170
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1171
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1172
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1173
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1174
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1175
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1176
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1177
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1178
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1179
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1180
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1181
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1182
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1183
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1184
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1185
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1186
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1187
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1188
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1189
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1190
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1191
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1192
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1193
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1194
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1195
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1196
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1197
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1198
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1199
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1200
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1201
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1202
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1203
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1204
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1205
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1206
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1207
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1208
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1209
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1210
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1211
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1212
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1213
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1214
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1215
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1216
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1217
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1218
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1219
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1220
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1221
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1222
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1223
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1224
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1225
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1226
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1227
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1228

    
1229
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1230
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1231
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1232
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1233
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1234
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1235
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1236
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1237

    
1238
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1239
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1240

    
1241
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1242

    
1243
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1244
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1245
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1246
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1247

    
1248
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1249

    
1250
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1251
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1252
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1253
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1254
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1255
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1256
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1257
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1258
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1259
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1260
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1261
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1262
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1263
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1264
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1265
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1266
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1267

    
1268
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1269
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1270
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1271
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1272

    
1273
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1274
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1275

    
1276
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1277

    
1278
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1279
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1280
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1281
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1282
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1283
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1284

    
1285
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1286
        /* {"th_TH"        , C_WINDOWS_874}, */
1287
        /* {"th_TH"        , C_ISO_8859_11}, */
1288

    
1289
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1290
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1291

    
1292
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1293
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1294
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1295
};
1296

    
1297
static GHashTable *conv_get_charset_to_str_table(void)
1298
{
1299
        static GHashTable *table;
1300
        gint i;
1301

    
1302
        if (table)
1303
                return table;
1304

    
1305
        table = g_hash_table_new(NULL, g_direct_equal);
1306

    
1307
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1308
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1309
                    == NULL) {
1310
                        g_hash_table_insert
1311
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1312
                                 charsets[i].name);
1313
                }
1314
        }
1315

    
1316
        return table;
1317
}
1318

    
1319
static GHashTable *conv_get_charset_from_str_table(void)
1320
{
1321
        static GHashTable *table;
1322
        gint i;
1323

    
1324
        if (table)
1325
                return table;
1326

    
1327
        table = g_hash_table_new(str_case_hash, str_case_equal);
1328

    
1329
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1330
                g_hash_table_insert(table, charsets[i].name,
1331
                                    GUINT_TO_POINTER(charsets[i].charset));
1332
        }
1333

    
1334
        return table;
1335
}
1336

    
1337
const gchar *conv_get_charset_str(CharSet charset)
1338
{
1339
        GHashTable *table;
1340

    
1341
        table = conv_get_charset_to_str_table();
1342
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1343
}
1344

    
1345
CharSet conv_get_charset_from_str(const gchar *charset)
1346
{
1347
        GHashTable *table;
1348

    
1349
        if (!charset) return C_AUTO;
1350

    
1351
        table = conv_get_charset_from_str_table();
1352
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1353
}
1354

    
1355
CharSet conv_get_locale_charset(void)
1356
{
1357
        static CharSet cur_charset = -1;
1358
        const gchar *cur_locale;
1359
        const gchar *p;
1360
        gint i;
1361

    
1362
        if (cur_charset != -1)
1363
                return cur_charset;
1364

    
1365
        cur_locale = conv_get_current_locale();
1366
        if (!cur_locale) {
1367
                cur_charset = C_US_ASCII;
1368
                return cur_charset;
1369
        }
1370

    
1371
        if (strcasestr(cur_locale, "UTF-8")) {
1372
                cur_charset = C_UTF_8;
1373
                return cur_charset;
1374
        }
1375

    
1376
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1377
                cur_charset = C_ISO_8859_15;
1378
                return cur_charset;
1379
        }
1380

    
1381
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1382
                const gchar *p;
1383

    
1384
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1385
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1386
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1387
                                 strlen(locale_table[i].locale))) {
1388
                        cur_charset = locale_table[i].charset;
1389
                        return cur_charset;
1390
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1391
                         !strchr(p + 1, '.')) {
1392
                        if (strlen(cur_locale) == 2 &&
1393
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1394
                                cur_charset = locale_table[i].charset;
1395
                                return cur_charset;
1396
                        }
1397
                }
1398
        }
1399

    
1400
        cur_charset = C_AUTO;
1401
        return cur_charset;
1402
}
1403

    
1404
const gchar *conv_get_locale_charset_str(void)
1405
{
1406
        static const gchar *codeset = NULL;
1407

    
1408
        if (!codeset)
1409
                codeset = conv_get_charset_str(conv_get_locale_charset());
1410

    
1411
        return codeset ? codeset : CS_UTF_8;
1412
}
1413

    
1414
CharSet conv_get_internal_charset(void)
1415
{
1416
        return C_UTF_8;
1417
}
1418

    
1419
const gchar *conv_get_internal_charset_str(void)
1420
{
1421
        return CS_UTF_8;
1422
}
1423

    
1424
CharSet conv_get_outgoing_charset(void)
1425
{
1426
        static CharSet out_charset = -1;
1427
        const gchar *cur_locale;
1428
        const gchar *p;
1429
        gint i;
1430

    
1431
        if (out_charset != -1)
1432
                return out_charset;
1433

    
1434
        cur_locale = conv_get_current_locale();
1435
        if (!cur_locale) {
1436
                out_charset = C_AUTO;
1437
                return out_charset;
1438
        }
1439

    
1440
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1441
                out_charset = C_ISO_8859_15;
1442
                return out_charset;
1443
        }
1444

    
1445
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1446
                const gchar *p;
1447

    
1448
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1449
                                 strlen(locale_table[i].locale))) {
1450
                        out_charset = locale_table[i].out_charset;
1451
                        break;
1452
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1453
                         !strchr(p + 1, '.')) {
1454
                        if (strlen(cur_locale) == 2 &&
1455
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1456
                                out_charset = locale_table[i].out_charset;
1457
                                break;
1458
                        }
1459
                }
1460
        }
1461

    
1462
#if !HAVE_ICONV
1463
        /* encoding conversion without iconv() is only supported
1464
           on Japanese locale for now */
1465
        if (out_charset == C_ISO_2022_JP)
1466
                return out_charset;
1467
        else
1468
                return conv_get_locale_charset();
1469
#endif
1470

    
1471
        return out_charset;
1472
}
1473

    
1474
const gchar *conv_get_outgoing_charset_str(void)
1475
{
1476
        CharSet out_charset;
1477
        const gchar *str;
1478

    
1479
        if (prefs_common.outgoing_charset) {
1480
                if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1481
                        g_free(prefs_common.outgoing_charset);
1482
                        prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1483
                } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1484
                        return prefs_common.outgoing_charset;
1485
        }
1486

    
1487
        out_charset = conv_get_outgoing_charset();
1488
        str = conv_get_charset_str(out_charset);
1489

    
1490
        return str ? str : CS_UTF_8;
1491
}
1492

    
1493
gboolean conv_is_multibyte_encoding(CharSet encoding)
1494
{
1495
        switch (encoding) {
1496
        case C_EUC_JP:
1497
        case C_EUC_KR:
1498
        case C_EUC_TW:
1499
        case C_EUC_CN:
1500
        case C_ISO_2022_JP:
1501
        case C_ISO_2022_JP_2:
1502
        case C_ISO_2022_JP_3:
1503
        case C_ISO_2022_KR:
1504
        case C_ISO_2022_CN:
1505
        case C_SHIFT_JIS:
1506
        case C_GB2312:
1507
        case C_BIG5:
1508
        case C_UTF_8:
1509
        case C_UTF_7:
1510
                return TRUE;
1511
        default:
1512
                return FALSE;
1513
        }
1514
}
1515

    
1516
const gchar *conv_get_current_locale(void)
1517
{
1518
        const gchar *cur_locale;
1519

    
1520
        cur_locale = g_getenv("LC_ALL");
1521
        if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1522
        if (!cur_locale) cur_locale = g_getenv("LANG");
1523
        if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1524

    
1525
        debug_print("current locale: %s\n",
1526
                    cur_locale ? cur_locale : "(none)");
1527

    
1528
        return cur_locale;
1529
}
1530

    
1531
void conv_unmime_header_overwrite(gchar *str)
1532
{
1533
        gchar *buf;
1534
        gint buflen;
1535
        CharSet cur_charset;
1536

    
1537
        cur_charset = conv_get_locale_charset();
1538

    
1539
        if (cur_charset == C_EUC_JP) {
1540
                buflen = strlen(str) * 2 + 1;
1541
                Xalloca(buf, buflen, return);
1542
                conv_anytodisp(buf, buflen, str);
1543
                unmime_header(str, buf);
1544
        } else {
1545
                buflen = strlen(str) + 1;
1546
                Xalloca(buf, buflen, return);
1547
                unmime_header(buf, str);
1548
                strncpy2(str, buf, buflen);
1549
        }
1550
}
1551

    
1552
void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1553
                        const gchar *charset)
1554
{
1555
        CharSet cur_charset;
1556

    
1557
        cur_charset = conv_get_locale_charset();
1558

    
1559
        if (cur_charset == C_EUC_JP) {
1560
                gchar *buf;
1561
                gint buflen;
1562

    
1563
                buflen = strlen(str) * 2 + 1;
1564
                Xalloca(buf, buflen, return);
1565
                conv_anytodisp(buf, buflen, str);
1566
                unmime_header(outbuf, buf);
1567
        } else
1568
                unmime_header(outbuf, str);
1569
}
1570

    
1571
#define MAX_LINELEN                76
1572
#define MAX_HARD_LINELEN        996
1573
#define MIMESEP_BEGIN                "=?"
1574
#define MIMESEP_END                "?="
1575

    
1576
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1577

    
1578
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
1579
{                                                                        \
1580
        if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {                \
1581
                *destp = '\0';                                                \
1582
                return;                                                        \
1583
        }                                                                \
1584
                                                                        \
1585
        if ((cond) && *srcp) {                                                \
1586
                if (destp > (guchar *)dest && left < MAX_LINELEN - 1) {        \
1587
                        if (isspace(*(destp - 1)))                        \
1588
                                destp--;                                \
1589
                        else if (is_plain_text && isspace(*srcp))        \
1590
                                srcp++;                                        \
1591
                        if (*srcp) {                                        \
1592
                                *destp++ = '\n';                        \
1593
                                *destp++ = ' ';                                \
1594
                                left = MAX_LINELEN - 1;                        \
1595
                        }                                                \
1596
                }                                                        \
1597
        }                                                                \
1598
}
1599

    
1600
void conv_encode_header(gchar *dest, gint len, const gchar *src,
1601
                        gint header_len, gboolean addr_field)
1602
{
1603
        const gchar *cur_encoding;
1604
        const gchar *out_encoding;
1605
        gint mimestr_len;
1606
        gchar *mimesep_enc;
1607
        gint left;
1608
        const guchar *srcp = src;
1609
        guchar *destp = dest;
1610
        gboolean use_base64;
1611

    
1612
        if (MB_CUR_MAX > 1) {
1613
                use_base64 = TRUE;
1614
                mimesep_enc = "?B?";
1615
        } else {
1616
                use_base64 = FALSE;
1617
                mimesep_enc = "?Q?";
1618
        }
1619

    
1620
        cur_encoding = conv_get_locale_charset_str();
1621
        if (!strcmp(cur_encoding, CS_US_ASCII))
1622
                cur_encoding = CS_ISO_8859_1;
1623
        out_encoding = conv_get_outgoing_charset_str();
1624
        if (!strcmp(out_encoding, CS_US_ASCII))
1625
                out_encoding = CS_ISO_8859_1;
1626

    
1627
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1628
                strlen(mimesep_enc) + strlen(MIMESEP_END);
1629

    
1630
        left = MAX_LINELEN - header_len;
1631

    
1632
        while (*srcp) {
1633
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
1634

    
1635
                while (isspace(*srcp)) {
1636
                        *destp++ = *srcp++;
1637
                        left--;
1638
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
1639
                }
1640

    
1641
                /* output as it is if the next word is ASCII string */
1642
                if (!is_next_nonascii(srcp)) {
1643
                        gint word_len;
1644

    
1645
                        word_len = get_next_word_len(srcp);
1646
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
1647
                        while (word_len > 0) {
1648
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1649
                                *destp++ = *srcp++;
1650
                                left--;
1651
                                word_len--;
1652
                        }
1653

    
1654
                        continue;
1655
                }
1656

    
1657
                /* don't include parentheses in encoded strings */
1658
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
1659
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
1660
                        *destp++ = *srcp++;
1661
                        left--;
1662
                }
1663

    
1664
                while (1) {
1665
                        gint mb_len = 0;
1666
                        gint cur_len = 0;
1667
                        gchar *part_str;
1668
                        gchar *out_str;
1669
                        gchar *enc_str;
1670
                        const guchar *p = srcp;
1671
                        gint out_str_len;
1672
                        gint out_enc_str_len;
1673
                        gint mime_block_len;
1674
                        gboolean cont = FALSE;
1675

    
1676
                        while (*p != '\0') {
1677
                                if (isspace(*p) && !is_next_nonascii(p + 1))
1678
                                        break;
1679
                                /* don't include parentheses in encoded
1680
                                   strings */
1681
                                if (addr_field && (*p == '(' || *p == ')'))
1682
                                        break;
1683

    
1684
                                if (MB_CUR_MAX > 1) {
1685
                                        mb_len = mblen(p, MB_CUR_MAX);
1686
                                        if (mb_len < 0) {
1687
                                                g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1688
                                                mb_len = 1;
1689
                                        }
1690
                                } else
1691
                                        mb_len = 1;
1692

    
1693
                                Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1694
                                out_str = conv_codeset_strdup
1695
                                        (part_str, cur_encoding, out_encoding);
1696
                                if (!out_str) {
1697
                                        g_warning("conv_encode_header(): code conversion failed\n");
1698
                                        conv_unreadable_8bit(part_str);
1699
                                        out_str = g_strdup(part_str);
1700
                                }
1701
                                out_str_len = strlen(out_str);
1702

    
1703
                                if (use_base64)
1704
                                        out_enc_str_len = B64LEN(out_str_len);
1705
                                else
1706
                                        out_enc_str_len =
1707
                                                qp_get_q_encoding_len(out_str);
1708

    
1709
                                g_free(out_str);
1710

    
1711
                                if (mimestr_len + out_enc_str_len <= left) {
1712
                                        cur_len += mb_len;
1713
                                        p += mb_len;
1714
                                } else if (cur_len == 0) {
1715
                                        LBREAK_IF_REQUIRED(1, FALSE);
1716
                                        continue;
1717
                                } else {
1718
                                        cont = TRUE;
1719
                                        break;
1720
                                }
1721
                        }
1722

    
1723
                        if (cur_len > 0) {
1724
                                Xstrndup_a(part_str, srcp, cur_len, );
1725
                                out_str = conv_codeset_strdup
1726
                                        (part_str, cur_encoding, out_encoding);
1727
                                if (!out_str) {
1728
                                        g_warning("conv_encode_header(): code conversion failed\n");
1729
                                        conv_unreadable_8bit(part_str);
1730
                                        out_str = g_strdup(part_str);
1731
                                }
1732
                                out_str_len = strlen(out_str);
1733

    
1734
                                if (use_base64)
1735
                                        out_enc_str_len = B64LEN(out_str_len);
1736
                                else
1737
                                        out_enc_str_len =
1738
                                                qp_get_q_encoding_len(out_str);
1739

    
1740
                                Xalloca(enc_str, out_enc_str_len + 1, );
1741
                                if (use_base64)
1742
                                        base64_encode(enc_str, out_str, out_str_len);
1743
                                else
1744
                                        qp_q_encode(enc_str, out_str);
1745

    
1746
                                g_free(out_str);
1747

    
1748
                                /* output MIME-encoded string block */
1749
                                mime_block_len = mimestr_len + strlen(enc_str);
1750
                                g_snprintf(destp, mime_block_len + 1,
1751
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1752
                                           out_encoding, mimesep_enc, enc_str);
1753
                                destp += mime_block_len;
1754
                                srcp += cur_len;
1755

    
1756
                                left -= mime_block_len;
1757
                        }
1758

    
1759
                        LBREAK_IF_REQUIRED(cont, FALSE);
1760

    
1761
                        if (cur_len == 0)
1762
                                break;
1763
                }
1764
        }
1765

    
1766
        *destp = '\0';
1767
}
1768

    
1769
#undef LBREAK_IF_REQUIRED