Statistics
| Revision:

root / src / codeconv.c @ 7

History | View | Annotate | Download (41.8 kB)

1
/*
2
 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3
 * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
 */
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config.h"
22
#endif
23
24
#include <glib.h>
25
#include <string.h>
26
#include <ctype.h>
27
#include <stdlib.h>
28
#include <errno.h>
29
30
#if HAVE_LOCALE_H
31
#  include <locale.h>
32
#endif
33
34
#include <iconv.h>
35
36
#include "intl.h"
37
#include "codeconv.h"
38
#include "unmime.h"
39
#include "base64.h"
40
#include "quoted-printable.h"
41
#include "utils.h"
42
#include "prefs_common.h"
43
44
typedef enum
45
{
46
        JIS_ASCII,
47
        JIS_KANJI,
48
        JIS_HWKANA,
49
        JIS_AUXKANJI
50
} JISState;
51
52
#define SUBST_CHAR        '_'
53
#define ESC                '\033'
54
55
#define iseuckanji(c) \
56
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
57
#define iseuchwkana1(c) \
58
        (((c) & 0xff) == 0x8e)
59
#define iseuchwkana2(c) \
60
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
61
#define iseucaux(c) \
62
        (((c) & 0xff) == 0x8f)
63
#define issjiskanji1(c) \
64
        ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
65
         (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
66
#define issjiskanji2(c) \
67
        ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
68
         (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
69
#define issjishwkana(c) \
70
        (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
71
72
#define K_IN()                                \
73
        if (state != JIS_KANJI) {        \
74
                *out++ = ESC;                \
75
                *out++ = '$';                \
76
                *out++ = 'B';                \
77
                state = JIS_KANJI;        \
78
        }
79
80
#define K_OUT()                                \
81
        if (state != JIS_ASCII) {        \
82
                *out++ = ESC;                \
83
                *out++ = '(';                \
84
                *out++ = 'B';                \
85
                state = JIS_ASCII;        \
86
        }
87
88
#define HW_IN()                                \
89
        if (state != JIS_HWKANA) {        \
90
                *out++ = ESC;                \
91
                *out++ = '(';                \
92
                *out++ = 'I';                \
93
                state = JIS_HWKANA;        \
94
        }
95
96
#define AUX_IN()                        \
97
        if (state != JIS_AUXKANJI) {        \
98
                *out++ = ESC;                \
99
                *out++ = '$';                \
100
                *out++ = '(';                \
101
                *out++ = 'D';                \
102
                state = JIS_AUXKANJI;        \
103
        }
104
105
void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
106
{
107
        const guchar *in = inbuf;
108
        guchar *out = outbuf;
109
        JISState state = JIS_ASCII;
110
111
        while (*in != '\0') {
112
                if (*in == ESC) {
113
                        in++;
114
                        if (*in == '$') {
115
                                if (*(in + 1) == '@' || *(in + 1) == 'B') {
116
                                        state = JIS_KANJI;
117
                                        in += 2;
118
                                } else if (*(in + 1) == '(' &&
119
                                           *(in + 2) == 'D') {
120
                                        state = JIS_AUXKANJI;
121
                                        in += 3;
122
                                } else {
123
                                        /* unknown escape sequence */
124
                                        state = JIS_ASCII;
125
                                }
126
                        } else if (*in == '(') {
127
                                if (*(in + 1) == 'B' || *(in + 1) == 'J') {
128
                                        state = JIS_ASCII;
129
                                        in += 2;
130
                                } else if (*(in + 1) == 'I') {
131
                                        state = JIS_HWKANA;
132
                                        in += 2;
133
                                } else {
134
                                        /* unknown escape sequence */
135
                                        state = JIS_ASCII;
136
                                }
137
                        } else {
138
                                /* unknown escape sequence */
139
                                state = JIS_ASCII;
140
                        }
141
                } else if (*in == 0x0e) {
142
                        state = JIS_HWKANA;
143
                        in++;
144
                } else if (*in == 0x0f) {
145
                        state = JIS_ASCII;
146
                        in++;
147
                } else {
148
                        switch (state) {
149
                        case JIS_ASCII:
150
                                *out++ = *in++;
151
                                break;
152
                        case JIS_KANJI:
153
                                *out++ = *in++ | 0x80;
154
                                if (*in == '\0') break;
155
                                *out++ = *in++ | 0x80;
156
                                break;
157
                        case JIS_HWKANA:
158
                                *out++ = 0x8e;
159
                                *out++ = *in++ | 0x80;
160
                                break;
161
                        case JIS_AUXKANJI:
162
                                *out++ = 0x8f;
163
                                *out++ = *in++ | 0x80;
164
                                if (*in == '\0') break;
165
                                *out++ = *in++ | 0x80;
166
                                break;
167
                        }
168
                }
169
        }
170
171
        *out = '\0';
172
}
173
174
#define JIS_HWDAKUTEN                0x5e
175
#define JIS_HWHANDAKUTEN        0x5f
176
177
static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
178
{
179
        static guint16 h2z_tbl[] = {
180
                /* 0x20 - 0x2f */
181
                0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
182
                0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
183
                /* 0x30 - 0x3f */
184
                0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
185
                0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
186
                /* 0x40 - 0x4f */
187
                0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
188
                0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
189
                /* 0x50 - 0x5f */
190
                0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
191
                0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
192
        };
193
194
        static guint16 dakuten_tbl[] = {
195
                /* 0x30 - 0x3f */
196
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
197
                0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
198
                /* 0x40 - 0x4f */
199
                0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
200
                0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
201
        };
202
203
        static guint16 handakuten_tbl[] = {
204
                /* 0x4a - 0x4e */
205
                0x2551, 0x2554, 0x2557, 0x255a, 0x255d
206
        };
207
208
        guint16 out_code;
209
210
        jis_code &= 0x7f;
211
        sound_sym &= 0x7f;
212
213
        if (jis_code < 0x21 || jis_code > 0x5f)
214
                return 0;
215
216
        if (sound_sym == JIS_HWDAKUTEN &&
217
            jis_code >= 0x36 && jis_code <= 0x4e) {
218
                out_code = dakuten_tbl[jis_code - 0x30];
219
                if (out_code != 0) {
220
                        *outbuf = out_code >> 8;
221
                        *(outbuf + 1) = out_code & 0xff;
222
                        return 2;
223
                }
224
        }
225
226
        if (sound_sym == JIS_HWHANDAKUTEN &&
227
            jis_code >= 0x4a && jis_code <= 0x4e) {
228
                out_code = handakuten_tbl[jis_code - 0x4a];
229
                *outbuf = out_code >> 8;
230
                *(outbuf + 1) = out_code & 0xff;
231
                return 2;
232
        }
233
234
        out_code = h2z_tbl[jis_code - 0x20];
235
        *outbuf = out_code >> 8;
236
        *(outbuf + 1) = out_code & 0xff;
237
        return 1;
238
}
239
240
void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
241
{
242
        const guchar *in = inbuf;
243
        guchar *out = outbuf;
244
        JISState state = JIS_ASCII;
245
246
        while (*in != '\0') {
247
                if (isascii(*in)) {
248
                        K_OUT();
249
                        *out++ = *in++;
250
                } else if (iseuckanji(*in)) {
251
                        if (iseuckanji(*(in + 1))) {
252
                                K_IN();
253
                                *out++ = *in++ & 0x7f;
254
                                *out++ = *in++ & 0x7f;
255
                        } else {
256
                                K_OUT();
257
                                *out++ = SUBST_CHAR;
258
                                in++;
259
                                if (*in != '\0' && !isascii(*in)) {
260
                                        *out++ = SUBST_CHAR;
261
                                        in++;
262
                                }
263
                        }
264
                } else if (iseuchwkana1(*in)) {
265
                        if (iseuchwkana2(*(in + 1))) {
266
                                if (prefs_common.allow_jisx0201_kana) {
267
                                        HW_IN();
268
                                        in++;
269
                                        *out++ = *in++ & 0x7f;
270
                                } else {
271
                                        guchar jis_ch[2];
272
                                        gint len;
273
274
                                        if (iseuchwkana1(*(in + 2)) &&
275
                                            iseuchwkana2(*(in + 3)))
276
                                                len = conv_jis_hantozen
277
                                                        (jis_ch,
278
                                                         *(in + 1), *(in + 3));
279
                                        else
280
                                                len = conv_jis_hantozen
281
                                                        (jis_ch,
282
                                                         *(in + 1), '\0');
283
                                        if (len == 0)
284
                                                in += 2;
285
                                        else {
286
                                                K_IN();
287
                                                in += len * 2;
288
                                                *out++ = jis_ch[0];
289
                                                *out++ = jis_ch[1];
290
                                        }
291
                                }
292
                        } else {
293
                                K_OUT();
294
                                in++;
295
                                if (*in != '\0' && !isascii(*in)) {
296
                                        *out++ = SUBST_CHAR;
297
                                        in++;
298
                                }
299
                        }
300
                } else if (iseucaux(*in)) {
301
                        in++;
302
                        if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
303
                                AUX_IN();
304
                                *out++ = *in++ & 0x7f;
305
                                *out++ = *in++ & 0x7f;
306
                        } else {
307
                                K_OUT();
308
                                if (*in != '\0' && !isascii(*in)) {
309
                                        *out++ = SUBST_CHAR;
310
                                        in++;
311
                                        if (*in != '\0' && !isascii(*in)) {
312
                                                *out++ = SUBST_CHAR;
313
                                                in++;
314
                                        }
315
                                }
316
                        }
317
                } else {
318
                        K_OUT();
319
                        *out++ = SUBST_CHAR;
320
                        in++;
321
                }
322
        }
323
324
        K_OUT();
325
        *out = '\0';
326
}
327
328
void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
329
{
330
        const guchar *in = inbuf;
331
        guchar *out = outbuf;
332
333
        while (*in != '\0') {
334
                if (isascii(*in)) {
335
                        *out++ = *in++;
336
                } else if (issjiskanji1(*in)) {
337
                        if (issjiskanji2(*(in + 1))) {
338
                                guchar out1 = *in;
339
                                guchar out2 = *(in + 1);
340
                                guchar row;
341
342
                                row = out1 < 0xa0 ? 0x70 : 0xb0;
343
                                if (out2 < 0x9f) {
344
                                        out1 = (out1 - row) * 2 - 1;
345
                                        out2 -= out2 > 0x7f ? 0x20 : 0x1f;
346
                                } else {
347
                                        out1 = (out1 - row) * 2;
348
                                        out2 -= 0x7e;
349
                                }
350
351
                                *out++ = out1 | 0x80;
352
                                *out++ = out2 | 0x80;
353
                                in += 2;
354
                        } else {
355
                                *out++ = SUBST_CHAR;
356
                                in++;
357
                                if (*in != '\0' && !isascii(*in)) {
358
                                        *out++ = SUBST_CHAR;
359
                                        in++;
360
                                }
361
                        }
362
                } else if (issjishwkana(*in)) {
363
                        *out++ = 0x8e;
364
                        *out++ = *in++;
365
                } else {
366
                        *out++ = SUBST_CHAR;
367
                        in++;
368
                }
369
        }
370
371
        *out = '\0';
372
}
373
374
void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
375
{
376
        static iconv_t cd = (iconv_t)-1;
377
        static gboolean iconv_ok = TRUE;
378
        gchar *tmpstr;
379
        gchar *eucstr;
380
381
        Xalloca(eucstr, outlen, return);
382
383
        conv_jistoeuc(eucstr, outlen, inbuf);
384
385
        if (cd == (iconv_t)-1) {
386
                if (!iconv_ok) {
387
                        strncpy2(outbuf, inbuf, outlen);
388
                        return;
389
                }
390
                cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
391
                if (cd == (iconv_t)-1) {
392
                        cd = iconv_open(CS_UTF_8, CS_EUC_JP);
393
                        if (cd == (iconv_t)-1) {
394
                                g_warning("conv_jistoutf8(): %s\n",
395
                                          g_strerror(errno));
396
                                iconv_ok = FALSE;
397
                                strncpy2(outbuf, inbuf, outlen);
398
                                return;
399
                        }
400
                }
401
        }
402
403
        tmpstr = conv_iconv_strdup_with_cd(eucstr, cd);
404
        if (tmpstr) {
405
                strncpy2(outbuf, tmpstr, outlen);
406
                g_free(tmpstr);
407
        } else
408
                strncpy2(outbuf, inbuf, outlen);
409
}
410
411
void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
412
{
413
        gchar *tmpstr;
414
415
        tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
416
        if (tmpstr) {
417
                strncpy2(outbuf, tmpstr, outlen);
418
                g_free(tmpstr);
419
        } else
420
                strncpy2(outbuf, inbuf, outlen);
421
}
422
423
void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
424
{
425
        gchar *tmpstr;
426
427
        tmpstr = conv_iconv_strdup(inbuf, CS_EUC_JP, CS_UTF_8);
428
        if (tmpstr) {
429
                strncpy2(outbuf, tmpstr, outlen);
430
                g_free(tmpstr);
431
        } else
432
                strncpy2(outbuf, inbuf, outlen);
433
}
434
435
void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
436
{
437
        switch (conv_guess_ja_encoding(inbuf)) {
438
        case C_ISO_2022_JP:
439
                conv_jistoeuc(outbuf, outlen, inbuf);
440
                break;
441
        case C_SHIFT_JIS:
442
                conv_sjistoeuc(outbuf, outlen, inbuf);
443
                break;
444
        default:
445
                strncpy2(outbuf, inbuf, outlen);
446
                break;
447
        }
448
}
449
450
void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
451
{
452
        switch (conv_guess_ja_encoding(inbuf)) {
453
        case C_ISO_2022_JP:
454
                conv_jistoutf8(outbuf, outlen, inbuf);
455
                break;
456
        case C_SHIFT_JIS:
457
                conv_sjistoutf8(outbuf, outlen, inbuf);
458
                break;
459
        case C_EUC_JP:
460
                conv_euctoutf8(outbuf, outlen, inbuf);
461
                break;
462
        default:
463
                strncpy2(outbuf, inbuf, outlen);
464
                break;
465
        }
466
}
467
468
void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
469
{
470
        switch (conv_guess_ja_encoding(inbuf)) {
471
        case C_EUC_JP:
472
                conv_euctojis(outbuf, outlen, inbuf);
473
                break;
474
        default:
475
                strncpy2(outbuf, inbuf, outlen);
476
                break;
477
        }
478
}
479
480
static gchar valid_eucjp_tbl[][96] = {
481
        /* 0xa2a0 - 0xa2ff */
482
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
483
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
484
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
485
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
486
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
487
          0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
488
489
        /* 0xa3a0 - 0xa3ff */
490
        { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
491
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
492
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
493
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
494
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
495
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
496
497
        /* 0xa4a0 - 0xa4ff */
498
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
499
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
500
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
501
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
502
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
503
          1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
504
505
        /* 0xa5a0 - 0xa5ff */
506
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
507
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
508
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
509
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
510
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
511
          1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
512
513
        /* 0xa6a0 - 0xa6ff */
514
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
515
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
516
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
517
          1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
518
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
519
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
520
521
        /* 0xa7a0 - 0xa7ff */
522
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
523
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
524
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
525
          0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
526
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
527
          1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
528
529
        /* 0xa8a0 - 0xa8ff */
530
        { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
531
          1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
532
          1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
533
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
534
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
535
          0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
536
};
537
538
static gboolean isprintableeuckanji(guchar c1, guchar c2)
539
{
540
        if (c1 <= 0xa0 || c1 >= 0xf5)
541
                return FALSE;
542
        if (c2 <= 0xa0 || c2 == 0xff)
543
                return FALSE;
544
545
        if (c1 >= 0xa9 && c1 <= 0xaf)
546
                return FALSE;
547
548
        if (c1 >= 0xa2 && c1 <= 0xa8)
549
                return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
550
551
        if (c1 == 0xcf) {
552
                if (c2 >= 0xd4 && c2 <= 0xfe)
553
                        return FALSE;
554
        } else if (c1 == 0xf4) {
555
                if (c2 >= 0xa7 && c2 <= 0xfe)
556
                        return FALSE;
557
        }
558
559
        return TRUE;
560
}
561
562
void conv_unreadable_eucjp(gchar *str)
563
{
564
        register guchar *p = str;
565
566
        while (*p != '\0') {
567
                if (isascii(*p)) {
568
                        /* convert CR+LF -> LF */
569
                        if (*p == '\r' && *(p + 1) == '\n')
570
                                memmove(p, p + 1, strlen(p));
571
                        /* printable 7 bit code */
572
                        p++;
573
                } else if (iseuckanji(*p)) {
574
                        if (isprintableeuckanji(*p, *(p + 1))) {
575
                                /* printable euc-jp code */
576
                                p += 2;
577
                        } else {
578
                                /* substitute unprintable code */
579
                                *p++ = SUBST_CHAR;
580
                                if (*p != '\0') {
581
                                        if (isascii(*p))
582
                                                p++;
583
                                        else
584
                                                *p++ = SUBST_CHAR;
585
                                }
586
                        }
587
                } else if (iseuchwkana1(*p)) {
588
                        if (iseuchwkana2(*(p + 1)))
589
                                /* euc-jp hankaku kana */
590
                                p += 2;
591
                        else
592
                                *p++ = SUBST_CHAR;
593
                } else if (iseucaux(*p)) {
594
                        if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
595
                                /* auxiliary kanji */
596
                                p += 3;
597
                        } else
598
                                *p++ = SUBST_CHAR;
599
                } else
600
                        /* substitute unprintable 1 byte code */
601
                        *p++ = SUBST_CHAR;
602
        }
603
}
604
605
void conv_unreadable_8bit(gchar *str)
606
{
607
        register guchar *p = str;
608
609
        while (*p != '\0') {
610
                /* convert CR+LF -> LF */
611
                if (*p == '\r' && *(p + 1) == '\n')
612
                        memmove(p, p + 1, strlen(p));
613
                else if (!isascii(*p)) *p = SUBST_CHAR;
614
                p++;
615
        }
616
}
617
618
void conv_unreadable_latin(gchar *str)
619
{
620
        register guchar *p = str;
621
622
        while (*p != '\0') {
623
                /* convert CR+LF -> LF */
624
                if (*p == '\r' && *(p + 1) == '\n')
625
                        memmove(p, p + 1, strlen(p));
626
                else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
627
                        *p = SUBST_CHAR;
628
                p++;
629
        }
630
}
631
632
void conv_unreadable_locale(gchar *str)
633
{
634
        switch (conv_get_locale_charset()) {
635
        case C_US_ASCII:
636
        case C_ISO_8859_1:
637
        case C_ISO_8859_2:
638
        case C_ISO_8859_3:
639
        case C_ISO_8859_4:
640
        case C_ISO_8859_5:
641
        case C_ISO_8859_6:
642
        case C_ISO_8859_7:
643
        case C_ISO_8859_8:
644
        case C_ISO_8859_9:
645
        case C_ISO_8859_10:
646
        case C_ISO_8859_11:
647
        case C_ISO_8859_13:
648
        case C_ISO_8859_14:
649
        case C_ISO_8859_15:
650
                conv_unreadable_latin(str);
651
                break;
652
        case C_EUC_JP:
653
                conv_unreadable_eucjp(str);
654
                break;
655
        default:
656
                break;
657
        }
658
}
659
660
#define NCV        '\0'
661
662
void conv_mb_alnum(gchar *str)
663
{
664
        static guchar char_tbl[] = {
665
                /* 0xa0 - 0xaf */
666
                NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
667
                ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
668
                /* 0xb0 - 0xbf */
669
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
670
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
671
                /* 0xc0 - 0xcf */
672
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
673
                NCV, NCV, '(', ')', NCV, NCV, '[', ']',
674
                /* 0xd0 - 0xdf */
675
                '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
676
                NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
677
                /* 0xe0 - 0xef */
678
                NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
679
                NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
680
        };
681
682
        register guchar *p = str;
683
        register gint len;
684
685
        len = strlen(str);
686
687
        while (len > 1) {
688
                if (*p == 0xa3) {
689
                        register guchar ch = *(p + 1);
690
691
                        if (ch >= 0xb0 && ch <= 0xfa) {
692
                                /* [a-zA-Z] */
693
                                *p = ch & 0x7f;
694
                                p++;
695
                                len--;
696
                                memmove(p, p + 1, len);
697
                                len--;
698
                        } else  {
699
                                p += 2;
700
                                len -= 2;
701
                        }
702
                } else if (*p == 0xa1) {
703
                        register guchar ch = *(p + 1);
704
705
                        if (ch >= 0xa0 && ch <= 0xef &&
706
                            NCV != char_tbl[ch - 0xa0]) {
707
                                *p = char_tbl[ch - 0xa0];
708
                                p++;
709
                                len--;
710
                                memmove(p, p + 1, len);
711
                                len--;
712
                        } else {
713
                                p += 2;
714
                                len -= 2;
715
                        }
716
                } else if (iseuckanji(*p)) {
717
                        p += 2;
718
                        len -= 2;
719
                } else {
720
                        p++;
721
                        len--;
722
                }
723
        }
724
}
725
726
CharSet conv_guess_ja_encoding(const gchar *str)
727
{
728
        const guchar *p = str;
729
        CharSet guessed = C_US_ASCII;
730
731
        while (*p != '\0') {
732
                if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
733
                        if (guessed == C_US_ASCII)
734
                                return C_ISO_2022_JP;
735
                        p += 2;
736
                } else if (isascii(*p)) {
737
                        p++;
738
                } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
739
                        if (*p >= 0xfd && *p <= 0xfe)
740
                                return C_EUC_JP;
741
                        else if (guessed == C_SHIFT_JIS) {
742
                                if ((issjiskanji1(*p) &&
743
                                     issjiskanji2(*(p + 1))) ||
744
                                    issjishwkana(*p))
745
                                        guessed = C_SHIFT_JIS;
746
                                else
747
                                        guessed = C_EUC_JP;
748
                        } else
749
                                guessed = C_EUC_JP;
750
                        p += 2;
751
                } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
752
                        if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
753
                                guessed = C_SHIFT_JIS;
754
                        else
755
                                return C_SHIFT_JIS;
756
                        p += 2;
757
                } else if (issjishwkana(*p)) {
758
                        guessed = C_SHIFT_JIS;
759
                        p++;
760
                } else {
761
                        p++;
762
                }
763
        }
764
765
        return guessed;
766
}
767
768
void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
769
{
770
        conv_jistoutf8(outbuf, outlen, inbuf);
771
}
772
773
void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
774
{
775
        conv_sjistoutf8(outbuf, outlen, inbuf);
776
}
777
778
void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
779
{
780
        conv_euctoutf8(outbuf, outlen, inbuf);
781
}
782
783
void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
784
{
785
        conv_anytoutf8(outbuf, outlen, inbuf);
786
}
787
788
void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
789
{
790
        strncpy2(outbuf, inbuf, outlen);
791
        conv_unreadable_8bit(outbuf);
792
}
793
794
void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
795
{
796
        strncpy2(outbuf, inbuf, outlen);
797
        //conv_unreadable_latin(outbuf);
798
}
799
800
void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
801
{
802
        gchar *tmpstr;
803
804
        tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
805
                                   conv_get_internal_charset_str());
806
        if (tmpstr) {
807
                strncpy2(outbuf, tmpstr, outlen);
808
                g_free(tmpstr);
809
        } else
810
                strncpy2(outbuf, inbuf, outlen);
811
}
812
813
void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
814
{
815
        strncpy2(outbuf, inbuf, outlen);
816
}
817
818
CodeConverter *conv_code_converter_new(const gchar *src_charset)
819
{
820
        CodeConverter *conv;
821
822
        conv = g_new0(CodeConverter, 1);
823
        conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
824
        conv->charset_str = g_strdup(src_charset);
825
        conv->charset = conv_get_charset_from_str(src_charset);
826
827
        return conv;
828
}
829
830
void conv_code_converter_destroy(CodeConverter *conv)
831
{
832
        g_free(conv->charset_str);
833
        g_free(conv);
834
}
835
836
gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
837
                  const gchar *inbuf)
838
{
839
        if (conv->code_conv_func != conv_noconv)
840
                conv->code_conv_func(outbuf, outlen, inbuf);
841
        else {
842
                gchar *str;
843
844
                str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
845
                if (!str)
846
                        return -1;
847
                else {
848
                        strncpy2(outbuf, str, outlen);
849
                        g_free(str);
850
                }
851
        }
852
853
        return 0;
854
}
855
856
gchar *conv_codeset_strdup(const gchar *inbuf,
857
                           const gchar *src_code, const gchar *dest_code)
858
{
859
        gchar *buf;
860
        size_t len;
861
        CodeConvFunc conv_func;
862
863
        conv_func = conv_get_code_conv_func(src_code, dest_code);
864
        if (conv_func != conv_noconv) {
865
                len = (strlen(inbuf) + 1) * 3;
866
                buf = g_malloc(len);
867
                if (!buf) return NULL;
868
869
                conv_func(buf, len, inbuf);
870
                return g_realloc(buf, strlen(buf) + 1);
871
        }
872
873
        return conv_iconv_strdup(inbuf, src_code, dest_code);
874
}
875
876
CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
877
                                     const gchar *dest_charset_str)
878
{
879
        CodeConvFunc code_conv = conv_noconv;
880
        CharSet src_charset;
881
        CharSet dest_charset;
882
883
        if (!src_charset_str)
884
                src_charset = conv_get_locale_charset();
885
        else
886
                src_charset = conv_get_charset_from_str(src_charset_str);
887
888
        /* auto detection mode */
889
        if (!src_charset_str && !dest_charset_str) {
890
                if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
891
                        return conv_anytodisp;
892
                else
893
                        return conv_noconv;
894
        }
895
896
        dest_charset = conv_get_charset_from_str(dest_charset_str);
897
898
        if (dest_charset == C_US_ASCII)
899
                return conv_ustodisp;
900
901
        switch (src_charset) {
902
        case C_ISO_2022_JP:
903
        case C_ISO_2022_JP_2:
904
        case C_ISO_2022_JP_3:
905
                if (dest_charset == C_AUTO)
906
                        code_conv = conv_jistodisp;
907
                else if (dest_charset == C_EUC_JP)
908
                        code_conv = conv_jistoeuc;
909
                else if (dest_charset == C_UTF_8)
910
                        code_conv = conv_jistoutf8;
911
                break;
912
        case C_US_ASCII:
913
                if (dest_charset == C_AUTO)
914
                        code_conv = conv_ustodisp;
915
                break;
916
        case C_ISO_8859_1:
917
        case C_ISO_8859_2:
918
        case C_ISO_8859_3:
919
        case C_ISO_8859_4:
920
        case C_ISO_8859_5:
921
        case C_ISO_8859_6:
922
        case C_ISO_8859_7:
923
        case C_ISO_8859_8:
924
        case C_ISO_8859_9:
925
        case C_ISO_8859_10:
926
        case C_ISO_8859_11:
927
        case C_ISO_8859_13:
928
        case C_ISO_8859_14:
929
        case C_ISO_8859_15:
930
                break;
931
        case C_SHIFT_JIS:
932
                if (dest_charset == C_AUTO)
933
                        code_conv = conv_sjistodisp;
934
                else if (dest_charset == C_EUC_JP)
935
                        code_conv = conv_sjistoeuc;
936
                else if (dest_charset == C_UTF_8)
937
                        code_conv = conv_sjistoutf8;
938
                break;
939
        case C_EUC_JP:
940
                if (dest_charset == C_AUTO)
941
                        code_conv = conv_euctodisp;
942
                else if (dest_charset == C_ISO_2022_JP   ||
943
                         dest_charset == C_ISO_2022_JP_2 ||
944
                         dest_charset == C_ISO_2022_JP_3)
945
                        code_conv = conv_euctojis;
946
                else if (dest_charset == C_UTF_8)
947
                        code_conv = conv_euctoutf8;
948
                break;
949
        default:
950
                break;
951
        }
952
953
        return code_conv;
954
}
955
956
gchar *conv_iconv_strdup(const gchar *inbuf,
957
                         const gchar *src_code, const gchar *dest_code)
958
{
959
        iconv_t cd;
960
        gchar *outbuf;
961
962
        if (!src_code)
963
                src_code = conv_get_outgoing_charset_str();
964
        if (!dest_code)
965
                dest_code = conv_get_internal_charset_str();
966
967
        /* don't convert if current codeset is US-ASCII */
968
        if (!strcasecmp(dest_code, CS_US_ASCII))
969
                return g_strdup(inbuf);
970
971
        /* don't convert if src and dest codeset are identical */
972
        if (!strcasecmp(src_code, dest_code))
973
                return g_strdup(inbuf);
974
975
        cd = iconv_open(dest_code, src_code);
976
        if (cd == (iconv_t)-1)
977
                return NULL;
978
979
        outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
980
981
        iconv_close(cd);
982
983
        return outbuf;
984
}
985
986
gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
987
{
988
        const gchar *inbuf_p;
989
        gchar *outbuf;
990
        gchar *outbuf_p;
991
        size_t in_size;
992
        size_t in_left;
993
        size_t out_size;
994
        size_t out_left;
995
        size_t n_conv;
996
        size_t len;
997
998
        inbuf_p = inbuf;
999
        in_size = strlen(inbuf);
1000
        in_left = in_size;
1001
        out_size = (in_size + 1) * 2;
1002
        outbuf = g_malloc(out_size);
1003
        outbuf_p = outbuf;
1004
        out_left = out_size;
1005
1006
#define EXPAND_BUF()                                \
1007
{                                                \
1008
        len = outbuf_p - outbuf;                \
1009
        out_size *= 2;                                \
1010
        outbuf = g_realloc(outbuf, out_size);        \
1011
        outbuf_p = outbuf + len;                \
1012
        out_left = out_size - len;                \
1013
}
1014
1015
        while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1016
                               &outbuf_p, &out_left)) == (size_t)-1) {
1017
                if (EILSEQ == errno) {
1018
                        g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1019
                        inbuf_p++;
1020
                        in_left--;
1021
                        if (out_left == 0) {
1022
                                EXPAND_BUF();
1023
                        }
1024
                        *outbuf_p++ = SUBST_CHAR;
1025
                        out_left--;
1026
                } else if (EINVAL == errno) {
1027
                        break;
1028
                } else if (E2BIG == errno) {
1029
                        EXPAND_BUF();
1030
                } else {
1031
                        g_warning("conv_iconv_strdup(): %s\n",
1032
                                  g_strerror(errno));
1033
                        break;
1034
                }
1035
        }
1036
1037
        while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1038
               (size_t)-1) {
1039
                if (E2BIG == errno) {
1040
                        EXPAND_BUF();
1041
                } else {
1042
                        g_warning("conv_iconv_strdup(): %s\n",
1043
                                  g_strerror(errno));
1044
                        break;
1045
                }
1046
        }
1047
1048
#undef EXPAND_BUF
1049
1050
        len = outbuf_p - outbuf;
1051
        outbuf = g_realloc(outbuf, len + 1);
1052
        outbuf[len] = '\0';
1053
1054
        return outbuf;
1055
}
1056
1057
static const struct {
1058
        CharSet charset;
1059
        gchar *const name;
1060
} charsets[] = {
1061
        {C_US_ASCII,                CS_US_ASCII},
1062
        {C_US_ASCII,                CS_ANSI_X3_4_1968},
1063
        {C_UTF_8,                CS_UTF_8},
1064
        {C_UTF_7,                CS_UTF_7},
1065
        {C_ISO_8859_1,                CS_ISO_8859_1},
1066
        {C_ISO_8859_2,                CS_ISO_8859_2},
1067
        {C_ISO_8859_3,                CS_ISO_8859_3},
1068
        {C_ISO_8859_4,                CS_ISO_8859_4},
1069
        {C_ISO_8859_5,                CS_ISO_8859_5},
1070
        {C_ISO_8859_6,                CS_ISO_8859_6},
1071
        {C_ISO_8859_7,                CS_ISO_8859_7},
1072
        {C_ISO_8859_8,                CS_ISO_8859_8},
1073
        {C_ISO_8859_9,                CS_ISO_8859_9},
1074
        {C_ISO_8859_10,                CS_ISO_8859_10},
1075
        {C_ISO_8859_11,                CS_ISO_8859_11},
1076
        {C_ISO_8859_13,                CS_ISO_8859_13},
1077
        {C_ISO_8859_14,                CS_ISO_8859_14},
1078
        {C_ISO_8859_15,                CS_ISO_8859_15},
1079
        {C_BALTIC,                CS_BALTIC},
1080
        {C_CP1250,                CS_CP1250},
1081
        {C_CP1251,                CS_CP1251},
1082
        {C_CP1252,                CS_CP1252},
1083
        {C_CP1253,                CS_CP1253},
1084
        {C_CP1254,                CS_CP1254},
1085
        {C_CP1255,                CS_CP1255},
1086
        {C_CP1256,                CS_CP1256},
1087
        {C_CP1257,                CS_CP1257},
1088
        {C_CP1258,                CS_CP1258},
1089
        {C_WINDOWS_1250,        CS_WINDOWS_1250},
1090
        {C_WINDOWS_1251,        CS_WINDOWS_1251},
1091
        {C_WINDOWS_1252,        CS_WINDOWS_1252},
1092
        {C_WINDOWS_1253,        CS_WINDOWS_1253},
1093
        {C_WINDOWS_1254,        CS_WINDOWS_1254},
1094
        {C_WINDOWS_1255,        CS_WINDOWS_1255},
1095
        {C_WINDOWS_1256,        CS_WINDOWS_1256},
1096
        {C_WINDOWS_1257,        CS_WINDOWS_1257},
1097
        {C_WINDOWS_1258,        CS_WINDOWS_1258},
1098
        {C_KOI8_R,                CS_KOI8_R},
1099
        {C_KOI8_T,                CS_KOI8_T},
1100
        {C_KOI8_U,                CS_KOI8_U},
1101
        {C_ISO_2022_JP,                CS_ISO_2022_JP},
1102
        {C_ISO_2022_JP_2,        CS_ISO_2022_JP_2},
1103
        {C_ISO_2022_JP_3,        CS_ISO_2022_JP_3},
1104
        {C_EUC_JP,                CS_EUC_JP},
1105
        {C_EUC_JP,                CS_EUCJP},
1106
        {C_EUC_JP_MS,                CS_EUC_JP_MS},
1107
        {C_SHIFT_JIS,                CS_SHIFT_JIS},
1108
        {C_SHIFT_JIS,                CS_SHIFT__JIS},
1109
        {C_SHIFT_JIS,                CS_SJIS},
1110
        {C_ISO_2022_KR,                CS_ISO_2022_KR},
1111
        {C_EUC_KR,                CS_EUC_KR},
1112
        {C_ISO_2022_CN,                CS_ISO_2022_CN},
1113
        {C_EUC_CN,                CS_EUC_CN},
1114
        {C_GB2312,                CS_GB2312},
1115
        {C_GBK,                        CS_GBK},
1116
        {C_EUC_TW,                CS_EUC_TW},
1117
        {C_BIG5,                CS_BIG5},
1118
        {C_BIG5_HKSCS,                CS_BIG5_HKSCS},
1119
        {C_TIS_620,                CS_TIS_620},
1120
        {C_WINDOWS_874,                CS_WINDOWS_874},
1121
        {C_GEORGIAN_PS,                CS_GEORGIAN_PS},
1122
        {C_TCVN5712_1,                CS_TCVN5712_1},
1123
};
1124
1125
static const struct {
1126
        gchar *const locale;
1127
        CharSet charset;
1128
        CharSet out_charset;
1129
} locale_table[] = {
1130
        {"ja_JP.eucJP"        , C_EUC_JP        , C_ISO_2022_JP},
1131
        {"ja_JP.EUC-JP"        , C_EUC_JP        , C_ISO_2022_JP},
1132
        {"ja_JP.EUC"        , C_EUC_JP        , C_ISO_2022_JP},
1133
        {"ja_JP.ujis"        , C_EUC_JP        , C_ISO_2022_JP},
1134
        {"ja_JP.SJIS"        , C_SHIFT_JIS        , C_ISO_2022_JP},
1135
        {"ja_JP.JIS"        , C_ISO_2022_JP        , C_ISO_2022_JP},
1136
        {"ja_JP"        , C_EUC_JP        , C_ISO_2022_JP},
1137
        {"ko_KR.EUC-KR"        , C_EUC_KR        , C_EUC_KR},
1138
        {"ko_KR"        , C_EUC_KR        , C_EUC_KR},
1139
        {"zh_CN.GB2312"        , C_GB2312        , C_GB2312},
1140
        {"zh_CN.GBK"        , C_GBK                , C_GB2312},
1141
        {"zh_CN"        , C_GB2312        , C_GB2312},
1142
        {"zh_HK"        , C_BIG5_HKSCS        , C_BIG5_HKSCS},
1143
        {"zh_TW.eucTW"        , C_EUC_TW        , C_BIG5},
1144
        {"zh_TW.EUC-TW"        , C_EUC_TW        , C_BIG5},
1145
        {"zh_TW.Big5"        , C_BIG5        , C_BIG5},
1146
        {"zh_TW"        , C_BIG5        , C_BIG5},
1147
1148
        {"ru_RU.KOI8-R"        , C_KOI8_R        , C_KOI8_R},
1149
        {"ru_RU.KOI8R"        , C_KOI8_R        , C_KOI8_R},
1150
        {"ru_RU.CP1251"        , C_WINDOWS_1251, C_KOI8_R},
1151
        {"ru_RU"        , C_ISO_8859_5        , C_KOI8_R},
1152
        {"tg_TJ"        , C_KOI8_T        , C_KOI8_T},
1153
        {"ru_UA"        , C_KOI8_U        , C_KOI8_U},
1154
        {"uk_UA.CP1251"        , C_WINDOWS_1251, C_KOI8_U},
1155
        {"uk_UA"        , C_KOI8_U        , C_KOI8_U},
1156
1157
        {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1158
        {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1159
1160
        {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1161
1162
        {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1163
        {"br_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1164
        {"ca_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1165
        {"da_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1166
        {"de_AT"        , C_ISO_8859_1        , C_ISO_8859_1},
1167
        {"de_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1168
        {"de_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1169
        {"de_DE"        , C_ISO_8859_1        , C_ISO_8859_1},
1170
        {"de_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1171
        {"en_AU"        , C_ISO_8859_1        , C_ISO_8859_1},
1172
        {"en_BW"        , C_ISO_8859_1        , C_ISO_8859_1},
1173
        {"en_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1174
        {"en_DK"        , C_ISO_8859_1        , C_ISO_8859_1},
1175
        {"en_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1176
        {"en_HK"        , C_ISO_8859_1        , C_ISO_8859_1},
1177
        {"en_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1178
        {"en_NZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1179
        {"en_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1180
        {"en_SG"        , C_ISO_8859_1        , C_ISO_8859_1},
1181
        {"en_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1182
        {"en_ZA"        , C_ISO_8859_1        , C_ISO_8859_1},
1183
        {"en_ZW"        , C_ISO_8859_1        , C_ISO_8859_1},
1184
        {"es_AR"        , C_ISO_8859_1        , C_ISO_8859_1},
1185
        {"es_BO"        , C_ISO_8859_1        , C_ISO_8859_1},
1186
        {"es_CL"        , C_ISO_8859_1        , C_ISO_8859_1},
1187
        {"es_CO"        , C_ISO_8859_1        , C_ISO_8859_1},
1188
        {"es_CR"        , C_ISO_8859_1        , C_ISO_8859_1},
1189
        {"es_DO"        , C_ISO_8859_1        , C_ISO_8859_1},
1190
        {"es_EC"        , C_ISO_8859_1        , C_ISO_8859_1},
1191
        {"es_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1192
        {"es_GT"        , C_ISO_8859_1        , C_ISO_8859_1},
1193
        {"es_HN"        , C_ISO_8859_1        , C_ISO_8859_1},
1194
        {"es_MX"        , C_ISO_8859_1        , C_ISO_8859_1},
1195
        {"es_NI"        , C_ISO_8859_1        , C_ISO_8859_1},
1196
        {"es_PA"        , C_ISO_8859_1        , C_ISO_8859_1},
1197
        {"es_PE"        , C_ISO_8859_1        , C_ISO_8859_1},
1198
        {"es_PR"        , C_ISO_8859_1        , C_ISO_8859_1},
1199
        {"es_PY"        , C_ISO_8859_1        , C_ISO_8859_1},
1200
        {"es_SV"        , C_ISO_8859_1        , C_ISO_8859_1},
1201
        {"es_US"        , C_ISO_8859_1        , C_ISO_8859_1},
1202
        {"es_UY"        , C_ISO_8859_1        , C_ISO_8859_1},
1203
        {"es_VE"        , C_ISO_8859_1        , C_ISO_8859_1},
1204
        {"et_EE"        , C_ISO_8859_1        , C_ISO_8859_1},
1205
        {"eu_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1206
        {"fi_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1207
        {"fo_FO"        , C_ISO_8859_1        , C_ISO_8859_1},
1208
        {"fr_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1209
        {"fr_CA"        , C_ISO_8859_1        , C_ISO_8859_1},
1210
        {"fr_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1211
        {"fr_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1212
        {"fr_LU"        , C_ISO_8859_1        , C_ISO_8859_1},
1213
        {"ga_IE"        , C_ISO_8859_1        , C_ISO_8859_1},
1214
        {"gl_ES"        , C_ISO_8859_1        , C_ISO_8859_1},
1215
        {"gv_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1216
        {"id_ID"        , C_ISO_8859_1        , C_ISO_8859_1},
1217
        {"is_IS"        , C_ISO_8859_1        , C_ISO_8859_1},
1218
        {"it_CH"        , C_ISO_8859_1        , C_ISO_8859_1},
1219
        {"it_IT"        , C_ISO_8859_1        , C_ISO_8859_1},
1220
        {"kl_GL"        , C_ISO_8859_1        , C_ISO_8859_1},
1221
        {"kw_GB"        , C_ISO_8859_1        , C_ISO_8859_1},
1222
        {"ms_MY"        , C_ISO_8859_1        , C_ISO_8859_1},
1223
        {"nl_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1224
        {"nl_NL"        , C_ISO_8859_1        , C_ISO_8859_1},
1225
        {"nn_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1226
        {"no_NO"        , C_ISO_8859_1        , C_ISO_8859_1},
1227
        {"oc_FR"        , C_ISO_8859_1        , C_ISO_8859_1},
1228
        {"pt_BR"        , C_ISO_8859_1        , C_ISO_8859_1},
1229
        {"pt_PT"        , C_ISO_8859_1        , C_ISO_8859_1},
1230
        {"sq_AL"        , C_ISO_8859_1        , C_ISO_8859_1},
1231
        {"sv_FI"        , C_ISO_8859_1        , C_ISO_8859_1},
1232
        {"sv_SE"        , C_ISO_8859_1        , C_ISO_8859_1},
1233
        {"tl_PH"        , C_ISO_8859_1        , C_ISO_8859_1},
1234
        {"uz_UZ"        , C_ISO_8859_1        , C_ISO_8859_1},
1235
        {"wa_BE"        , C_ISO_8859_1        , C_ISO_8859_1},
1236
1237
        {"bs_BA"        , C_ISO_8859_2        , C_ISO_8859_2},
1238
        {"cs_CZ"        , C_ISO_8859_2        , C_ISO_8859_2},
1239
        {"hr_HR"        , C_ISO_8859_2        , C_ISO_8859_2},
1240
        {"hu_HU"        , C_ISO_8859_2        , C_ISO_8859_2},
1241
        {"pl_PL"        , C_ISO_8859_2        , C_ISO_8859_2},
1242
        {"ro_RO"        , C_ISO_8859_2        , C_ISO_8859_2},
1243
        {"sk_SK"        , C_ISO_8859_2        , C_ISO_8859_2},
1244
        {"sl_SI"        , C_ISO_8859_2        , C_ISO_8859_2},
1245
1246
        {"sr_YU@cyrillic"        , C_ISO_8859_5        , C_ISO_8859_5},
1247
        {"sr_YU"                , C_ISO_8859_2        , C_ISO_8859_2},
1248
1249
        {"mt_MT"                , C_ISO_8859_3        , C_ISO_8859_3},
1250
1251
        {"lt_LT.iso88594"        , C_ISO_8859_4        , C_ISO_8859_4},
1252
        {"lt_LT.ISO8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1253
        {"lt_LT.ISO_8859-4"        , C_ISO_8859_4        , C_ISO_8859_4},
1254
        {"lt_LT"                , C_ISO_8859_13        , C_ISO_8859_13},
1255
1256
        {"mk_MK"        , C_ISO_8859_5        , C_ISO_8859_5},
1257
1258
        {"ar_AE"        , C_ISO_8859_6        , C_ISO_8859_6},
1259
        {"ar_BH"        , C_ISO_8859_6        , C_ISO_8859_6},
1260
        {"ar_DZ"        , C_ISO_8859_6        , C_ISO_8859_6},
1261
        {"ar_EG"        , C_ISO_8859_6        , C_ISO_8859_6},
1262
        {"ar_IQ"        , C_ISO_8859_6        , C_ISO_8859_6},
1263
        {"ar_JO"        , C_ISO_8859_6        , C_ISO_8859_6},
1264
        {"ar_KW"        , C_ISO_8859_6        , C_ISO_8859_6},
1265
        {"ar_LB"        , C_ISO_8859_6        , C_ISO_8859_6},
1266
        {"ar_LY"        , C_ISO_8859_6        , C_ISO_8859_6},
1267
        {"ar_MA"        , C_ISO_8859_6        , C_ISO_8859_6},
1268
        {"ar_OM"        , C_ISO_8859_6        , C_ISO_8859_6},
1269
        {"ar_QA"        , C_ISO_8859_6        , C_ISO_8859_6},
1270
        {"ar_SA"        , C_ISO_8859_6        , C_ISO_8859_6},
1271
        {"ar_SD"        , C_ISO_8859_6        , C_ISO_8859_6},
1272
        {"ar_SY"        , C_ISO_8859_6        , C_ISO_8859_6},
1273
        {"ar_TN"        , C_ISO_8859_6        , C_ISO_8859_6},
1274
        {"ar_YE"        , C_ISO_8859_6        , C_ISO_8859_6},
1275
1276
        {"el_GR"        , C_ISO_8859_7        , C_ISO_8859_7},
1277
        {"he_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1278
        {"iw_IL"        , C_ISO_8859_8        , C_ISO_8859_8},
1279
        {"tr_TR"        , C_ISO_8859_9        , C_ISO_8859_9},
1280
1281
        {"lv_LV"        , C_ISO_8859_13        , C_ISO_8859_13},
1282
        {"mi_NZ"        , C_ISO_8859_13        , C_ISO_8859_13},
1283
1284
        {"cy_GB"        , C_ISO_8859_14        , C_ISO_8859_14},
1285
1286
        {"ar_IN"        , C_UTF_8        , C_UTF_8},
1287
        {"en_IN"        , C_UTF_8        , C_UTF_8},
1288
        {"se_NO"        , C_UTF_8        , C_UTF_8},
1289
        {"ta_IN"        , C_UTF_8        , C_UTF_8},
1290
        {"te_IN"        , C_UTF_8        , C_UTF_8},
1291
        {"ur_PK"        , C_UTF_8        , C_UTF_8},
1292
1293
        {"th_TH"        , C_TIS_620        , C_TIS_620},
1294
        /* {"th_TH"        , C_WINDOWS_874}, */
1295
        /* {"th_TH"        , C_ISO_8859_11}, */
1296
1297
        {"ka_GE"        , C_GEORGIAN_PS        , C_GEORGIAN_PS},
1298
        {"vi_VN.TCVN"        , C_TCVN5712_1        , C_TCVN5712_1},
1299
1300
        {"C"                        , C_US_ASCII        , C_US_ASCII},
1301
        {"POSIX"                , C_US_ASCII        , C_US_ASCII},
1302
        {"ANSI_X3.4-1968"        , C_US_ASCII        , C_US_ASCII},
1303
};
1304
1305
static GHashTable *conv_get_charset_to_str_table(void)
1306
{
1307
        static GHashTable *table;
1308
        gint i;
1309
1310
        if (table)
1311
                return table;
1312
1313
        table = g_hash_table_new(NULL, g_direct_equal);
1314
1315
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1316
                if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1317
                    == NULL) {
1318
                        g_hash_table_insert
1319
                                (table, GUINT_TO_POINTER(charsets[i].charset),
1320
                                 charsets[i].name);
1321
                }
1322
        }
1323
1324
        return table;
1325
}
1326
1327
static GHashTable *conv_get_charset_from_str_table(void)
1328
{
1329
        static GHashTable *table;
1330
        gint i;
1331
1332
        if (table)
1333
                return table;
1334
1335
        table = g_hash_table_new(str_case_hash, str_case_equal);
1336
1337
        for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1338
                g_hash_table_insert(table, charsets[i].name,
1339
                                    GUINT_TO_POINTER(charsets[i].charset));
1340
        }
1341
1342
        return table;
1343
}
1344
1345
const gchar *conv_get_charset_str(CharSet charset)
1346
{
1347
        GHashTable *table;
1348
1349
        table = conv_get_charset_to_str_table();
1350
        return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1351
}
1352
1353
CharSet conv_get_charset_from_str(const gchar *charset)
1354
{
1355
        GHashTable *table;
1356
1357
        if (!charset) return C_AUTO;
1358
1359
        table = conv_get_charset_from_str_table();
1360
        return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1361
}
1362
1363
CharSet conv_get_locale_charset(void)
1364
{
1365
        static CharSet cur_charset = -1;
1366
        const gchar *cur_locale;
1367
        const gchar *p;
1368
        gint i;
1369
1370
        if (cur_charset != -1)
1371
                return cur_charset;
1372
1373
        cur_locale = conv_get_current_locale();
1374
        if (!cur_locale) {
1375
                cur_charset = C_US_ASCII;
1376
                return cur_charset;
1377
        }
1378
1379
        if (strcasestr(cur_locale, "UTF-8")) {
1380
                cur_charset = C_UTF_8;
1381
                return cur_charset;
1382
        }
1383
1384
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1385
                cur_charset = C_ISO_8859_15;
1386
                return cur_charset;
1387
        }
1388
1389
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1390
                const gchar *p;
1391
1392
                /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1393
                   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1394
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1395
                                 strlen(locale_table[i].locale))) {
1396
                        cur_charset = locale_table[i].charset;
1397
                        return cur_charset;
1398
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1399
                         !strchr(p + 1, '.')) {
1400
                        if (strlen(cur_locale) == 2 &&
1401
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1402
                                cur_charset = locale_table[i].charset;
1403
                                return cur_charset;
1404
                        }
1405
                }
1406
        }
1407
1408
        cur_charset = C_AUTO;
1409
        return cur_charset;
1410
}
1411
1412
const gchar *conv_get_locale_charset_str(void)
1413
{
1414
        static const gchar *codeset = NULL;
1415
1416
        if (!codeset)
1417
                codeset = conv_get_charset_str(conv_get_locale_charset());
1418
1419
        return codeset ? codeset : CS_UTF_8;
1420
}
1421
1422
CharSet conv_get_internal_charset(void)
1423
{
1424
        return C_UTF_8;
1425
}
1426
1427
const gchar *conv_get_internal_charset_str(void)
1428
{
1429
        return CS_UTF_8;
1430
}
1431
1432
CharSet conv_get_outgoing_charset(void)
1433
{
1434
        static CharSet out_charset = -1;
1435
        const gchar *cur_locale;
1436
        const gchar *p;
1437
        gint i;
1438
1439
        if (out_charset != -1)
1440
                return out_charset;
1441
1442
        cur_locale = conv_get_current_locale();
1443
        if (!cur_locale) {
1444
                out_charset = C_AUTO;
1445
                return out_charset;
1446
        }
1447
1448
        if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1449
                out_charset = C_ISO_8859_15;
1450
                return out_charset;
1451
        }
1452
1453
        for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1454
                const gchar *p;
1455
1456
                if (!strncasecmp(cur_locale, locale_table[i].locale,
1457
                                 strlen(locale_table[i].locale))) {
1458
                        out_charset = locale_table[i].out_charset;
1459
                        break;
1460
                } else if ((p = strchr(locale_table[i].locale, '_')) &&
1461
                         !strchr(p + 1, '.')) {
1462
                        if (strlen(cur_locale) == 2 &&
1463
                            !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1464
                                out_charset = locale_table[i].out_charset;
1465
                                break;
1466
                        }
1467
                }
1468
        }
1469
1470
        return out_charset;
1471
}
1472
1473
const gchar *conv_get_outgoing_charset_str(void)
1474
{
1475
        CharSet out_charset;
1476
        const gchar *str;
1477
1478
        if (prefs_common.outgoing_charset) {
1479
                if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1480
                        g_free(prefs_common.outgoing_charset);
1481
                        prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1482
                } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1483
                        return prefs_common.outgoing_charset;
1484
        }
1485
1486
        out_charset = conv_get_outgoing_charset();
1487
        str = conv_get_charset_str(out_charset);
1488
1489
        return str ? str : CS_UTF_8;
1490
}
1491
1492
gboolean conv_is_multibyte_encoding(CharSet encoding)
1493
{
1494
        switch (encoding) {
1495
        case C_EUC_JP:
1496
        case C_EUC_JP_MS:
1497
        case C_EUC_KR:
1498
        case C_EUC_TW:
1499
        case C_EUC_CN:
1500
        case C_ISO_2022_JP:
1501
        case C_ISO_2022_JP_2:
1502
        case C_ISO_2022_JP_3:
1503
        case C_ISO_2022_KR:
1504
        case C_ISO_2022_CN:
1505
        case C_SHIFT_JIS:
1506
        case C_GB2312:
1507
        case C_BIG5:
1508
        case C_UTF_8:
1509
        case C_UTF_7:
1510
                return TRUE;
1511
        default:
1512
                return FALSE;
1513
        }
1514
}
1515
1516
const gchar *conv_get_current_locale(void)
1517
{
1518
        const gchar *cur_locale;
1519
1520
        cur_locale = g_getenv("LC_ALL");
1521
        if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1522
        if (!cur_locale) cur_locale = g_getenv("LANG");
1523
        if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1524
1525
        debug_print("current locale: %s\n",
1526
                    cur_locale ? cur_locale : "(none)");
1527
1528
        return cur_locale;
1529
}
1530
1531
void conv_unmime_header_overwrite(gchar *str)
1532
{
1533
        gchar *buf;
1534
        gint buflen;
1535
        CharSet cur_charset;
1536
1537
        cur_charset = conv_get_locale_charset();
1538
1539
        if (cur_charset == C_EUC_JP) {
1540
                buflen = strlen(str) * 2 + 1;
1541
                Xalloca(buf, buflen, return);
1542
                conv_anytodisp(buf, buflen, str);
1543
                unmime_header(str, buf);
1544
        } else {
1545
                buflen = strlen(str) + 1;
1546
                Xalloca(buf, buflen, return);
1547
                unmime_header(buf, str);
1548
                strncpy2(str, buf, buflen);
1549
        }
1550
}
1551
1552
void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1553
                        const gchar *charset)
1554
{
1555
        CharSet cur_charset;
1556
1557
        cur_charset = conv_get_locale_charset();
1558
1559
        if (cur_charset == C_EUC_JP) {
1560
                gchar *buf;
1561
                gint buflen;
1562
1563
                buflen = strlen(str) * 2 + 1;
1564
                Xalloca(buf, buflen, return);
1565
                conv_anytodisp(buf, buflen, str);
1566
                unmime_header(outbuf, buf);
1567
        } else
1568
                unmime_header(outbuf, str);
1569
}
1570
1571
#define MAX_LINELEN                76
1572
#define MAX_HARD_LINELEN        996
1573
#define MIMESEP_BEGIN                "=?"
1574
#define MIMESEP_END                "?="
1575
1576
#define B64LEN(len)        ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1577
1578
#define LBREAK_IF_REQUIRED(cond, is_plain_text)                                \
1579
{                                                                        \
1580
        if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {                \
1581
                *destp = '\0';                                                \
1582
                return;                                                        \
1583
        }                                                                \
1584
                                                                        \
1585
        if ((cond) && *srcp) {                                                \
1586
                if (destp > (guchar *)dest && left < MAX_LINELEN - 1) {        \
1587
                        if (isspace(*(destp - 1)))                        \
1588
                                destp--;                                \
1589
                        else if (is_plain_text && isspace(*srcp))        \
1590
                                srcp++;                                        \
1591
                        if (*srcp) {                                        \
1592
                                *destp++ = '\n';                        \
1593
                                *destp++ = ' ';                                \
1594
                                left = MAX_LINELEN - 1;                        \
1595
                        }                                                \
1596
                }                                                        \
1597
        }                                                                \
1598
}
1599
1600
void conv_encode_header(gchar *dest, gint len, const gchar *src,
1601
                        gint header_len, gboolean addr_field)
1602
{
1603
        const gchar *cur_encoding;
1604
        const gchar *out_encoding;
1605
        gint mimestr_len;
1606
        gchar *mimesep_enc;
1607
        gint left;
1608
        const guchar *srcp = src;
1609
        guchar *destp = dest;
1610
        gboolean use_base64;
1611
1612
        g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1613
1614
        if (MB_CUR_MAX > 1) {
1615
                use_base64 = TRUE;
1616
                mimesep_enc = "?B?";
1617
        } else {
1618
                use_base64 = FALSE;
1619
                mimesep_enc = "?Q?";
1620
        }
1621
1622
        cur_encoding = conv_get_internal_charset_str();
1623
        out_encoding = conv_get_outgoing_charset_str();
1624
        if (!strcmp(out_encoding, CS_US_ASCII))
1625
                out_encoding = CS_ISO_8859_1;
1626
1627
        mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1628
                strlen(mimesep_enc) + strlen(MIMESEP_END);
1629
1630
        left = MAX_LINELEN - header_len;
1631
1632
        while (*srcp) {
1633
                LBREAK_IF_REQUIRED(left <= 0, TRUE);
1634
1635
                while (isspace(*srcp)) {
1636
                        *destp++ = *srcp++;
1637
                        left--;
1638
                        LBREAK_IF_REQUIRED(left <= 0, TRUE);
1639
                }
1640
1641
                /* output as it is if the next word is ASCII string */
1642
                if (!is_next_nonascii(srcp)) {
1643
                        gint word_len;
1644
1645
                        word_len = get_next_word_len(srcp);
1646
                        LBREAK_IF_REQUIRED(left < word_len, TRUE);
1647
                        while (word_len > 0) {
1648
                                LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1649
                                *destp++ = *srcp++;
1650
                                left--;
1651
                                word_len--;
1652
                        }
1653
1654
                        continue;
1655
                }
1656
1657
                /* don't include parentheses in encoded strings */
1658
                if (addr_field && (*srcp == '(' || *srcp == ')')) {
1659
                        LBREAK_IF_REQUIRED(left < 2, FALSE);
1660
                        *destp++ = *srcp++;
1661
                        left--;
1662
                }
1663
1664
                while (1) {
1665
                        gint mb_len = 0;
1666
                        gint cur_len = 0;
1667
                        gchar *part_str;
1668
                        gchar *out_str;
1669
                        gchar *enc_str;
1670
                        const guchar *p = srcp;
1671
                        gint out_str_len;
1672
                        gint out_enc_str_len;
1673
                        gint mime_block_len;
1674
                        gboolean cont = FALSE;
1675
1676
                        while (*p != '\0') {
1677
                                if (isspace(*p) && !is_next_nonascii(p + 1))
1678
                                        break;
1679
                                /* don't include parentheses in encoded
1680
                                   strings */
1681
                                if (addr_field && (*p == '(' || *p == ')'))
1682
                                        break;
1683
1684
                                mb_len = g_utf8_skip[*p];
1685
1686
                                Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1687
                                out_str = conv_codeset_strdup
1688
                                        (part_str, cur_encoding, out_encoding);
1689
                                if (!out_str) {
1690
                                        g_warning("conv_encode_header(): code conversion failed\n");
1691
                                        conv_unreadable_8bit(part_str);
1692
                                        out_str = g_strdup(part_str);
1693
                                }
1694
                                out_str_len = strlen(out_str);
1695
1696
                                if (use_base64)
1697
                                        out_enc_str_len = B64LEN(out_str_len);
1698
                                else
1699
                                        out_enc_str_len =
1700
                                                qp_get_q_encoding_len(out_str);
1701
1702
                                g_free(out_str);
1703
1704
                                if (mimestr_len + out_enc_str_len <= left) {
1705
                                        cur_len += mb_len;
1706
                                        p += mb_len;
1707
                                } else if (cur_len == 0) {
1708
                                        LBREAK_IF_REQUIRED(1, FALSE);
1709
                                        continue;
1710
                                } else {
1711
                                        cont = TRUE;
1712
                                        break;
1713
                                }
1714
                        }
1715
1716
                        if (cur_len > 0) {
1717
                                Xstrndup_a(part_str, srcp, cur_len, );
1718
                                out_str = conv_codeset_strdup
1719
                                        (part_str, cur_encoding, out_encoding);
1720
                                if (!out_str) {
1721
                                        g_warning("conv_encode_header(): code conversion failed\n");
1722
                                        conv_unreadable_8bit(part_str);
1723
                                        out_str = g_strdup(part_str);
1724
                                }
1725
                                out_str_len = strlen(out_str);
1726
1727
                                if (use_base64)
1728
                                        out_enc_str_len = B64LEN(out_str_len);
1729
                                else
1730
                                        out_enc_str_len =
1731
                                                qp_get_q_encoding_len(out_str);
1732
1733
                                Xalloca(enc_str, out_enc_str_len + 1, );
1734
                                if (use_base64)
1735
                                        base64_encode(enc_str, out_str, out_str_len);
1736
                                else
1737
                                        qp_q_encode(enc_str, out_str);
1738
1739
                                g_free(out_str);
1740
1741
                                /* output MIME-encoded string block */
1742
                                mime_block_len = mimestr_len + strlen(enc_str);
1743
                                g_snprintf(destp, mime_block_len + 1,
1744
                                           MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1745
                                           out_encoding, mimesep_enc, enc_str);
1746
                                destp += mime_block_len;
1747
                                srcp += cur_len;
1748
1749
                                left -= mime_block_len;
1750
                        }
1751
1752
                        LBREAK_IF_REQUIRED(cont, FALSE);
1753
1754
                        if (cur_len == 0)
1755
                                break;
1756
                }
1757
        }
1758
1759
        *destp = '\0';
1760
}
1761
1762
#undef LBREAK_IF_REQUIRED