Statistics
| Branch: | Tag: | Revision:

root / src / sylfilter.c @ aebfd4cc

History | View | Annotate | Download (7.49 KB)

1
/* SylFilter - a message filter
2
 *
3
 * Copyright (C) 2011 Hiroyuki Yamamoto
4
 * Copyright (C) 2011 Sylpheed Development Team
5
 */
6

    
7
#include <stdio.h>
8
#include <string.h>
9
#include <locale.h>
10

    
11
#include "filter.h"
12
#include "filter-manager.h"
13
#include "filter-utils.h"
14
#include "filter-kvs.h"
15

    
16
#define USE_QDBM 1
17
#ifdef USE_QDBM
18
#  include "filter-kvs-qdbm.h"
19
#endif
20

    
21
#include "textcontent-filter.h"
22
#include "blacklist-filter.h"
23
#include "whitelist-filter.h"
24
#include "wordsep-filter.h"
25
#include "ngram-filter.h"
26
#include "bayes-filter.h"
27

    
28
enum {
29
        MODE_TEST_JUNK,
30
        MODE_LEARN_JUNK,
31
        MODE_LEARN_CLEAN,
32
        MODE_UNLEARN_JUNK,
33
        MODE_UNLEARN_CLEAN,
34
        MODE_SHOW_STATUS
35
};
36

    
37
#define DB_DIR        ".sylfilter"
38

    
39

    
40
static int verbose = 0;
41

    
42
static int learn_filter(int mode, const char *file);
43
static int test_filter(int mode, const char *file);
44
static void print_message_data(XMessageData *msgdata);
45
static void usage(void);
46

    
47

    
48
int main(int argc, char *argv[])
49
{
50
        int retval = 2;
51
        int i;
52
        int mode = MODE_TEST_JUNK;
53
        int count = 0;
54
        char dbpath[1024];
55

    
56
        setlocale(LC_ALL, "");
57

    
58
        xfilter_init();
59

    
60
        for (i = 1; i < argc; i++) {
61
                if (!strcmp(argv[i], "-j"))
62
                        mode = MODE_LEARN_JUNK;
63
                else if (!strcmp(argv[i], "-c"))
64
                        mode = MODE_LEARN_CLEAN;
65
                if (!strcmp(argv[i], "-J"))
66
                        mode = MODE_UNLEARN_JUNK;
67
                else if (!strcmp(argv[i], "-C"))
68
                        mode = MODE_UNLEARN_CLEAN;
69
                else if (!strcmp(argv[i], "-t"))
70
                        mode = MODE_TEST_JUNK;
71
                else if (!strcmp(argv[i], "-s"))
72
                        mode = MODE_SHOW_STATUS;
73
                else if (!strcmp(argv[i], "-v"))
74
                        verbose = 1;
75
                else if (!strcmp(argv[i], "-d"))
76
                        xfilter_set_debug_mode(1);
77
                else if (!strcmp(argv[i], "-h")) {
78
                        usage();
79
                        xfilter_done();
80
                        return 0;
81
                }
82
        }
83

    
84
#ifdef USE_QDBM
85
        xfilter_kvs_qdbm_set_engine();
86
#endif
87

    
88
        snprintf(dbpath, sizeof(dbpath), "%s/" DB_DIR, xfilter_utils_get_home_dir());
89
        xfilter_utils_set_base_dir(dbpath);
90

    
91
        if (xfilter_bayes_db_init(dbpath) < 0) {
92
                fprintf(stderr, "Database initialization error.\n");
93
                xfilter_done();
94
                return 127;
95
        }
96

    
97
        if (mode == MODE_SHOW_STATUS) {
98
                retval = xfilter_bayes_db_show_contents();
99
        } else if (mode == MODE_LEARN_JUNK || mode == MODE_LEARN_CLEAN ||
100
                   mode == MODE_UNLEARN_JUNK || mode == MODE_UNLEARN_CLEAN) {
101
                for (i = 1; i < argc; i++) {
102
                        if (argv[i][0] != '-') {
103
                                retval = learn_filter(mode, argv[i]);
104
                                if (retval != 0)
105
                                        break;
106
                                count++;
107
                        }
108
                }
109
        } else {
110
                for (i = 1; i < argc; i++) {
111
                        if (argv[i][0] != '-') {
112
                                retval = test_filter(mode, argv[i]);
113
                                if (retval == 127)
114
                                        break;
115
                                count++;
116
                        }
117
                }
118
        }
119

    
120
        xfilter_bayes_db_done();
121
        xfilter_done();
122

    
123
        if (count == 0)
124
                fprintf(stderr, "No input file.\n");
125

    
126
        if (verbose)
127
                printf("return value: %d\n", retval);
128

    
129
        return retval;
130
}
131

    
132
static int learn_filter(int mode, const char *file)
133
{
134
        XFilterManager *mgr;
135
        XMessageData *msgdata;
136
        XMessageData *resdata;
137
        XFilterResult *res;
138
        XFilterStatus status;
139
        int retval = 0;
140

    
141
        static XFilterConstructorFunc learn_junk_ctors[] = {
142
                xfilter_textcontent_new,
143
                xfilter_wordsep_new,
144
                xfilter_ngram_new,
145
                xfilter_bayes_learn_junk_new,
146
                NULL
147
        };
148

    
149
        static XFilterConstructorFunc learn_nojunk_ctors[] = {
150
                xfilter_textcontent_new,
151
                xfilter_wordsep_new,
152
                xfilter_ngram_new,
153
                xfilter_bayes_learn_nojunk_new,
154
                NULL
155
        };
156

    
157
        static XFilterConstructorFunc unlearn_junk_ctors[] = {
158
                xfilter_textcontent_new,
159
                xfilter_wordsep_new,
160
                xfilter_ngram_new,
161
                xfilter_bayes_unlearn_junk_new,
162
                NULL
163
        };
164

    
165
        static XFilterConstructorFunc unlearn_nojunk_ctors[] = {
166
                xfilter_textcontent_new,
167
                xfilter_wordsep_new,
168
                xfilter_ngram_new,
169
                xfilter_bayes_unlearn_nojunk_new,
170
                NULL
171
        };
172

    
173
        if (verbose)
174
                printf("learning message file: %s\n", file);
175

    
176
        mgr = xfilter_manager_new();
177

    
178
        switch (mode) {
179
        case MODE_LEARN_JUNK:
180
                xfilter_manager_add_filters(mgr, learn_junk_ctors);
181
                break;
182
        case MODE_LEARN_CLEAN:
183
                xfilter_manager_add_filters(mgr, learn_nojunk_ctors);
184
                break;
185
        case MODE_UNLEARN_JUNK:
186
                xfilter_manager_add_filters(mgr, unlearn_junk_ctors);
187
                break;
188
        case MODE_UNLEARN_CLEAN:
189
                xfilter_manager_add_filters(mgr, unlearn_nojunk_ctors);
190
                break;
191
        default:
192
                fprintf(stderr, "Internal error: invalid learn mode\n");
193
                xfilter_manager_free(mgr);
194
                return 127;
195
        }
196

    
197
        msgdata = xfilter_message_data_read_file(file, "message/rfc822");
198

    
199
        res = xfilter_manager_run(mgr, msgdata);
200
        if (verbose)
201
                xfilter_result_print(res);
202
        status = xfilter_result_get_status(res);
203
        if (status == XF_UNSUPPORTED_TYPE || status == XF_ERROR) {
204
                fprintf(stderr, "%s: Error on learning mail\n", file);
205
                retval = 127;
206
        }
207

    
208
        if (xfilter_get_debug_mode()) {
209
                resdata = xfilter_result_get_message_data(res);
210
                print_message_data(resdata);
211
        }
212

    
213
        xfilter_result_free(res);
214
        xfilter_message_data_free(msgdata);
215
        xfilter_manager_free(mgr);
216

    
217
        return retval;
218
}
219

    
220
static int test_filter(int mode, const char *file)
221
{
222
        XFilterManager *mgr;
223
        XMessageData *msgdata;
224
        XMessageData *resdata;
225
        XFilterResult *res;
226
        XFilterStatus status;
227
        int retval = 0;
228

    
229
        XFilterConstructorFunc ctors[] = {
230
                xfilter_textcontent_new,
231
                xfilter_blacklist_new,
232
                xfilter_whitelist_new,
233
                xfilter_wordsep_new,
234
                xfilter_ngram_new,
235
                xfilter_bayes_new,
236
                NULL
237
        };
238

    
239
        if (verbose)
240
                printf("testing message file: %s\n", file);
241

    
242
        mgr = xfilter_manager_new();
243
        xfilter_manager_add_filters(mgr, ctors);
244

    
245
        msgdata = xfilter_message_data_read_file(file, "message/rfc822");
246

    
247
        res = xfilter_manager_run(mgr, msgdata);
248
        if (verbose)
249
                xfilter_result_print(res);
250
        status = xfilter_result_get_status(res);
251
        if (status == XF_JUNK) {
252
                printf("%s: This is a junk mail (prob: %f)\n", file, xfilter_result_get_probability(res));
253
                retval = 0;
254
        } else if (status == XF_UNCERTAIN) {
255
                printf("%s: This mail could not be classified (prob: %f)\n", file, xfilter_result_get_probability(res));
256
                retval = 2;
257
        } else if (status == XF_UNSUPPORTED_TYPE || status == XF_ERROR) {
258
                printf("%s: Error on testing mail\n", file);
259
                retval = 127;
260
        } else {
261
                printf("%s: This is a clean mail (prob: %f)\n", file, xfilter_result_get_probability(res));
262
                retval = 1;
263
        }
264

    
265
        if (xfilter_get_debug_mode()) {
266
                resdata = xfilter_result_get_message_data(res);
267
                print_message_data(resdata);
268
        }
269

    
270
        xfilter_result_free(res);
271
        xfilter_message_data_free(msgdata);
272

    
273
        xfilter_manager_free(mgr);
274

    
275
        return retval;
276
}
277

    
278
static void print_message_data(XMessageData *msgdata)
279
{
280
        const char *content;
281

    
282
        if (!msgdata)
283
                return;
284

    
285
        printf("\n");
286

    
287
        content = xfilter_message_data_get_attribute(msgdata, XM_FROM);
288
        if (content)
289
                printf("from: %s\n", content);
290
        content = xfilter_message_data_get_attribute(msgdata, XM_TO);
291
        if (content)
292
                printf("to: %s\n", content);
293
        content = xfilter_message_data_get_attribute(msgdata, XM_CC);
294
        if (content)
295
                printf("cc: %s\n", content);
296
        content = xfilter_message_data_get_attribute(msgdata, XM_SUBJECT);
297
        if (content)
298
                printf("subject: %s\n", content);
299
        content = xfilter_message_data_get_content(msgdata);
300
        printf("content: %s\n", content);
301
}
302

    
303
static void usage(void)
304
{
305
        printf("SylFilter (tentative name) version 0.0.8\n");
306
        printf("\n");
307
        printf("Usage: sylfilter [options] message [message ...]\n");
308
        printf("\n");
309
        printf("Options:\n");
310
        printf("  -j  learn junk (spam) messages\n");
311
        printf("  -c  learn clean (non-spam) messages\n");
312
        printf("  -J  unlearn junk (spam) messages\n");
313
        printf("  -C  unlearn clean (non-spam) messages\n");
314
        printf("  -t  classify messages\n");
315
        printf("  -v  show verbose messages\n");
316
        printf("  -d  show debug messages\n");
317
        printf("  -h  print this help message\n");
318
        printf("\n");
319
        printf("Database will be created at ~/.sylfilter/*.db\n");
320
        printf("\n");
321
        printf("Return values:\n");
322
        printf("  0   junk (spam)\n");
323
        printf("  1   clean (non-spam)\n");
324
        printf("  2   uncertain\n");
325
        printf("  127 other errors\n");
326
}