Statistics
| Branch: | Tag: | Revision:

root / lib / filters / bayes-filter.c @ d75428f4

History | View | Annotate | Download (17.6 kB)

1
/* SylFilter - a message filter
2
 *
3
 * Copyright (C) 2011 Hiroyuki Yamamoto
4
 * Copyright (C) 2011 Sylpheed Development Team
5
 */
6
7
#include <glib.h>
8
#include <stdio.h>
9
#include <string.h>
10
#include <unistd.h>
11
#include <errno.h>
12
13
#include "filter.h"
14
#include "filter-kvs.h"
15
#include "filter-utils.h"
16
#include "bayes-filter.h"
17
18
#include "libsylph/utils.h"
19
20
#define N_TOKENS 15
21
#undef USE_STATUS_KVS
22
23
static XFilterKVS *junk_kvs;
24
static XFilterKVS *clean_kvs;
25
#ifdef USE_STATUS_KVS
26
static XFilterKVS *prob_kvs;
27
#else
28
static XFilterBayesLearnStatus learn_status;
29
static FILE *status_fp;
30
#endif
31
32
/* Test */
33
34
typedef struct _XFilterBayesProbData
35
{
36
        GArray *array;
37
        XFilterBayesLearnStatus status;
38
} XFilterBayesProbData;
39
40
typedef struct _XFilterKeyCount
41
{
42
        const char *key;
43
        int count;
44
        double prob;
45
} XFilterKeyCount;
46
47
static void xfilter_bayes_content_word_freq(GHashTable *table, const char *prefix, const char *text)
48
{
49
        const char *bp = text, *p = text;
50
        char *word;
51
        int count;
52
53
        if (!text)
54
                return;
55
56
        while (*p != '\0') {
57
                while (*p == ' ')
58
                        p++;
59
                bp = p;
60
                while (*p != '\0' && *p != ' ')
61
                        p++;
62
                if (p > bp) {
63
                        word = g_strndup(bp, p - bp);
64
                        if (prefix) {
65
                                char *bword = word;
66
                                word = g_strconcat(prefix, "*", bword, NULL);
67
                                g_free(bword);
68
                        }
69
                        count = GPOINTER_TO_INT(g_hash_table_lookup(table, word));
70
                        count++;
71
                        g_hash_table_insert(table, word, GINT_TO_POINTER(count));
72
                }
73
        }
74
}
75
76
static GHashTable *xfilter_bayes_word_freq(const XMessageData *data)
77
{
78
        GHashTable *table;
79
        const char *content;
80
81
        table = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
82
83
        content = xfilter_message_data_get_attribute(data, XM_FROM);
84
        xfilter_bayes_content_word_freq(table, "From", content);
85
        content = xfilter_message_data_get_attribute(data, XM_TO);
86
        xfilter_bayes_content_word_freq(table, "To", content);
87
        content = xfilter_message_data_get_attribute(data, XM_CC);
88
        xfilter_bayes_content_word_freq(table, "Cc", content);
89
        content = xfilter_message_data_get_attribute(data, XM_SUBJECT);
90
        xfilter_bayes_content_word_freq(table, "Subject", content);
91
92
        content = xfilter_message_data_get_content(data);
93
        xfilter_bayes_content_word_freq(table, NULL, content);
94
95
        return table;
96
}
97
98
static char *get_degenerated_word(const char *word)
99
{
100
        const char *p;
101
102
        if (!word)
103
                return NULL;
104
105
        if ((p = strchr(word, '*'))) {
106
                return g_strdup(p + 1);
107
        }
108
        if ((p = strchr(word, '!'))) {
109
                if (*(p + 1) == '!')
110
                        return g_strndup(word, p + 1 - word);
111
                else
112
                        return g_strndup(word, p - word);
113
        }
114
115
        for (p = word; *p != '\0'; p++) {
116
                if (g_ascii_isupper(*p))
117
                        return g_ascii_strdown(word, -1);
118
        }
119
120
        return NULL;
121
}
122
123
static double xfilter_get_prob(const char *key, XFilterBayesLearnStatus *status, gboolean do_degeneration)
124
{
125
        int n_junk;
126
        int n_clean;
127
        int n_junk_learn;
128
        int n_clean_learn;
129
        double prob = -1.0;
130
        double upper = 0.999;
131
        double lower = 0.001;
132
133
        //n_junk_learn = status->junk_learned_num;
134
        n_junk_learn = status->junk_words;
135
        if (n_junk_learn < 1)
136
                return -1.0;
137
        //n_clean_learn = status->nojunk_learned_num;
138
        n_clean_learn = status->nojunk_words;
139
        if (n_clean_learn < 1)
140
                return -1.0;
141
142
        n_junk = xfilter_kvs_fetch_int(junk_kvs, key);
143
        n_clean = xfilter_kvs_fetch_int(clean_kvs, key) * 2;
144
145
        if (n_junk + n_clean == 0) {
146
                if (do_degeneration) {
147
                        char *deg_key;
148
149
                        deg_key = get_degenerated_word(key);
150
                        if (deg_key) {
151
                                xfilter_debug_print("[degen] %s -> %s\n", key, deg_key);
152
                                prob = xfilter_get_prob(deg_key, status, TRUE);
153
                                g_free(deg_key);
154
                        }
155
                }
156
157
                return prob;
158
        }
159
160
        if (n_junk + n_clean < 5) {
161
                switch (n_junk + n_clean) {
162
                case 1:
163
                        upper = 0.6; lower = 0.4; break;
164
                case 2:
165
                        upper = 0.7; lower = 0.3; break;
166
                case 3:
167
                        upper = 0.8; lower = 0.2; break;
168
                case 4:
169
                        upper = 0.9; lower = 0.1; break;
170
                }
171
        } 
172
173
        prob = ((double)n_junk / n_junk_learn) /
174
                (((double)n_clean / n_clean_learn) + ((double)n_junk / n_junk_learn));
175
        if (prob < lower) {
176
                if (n_junk == 0) {
177
                        if (n_clean > 10)
178
                                prob = lower;
179
                        else
180
                                prob = lower + 0.001;
181
                } else
182
                        prob = lower + 0.002;
183
        } else if (prob > upper) {
184
                if (n_clean == 0) {
185
                        if (n_junk > 10)
186
                                prob = upper;
187
                        else
188
                                prob = upper - 0.001;
189
                } else
190
                        prob = upper - 0.002;
191
        }
192
193
        xfilter_debug_print("%s: %4f (j: %d c: %d)\n", (gchar *)key, prob, n_junk, n_clean);
194
195
        return prob;
196
}
197
198
static void test_walk_func(gpointer key, gpointer val, gpointer data)
199
{
200
        XFilterBayesProbData *pdata;
201
        XFilterKeyCount kc;
202
203
        pdata = (XFilterBayesProbData *)data;
204
        kc.key = (gchar *)key;
205
        kc.count = GPOINTER_TO_INT(val);
206
        kc.prob = xfilter_get_prob(kc.key, &pdata->status, TRUE);
207
        //if (kc.prob > 0)
208
                //g_print("%s: (this: %d) %4f\n", kc.key, kc.count, kc.prob);
209
        if (kc.prob < 0)
210
                kc.prob = 0.4;
211
        g_array_append_val(pdata->array, kc);
212
}
213
214
static gint key_prob_compare_func(gconstpointer a, gconstpointer b)
215
{
216
        const XFilterKeyCount *kc1 = a;
217
        const XFilterKeyCount *kc2 = b;
218
        double da, db;
219
220
        da = ABS(0.5 - kc1->prob);
221
        db = ABS(0.5 - kc2->prob);
222
        return db * 10000 - da * 10000;
223
}
224
225
static XFilterStatus xfilter_bayes_func(XFilter *filter, const XMessageData *data, XFilterResult *result)
226
{
227
        const char *type;
228
        GHashTable *table;
229
        XFilterBayesProbData pdata;
230
        int i;
231
        double prod = 1.0, prod_rev = 1.0;
232
        double cmb_prob;
233
        XFilterStatus status;
234
235
        g_return_val_if_fail(result != NULL, XF_ERROR);
236
237
        type = xfilter_message_data_get_mime_type(data);
238
        if (!type || g_strncasecmp(type, "text/", 5) != 0) {
239
                xfilter_result_set_status(result, XF_UNSUPPORTED_TYPE);
240
                return XF_UNSUPPORTED_TYPE;
241
        }
242
243
        if (!junk_kvs) {
244
                g_warning("Cannot open junk database");
245
                xfilter_result_set_status(result, XF_ERROR);
246
                return XF_ERROR;
247
        }
248
249
        xfilter_debug_print("bayes-guessing message\n");
250
251
        xfilter_bayes_get_learn_status(&pdata.status);
252
        if (pdata.status.junk_learned_num < 1) {
253
                xfilter_debug_print("junk message not learned yet\n");
254
                cmb_prob = 0.5;
255
        } else if (pdata.status.nojunk_learned_num < 1) {
256
                xfilter_debug_print("clean message not learned yet\n");
257
                cmb_prob = 0.5;
258
        } else {
259
                xfilter_debug_print("\ncalculating probability for each tokens:\n");
260
                table = xfilter_bayes_word_freq(data);
261
                pdata.array = g_array_sized_new(FALSE, FALSE, sizeof(XFilterKeyCount), 128);
262
263
                xfilter_kvs_begin(junk_kvs);
264
                xfilter_kvs_begin(clean_kvs);
265
                g_hash_table_foreach(table, test_walk_func, &pdata);
266
                xfilter_kvs_end(junk_kvs);
267
                xfilter_kvs_end(clean_kvs);
268
                g_array_sort(pdata.array, key_prob_compare_func);
269
270
                xfilter_debug_print("\nmost interesting tokens:\n");
271
                for (i = 0; i < 15 && i < pdata.array->len; i++) {
272
                        XFilterKeyCount kc = g_array_index(pdata.array, XFilterKeyCount, i);
273
                        prod *= kc.prob;
274
                        prod_rev *= 1 - kc.prob;
275
                        xfilter_debug_print("%s: %d %4f\n", kc.key, kc.count, kc.prob);
276
                }
277
278
                cmb_prob = prod / (prod + prod_rev);
279
                xfilter_debug_print("\ncombined probability: %4f\n", cmb_prob);
280
281
                g_array_free(pdata.array, TRUE);
282
                g_hash_table_destroy(table);
283
        }
284
285
286
        xfilter_result_set_probability(result, cmb_prob);
287
        if (cmb_prob > 0.90)
288
                status = XF_JUNK;
289
        else if (cmb_prob < 0.10)
290
                status = XF_NOJUNK;
291
        else
292
                status = XF_UNCERTAIN;
293
        xfilter_result_set_status(result, status);
294
        
295
        return status;
296
}
297
298
XFilter *xfilter_bayes_new(void)
299
{
300
        XFilter *filter;
301
302
        filter = xfilter_new(XF_TEST, "bayes-test");
303
        xfilter_set_test_filter_func(X_TEST_FILTER(filter), xfilter_bayes_func);
304
305
        return filter;
306
}
307
308
309
/* Learning */
310
311
typedef struct _XFilterLearnWalkData
312
{
313
        XFilterKVS *kvs;
314
        int sum;
315
} XFilterLearnWalkData;
316
317
static void learn_walk_func(gpointer key, gpointer val, gpointer data)
318
{
319
        XFilterLearnWalkData *lwd = (XFilterLearnWalkData *)data;
320
321
        //g_print("%s: %d (%s)\n", (gchar *)key, GPOINTER_TO_INT(val), kvs == junk_kvs ? "j" : "c");
322
        if (xfilter_kvs_increment(lwd->kvs, (gchar *)key, GPOINTER_TO_INT(val)) < 0)
323
                g_warning("database update error");
324
        lwd->sum += GPOINTER_TO_INT(val);
325
}
326
327
static void unlearn_walk_func(gpointer key, gpointer val, gpointer data)
328
{
329
        XFilterKVS *kvs = (XFilterKVS *)data;
330
331
        //g_print("%s: %d (%s)\n", (gchar *)key, GPOINTER_TO_INT(val), kvs == junk_kvs ? "j" : "c");
332
        if (xfilter_kvs_decrement(kvs, (gchar *)key, GPOINTER_TO_INT(val)) < 0)
333
                g_warning("database update error");
334
}
335
336
static void xfilter_update_status(gboolean is_junk, gboolean is_register, int sum_add)
337
{
338
#ifdef USE_STATUS_KVS
339
        xfilter_kvs_begin(prob_kvs);
340
        if (is_register) {
341
                if (is_junk) {
342
                        xfilter_kvs_increment(prob_kvs, "@junk_words_sum", sum_add);
343
                        xfilter_kvs_increment(prob_kvs, "@junk_learn_count", 1);
344
                } else {
345
                        xfilter_kvs_increment(prob_kvs, "@clean_words_sum", sum_add);
346
                        xfilter_kvs_increment(prob_kvs, "@clean_learn_count", 1);
347
                }
348
        } else {
349
                if (is_junk) {
350
                        xfilter_kvs_set_int(prob_kvs, "@junk_words_sum", sum_add);
351
                        xfilter_kvs_decrement(prob_kvs, "@junk_learn_count", 1);
352
                } else {
353
                        xfilter_kvs_set_int(prob_kvs, "@clean_words_sum", sum_add);
354
                        xfilter_kvs_decrement(prob_kvs, "@clean_learn_count", 1);
355
                }
356
        }
357
        xfilter_kvs_end(prob_kvs);
358
#else /* !USE_STATUS_KVS */
359
        if (is_register) {
360
                if (is_junk) {
361
                        learn_status.junk_words += sum_add;
362
                        learn_status.junk_learned_num++;
363
                } else {
364
                        learn_status.nojunk_words += sum_add;
365
                        learn_status.nojunk_learned_num++;
366
                }
367
        } else {
368
                if (is_junk) {
369
                        learn_status.junk_words = sum_add;
370
                        if (learn_status.junk_learned_num > 0)
371
                                learn_status.junk_learned_num--;
372
                } else {
373
                        learn_status.nojunk_words = sum_add;
374
                        if (learn_status.nojunk_learned_num > 0)
375
                                learn_status.nojunk_learned_num--;
376
                }
377
        }
378
379
        ftruncate(fileno(status_fp), 0);
380
        rewind(status_fp);
381
        fprintf(status_fp, "@junk_words_sum=%d\n", learn_status.junk_words);
382
        fprintf(status_fp, "@junk_learn_count=%d\n", learn_status.junk_learned_num);
383
        fprintf(status_fp, "@clean_words_sum=%d\n", learn_status.nojunk_words);
384
        fprintf(status_fp, "@clean_learn_count=%d\n", learn_status.nojunk_learned_num);
385
386
        xfilter_debug_print("xfilter_update_status: writing status to file\n");
387
388
        if (fflush(status_fp) < 0) {
389
                perror("fflush");
390
                return;
391
        }
392
#if HAVE_FSYNC
393
        if (fsync(fileno(status_fp)) < 0) {
394
                perror("fsync");
395
        }
396
#elif defined(G_OS_WIN32)
397
        if (_commit(_fileno(status_fp)) < 0) {
398
                perror("_commit");
399
        }
400
#endif
401
402
        xfilter_debug_print("xfilter_update_status: done\n");
403
#endif /* !USE_STATUS_KVS */
404
}
405
406
static XFilterStatus xfilter_bayes_learn(XFilter *filter, const XMessageData *data, XFilterResult *result, gboolean is_junk, gboolean is_register)
407
{
408
        const char *type;
409
        GHashTable *table;
410
        XFilterKVS *kvs;
411
        int sum_add;
412
413
        g_return_val_if_fail(result != NULL, XF_ERROR);
414
415
        type = xfilter_message_data_get_mime_type(data);
416
        if (!type || g_strncasecmp(type, "text/", 5) != 0) {
417
                xfilter_result_set_status(result, XF_UNSUPPORTED_TYPE);
418
                return XF_UNSUPPORTED_TYPE;
419
        }
420
421
        if (is_junk)
422
                kvs = junk_kvs;
423
        else
424
                kvs = clean_kvs;
425
        if (!kvs) {
426
                g_warning("xfilter_bayes_learn: Cannot open database");
427
                xfilter_result_set_status(result, XF_ERROR);
428
                return XF_ERROR;
429
        }
430
431
        xfilter_debug_print("%slearning %s message\n", is_register ? "" : "un", is_junk ? "junk" : "clean");
432
433
        table = xfilter_bayes_word_freq(data);
434
        xfilter_kvs_begin(kvs);
435
        if (is_register) {
436
                XFilterLearnWalkData lwd = {kvs, 0};
437
438
                g_hash_table_foreach(table, learn_walk_func, &lwd);
439
                sum_add = lwd.sum;
440
        } else {
441
                g_hash_table_foreach(table, unlearn_walk_func, kvs);
442
                sum_add = xfilter_kvs_count_sum(kvs);
443
        }
444
        xfilter_kvs_end(kvs);
445
        g_hash_table_destroy(table);
446
447
        xfilter_update_status(is_junk, is_register, sum_add);
448
449
        xfilter_result_set_status(result, XF_NONE);
450
451
        return XF_NONE;
452
}
453
454
static XFilterStatus xfilter_bayes_learn_junk_func(XFilter *filter, const XMessageData *data, XFilterResult *result)
455
{
456
        return xfilter_bayes_learn(filter, data, result, TRUE, TRUE);
457
}
458
459
static XFilterStatus xfilter_bayes_learn_nojunk_func(XFilter *filter, const XMessageData *data, XFilterResult *result)
460
{
461
        return xfilter_bayes_learn(filter, data, result, FALSE, TRUE);
462
}
463
464
static XFilterStatus xfilter_bayes_unlearn_junk_func(XFilter *filter, const XMessageData *data, XFilterResult *result)
465
{
466
        return xfilter_bayes_learn(filter, data, result, TRUE, FALSE);
467
}
468
469
static XFilterStatus xfilter_bayes_unlearn_nojunk_func(XFilter *filter, const XMessageData *data, XFilterResult *result)
470
{
471
        return xfilter_bayes_learn(filter, data, result, FALSE, FALSE);
472
}
473
474
XFilter *xfilter_bayes_learn_junk_new(void)
475
{
476
        XFilter *filter;
477
478
        filter = xfilter_new(XF_CONTENT, "bayes-learn-junk");
479
        xfilter_set_content_filter_func(X_CONTENT_FILTER(filter), xfilter_bayes_learn_junk_func);
480
481
        return filter;
482
}
483
484
XFilter *xfilter_bayes_learn_nojunk_new(void)
485
{
486
        XFilter *filter;
487
488
        filter = xfilter_new(XF_CONTENT, "bayes-learn-clean");
489
        xfilter_set_content_filter_func(X_CONTENT_FILTER(filter), xfilter_bayes_learn_nojunk_func);
490
491
        return filter;
492
}
493
494
XFilter *xfilter_bayes_unlearn_junk_new(void)
495
{
496
        XFilter *filter;
497
498
        filter = xfilter_new(XF_CONTENT, "bayes-unlearn-junk");
499
        xfilter_set_content_filter_func(X_CONTENT_FILTER(filter), xfilter_bayes_unlearn_junk_func);
500
501
        return filter;
502
}
503
504
XFilter *xfilter_bayes_unlearn_nojunk_new(void)
505
{
506
        XFilter *filter;
507
508
        filter = xfilter_new(XF_CONTENT, "bayes-unlearn-clean");
509
        xfilter_set_content_filter_func(X_CONTENT_FILTER(filter), xfilter_bayes_unlearn_nojunk_func);
510
511
        return filter;
512
}
513
514
515
int xfilter_bayes_get_learn_status(XFilterBayesLearnStatus *status)
516
{
517
        g_return_val_if_fail(status != NULL, -1);
518
519
#ifdef USE_STATUS_KVS
520
        status->junk_words = xfilter_kvs_fetch_int(prob_kvs, "@junk_words_sum");
521
        status->nojunk_words = xfilter_kvs_fetch_int(prob_kvs, "@clean_words_sum");
522
        status->junk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@junk_learn_count");
523
        status->nojunk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@clean_learn_count");
524
#else
525
        *status = learn_status;
526
#endif
527
528
        return 0;
529
}
530
531
int xfilter_bayes_reset_learn_count(void)
532
{
533
        return 0;
534
}
535
536
int xfilter_bayes_reset_all(void)
537
{
538
        return 0;
539
}
540
541
static int show_walk_func(XFilterKVS *kvs, const char *key, void *value, int size, void *data)
542
{
543
        int ival;
544
545
        if (size == 4) {
546
                ival = *(int *)value;
547
                printf("%s: %d\n", key, ival);
548
        }
549
550
        return 0;
551
}
552
553
int xfilter_bayes_db_show_contents(int verbose)
554
{
555
        XFilterBayesLearnStatus status = {0};
556
557
        if (!junk_kvs || !clean_kvs) {
558
                g_warning("Database not ready");
559
                return -1;
560
        }
561
562
        if (verbose) {
563
                printf("Junk tokens:\n");
564
                xfilter_kvs_foreach(junk_kvs, show_walk_func, NULL);
565
                printf("\nClean tokens:\n");
566
                xfilter_kvs_foreach(clean_kvs, show_walk_func, NULL);
567
        }
568
569
        printf("\nStatus:\n");
570
        xfilter_bayes_get_learn_status(&status);
571
        printf("junk_words: %d\n", status.junk_words);
572
        printf("nojunk_words: %d\n", status.nojunk_words);
573
        printf("junk_learned_num: %d\n", status.junk_learned_num);
574
        printf("nojunk_learned_num: %d\n", status.nojunk_learned_num);
575
576
        return 0;
577
}
578
579
#ifndef USE_STATUS_KVS
580
int xfilter_read_status_file(FILE *fp)
581
{
582
        char buf[1024];
583
        int n;
584
585
        while (fgets(buf, sizeof(buf), fp) != NULL) {
586
                if (sscanf(buf, "@junk_words_sum=%d", &n) == 1)
587
                        learn_status.junk_words = n;
588
                else if (sscanf(buf, "@junk_learn_count=%d", &n) == 1)
589
                        learn_status.junk_learned_num = n;
590
                else if (sscanf(buf, "@clean_words_sum=%d", &n) == 1)
591
                        learn_status.nojunk_words = n;
592
                else if (sscanf(buf, "@clean_learn_count=%d", &n) == 1)
593
                        learn_status.nojunk_learned_num = n;
594
        }
595
596
        return 0;
597
}
598
#endif
599
600
int xfilter_bayes_db_init(const char *path)
601
{
602
        char *file;
603
604
        xfilter_debug_print("xfilter_bayes_db_init: init database\n");
605
        xfilter_debug_print("xfilter_bayes_db_init: path: %s\n",
606
                            path ? path : "(current dir)");
607
608
        if (path) {
609
                xfilter_debug_print("xfilter_bayes_db_init: making directory: %s\n", path);
610
                if (xfilter_utils_mkdir(path) < 0) {
611
                        g_warning("Making directory failed: %s", path);
612
                        return -1;
613
                }
614
        }
615
616
        if (!junk_kvs) {
617
                if (path)
618
                        file = g_strconcat(path, G_DIR_SEPARATOR_S, "junk.db",
619
                                           NULL);
620
                else
621
                        file = g_strdup("junk.db");
622
                xfilter_debug_print("xfilter_bayes_db_init: opening db: %s\n", file);
623
                junk_kvs = xfilter_kvs_open(file);
624
                if (!junk_kvs) {
625
                        g_warning("Cannot open database: %s", file);
626
                        g_free(file);
627
                        return -1;
628
                }
629
                g_free(file);
630
        }
631
        if (!clean_kvs) {
632
                if (path)
633
                        file = g_strconcat(path, G_DIR_SEPARATOR_S, "clean.db",
634
                                           NULL);
635
                else
636
                        file = g_strdup("clean.db");
637
                xfilter_debug_print("xfilter_bayes_db_init: opening db: %s\n", file);
638
                clean_kvs = xfilter_kvs_open(file);
639
                if (!clean_kvs) {
640
                        g_warning("Cannot open database: %s", file);
641
                        xfilter_kvs_close(junk_kvs);
642
                        g_free(file);
643
                        return -1;
644
                }
645
                g_free(file);
646
        }
647
648
#ifdef USE_STATUS_KVS
649
        if (!prob_kvs) {
650
                if (path)
651
                        file = g_strconcat(path, G_DIR_SEPARATOR_S, "prob.db",
652
                                           NULL);
653
                else
654
                        file = g_strdup("prob.db");
655
                xfilter_debug_print("xfilter_bayes_db_init: opening db: %s\n", file);
656
                prob_kvs = xfilter_kvs_open(file);
657
                if (!prob_kvs) {
658
                        g_warning("Cannot open database: %s", file);
659
                        xfilter_kvs_close(clean_kvs);
660
                        xfilter_kvs_close(junk_kvs);
661
                        g_free(file);
662
                        return -1;
663
                }
664
                g_free(file);
665
        }
666
#else
667
        if (!status_fp) {
668
                if (path)
669
                        file = g_strconcat(path, G_DIR_SEPARATOR_S, "status.dat",
670
                                           NULL);
671
                else
672
                        file = g_strdup("status.dat");
673
                xfilter_debug_print("xfilter_bayes_db_init: opening data file: %s\n", file);
674
                status_fp = g_fopen(file, "rb");
675
                if (!status_fp) {
676
                        if (ENOENT == errno)
677
                                status_fp = g_fopen(file, "w+b");
678
679
                        if (!status_fp) {
680
                                g_warning("Cannot open data file: %s", file);
681
                                xfilter_kvs_close(clean_kvs);
682
                                xfilter_kvs_close(junk_kvs);
683
                                g_free(file);
684
                                return -1;
685
                        }
686
                } else {
687
                        xfilter_read_status_file(status_fp);
688
                        status_fp = freopen(file, "r+b", status_fp);
689
                        if (!status_fp) {
690
                                g_warning("Cannot reopen data file: %s", file);
691
                                xfilter_kvs_close(clean_kvs);
692
                                xfilter_kvs_close(junk_kvs);
693
                                g_free(file);
694
                                return -1;
695
                        }
696
                }
697
                g_free(file);
698
        }
699
#endif
700
701
        return 0;
702
}
703
704
int xfilter_bayes_db_done(void)
705
{
706
        int ret = 0;
707
708
        xfilter_debug_print("xfilter_bayes_db_init: close database\n");
709
710
#ifdef USE_STATUS_KVS
711
        if (prob_kvs)
712
                ret |= xfilter_kvs_close(prob_kvs);
713
#else
714
        if (status_fp)
715
                ret |= fclose(status_fp);
716
#endif
717
718
        if (clean_kvs)
719
                ret |= xfilter_kvs_close(clean_kvs);
720
        if (junk_kvs)
721
                ret |= xfilter_kvs_close(junk_kvs);
722
723
        return ret;
724
}