Revision d75428f4 lib/filters/bayes-filter.c

b/lib/filters/bayes-filter.c
7 7
#include <glib.h>
8 8
#include <stdio.h>
9 9
#include <string.h>
10
#include <unistd.h>
11
#include <errno.h>
10 12

  
11 13
#include "filter.h"
12 14
#include "filter-kvs.h"
13 15
#include "filter-utils.h"
14 16
#include "bayes-filter.h"
15 17

  
18
#include "libsylph/utils.h"
19

  
16 20
#define N_TOKENS 15
21
#undef USE_STATUS_KVS
17 22

  
18 23
static XFilterKVS *junk_kvs;
19 24
static XFilterKVS *clean_kvs;
25
#ifdef USE_STATUS_KVS
20 26
static XFilterKVS *prob_kvs;
21

  
27
#else
28
static XFilterBayesLearnStatus learn_status;
29
static FILE *status_fp;
30
#endif
22 31

  
23 32
/* Test */
24 33

  
......
324 333
		g_warning("database update error");
325 334
}
326 335

  
336
static void xfilter_update_status(gboolean is_junk, gboolean is_register, int sum_add)
337
{
338
#ifdef USE_STATUS_KVS
339
	xfilter_kvs_begin(prob_kvs);
340
	if (is_register) {
341
		if (is_junk) {
342
			xfilter_kvs_increment(prob_kvs, "@junk_words_sum", sum_add);
343
			xfilter_kvs_increment(prob_kvs, "@junk_learn_count", 1);
344
		} else {
345
			xfilter_kvs_increment(prob_kvs, "@clean_words_sum", sum_add);
346
			xfilter_kvs_increment(prob_kvs, "@clean_learn_count", 1);
347
		}
348
	} else {
349
		if (is_junk) {
350
			xfilter_kvs_set_int(prob_kvs, "@junk_words_sum", sum_add);
351
			xfilter_kvs_decrement(prob_kvs, "@junk_learn_count", 1);
352
		} else {
353
			xfilter_kvs_set_int(prob_kvs, "@clean_words_sum", sum_add);
354
			xfilter_kvs_decrement(prob_kvs, "@clean_learn_count", 1);
355
		}
356
	}
357
	xfilter_kvs_end(prob_kvs);
358
#else /* !USE_STATUS_KVS */
359
	if (is_register) {
360
		if (is_junk) {
361
			learn_status.junk_words += sum_add;
362
			learn_status.junk_learned_num++;
363
		} else {
364
			learn_status.nojunk_words += sum_add;
365
			learn_status.nojunk_learned_num++;
366
		}
367
	} else {
368
		if (is_junk) {
369
			learn_status.junk_words = sum_add;
370
			if (learn_status.junk_learned_num > 0)
371
				learn_status.junk_learned_num--;
372
		} else {
373
			learn_status.nojunk_words = sum_add;
374
			if (learn_status.nojunk_learned_num > 0)
375
				learn_status.nojunk_learned_num--;
376
		}
377
	}
378

  
379
	ftruncate(fileno(status_fp), 0);
380
	rewind(status_fp);
381
	fprintf(status_fp, "@junk_words_sum=%d\n", learn_status.junk_words);
382
	fprintf(status_fp, "@junk_learn_count=%d\n", learn_status.junk_learned_num);
383
	fprintf(status_fp, "@clean_words_sum=%d\n", learn_status.nojunk_words);
384
	fprintf(status_fp, "@clean_learn_count=%d\n", learn_status.nojunk_learned_num);
385

  
386
	xfilter_debug_print("xfilter_update_status: writing status to file\n");
387

  
388
	if (fflush(status_fp) < 0) {
389
		perror("fflush");
390
		return;
391
	}
392
#if HAVE_FSYNC
393
	if (fsync(fileno(status_fp)) < 0) {
394
		perror("fsync");
395
	}
396
#elif defined(G_OS_WIN32)
397
	if (_commit(_fileno(status_fp)) < 0) {
398
		perror("_commit");
399
	}
400
#endif
401

  
402
	xfilter_debug_print("xfilter_update_status: done\n");
403
#endif /* !USE_STATUS_KVS */
404
}
405

  
327 406
static XFilterStatus xfilter_bayes_learn(XFilter *filter, const XMessageData *data, XFilterResult *result, gboolean is_junk, gboolean is_register)
328 407
{
329 408
	const char *type;
......
365 444
	xfilter_kvs_end(kvs);
366 445
	g_hash_table_destroy(table);
367 446

  
368
	xfilter_kvs_begin(prob_kvs);
369
	if (is_register) {
370
		if (is_junk) {
371
			xfilter_kvs_increment(prob_kvs, "@junk_words_sum", sum_add);
372
			xfilter_kvs_increment(prob_kvs, "@junk_learn_count", 1);
373
		} else {
374
			xfilter_kvs_increment(prob_kvs, "@clean_words_sum", sum_add);
375
			xfilter_kvs_increment(prob_kvs, "@clean_learn_count", 1);
376
		}
377
	} else {
378
		if (is_junk) {
379
			xfilter_kvs_set_int(prob_kvs, "@junk_words_sum", sum_add);
380
			xfilter_kvs_decrement(prob_kvs, "@junk_learn_count", 1);
381
		} else {
382
			xfilter_kvs_set_int(prob_kvs, "@clean_words_sum", sum_add);
383
			xfilter_kvs_decrement(prob_kvs, "@clean_learn_count", 1);
384
		}
385
	}
386
	xfilter_kvs_end(prob_kvs);
447
	xfilter_update_status(is_junk, is_register, sum_add);
387 448

  
388 449
	xfilter_result_set_status(result, XF_NONE);
389 450

  
......
455 516
{
456 517
	g_return_val_if_fail(status != NULL, -1);
457 518

  
519
#ifdef USE_STATUS_KVS
458 520
	status->junk_words = xfilter_kvs_fetch_int(prob_kvs, "@junk_words_sum");
459 521
	status->nojunk_words = xfilter_kvs_fetch_int(prob_kvs, "@clean_words_sum");
460 522
	status->junk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@junk_learn_count");
461 523
	status->nojunk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@clean_learn_count");
524
#else
525
	*status = learn_status;
526
#endif
462 527

  
463 528
	return 0;
464 529
}
......
489 554
{
490 555
	XFilterBayesLearnStatus status = {0};
491 556

  
492
	if (!junk_kvs || !clean_kvs || !prob_kvs) {
557
	if (!junk_kvs || !clean_kvs) {
493 558
		g_warning("Database not ready");
494 559
		return -1;
495 560
	}
......
511 576
	return 0;
512 577
}
513 578

  
579
#ifndef USE_STATUS_KVS
580
int xfilter_read_status_file(FILE *fp)
581
{
582
	char buf[1024];
583
	int n;
584

  
585
	while (fgets(buf, sizeof(buf), fp) != NULL) {
586
		if (sscanf(buf, "@junk_words_sum=%d", &n) == 1)
587
			learn_status.junk_words = n;
588
		else if (sscanf(buf, "@junk_learn_count=%d", &n) == 1)
589
			learn_status.junk_learned_num = n;
590
		else if (sscanf(buf, "@clean_words_sum=%d", &n) == 1)
591
			learn_status.nojunk_words = n;
592
		else if (sscanf(buf, "@clean_learn_count=%d", &n) == 1)
593
			learn_status.nojunk_learned_num = n;
594
	}
595

  
596
	return 0;
597
}
598
#endif
599

  
514 600
int xfilter_bayes_db_init(const char *path)
515 601
{
516 602
	char *file;
......
558 644
		}
559 645
		g_free(file);
560 646
	}
647

  
648
#ifdef USE_STATUS_KVS
561 649
	if (!prob_kvs) {
562 650
		if (path)
563 651
			file = g_strconcat(path, G_DIR_SEPARATOR_S, "prob.db",
......
575 663
		}
576 664
		g_free(file);
577 665
	}
666
#else
667
	if (!status_fp) {
668
		if (path)
669
			file = g_strconcat(path, G_DIR_SEPARATOR_S, "status.dat",
670
					   NULL);
671
		else
672
			file = g_strdup("status.dat");
673
		xfilter_debug_print("xfilter_bayes_db_init: opening data file: %s\n", file);
674
		status_fp = g_fopen(file, "rb");
675
		if (!status_fp) {
676
			if (ENOENT == errno)
677
				status_fp = g_fopen(file, "w+b");
678

  
679
			if (!status_fp) {
680
				g_warning("Cannot open data file: %s", file);
681
				xfilter_kvs_close(clean_kvs);
682
				xfilter_kvs_close(junk_kvs);
683
				g_free(file);
684
				return -1;
685
			}
686
		} else {
687
			xfilter_read_status_file(status_fp);
688
			status_fp = freopen(file, "r+b", status_fp);
689
			if (!status_fp) {
690
				g_warning("Cannot reopen data file: %s", file);
691
				xfilter_kvs_close(clean_kvs);
692
				xfilter_kvs_close(junk_kvs);
693
				g_free(file);
694
				return -1;
695
			}
696
		}
697
		g_free(file);
698
	}
699
#endif
578 700

  
579 701
	return 0;
580 702
}
......
585 707

  
586 708
	xfilter_debug_print("xfilter_bayes_db_init: close database\n");
587 709

  
710
#ifdef USE_STATUS_KVS
588 711
	if (prob_kvs)
589 712
		ret |= xfilter_kvs_close(prob_kvs);
713
#else
714
	if (status_fp)
715
		ret |= fclose(status_fp);
716
#endif
717

  
590 718
	if (clean_kvs)
591 719
		ret |= xfilter_kvs_close(clean_kvs);
592 720
	if (junk_kvs)

Also available in: Unified diff