Revision d75428f4 lib/filters/bayes-filter.c
| b/lib/filters/bayes-filter.c | ||
|---|---|---|
| 7 | 7 |
#include <glib.h> |
| 8 | 8 |
#include <stdio.h> |
| 9 | 9 |
#include <string.h> |
| 10 |
#include <unistd.h> |
|
| 11 |
#include <errno.h> |
|
| 10 | 12 |
|
| 11 | 13 |
#include "filter.h" |
| 12 | 14 |
#include "filter-kvs.h" |
| 13 | 15 |
#include "filter-utils.h" |
| 14 | 16 |
#include "bayes-filter.h" |
| 15 | 17 |
|
| 18 |
#include "libsylph/utils.h" |
|
| 19 |
|
|
| 16 | 20 |
#define N_TOKENS 15 |
| 21 |
#undef USE_STATUS_KVS |
|
| 17 | 22 |
|
| 18 | 23 |
static XFilterKVS *junk_kvs; |
| 19 | 24 |
static XFilterKVS *clean_kvs; |
| 25 |
#ifdef USE_STATUS_KVS |
|
| 20 | 26 |
static XFilterKVS *prob_kvs; |
| 21 |
|
|
| 27 |
#else |
|
| 28 |
static XFilterBayesLearnStatus learn_status; |
|
| 29 |
static FILE *status_fp; |
|
| 30 |
#endif |
|
| 22 | 31 |
|
| 23 | 32 |
/* Test */ |
| 24 | 33 |
|
| ... | ... | |
| 324 | 333 |
g_warning("database update error");
|
| 325 | 334 |
} |
| 326 | 335 |
|
| 336 |
static void xfilter_update_status(gboolean is_junk, gboolean is_register, int sum_add) |
|
| 337 |
{
|
|
| 338 |
#ifdef USE_STATUS_KVS |
|
| 339 |
xfilter_kvs_begin(prob_kvs); |
|
| 340 |
if (is_register) {
|
|
| 341 |
if (is_junk) {
|
|
| 342 |
xfilter_kvs_increment(prob_kvs, "@junk_words_sum", sum_add); |
|
| 343 |
xfilter_kvs_increment(prob_kvs, "@junk_learn_count", 1); |
|
| 344 |
} else {
|
|
| 345 |
xfilter_kvs_increment(prob_kvs, "@clean_words_sum", sum_add); |
|
| 346 |
xfilter_kvs_increment(prob_kvs, "@clean_learn_count", 1); |
|
| 347 |
} |
|
| 348 |
} else {
|
|
| 349 |
if (is_junk) {
|
|
| 350 |
xfilter_kvs_set_int(prob_kvs, "@junk_words_sum", sum_add); |
|
| 351 |
xfilter_kvs_decrement(prob_kvs, "@junk_learn_count", 1); |
|
| 352 |
} else {
|
|
| 353 |
xfilter_kvs_set_int(prob_kvs, "@clean_words_sum", sum_add); |
|
| 354 |
xfilter_kvs_decrement(prob_kvs, "@clean_learn_count", 1); |
|
| 355 |
} |
|
| 356 |
} |
|
| 357 |
xfilter_kvs_end(prob_kvs); |
|
| 358 |
#else /* !USE_STATUS_KVS */ |
|
| 359 |
if (is_register) {
|
|
| 360 |
if (is_junk) {
|
|
| 361 |
learn_status.junk_words += sum_add; |
|
| 362 |
learn_status.junk_learned_num++; |
|
| 363 |
} else {
|
|
| 364 |
learn_status.nojunk_words += sum_add; |
|
| 365 |
learn_status.nojunk_learned_num++; |
|
| 366 |
} |
|
| 367 |
} else {
|
|
| 368 |
if (is_junk) {
|
|
| 369 |
learn_status.junk_words = sum_add; |
|
| 370 |
if (learn_status.junk_learned_num > 0) |
|
| 371 |
learn_status.junk_learned_num--; |
|
| 372 |
} else {
|
|
| 373 |
learn_status.nojunk_words = sum_add; |
|
| 374 |
if (learn_status.nojunk_learned_num > 0) |
|
| 375 |
learn_status.nojunk_learned_num--; |
|
| 376 |
} |
|
| 377 |
} |
|
| 378 |
|
|
| 379 |
ftruncate(fileno(status_fp), 0); |
|
| 380 |
rewind(status_fp); |
|
| 381 |
fprintf(status_fp, "@junk_words_sum=%d\n", learn_status.junk_words); |
|
| 382 |
fprintf(status_fp, "@junk_learn_count=%d\n", learn_status.junk_learned_num); |
|
| 383 |
fprintf(status_fp, "@clean_words_sum=%d\n", learn_status.nojunk_words); |
|
| 384 |
fprintf(status_fp, "@clean_learn_count=%d\n", learn_status.nojunk_learned_num); |
|
| 385 |
|
|
| 386 |
xfilter_debug_print("xfilter_update_status: writing status to file\n");
|
|
| 387 |
|
|
| 388 |
if (fflush(status_fp) < 0) {
|
|
| 389 |
perror("fflush");
|
|
| 390 |
return; |
|
| 391 |
} |
|
| 392 |
#if HAVE_FSYNC |
|
| 393 |
if (fsync(fileno(status_fp)) < 0) {
|
|
| 394 |
perror("fsync");
|
|
| 395 |
} |
|
| 396 |
#elif defined(G_OS_WIN32) |
|
| 397 |
if (_commit(_fileno(status_fp)) < 0) {
|
|
| 398 |
perror("_commit");
|
|
| 399 |
} |
|
| 400 |
#endif |
|
| 401 |
|
|
| 402 |
xfilter_debug_print("xfilter_update_status: done\n");
|
|
| 403 |
#endif /* !USE_STATUS_KVS */ |
|
| 404 |
} |
|
| 405 |
|
|
| 327 | 406 |
static XFilterStatus xfilter_bayes_learn(XFilter *filter, const XMessageData *data, XFilterResult *result, gboolean is_junk, gboolean is_register) |
| 328 | 407 |
{
|
| 329 | 408 |
const char *type; |
| ... | ... | |
| 365 | 444 |
xfilter_kvs_end(kvs); |
| 366 | 445 |
g_hash_table_destroy(table); |
| 367 | 446 |
|
| 368 |
xfilter_kvs_begin(prob_kvs); |
|
| 369 |
if (is_register) {
|
|
| 370 |
if (is_junk) {
|
|
| 371 |
xfilter_kvs_increment(prob_kvs, "@junk_words_sum", sum_add); |
|
| 372 |
xfilter_kvs_increment(prob_kvs, "@junk_learn_count", 1); |
|
| 373 |
} else {
|
|
| 374 |
xfilter_kvs_increment(prob_kvs, "@clean_words_sum", sum_add); |
|
| 375 |
xfilter_kvs_increment(prob_kvs, "@clean_learn_count", 1); |
|
| 376 |
} |
|
| 377 |
} else {
|
|
| 378 |
if (is_junk) {
|
|
| 379 |
xfilter_kvs_set_int(prob_kvs, "@junk_words_sum", sum_add); |
|
| 380 |
xfilter_kvs_decrement(prob_kvs, "@junk_learn_count", 1); |
|
| 381 |
} else {
|
|
| 382 |
xfilter_kvs_set_int(prob_kvs, "@clean_words_sum", sum_add); |
|
| 383 |
xfilter_kvs_decrement(prob_kvs, "@clean_learn_count", 1); |
|
| 384 |
} |
|
| 385 |
} |
|
| 386 |
xfilter_kvs_end(prob_kvs); |
|
| 447 |
xfilter_update_status(is_junk, is_register, sum_add); |
|
| 387 | 448 |
|
| 388 | 449 |
xfilter_result_set_status(result, XF_NONE); |
| 389 | 450 |
|
| ... | ... | |
| 455 | 516 |
{
|
| 456 | 517 |
g_return_val_if_fail(status != NULL, -1); |
| 457 | 518 |
|
| 519 |
#ifdef USE_STATUS_KVS |
|
| 458 | 520 |
status->junk_words = xfilter_kvs_fetch_int(prob_kvs, "@junk_words_sum"); |
| 459 | 521 |
status->nojunk_words = xfilter_kvs_fetch_int(prob_kvs, "@clean_words_sum"); |
| 460 | 522 |
status->junk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@junk_learn_count"); |
| 461 | 523 |
status->nojunk_learned_num = xfilter_kvs_fetch_int(prob_kvs, "@clean_learn_count"); |
| 524 |
#else |
|
| 525 |
*status = learn_status; |
|
| 526 |
#endif |
|
| 462 | 527 |
|
| 463 | 528 |
return 0; |
| 464 | 529 |
} |
| ... | ... | |
| 489 | 554 |
{
|
| 490 | 555 |
XFilterBayesLearnStatus status = {0};
|
| 491 | 556 |
|
| 492 |
if (!junk_kvs || !clean_kvs || !prob_kvs) {
|
|
| 557 |
if (!junk_kvs || !clean_kvs) {
|
|
| 493 | 558 |
g_warning("Database not ready");
|
| 494 | 559 |
return -1; |
| 495 | 560 |
} |
| ... | ... | |
| 511 | 576 |
return 0; |
| 512 | 577 |
} |
| 513 | 578 |
|
| 579 |
#ifndef USE_STATUS_KVS |
|
| 580 |
int xfilter_read_status_file(FILE *fp) |
|
| 581 |
{
|
|
| 582 |
char buf[1024]; |
|
| 583 |
int n; |
|
| 584 |
|
|
| 585 |
while (fgets(buf, sizeof(buf), fp) != NULL) {
|
|
| 586 |
if (sscanf(buf, "@junk_words_sum=%d", &n) == 1) |
|
| 587 |
learn_status.junk_words = n; |
|
| 588 |
else if (sscanf(buf, "@junk_learn_count=%d", &n) == 1) |
|
| 589 |
learn_status.junk_learned_num = n; |
|
| 590 |
else if (sscanf(buf, "@clean_words_sum=%d", &n) == 1) |
|
| 591 |
learn_status.nojunk_words = n; |
|
| 592 |
else if (sscanf(buf, "@clean_learn_count=%d", &n) == 1) |
|
| 593 |
learn_status.nojunk_learned_num = n; |
|
| 594 |
} |
|
| 595 |
|
|
| 596 |
return 0; |
|
| 597 |
} |
|
| 598 |
#endif |
|
| 599 |
|
|
| 514 | 600 |
int xfilter_bayes_db_init(const char *path) |
| 515 | 601 |
{
|
| 516 | 602 |
char *file; |
| ... | ... | |
| 558 | 644 |
} |
| 559 | 645 |
g_free(file); |
| 560 | 646 |
} |
| 647 |
|
|
| 648 |
#ifdef USE_STATUS_KVS |
|
| 561 | 649 |
if (!prob_kvs) {
|
| 562 | 650 |
if (path) |
| 563 | 651 |
file = g_strconcat(path, G_DIR_SEPARATOR_S, "prob.db", |
| ... | ... | |
| 575 | 663 |
} |
| 576 | 664 |
g_free(file); |
| 577 | 665 |
} |
| 666 |
#else |
|
| 667 |
if (!status_fp) {
|
|
| 668 |
if (path) |
|
| 669 |
file = g_strconcat(path, G_DIR_SEPARATOR_S, "status.dat", |
|
| 670 |
NULL); |
|
| 671 |
else |
|
| 672 |
file = g_strdup("status.dat");
|
|
| 673 |
xfilter_debug_print("xfilter_bayes_db_init: opening data file: %s\n", file);
|
|
| 674 |
status_fp = g_fopen(file, "rb"); |
|
| 675 |
if (!status_fp) {
|
|
| 676 |
if (ENOENT == errno) |
|
| 677 |
status_fp = g_fopen(file, "w+b"); |
|
| 678 |
|
|
| 679 |
if (!status_fp) {
|
|
| 680 |
g_warning("Cannot open data file: %s", file);
|
|
| 681 |
xfilter_kvs_close(clean_kvs); |
|
| 682 |
xfilter_kvs_close(junk_kvs); |
|
| 683 |
g_free(file); |
|
| 684 |
return -1; |
|
| 685 |
} |
|
| 686 |
} else {
|
|
| 687 |
xfilter_read_status_file(status_fp); |
|
| 688 |
status_fp = freopen(file, "r+b", status_fp); |
|
| 689 |
if (!status_fp) {
|
|
| 690 |
g_warning("Cannot reopen data file: %s", file);
|
|
| 691 |
xfilter_kvs_close(clean_kvs); |
|
| 692 |
xfilter_kvs_close(junk_kvs); |
|
| 693 |
g_free(file); |
|
| 694 |
return -1; |
|
| 695 |
} |
|
| 696 |
} |
|
| 697 |
g_free(file); |
|
| 698 |
} |
|
| 699 |
#endif |
|
| 578 | 700 |
|
| 579 | 701 |
return 0; |
| 580 | 702 |
} |
| ... | ... | |
| 585 | 707 |
|
| 586 | 708 |
xfilter_debug_print("xfilter_bayes_db_init: close database\n");
|
| 587 | 709 |
|
| 710 |
#ifdef USE_STATUS_KVS |
|
| 588 | 711 |
if (prob_kvs) |
| 589 | 712 |
ret |= xfilter_kvs_close(prob_kvs); |
| 713 |
#else |
|
| 714 |
if (status_fp) |
|
| 715 |
ret |= fclose(status_fp); |
|
| 716 |
#endif |
|
| 717 |
|
|
| 590 | 718 |
if (clean_kvs) |
| 591 | 719 |
ret |= xfilter_kvs_close(clean_kvs); |
| 592 | 720 |
if (junk_kvs) |
Also available in: Unified diff