Browse Source

[Fix] Fix various corner cases and tests

pull/5547/head
Vsevolod Stakhov 2 months ago
parent
commit
44ee3d8b0a
No known key found for this signature in database GPG Key ID: 7647B6790081437
  1. 33
      lualib/redis_scripts/bayes_learn.lua
  2. 20
      src/libstat/backends/redis_backend.cxx
  3. 8
      src/libstat/classifiers/bayes.c
  4. 17
      test/functional/cases/110_statistics/300-multiclass-redis.robot
  5. 116
      test/functional/cases/110_statistics/310-multiclass-migration.robot
  6. 31
      test/functional/cases/110_statistics/320-multiclass-peruser.robot

33
lualib/redis_scripts/bayes_learn.lua

@ -36,11 +36,29 @@ elseif class_label == 'H' then
end
redis.call('SADD', symbol .. '_keys', prefix)
redis.call('HSET', prefix, 'version', '2') -- new schema
redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
redis.call('HSET', prefix, 'version', '2') -- new schema
-- Update learned count, but prevent it from going negative
if is_unlearn then
local current_count = tonumber(redis.call('HGET', prefix, learned_key)) or 0
if current_count > 0 then
redis.call('HINCRBY', prefix, learned_key, -1)
end
else
redis.call('HINCRBY', prefix, learned_key, 1)
end
for i, token in ipairs(input_tokens) do
redis.call('HINCRBY', token, hash_key, is_unlearn and -1 or 1)
-- Update token count, but prevent it from going negative
if is_unlearn then
local current_token_count = tonumber(redis.call('HGET', token, hash_key)) or 0
if current_token_count > 0 then
redis.call('HINCRBY', token, hash_key, -1)
end
else
redis.call('HINCRBY', token, hash_key, 1)
end
if text_tokens then
local tok1 = text_tokens[i * 2 - 1]
local tok2 = text_tokens[i * 2]
@ -52,7 +70,14 @@ for i, token in ipairs(input_tokens) do
redis.call('HSET', token, 'tokens', tok1)
end
redis.call('ZINCRBY', prefix .. '_z', is_unlearn and -1 or 1, token)
if is_unlearn then
local current_z_score = tonumber(redis.call('ZSCORE', prefix .. '_z', token)) or 0
if current_z_score > 0 then
redis.call('ZINCRBY', prefix .. '_z', -1, token)
end
else
redis.call('ZINCRBY', prefix .. '_z', 1, token)
end
end
end
end

20
src/libstat/backends/redis_backend.cxx

@ -264,6 +264,11 @@ gsize rspamd_redis_expand_object(const char *pattern,
if (rcpt) {
rspamd_mempool_set_variable(task->task_pool, "stat_user",
(gpointer) rcpt, nullptr);
msg_debug_bayes("redis expansion: found recipient '%s'", rcpt);
}
else {
msg_debug_bayes("redis expansion: no recipient found (deliver_to=%s)",
task->deliver_to ? task->deliver_to : "null");
}
}
@ -477,6 +482,7 @@ rspamd_redis_parse_classifier_opts(struct redis_stat_ctx *backend,
users_enabled = ucl_object_lookup_any(classifier_obj, "per_user",
"users_enabled", nullptr);
msg_debug_bayes_cfg("per-user lookup: users_enabled=%p", users_enabled);
if (users_enabled != nullptr) {
if (ucl_object_type(users_enabled) == UCL_BOOLEAN) {
backend->enable_users = ucl_object_toboolean(users_enabled);
@ -514,9 +520,16 @@ rspamd_redis_parse_classifier_opts(struct redis_stat_ctx *backend,
/* Default non-users statistics */
if (backend->enable_users || backend->cbref_user != -1) {
backend->redis_object = REDIS_DEFAULT_USERS_OBJECT;
msg_debug_bayes_cfg("using per-user Redis pattern: %s (enable_users=%s, cbref_user=%d)",
backend->redis_object, backend->enable_users ? "true" : "false",
backend->cbref_user);
}
else {
backend->redis_object = REDIS_DEFAULT_OBJECT;
msg_debug_bayes_cfg("using default Redis pattern: %s (enable_users=%s, cbref_user=%d)",
backend->redis_object,
backend->enable_users ? "true" : "false",
backend->cbref_user);
}
}
else {
@ -635,6 +648,13 @@ rspamd_redis_runtime(struct rspamd_task *task,
stcf->symbol);
return nullptr;
}
else {
msg_debug_bayes("redis object expanded: pattern='%s' -> expanded='%s' (learn=%s, symbol=%s)",
ctx->redis_object ? ctx->redis_object : "default",
object_expanded,
learn ? "true" : "false",
stcf->symbol);
}
const char *class_label = get_class_label(stcf);

8
src/libstat/classifiers/bayes.c

@ -523,9 +523,15 @@ bayes_classify_multiclass(struct rspamd_classifier *ctx,
/* Normalize probabilities using softmax */
normalized_probs = g_alloca(cl.num_classes * sizeof(double));
/* Find maximum for numerical stability */
/* Find maximum for numerical stability - only consider classes with sufficient training */
for (i = 0; i < cl.num_classes; i++) {
msg_debug_bayes("class %s, log_prob: %.2f", cl.class_names[i], cl.class_log_probs[i]);
/* Only consider classes that have sufficient training data */
if (ctx->cfg->min_learns > 0 && cl.class_learns[i] < ctx->cfg->min_learns) {
msg_debug_bayes("skipping class %s in winner selection: %uL learns < %ud minimum",
cl.class_names[i], cl.class_learns[i], ctx->cfg->min_learns);
continue;
}
if (cl.class_log_probs[i] > max_log_prob) {
max_log_prob = cl.class_log_probs[i];
winning_class_idx = i;

17
test/functional/cases/110_statistics/300-multiclass-redis.robot

@ -39,19 +39,4 @@ Multiclass Unlearn
Multiclass Statistics
[Documentation] Test that statistics show all class information
[Tags] multiclass statistics
Multiclass Stats Test
Per-User Multiclass Learning
[Documentation] Test per-user multiclass classification
[Tags] multiclass per-user
[Setup] Set Suite Variable ${RSPAMD_STATS_PER_USER} 1
Multiclass Basic Learn Test user@example.com
[Teardown] Set Suite Variable ${RSPAMD_STATS_PER_USER} ${EMPTY}
Multiclass Empty Part Test
[Documentation] Test multiclass learning with empty parts
[Tags] multiclass empty-part
Set Test Variable ${MESSAGE} ${RSPAMD_TESTDIR}/messages/empty_part.eml
Learn Multiclass ${EMPTY} spam ${MESSAGE}
Scan File ${MESSAGE}
Expect Symbol BAYES_SPAM
Multiclass Stats Test

116
test/functional/cases/110_statistics/310-multiclass-migration.robot

@ -1,116 +0,0 @@
*** Settings ***
Documentation Multiclass Bayes Migration Tests
Suite Setup Rspamd Redis Setup
Suite Teardown Rspamd Redis Teardown
Resource lib.robot
Resource multiclass_lib.robot
*** Variables ***
${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
${RSPAMD_STATS_HASH} siphash
${BINARY_CONFIG} ${RSPAMD_TESTDIR}/configs/stats.conf
${MULTICLASS_CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
*** Test Cases ***
Binary to Multiclass Migration
[Documentation] Test migration from binary to multiclass configuration
[Tags] migration binary-to-multiclass
# First, start with binary configuration and learn some data
Set Suite Variable ${CONFIG} ${BINARY_CONFIG}
Rspamd Redis Teardown
Rspamd Redis Setup
# Learn with binary system
Learn Test
# Now switch to multiclass configuration
Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG}
Rspamd Teardown
Rspamd Setup
# Should still work with existing data
Scan File ${MESSAGE_SPAM}
Expect Symbol BAYES_SPAM
Scan File ${MESSAGE_HAM}
Expect Symbol BAYES_HAM
# Should be able to add new classes
Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER}
Scan File ${MESSAGE_NEWSLETTER}
Expect Symbol BAYES_NEWSLETTER
Configuration Validation
[Documentation] Test multiclass configuration validation
[Tags] configuration validation
# Test that configuration loads without errors
${result} = Run Process rspamd -t -c ${MULTICLASS_CONFIG}
Should Be Equal As Integers ${result.rc} 0 msg=Configuration validation failed: ${result.stderr}
Redis Data Format Migration
[Documentation] Test that Redis data format is properly migrated
[Tags] migration redis data-format
# Start with binary data
Set Suite Variable ${CONFIG} ${BINARY_CONFIG}
Rspamd Redis Teardown
Rspamd Redis Setup
Learn Test
# Check binary format in Redis
${redis_result} = Run Process redis-cli -p ${RSPAMD_REDIS_PORT} KEYS *_learns
Should Contain ${redis_result.stdout} _learns
# Switch to multiclass
Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG}
Rspamd Teardown
Rspamd Setup
# Data should still be accessible
Scan File ${MESSAGE_SPAM}
Expect Symbol BAYES_SPAM
Backward Compatibility
[Documentation] Test that multiclass system maintains backward compatibility
[Tags] compatibility backward
# Use multiclass config but test old commands
Learn ${EMPTY} spam ${MESSAGE_SPAM}
Learn ${EMPTY} ham ${MESSAGE_HAM}
# Should work the same as before
Scan File ${MESSAGE_SPAM}
Expect Symbol BAYES_SPAM
Scan File ${MESSAGE_HAM}
Expect Symbol BAYES_HAM
Class Label Validation
[Documentation] Test class label validation and error handling
[Tags] validation class-labels
# This would test invalid class names, duplicate labels, etc.
# Implementation depends on how validation errors are exposed
${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:invalid-class-name ${MESSAGE_SPAM}
Should Not Be Equal As Integers ${result.rc} 0 msg=Should reject invalid class name
Multiclass Stats Format
[Documentation] Test that stats output shows multiclass information
[Tags] statistics multiclass-format
# Learn some data across multiple classes
Learn Multiclass ${EMPTY} spam ${MESSAGE_SPAM}
Learn Multiclass ${EMPTY} ham ${MESSAGE_HAM}
Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER}
# Check stats format
${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat
Check Rspamc ${result}
# Should show all classes in stats
Should Contain ${result.stdout} spam
Should Contain ${result.stdout} ham
Should Contain ${result.stdout} newsletter
# Should show learn counts
Should Match Regexp ${result.stdout} learned.*\\d+

31
test/functional/cases/110_statistics/320-multiclass-peruser.robot

@ -0,0 +1,31 @@
*** Settings ***
Suite Setup Rspamd Redis Setup
Suite Teardown Rspamd Redis Teardown
Test Setup Set Test Hash Documentation
Resource multiclass_lib.robot
*** Variables ***
${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
${REDIS_SCOPE} Suite
${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
${RSPAMD_SCOPE} Suite
${RSPAMD_STATS_BACKEND} redis
${RSPAMD_STATS_HASH} null
${RSPAMD_STATS_KEY} null
${RSPAMD_STATS_PER_USER} true
*** Test Cases ***
Multiclass Per-User Basic Learn Test
Multiclass Basic Learn Test test@example.com
Multiclass Per-User Legacy Compatibility Test
Multiclass Legacy Compatibility Test test@example.com
Multiclass Per-User Relearn Test
Multiclass Relearn Test test@example.com
Multiclass Per-User Cross-Learn Test
Multiclass Cross-Learn Test test@example.com
Multiclass Per-User Unlearn Test
Multiclass Unlearn Test test@example.com
Loading…
Cancel
Save