Browse Source

Merge branch 'master' into master

pull/5572/head^2
hunter-nl 3 months ago
committed by GitHub
parent
commit
632cf52764
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 2
      .github/workflows/ci_webui_e2e_playwright.yml
  2. 1
      eslint.config.mjs
  3. 75
      src/libstat/classifiers/bayes.c
  4. 1
      src/plugins/lua/hfilter.lua
  5. 12
      test/playwright/tests/basic.spec.mjs

2
.github/workflows/ci_webui_e2e_playwright.yml

@ -58,8 +58,6 @@ jobs:
type = "console";
level = "error";
EOF
# Disable multimap module to prevent hyperscan cache issues at runtime
echo 'enabled = false;' > ${PREFIX}/etc/rspamd/local.d/multimap.conf
# Disable redis dependent modules for WebUI tests
echo 'redis { enabled = false; }' > ${PREFIX}/etc/rspamd/local.d/modules.conf
chmod +x ${PREFIX}/bin/rspamd

1
eslint.config.mjs

@ -89,6 +89,7 @@ export default [
files: ["test/playwright/tests/*.mjs"],
rules: {
"no-await-in-loop": "off", // Playwright operations in loops are often sequential and not independent
"sort-keys": ["error", "asc", {minKeys: 4}]
},
},
];

75
src/libstat/classifiers/bayes.c

@ -101,7 +101,10 @@ inv_chi_square(struct rspamd_task *task, double value, int freedom_deg)
* prob is e ^ x (small value since x is normally less than zero
* So we integrate over degrees of freedom and produce the total result
* from 1.0 (no confidence) to 0.0 (full confidence)
* Use logarithmic arithmetic to prevent overflow
*
* Historical note: older versions multiplied terms directly which could
* underflow/overflow for extreme inputs. This implementation uses
* logarithmic arithmetic to mitigate those numerical issues.
*/
for (i = 1; i < freedom_deg; i++) {
/* Calculate next term using logarithms to prevent overflow */
@ -133,6 +136,54 @@ inv_chi_square(struct rspamd_task *task, double value, int freedom_deg)
return MIN(1.0, sum);
}
/*
* Legacy implementation kept for binary compatibility with 3.12.1.
* This mirrors the historical behaviour to ensure identical scoring.
*/
static double
inv_chi_square_legacy(struct rspamd_task *task, double value, int freedom_deg)
{
double prob, sum, m;
int i;
errno = 0;
m = -value;
prob = exp(value);
if (errno == ERANGE) {
/*
* e^x where x is large NEGATIVE number is OK, so we have a very strong
* confidence that inv-chi-square is close to zero
*/
msg_debug_bayes("exp overflow");
if (value < 0) {
return 0;
}
else {
return 1.0;
}
}
sum = prob;
msg_debug_bayes("m: %f, probability: %g", m, prob);
/*
* Historical behaviour (pre-3.13): direct multiplicative series
* accretion. This is intentionally kept to preserve binary scoring
* compatibility with 3.12.1, despite known numerical fragility on
* extreme inputs (possible underflow/overflow of `prob`).
*/
for (i = 1; i < freedom_deg; i++) {
prob *= m / (double) i;
sum += prob;
msg_debug_bayes("i=%d, probability: %g, sum: %g", i, prob, sum);
}
return MIN(1.0, sum);
}
struct bayes_task_closure {
double ham_prob; /* Kept for binary compatibility */
double spam_prob; /* Kept for binary compatibility */
@ -164,6 +215,11 @@ struct bayes_multiclass_closure {
static const double feature_weight[] = {0, 3125, 256, 27, 1, 0, 0, 0};
#define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt)))
/*
* Historical note: alternative weighting schemes were proposed in older
* versions, but this exact form is retained for backward compatibility.
* Changing it would shift token posteriors and alter legacy scores.
*/
/*
* In this callback we calculate local probabilities for tokens
*/
@ -503,6 +559,12 @@ bayes_classify_multiclass(struct rspamd_classifier *ctx,
}
else {
cl.meta_skip_prob = 1.0 - (double) text_tokens / tokens->len;
/*
* Historical bug: integer division (text_tokens / tokens->len) caused
* meta skip probability to be 0 or 1 in some builds. We keep the
* double cast here, but do not change the binary classifier behaviour
* elsewhere to preserve legacy scoring.
*/
}
/* Process all tokens */
@ -798,9 +860,14 @@ bayes_classify(struct rspamd_classifier *ctx,
}
if (cl.spam_prob > -300 && cl.ham_prob > -300) {
/* Fisher value is low enough to apply inv_chi_square */
h = 1 - inv_chi_square(task, cl.spam_prob, cl.processed_tokens);
s = 1 - inv_chi_square(task, cl.ham_prob, cl.processed_tokens);
/*
* Fisher value is low enough to apply inv_chi_square.
* Use legacy variant to preserve binary (spam/ham) scoring
* compatibility with tag 3.12.1. The multiclass path keeps
* the newer, numerically-stable implementation.
*/
h = 1 - inv_chi_square_legacy(task, cl.spam_prob, cl.processed_tokens);
s = 1 - inv_chi_square_legacy(task, cl.ham_prob, cl.processed_tokens);
}
else {
/* Use naive method */

1
src/plugins/lua/hfilter.lua

@ -72,7 +72,6 @@ local checks_hellohost = [[
/host[.-][0-9]/i 2
/[.-]ppp[.-]/i 5
/[.-]dhcp[.-]/i 5
/[.-]comcast[.-]/i 5
/cable[.-][0-9]/i 3
/[-.0-9][0-9][.-]?dial-?up/i 5
/[-.0-9][0-9][.-]?bredband/i 5

12
test/playwright/tests/basic.spec.mjs

@ -7,6 +7,18 @@ test.describe("WebUI basic", () => {
await login(page, readOnlyPassword);
});
test("Browser version info", async ({page, browserName}, testInfo) => {
const browserVersion = await page.context().browser().version();
testInfo.annotations.push({
type: "Browser info",
description: `Browser version: ${browserName} ${browserVersion}`,
});
// eslint-disable-next-line no-console
console.log(`Browser (${browserName}) version: ${browserVersion}`);
});
test("Smoke: loads WebUI and shows main elements", async ({page}) => {
await expect(page).toHaveTitle(/Rspamd Web Interface/i);
// Wait for preloader to be hidden by JS when loading is complete

Loading…
Cancel
Save