Merge branch 'master' into master

3 months ago · 632cf52764
5 changed files with 84 additions and 7 deletions
--- a/.github/workflows/ci_webui_e2e_playwright.yml
+++ b/.github/workflows/ci_webui_e2e_playwright.yml
@ -58,8 +58,6 @@ jobs:
          type = "console";
          level = "error";
          EOF
-          # Disable multimap module to prevent hyperscan cache issues at runtime
-          echo 'enabled = false;' > ${PREFIX}/etc/rspamd/local.d/multimap.conf
          # Disable redis dependent modules for WebUI tests
          echo 'redis { enabled = false; }' > ${PREFIX}/etc/rspamd/local.d/modules.conf
          chmod +x ${PREFIX}/bin/rspamd
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@ -89,6 +89,7 @@ export default [
        files: ["test/playwright/tests/*.mjs"],
        rules: {
            "no-await-in-loop": "off", // Playwright operations in loops are often sequential and not independent
+            "sort-keys": ["error", "asc", {minKeys: 4}]
        },
    },
 ];
--- a/src/libstat/classifiers/bayes.c
+++ b/src/libstat/classifiers/bayes.c
@ -101,7 +101,10 @@ inv_chi_square(struct rspamd_task *task, double value, int freedom_deg)
 	 * prob is e ^ x (small value since x is normally less than zero
 	 * So we integrate over degrees of freedom and produce the total result
 	 * from 1.0 (no confidence) to 0.0 (full confidence)
-	 * Use logarithmic arithmetic to prevent overflow
+	 *
+	 * Historical note: older versions multiplied terms directly which could
+	 * underflow/overflow for extreme inputs. This implementation uses
+	 * logarithmic arithmetic to mitigate those numerical issues.
 	 */
 	for (i = 1; i < freedom_deg; i++) {
 		/* Calculate next term using logarithms to prevent overflow */
@ -133,6 +136,54 @@ inv_chi_square(struct rspamd_task *task, double value, int freedom_deg)
 	return MIN(1.0, sum);
 }

+/*
+ * Legacy implementation kept for binary compatibility with 3.12.1.
+ * This mirrors the historical behaviour to ensure identical scoring.
+ */
+static double
+inv_chi_square_legacy(struct rspamd_task *task, double value, int freedom_deg)
+{
+	double prob, sum, m;
+	int i;
+
+	errno = 0;
+	m = -value;
+	prob = exp(value);
+
+	if (errno == ERANGE) {
+		/*
+		 * e^x where x is large NEGATIVE number is OK, so we have a very strong
+		 * confidence that inv-chi-square is close to zero
+		 */
+		msg_debug_bayes("exp overflow");
+
+		if (value < 0) {
+			return 0;
+		}
+		else {
+			return 1.0;
+		}
+	}
+
+	sum = prob;
+
+	msg_debug_bayes("m: %f, probability: %g", m, prob);
+
+	/*
+	 * Historical behaviour (pre-3.13): direct multiplicative series
+	 * accretion. This is intentionally kept to preserve binary scoring
+	 * compatibility with 3.12.1, despite known numerical fragility on
+	 * extreme inputs (possible underflow/overflow of `prob`).
+	 */
+	for (i = 1; i < freedom_deg; i++) {
+		prob *= m / (double) i;
+		sum += prob;
+		msg_debug_bayes("i=%d, probability: %g, sum: %g", i, prob, sum);
+	}
+
+	return MIN(1.0, sum);
+}
+
 struct bayes_task_closure {
 	double ham_prob;  /* Kept for binary compatibility */
 	double spam_prob; /* Kept for binary compatibility */
@ -164,6 +215,11 @@ struct bayes_multiclass_closure {
 static const double feature_weight[] = {0, 3125, 256, 27, 1, 0, 0, 0};

 #define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt)))
+/*
+ * Historical note: alternative weighting schemes were proposed in older
+ * versions, but this exact form is retained for backward compatibility.
+ * Changing it would shift token posteriors and alter legacy scores.
+ */
 /*
 * In this callback we calculate local probabilities for tokens
 */
@ -503,6 +559,12 @@ bayes_classify_multiclass(struct rspamd_classifier *ctx,
 	}
 	else {
 		cl.meta_skip_prob = 1.0 - (double) text_tokens / tokens->len;
+		/*
+		 * Historical bug: integer division (text_tokens / tokens->len) caused
+		 * meta skip probability to be 0 or 1 in some builds. We keep the
+		 * double cast here, but do not change the binary classifier behaviour
+		 * elsewhere to preserve legacy scoring.
+		 */
 	}

 	/* Process all tokens */
@ -798,9 +860,14 @@ bayes_classify(struct rspamd_classifier *ctx,
 	}

 	if (cl.spam_prob > -300 && cl.ham_prob > -300) {
-		/* Fisher value is low enough to apply inv_chi_square */
-		h = 1 - inv_chi_square(task, cl.spam_prob, cl.processed_tokens);
-		s = 1 - inv_chi_square(task, cl.ham_prob, cl.processed_tokens);
+		/*
+		 * Fisher value is low enough to apply inv_chi_square.
+		 * Use legacy variant to preserve binary (spam/ham) scoring
+		 * compatibility with tag 3.12.1. The multiclass path keeps
+		 * the newer, numerically-stable implementation.
+		 */
+		h = 1 - inv_chi_square_legacy(task, cl.spam_prob, cl.processed_tokens);
+		s = 1 - inv_chi_square_legacy(task, cl.ham_prob, cl.processed_tokens);
 	}
 	else {
 		/* Use naive method */
--- a/src/plugins/lua/hfilter.lua
+++ b/src/plugins/lua/hfilter.lua
@ -72,7 +72,6 @@ local checks_hellohost = [[
 /host[.-][0-9]/i 2
 /[.-]ppp[.-]/i 5
 /[.-]dhcp[.-]/i 5
-/[.-]comcast[.-]/i 5
 /cable[.-][0-9]/i 3
 /[-.0-9][0-9][.-]?dial-?up/i 5
 /[-.0-9][0-9][.-]?bredband/i 5
--- a/test/playwright/tests/basic.spec.mjs
+++ b/test/playwright/tests/basic.spec.mjs
@ -7,6 +7,18 @@ test.describe("WebUI basic", () => {
        await login(page, readOnlyPassword);
    });

+    test("Browser version info", async ({page, browserName}, testInfo) => {
+        const browserVersion = await page.context().browser().version();
+
+        testInfo.annotations.push({
+            type: "Browser info",
+            description: `Browser version: ${browserName} ${browserVersion}`,
+        });
+
+        // eslint-disable-next-line no-console
+        console.log(`Browser (${browserName}) version: ${browserVersion}`);
+    });
+
    test("Smoke: loads WebUI and shows main elements", async ({page}) => {
        await expect(page).toHaveTitle(/Rspamd Web Interface/i);
        // Wait for preloader to be hidden by JS when loading is complete