Browse Source
[Feature] Add URL deep processing architecture
[Feature] Add URL deep processing architecture
This commit implements a two-level URL processing system that addresses issue #5731 and provides flexible URL analysis with multiple specific symbols. Core changes: * Modified src/libserver/url.c to handle oversized user fields (fixes #5731) * Added lualib/lua_url_filter.lua - Fast library filter during parsing * Added src/plugins/lua/url_suspect.lua - Deep inspection plugin * Added conf/modules.d/url_suspect.conf - Plugin configuration * Added conf/scores.d/url_suspect_group.conf - Symbol scores Key features: * No new C flags - uses existing URL flags (has_user, numeric, obscured, etc.) * Works without maps - built-in logic for common cases * 15+ specific symbols instead of generic R_SUSPICIOUS_URL * Backward compatible - keeps R_SUSPICIOUS_URL working * User extensible - custom filters and checks supported Optional features: * Example map files for advanced customization (disabled by default) * Whitelist, pattern matching, TLD lists Issue: #5731pull/5748/head
No known key found for this signature in database
GPG Key ID: 7647B6790081437
11 changed files with 1350 additions and 1 deletions
-
173conf/maps.d/url_suspect/README.md
-
18conf/maps.d/url_suspect/suspicious_ports.map.example
-
19conf/maps.d/url_suspect/suspicious_tlds.map.example
-
20conf/maps.d/url_suspect/user_patterns.map.example
-
19conf/maps.d/url_suspect/whitelist_domains.map.example
-
163conf/modules.d/url_suspect.conf
-
101conf/scores.d/url_suspect_group.conf
-
51local.d/url_filter.lua
-
180lualib/lua_url_filter.lua
-
5src/libserver/url.c
-
602src/plugins/lua/url_suspect.lua
@ -0,0 +1,173 @@ |
|||||
|
# URL Suspect Optional Maps |
||||
|
|
||||
|
This directory contains **optional** map files for the URL Suspect plugin. |
||||
|
|
||||
|
**Important**: These maps are **disabled by default**. The plugin works perfectly without them using built-in logic. |
||||
|
|
||||
|
## When to Use Maps |
||||
|
|
||||
|
Use maps only if you need to: |
||||
|
- Whitelist specific domains to skip checks |
||||
|
- Add custom user field patterns beyond built-in checks |
||||
|
- Blacklist specific user names |
||||
|
- Define additional suspicious TLDs beyond the built-in list |
||||
|
- Mark specific IP ranges as suspicious |
||||
|
- Define unusual ports as suspicious |
||||
|
|
||||
|
For most users, the built-in logic is sufficient. |
||||
|
|
||||
|
## Available Maps |
||||
|
|
||||
|
### 1. whitelist_domains.map |
||||
|
**Purpose**: Skip all URL suspect checks for trusted domains |
||||
|
|
||||
|
**Format**: One domain per line |
||||
|
``` |
||||
|
google.com |
||||
|
microsoft.com |
||||
|
github.com |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
use_whitelist = true; |
||||
|
whitelist_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/whitelist_domains.map"; |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
### 2. user_patterns.map |
||||
|
**Purpose**: Regex patterns for suspicious user fields |
||||
|
|
||||
|
**Format**: Regex pattern (one per line) |
||||
|
``` |
||||
|
^admin$ |
||||
|
^root$ |
||||
|
^test$ |
||||
|
^[0-9]{10,}$ |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
checks { |
||||
|
user_password { |
||||
|
use_pattern_map = true; |
||||
|
pattern_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/user_patterns.map"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
### 3. user_blacklist.map |
||||
|
**Purpose**: Exact user names to penalize |
||||
|
|
||||
|
**Format**: Exact match (one per line) |
||||
|
``` |
||||
|
admin |
||||
|
root |
||||
|
administrator |
||||
|
webmaster |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
checks { |
||||
|
user_password { |
||||
|
use_blacklist = true; |
||||
|
blacklist_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/user_blacklist.map"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
### 4. suspicious_tlds.map |
||||
|
**Purpose**: Additional TLDs beyond built-in list (.tk, .ml, .ga, .cf, .gq) |
||||
|
|
||||
|
**Format**: TLD with leading dot (one per line) |
||||
|
``` |
||||
|
.xyz |
||||
|
.top |
||||
|
.work |
||||
|
.date |
||||
|
.loan |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
checks { |
||||
|
tld { |
||||
|
use_tld_map = true; |
||||
|
tld_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_tlds.map"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
### 5. suspicious_ip_ranges.map |
||||
|
**Purpose**: IP ranges to mark as suspicious (beyond built-in private IP detection) |
||||
|
|
||||
|
**Format**: CIDR notation (one per line) |
||||
|
``` |
||||
|
203.0.113.0/24 |
||||
|
198.51.100.0/24 |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
checks { |
||||
|
numeric_ip { |
||||
|
use_range_map = true; |
||||
|
range_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_ip_ranges.map"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
### 6. suspicious_ports.map |
||||
|
**Purpose**: Unusual ports that indicate suspicious URLs |
||||
|
|
||||
|
**Format**: Port number (one per line) |
||||
|
``` |
||||
|
8080 |
||||
|
8443 |
||||
|
3128 |
||||
|
1080 |
||||
|
``` |
||||
|
|
||||
|
**Enable in** `local.d/url_suspect.conf`: |
||||
|
```lua |
||||
|
url_suspect { |
||||
|
checks { |
||||
|
structure { |
||||
|
use_port_map = true; |
||||
|
port_map = "$LOCAL_CONFDIR/local.d/maps.d/url_suspect/suspicious_ports.map"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
## Map File Locations |
||||
|
|
||||
|
You can place map files in: |
||||
|
1. `$LOCAL_CONFDIR/local.d/maps.d/url_suspect/` (recommended) |
||||
|
2. `$LOCAL_CONFDIR/local.d/` (also works) |
||||
|
3. Any absolute path |
||||
|
4. Remote URL (e.g., `https://example.com/map.txt`) |
||||
|
|
||||
|
## Example Files |
||||
|
|
||||
|
See `.example` files in this directory for templates you can copy and modify. |
||||
|
|
||||
|
## Performance Note |
||||
|
|
||||
|
Maps are loaded once at startup and cached in memory. They don't add significant overhead even when enabled. |
||||
|
|
||||
|
## Support |
||||
|
|
||||
|
For questions or issues: |
||||
|
- Documentation: https://rspamd.com/doc/modules/url_suspect.html |
||||
|
- GitHub: https://github.com/rspamd/rspamd/issues |
||||
@ -0,0 +1,18 @@ |
|||||
|
# Suspicious Ports |
||||
|
# Unusual ports that may indicate malicious activity |
||||
|
# Format: Port number (one per line) |
||||
|
|
||||
|
# Common proxy ports |
||||
|
8080 |
||||
|
8443 |
||||
|
3128 |
||||
|
1080 |
||||
|
|
||||
|
# Unusual web ports |
||||
|
8888 |
||||
|
9999 |
||||
|
4444 |
||||
|
8000 |
||||
|
|
||||
|
# Add your suspicious ports below: |
||||
|
# 12345 |
||||
@ -0,0 +1,19 @@ |
|||||
|
# Suspicious TLDs |
||||
|
# These are in addition to the built-in list: .tk, .ml, .ga, .cf, .gq |
||||
|
# Format: TLD with leading dot (one per line) |
||||
|
|
||||
|
# Frequently abused TLDs |
||||
|
.xyz |
||||
|
.top |
||||
|
.work |
||||
|
.date |
||||
|
.loan |
||||
|
.win |
||||
|
.download |
||||
|
.stream |
||||
|
.click |
||||
|
.link |
||||
|
.racing |
||||
|
|
||||
|
# Add your suspicious TLDs below: |
||||
|
# .suspicious |
||||
@ -0,0 +1,20 @@ |
|||||
|
# Suspicious User Field Patterns (Regex) |
||||
|
# Format: Regex pattern (one per line) |
||||
|
|
||||
|
# Common suspicious usernames |
||||
|
^admin$ |
||||
|
^root$ |
||||
|
^test$ |
||||
|
^user$ |
||||
|
^administrator$ |
||||
|
^webmaster$ |
||||
|
^postmaster$ |
||||
|
|
||||
|
# Very long numeric usernames (10+ digits) |
||||
|
^[0-9]{10,}$ |
||||
|
|
||||
|
# Very long usernames in general |
||||
|
^.{128,}$ |
||||
|
|
||||
|
# Add your patterns below: |
||||
|
# ^mypattern$ |
||||
@ -0,0 +1,19 @@ |
|||||
|
# Whitelist Domains for URL Suspect Plugin |
||||
|
# URLs from these domains will skip all URL suspect checks |
||||
|
# Format: One domain per line |
||||
|
|
||||
|
# Major tech companies |
||||
|
google.com |
||||
|
microsoft.com |
||||
|
apple.com |
||||
|
amazon.com |
||||
|
|
||||
|
# Development platforms |
||||
|
github.com |
||||
|
gitlab.com |
||||
|
bitbucket.org |
||||
|
stackoverflow.com |
||||
|
|
||||
|
# Add your trusted domains below: |
||||
|
# example.com |
||||
|
# internal-cdn.mycompany.com |
||||
@ -0,0 +1,163 @@ |
|||||
|
# URL Suspect Plugin Configuration |
||||
|
# Module documentation: https://rspamd.com/doc/modules/url_suspect.html |
||||
|
|
||||
|
url_suspect { |
||||
|
# Enable the plugin |
||||
|
enabled = true; |
||||
|
|
||||
|
# Which URL flags trigger inspection (existing flags, no new flags needed) |
||||
|
# Available: has_user, numeric, obscured, zw_spaces, no_tld, unnormalised |
||||
|
process_flags = ["has_user", "numeric", "obscured", "zw_spaces", "no_tld"]; |
||||
|
|
||||
|
# Check configuration |
||||
|
checks { |
||||
|
# User/password field analysis |
||||
|
user_password { |
||||
|
enabled = true; |
||||
|
|
||||
|
# Length thresholds for scoring |
||||
|
length_thresholds { |
||||
|
suspicious = 64; # Score if user field > 64 chars |
||||
|
long = 128; # Higher score if > 128 |
||||
|
very_long = 256; # Even higher if > 256 |
||||
|
} |
||||
|
|
||||
|
# OPTIONAL: Advanced pattern matching (disabled by default) |
||||
|
# Enable only if you need custom user field patterns |
||||
|
use_pattern_map = false; |
||||
|
# pattern_map = "$LOCAL_CONFDIR/local.d/url_suspect_user_patterns.map"; |
||||
|
|
||||
|
# OPTIONAL: User blacklist (disabled by default) |
||||
|
use_blacklist = false; |
||||
|
# blacklist_map = "$LOCAL_CONFDIR/local.d/url_suspect_user_blacklist.map"; |
||||
|
} |
||||
|
|
||||
|
# Numeric IP address analysis |
||||
|
numeric_ip { |
||||
|
enabled = true; |
||||
|
|
||||
|
# Scoring for different scenarios |
||||
|
base_score = 1.5; # Basic numeric IP |
||||
|
with_user_score = 4.0; # Numeric IP + user field |
||||
|
|
||||
|
# Private IP ranges (10.x, 192.168.x, etc.) |
||||
|
allow_private_ranges = true; |
||||
|
private_score = 0.5; # Lower score for private IPs |
||||
|
|
||||
|
# OPTIONAL: Suspicious IP ranges map (disabled by default) |
||||
|
use_range_map = false; |
||||
|
# range_map = "$LOCAL_CONFDIR/local.d/url_suspect_ip_ranges.map"; |
||||
|
} |
||||
|
|
||||
|
# TLD (Top Level Domain) analysis |
||||
|
tld { |
||||
|
enabled = true; |
||||
|
|
||||
|
# Built-in suspicious TLDs (no map needed) |
||||
|
builtin_suspicious = [".tk", ".ml", ".ga", ".cf", ".gq"]; |
||||
|
builtin_score = 3.0; |
||||
|
|
||||
|
# Missing TLD score |
||||
|
missing_tld_score = 2.0; |
||||
|
|
||||
|
# OPTIONAL: Custom TLD map (disabled by default) |
||||
|
# Add this if you have additional TLDs to check |
||||
|
use_tld_map = false; |
||||
|
# tld_map = "$LOCAL_CONFDIR/local.d/url_suspect_tlds.map"; |
||||
|
} |
||||
|
|
||||
|
# Unicode and encoding analysis |
||||
|
unicode { |
||||
|
enabled = true; |
||||
|
|
||||
|
# All checks use built-in logic (no maps needed) |
||||
|
check_validity = true; # Invalid UTF-8 sequences |
||||
|
check_homographs = true; # Mixed script homograph attacks |
||||
|
check_rtl_override = true; # RTL Unicode override tricks |
||||
|
check_zero_width = true; # Zero-width space characters |
||||
|
} |
||||
|
|
||||
|
# URL structure analysis |
||||
|
structure { |
||||
|
enabled = true; |
||||
|
|
||||
|
# Multiple @ signs |
||||
|
check_multiple_at = true; |
||||
|
max_at_signs = 2; |
||||
|
|
||||
|
# Backslashes in URL |
||||
|
check_backslash = true; |
||||
|
|
||||
|
# Excessive dots in hostname |
||||
|
check_excessive_dots = true; |
||||
|
max_host_dots = 6; |
||||
|
|
||||
|
# URL length |
||||
|
check_length = true; |
||||
|
max_url_length = 2048; |
||||
|
|
||||
|
# OPTIONAL: Suspicious ports map (disabled by default) |
||||
|
use_port_map = false; |
||||
|
# port_map = "$LOCAL_CONFDIR/local.d/url_suspect_ports.map"; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
# Symbol names (can be customized) |
||||
|
symbols { |
||||
|
# User/password symbols |
||||
|
user_password = "URL_USER_PASSWORD"; |
||||
|
user_long = "URL_USER_LONG"; |
||||
|
user_very_long = "URL_USER_VERY_LONG"; |
||||
|
|
||||
|
# Numeric IP symbols |
||||
|
numeric_ip = "URL_NUMERIC_IP"; |
||||
|
numeric_ip_user = "URL_NUMERIC_IP_USER"; |
||||
|
numeric_private = "URL_NUMERIC_PRIVATE_IP"; |
||||
|
|
||||
|
# TLD symbols |
||||
|
no_tld = "URL_NO_TLD"; |
||||
|
suspicious_tld = "URL_SUSPICIOUS_TLD"; |
||||
|
|
||||
|
# Unicode symbols |
||||
|
bad_unicode = "URL_BAD_UNICODE"; |
||||
|
homograph = "URL_HOMOGRAPH_ATTACK"; |
||||
|
rtl_override = "URL_RTL_OVERRIDE"; |
||||
|
zero_width = "URL_ZERO_WIDTH_SPACES"; |
||||
|
|
||||
|
# Structure symbols |
||||
|
multiple_at = "URL_MULTIPLE_AT_SIGNS"; |
||||
|
backslash = "URL_BACKSLASH_PATH"; |
||||
|
excessive_dots = "URL_EXCESSIVE_DOTS"; |
||||
|
very_long = "URL_VERY_LONG"; |
||||
|
} |
||||
|
|
||||
|
# ADVANCED: Global whitelist (disabled by default) |
||||
|
# Use only if you need to skip checks for specific domains |
||||
|
use_whitelist = false; |
||||
|
# whitelist_map = "$LOCAL_CONFDIR/local.d/url_suspect_whitelist.map"; |
||||
|
|
||||
|
# ADVANCED: Custom checks (disabled by default) |
||||
|
# Example: |
||||
|
# custom_checks { |
||||
|
# my_check = <<EOD |
||||
|
# return function(task, url, settings) |
||||
|
# local host = url:get_host() |
||||
|
# if host and host:match("suspicious") then |
||||
|
# return { |
||||
|
# symbol = "MY_SUSPICIOUS_URL", |
||||
|
# score = 5.0, |
||||
|
# options = {host} |
||||
|
# } |
||||
|
# end |
||||
|
# end |
||||
|
# EOD; |
||||
|
# } |
||||
|
|
||||
|
# Backward compatibility with R_SUSPICIOUS_URL |
||||
|
# When enabled, R_SUSPICIOUS_URL symbol is inserted if any URL_* symbols fire |
||||
|
compat_mode = true; |
||||
|
|
||||
|
.include(try=true,priority=5) "${DBDIR}/dynamic/url_suspect.conf" |
||||
|
.include(try=true,priority=1,duplicate=merge) "$LOCAL_CONFDIR/local.d/url_suspect.conf" |
||||
|
.include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/url_suspect.conf" |
||||
|
} |
||||
@ -0,0 +1,101 @@ |
|||||
|
# URL Suspect Plugin Scores |
||||
|
# These scores are applied when suspicious URLs are detected |
||||
|
|
||||
|
symbols = { |
||||
|
# User/password in URL |
||||
|
"URL_USER_PASSWORD" { |
||||
|
weight = 2.0; |
||||
|
description = "URL contains user field"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_USER_LONG" { |
||||
|
weight = 3.0; |
||||
|
description = "URL user field is long (>128 chars)"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_USER_VERY_LONG" { |
||||
|
weight = 5.0; |
||||
|
description = "URL user field is very long (>256 chars)"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
|
||||
|
# Numeric IP in URL |
||||
|
"URL_NUMERIC_IP" { |
||||
|
weight = 1.5; |
||||
|
description = "URL uses numeric IP address"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_NUMERIC_IP_USER" { |
||||
|
weight = 4.0; |
||||
|
description = "URL uses numeric IP with user field"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_NUMERIC_PRIVATE_IP" { |
||||
|
weight = 0.5; |
||||
|
description = "URL uses private IP range"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
|
||||
|
# TLD issues |
||||
|
"URL_NO_TLD" { |
||||
|
weight = 2.0; |
||||
|
description = "URL has no TLD"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_SUSPICIOUS_TLD" { |
||||
|
weight = 3.0; |
||||
|
description = "URL uses suspicious TLD"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
|
||||
|
# Unicode and encoding issues |
||||
|
"URL_BAD_UNICODE" { |
||||
|
weight = 3.0; |
||||
|
description = "URL contains invalid Unicode"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_HOMOGRAPH_ATTACK" { |
||||
|
weight = 5.0; |
||||
|
description = "URL uses homograph attack (mixed scripts)"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_RTL_OVERRIDE" { |
||||
|
weight = 6.0; |
||||
|
description = "URL uses RTL override character"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_ZERO_WIDTH_SPACES" { |
||||
|
weight = 7.0; |
||||
|
description = "URL contains zero-width spaces"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
|
||||
|
# URL structure issues |
||||
|
"URL_MULTIPLE_AT_SIGNS" { |
||||
|
weight = 3.0; |
||||
|
description = "URL has multiple @ signs"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_BACKSLASH_PATH" { |
||||
|
weight = 2.0; |
||||
|
description = "URL uses backslashes"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_EXCESSIVE_DOTS" { |
||||
|
weight = 2.0; |
||||
|
description = "URL has excessive dots in hostname"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
"URL_VERY_LONG" { |
||||
|
weight = 1.5; |
||||
|
description = "URL is very long"; |
||||
|
one_shot = false; |
||||
|
} |
||||
|
|
||||
|
# Legacy symbol (backward compatibility) |
||||
|
"R_SUSPICIOUS_URL" { |
||||
|
weight = 5.0; |
||||
|
description = "Suspicious URL detected (legacy symbol)"; |
||||
|
one_shot = true; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,51 @@ |
|||||
|
--[[ |
||||
|
URL Filter Configuration |
||||
|
This is a configuration template for the URL filter library. |
||||
|
|
||||
|
The URL filter runs during parsing (before URL objects are created). |
||||
|
It provides fast validation to reject obvious garbage URLs. |
||||
|
|
||||
|
Most users don't need to configure this - the defaults work well. |
||||
|
]]-- |
||||
|
|
||||
|
-- Enable/disable the filter |
||||
|
-- enabled = true; |
||||
|
|
||||
|
-- Built-in filter configuration |
||||
|
-- builtin_filters = { |
||||
|
-- # Reject URLs with extremely long user fields |
||||
|
-- oversized_user = { |
||||
|
-- enabled = true; |
||||
|
-- max_length = 512; # Absolute limit for user field length |
||||
|
-- }; |
||||
|
-- |
||||
|
-- # Reject URLs with invalid UTF-8 |
||||
|
-- basic_unicode = { |
||||
|
-- enabled = true; |
||||
|
-- reject_invalid_utf8 = true; |
||||
|
-- }; |
||||
|
-- |
||||
|
-- # Reject obvious garbage patterns |
||||
|
-- garbage_pattern = { |
||||
|
-- enabled = true; |
||||
|
-- max_at_signs = 20; # URLs with >20 @ signs are garbage |
||||
|
-- }; |
||||
|
-- }; |
||||
|
|
||||
|
-- ADVANCED: Custom filters |
||||
|
-- You can add your own filters that run during URL parsing. |
||||
|
-- Filter function signature: function(url_text, url_obj, flags) |
||||
|
-- Return: "accept", "suspicious", or "reject" |
||||
|
-- |
||||
|
-- Example: |
||||
|
-- custom_filters = { |
||||
|
-- my_domain_filter = function(url_text, url_obj, flags) |
||||
|
-- if url_obj then |
||||
|
-- local host = url_obj:get_host() |
||||
|
-- if host == "blocked-domain.com" then |
||||
|
-- return "reject" -- Don't create URL object |
||||
|
-- end |
||||
|
-- end |
||||
|
-- return "accept" |
||||
|
-- end; |
||||
|
-- }; |
||||
@ -0,0 +1,180 @@ |
|||||
|
--[[ |
||||
|
Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com> |
||||
|
|
||||
|
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
you may not use this file except in compliance with the License. |
||||
|
You may obtain a copy of the License at |
||||
|
|
||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
|
||||
|
Unless required by applicable law or agreed to in writing, software |
||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
See the License for the specific language governing permissions and |
||||
|
limitations under the License. |
||||
|
]]-- |
||||
|
|
||||
|
--[[[ |
||||
|
-- @module lua_url_filter |
||||
|
-- This module provides fast URL filtering during parsing phase. |
||||
|
-- Called from C code to decide whether to create URL object or reject text. |
||||
|
--]] |
||||
|
|
||||
|
local exports = {} |
||||
|
|
||||
|
-- Filter result constants |
||||
|
exports.ACCEPT = 0 |
||||
|
exports.SUSPICIOUS = 1 |
||||
|
exports.REJECT = 2 |
||||
|
|
||||
|
-- Default settings (work without configuration) |
||||
|
local settings = { |
||||
|
enabled = true, |
||||
|
builtin_filters = { |
||||
|
oversized_user = { |
||||
|
enabled = true, |
||||
|
max_length = 512 -- Absolute limit for user field |
||||
|
}, |
||||
|
basic_unicode = { |
||||
|
enabled = true, |
||||
|
reject_invalid_utf8 = true |
||||
|
}, |
||||
|
garbage_pattern = { |
||||
|
enabled = true, |
||||
|
max_at_signs = 20 -- Obvious garbage threshold |
||||
|
} |
||||
|
}, |
||||
|
custom_filters = {} |
||||
|
} |
||||
|
|
||||
|
-- Built-in filter: Check for extremely long user fields |
||||
|
local function filter_oversized_user(url_text, url_obj, flags, cfg) |
||||
|
if not url_obj then |
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
local user = url_obj:get_user() |
||||
|
if not user then |
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
local user_len = #user |
||||
|
if user_len > cfg.max_length then |
||||
|
-- This is obviously garbage, reject |
||||
|
return exports.REJECT |
||||
|
end |
||||
|
|
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
-- Built-in filter: Check for invalid UTF-8 |
||||
|
local function filter_basic_unicode(url_text, url_obj, flags, cfg) |
||||
|
if not cfg.reject_invalid_utf8 then |
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
local ok, rspamd_util = pcall(require, "rspamd_util") |
||||
|
if ok and rspamd_util.is_valid_utf8 then |
||||
|
if not rspamd_util.is_valid_utf8(url_text) then |
||||
|
-- Invalid UTF-8, reject |
||||
|
return exports.REJECT |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
-- Built-in filter: Check for obvious garbage patterns |
||||
|
local function filter_garbage_pattern(url_text, url_obj, flags, cfg) |
||||
|
-- Count @ signs |
||||
|
local _, at_count = url_text:gsub("@", "") |
||||
|
if at_count > cfg.max_at_signs then |
||||
|
-- Way too many @ signs, this is garbage |
||||
|
return exports.REJECT |
||||
|
end |
||||
|
|
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
-- Main entry point (called from C) |
||||
|
function exports.filter_url(url_text, url_obj, flags) |
||||
|
if not settings.enabled then |
||||
|
return exports.ACCEPT |
||||
|
end |
||||
|
|
||||
|
local result = exports.ACCEPT |
||||
|
|
||||
|
-- Run built-in filters |
||||
|
if settings.builtin_filters.oversized_user and |
||||
|
settings.builtin_filters.oversized_user.enabled then |
||||
|
local r = filter_oversized_user(url_text, url_obj, flags, |
||||
|
settings.builtin_filters.oversized_user) |
||||
|
if r == exports.REJECT then |
||||
|
return r |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if settings.builtin_filters.basic_unicode and |
||||
|
settings.builtin_filters.basic_unicode.enabled then |
||||
|
local r = filter_basic_unicode(url_text, url_obj, flags, |
||||
|
settings.builtin_filters.basic_unicode) |
||||
|
if r == exports.REJECT then |
||||
|
return r |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if settings.builtin_filters.garbage_pattern and |
||||
|
settings.builtin_filters.garbage_pattern.enabled then |
||||
|
local r = filter_garbage_pattern(url_text, url_obj, flags, |
||||
|
settings.builtin_filters.garbage_pattern) |
||||
|
if r == exports.REJECT then |
||||
|
return r |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Run custom filters (if any) |
||||
|
for name, filter_func in pairs(settings.custom_filters) do |
||||
|
local ok, r = pcall(filter_func, url_text, url_obj, flags) |
||||
|
if not ok then |
||||
|
-- Log error but don't fail |
||||
|
local rspamd_logger = require "rspamd_logger" |
||||
|
rspamd_logger.errx("Error in custom URL filter %s: %s", name, r) |
||||
|
else |
||||
|
if r == "reject" then |
||||
|
return exports.REJECT |
||||
|
elseif r == "suspicious" then |
||||
|
result = exports.SUSPICIOUS |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return result |
||||
|
end |
||||
|
|
||||
|
-- Initialize from configuration |
||||
|
function exports.init(cfg) |
||||
|
local lua_util = require "lua_util" |
||||
|
local opts = cfg:get_all_opt('url_filter') |
||||
|
if opts then |
||||
|
settings = lua_util.override_defaults(settings, opts) |
||||
|
end |
||||
|
|
||||
|
local rspamd_logger = require "rspamd_logger" |
||||
|
rspamd_logger.infox(cfg, "URL filter initialized (enabled=%s)", settings.enabled) |
||||
|
end |
||||
|
|
||||
|
-- Allow runtime registration of custom filters |
||||
|
function exports.register_custom_filter(name, func) |
||||
|
if type(func) ~= 'function' then |
||||
|
local rspamd_logger = require "rspamd_logger" |
||||
|
rspamd_logger.errx("Cannot register custom filter %s: not a function", name) |
||||
|
return false |
||||
|
end |
||||
|
|
||||
|
settings.custom_filters[name] = func |
||||
|
local rspamd_logger = require "rspamd_logger" |
||||
|
rspamd_logger.infox("Registered custom URL filter: %s", name) |
||||
|
return true |
||||
|
end |
||||
|
|
||||
|
return exports |
||||
@ -0,0 +1,602 @@ |
|||||
|
--[[ |
||||
|
Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com> |
||||
|
|
||||
|
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
you may not use this file except in compliance with the License. |
||||
|
You may obtain a copy of the License at |
||||
|
|
||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
|
||||
|
Unless required by applicable law or agreed to in writing, software |
||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
See the License for the specific language governing permissions and |
||||
|
limitations under the License. |
||||
|
]]-- |
||||
|
|
||||
|
--[[[ |
||||
|
-- @module url_suspect |
||||
|
-- This module performs deep introspection of suspicious URLs. |
||||
|
-- Works with existing URL flags, no new flags needed. |
||||
|
-- Provides multiple specific symbols for different URL issues. |
||||
|
--]] |
||||
|
|
||||
|
if confighelp then |
||||
|
return |
||||
|
end |
||||
|
|
||||
|
local N = "url_suspect" |
||||
|
local rspamd_logger = require "rspamd_logger" |
||||
|
local lua_util = require "lua_util" |
||||
|
local rspamd_url = require "rspamd_url" |
||||
|
local rspamd_util = require "rspamd_util" |
||||
|
local bit = require "bit" |
||||
|
|
||||
|
-- Default settings (work without any maps) |
||||
|
local settings = { |
||||
|
enabled = true, |
||||
|
process_flags = { 'has_user', 'numeric', 'obscured', 'zw_spaces', 'no_tld' }, |
||||
|
checks = { |
||||
|
user_password = { |
||||
|
enabled = true, |
||||
|
length_thresholds = { |
||||
|
suspicious = 64, |
||||
|
long = 128, |
||||
|
very_long = 256 |
||||
|
}, |
||||
|
use_pattern_map = false, |
||||
|
use_blacklist = false |
||||
|
}, |
||||
|
numeric_ip = { |
||||
|
enabled = true, |
||||
|
base_score = 1.5, |
||||
|
with_user_score = 4.0, |
||||
|
allow_private_ranges = true, |
||||
|
private_score = 0.5, |
||||
|
use_range_map = false |
||||
|
}, |
||||
|
tld = { |
||||
|
enabled = true, |
||||
|
builtin_suspicious = { ".tk", ".ml", ".ga", ".cf", ".gq" }, |
||||
|
builtin_score = 3.0, |
||||
|
missing_tld_score = 2.0, |
||||
|
use_tld_map = false |
||||
|
}, |
||||
|
unicode = { |
||||
|
enabled = true, |
||||
|
check_validity = true, |
||||
|
check_homographs = true, |
||||
|
check_rtl_override = true, |
||||
|
check_zero_width = true |
||||
|
}, |
||||
|
structure = { |
||||
|
enabled = true, |
||||
|
check_multiple_at = true, |
||||
|
max_at_signs = 2, |
||||
|
check_backslash = true, |
||||
|
check_excessive_dots = true, |
||||
|
max_host_dots = 6, |
||||
|
check_length = true, |
||||
|
max_url_length = 2048, |
||||
|
use_port_map = false |
||||
|
} |
||||
|
}, |
||||
|
symbols = { |
||||
|
-- User/password symbols |
||||
|
user_password = "URL_USER_PASSWORD", |
||||
|
user_long = "URL_USER_LONG", |
||||
|
user_very_long = "URL_USER_VERY_LONG", |
||||
|
-- Numeric IP symbols |
||||
|
numeric_ip = "URL_NUMERIC_IP", |
||||
|
numeric_ip_user = "URL_NUMERIC_IP_USER", |
||||
|
numeric_private = "URL_NUMERIC_PRIVATE_IP", |
||||
|
-- TLD symbols |
||||
|
no_tld = "URL_NO_TLD", |
||||
|
suspicious_tld = "URL_SUSPICIOUS_TLD", |
||||
|
-- Unicode symbols |
||||
|
bad_unicode = "URL_BAD_UNICODE", |
||||
|
homograph = "URL_HOMOGRAPH_ATTACK", |
||||
|
rtl_override = "URL_RTL_OVERRIDE", |
||||
|
zero_width = "URL_ZERO_WIDTH_SPACES", |
||||
|
-- Structure symbols |
||||
|
multiple_at = "URL_MULTIPLE_AT_SIGNS", |
||||
|
backslash = "URL_BACKSLASH_PATH", |
||||
|
excessive_dots = "URL_EXCESSIVE_DOTS", |
||||
|
very_long = "URL_VERY_LONG" |
||||
|
}, |
||||
|
use_whitelist = false, |
||||
|
custom_checks = {}, |
||||
|
compat_mode = true |
||||
|
} |
||||
|
|
||||
|
-- Optional maps (only loaded if enabled) |
||||
|
local maps = { |
||||
|
whitelist = nil, |
||||
|
user_patterns = nil, |
||||
|
user_blacklist = nil, |
||||
|
suspicious_ips = nil, |
||||
|
suspicious_tlds = nil, |
||||
|
suspicious_ports = nil |
||||
|
} |
||||
|
|
||||
|
-- Check implementations |
||||
|
local checks = {} |
||||
|
|
||||
|
-- Check: User/password in URL |
||||
|
function checks.user_password_analysis(task, url, cfg) |
||||
|
local findings = {} |
||||
|
local url_flags_tab = rspamd_url.flags |
||||
|
local flags = url:get_flags_num() |
||||
|
|
||||
|
-- Check if user field present |
||||
|
if bit.band(flags, url_flags_tab.has_user) == 0 then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
local user = url:get_user() |
||||
|
if not user then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
local user_len = #user |
||||
|
local host = url:get_host() |
||||
|
|
||||
|
lua_util.debugm(N, task, "Checking user field length: %d chars", user_len) |
||||
|
|
||||
|
-- Length-based scoring (built-in, no map needed) |
||||
|
if user_len > cfg.length_thresholds.very_long then |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.user_very_long, |
||||
|
score = 5.0, |
||||
|
options = { string.format("%d", user_len) } |
||||
|
}) |
||||
|
elseif user_len > cfg.length_thresholds.long then |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.user_long, |
||||
|
score = 3.0, |
||||
|
options = { string.format("%d", user_len) } |
||||
|
}) |
||||
|
elseif user_len > cfg.length_thresholds.suspicious then |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.user_password, |
||||
|
score = 2.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
else |
||||
|
-- Normal length user |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.user_password, |
||||
|
score = 2.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
-- Optional: check pattern map if enabled |
||||
|
if cfg.use_pattern_map and maps.user_patterns then |
||||
|
local match = maps.user_patterns:get_key(user) |
||||
|
if match then |
||||
|
lua_util.debugm(N, task, "User field matches suspicious pattern") |
||||
|
-- Could add additional symbol or increase score |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Optional: check blacklist if enabled |
||||
|
if cfg.use_blacklist and maps.user_blacklist then |
||||
|
if maps.user_blacklist:get_key(user) then |
||||
|
lua_util.debugm(N, task, "User field is blacklisted") |
||||
|
-- Could add additional symbol or increase score |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check: Numeric IP as hostname |
||||
|
function checks.numeric_ip_analysis(task, url, cfg) |
||||
|
local findings = {} |
||||
|
local url_flags_tab = rspamd_url.flags |
||||
|
local flags = url:get_flags_num() |
||||
|
|
||||
|
if bit.band(flags, url_flags_tab.numeric) == 0 then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
local host = url:get_host() |
||||
|
if not host then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
lua_util.debugm(N, task, "Checking numeric IP: %s", host) |
||||
|
|
||||
|
-- Check if private IP |
||||
|
local is_private = host:match("^10%.") or |
||||
|
host:match("^192%.168%.") or |
||||
|
host:match("^172%.1[6-9]%.") or |
||||
|
host:match("^172%.2[0-9]%.") or |
||||
|
host:match("^172%.3[0-1]%.") |
||||
|
|
||||
|
if is_private and cfg.allow_private_ranges then |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.numeric_private, |
||||
|
score = cfg.private_score, |
||||
|
options = { host } |
||||
|
}) |
||||
|
else |
||||
|
-- Check if user present (more suspicious) |
||||
|
if bit.band(flags, url_flags_tab.has_user) ~= 0 then |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.numeric_ip_user, |
||||
|
score = cfg.with_user_score, |
||||
|
options = { host } |
||||
|
}) |
||||
|
else |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.numeric_ip, |
||||
|
score = cfg.base_score, |
||||
|
options = { host } |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Optional: check IP range map if enabled |
||||
|
if cfg.use_range_map and maps.suspicious_ips then |
||||
|
if maps.suspicious_ips:get_key(host) then |
||||
|
lua_util.debugm(N, task, "IP is in suspicious range") |
||||
|
-- Could add additional penalty |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check: TLD validation |
||||
|
function checks.tld_analysis(task, url, cfg) |
||||
|
local findings = {} |
||||
|
local url_flags_tab = rspamd_url.flags |
||||
|
local flags = url:get_flags_num() |
||||
|
local host = url:get_host() |
||||
|
|
||||
|
if not host then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check for missing TLD |
||||
|
if bit.band(flags, url_flags_tab.no_tld) ~= 0 then |
||||
|
-- Skip if it's a numeric IP (handled separately) |
||||
|
if bit.band(flags, url_flags_tab.numeric) == 0 then |
||||
|
lua_util.debugm(N, task, "URL has no TLD: %s", host) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.no_tld, |
||||
|
score = cfg.missing_tld_score, |
||||
|
options = { host } |
||||
|
}) |
||||
|
end |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
local tld = url:get_tld() |
||||
|
if not tld then |
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check built-in suspicious TLDs (no map needed) |
||||
|
for _, suspicious_tld in ipairs(cfg.builtin_suspicious) do |
||||
|
if tld == suspicious_tld or tld:sub(-#suspicious_tld) == suspicious_tld then |
||||
|
lua_util.debugm(N, task, "URL uses suspicious TLD: %s", tld) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.suspicious_tld, |
||||
|
score = cfg.builtin_score, |
||||
|
options = { tld } |
||||
|
}) |
||||
|
break |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Optional: check TLD map if enabled |
||||
|
if cfg.use_tld_map and maps.suspicious_tlds then |
||||
|
if maps.suspicious_tlds:get_key(tld) then |
||||
|
lua_util.debugm(N, task, "URL TLD in suspicious map: %s", tld) |
||||
|
-- Already handled by built-in check, or could add extra penalty |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check: Unicode anomalies |
||||
|
function checks.unicode_analysis(task, url, cfg) |
||||
|
local findings = {} |
||||
|
local url_flags_tab = rspamd_url.flags |
||||
|
local flags = url:get_flags_num() |
||||
|
|
||||
|
local url_text = url:get_text() |
||||
|
local host = url:get_host() |
||||
|
|
||||
|
-- Check validity |
||||
|
if cfg.check_validity and not rspamd_util.is_valid_utf8(url_text) then |
||||
|
lua_util.debugm(N, task, "URL has invalid UTF-8") |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.bad_unicode, |
||||
|
score = 3.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
-- Check zero-width spaces (existing flag) |
||||
|
if cfg.check_zero_width and bit.band(flags, url_flags_tab.zw_spaces) ~= 0 then |
||||
|
lua_util.debugm(N, task, "URL contains zero-width spaces") |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.zero_width, |
||||
|
score = 7.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
-- Check homographs |
||||
|
if cfg.check_homographs and host then |
||||
|
if rspamd_util.is_utf_spoofed(host) then |
||||
|
lua_util.debugm(N, task, "URL uses homograph attack: %s", host) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.homograph, |
||||
|
score = 5.0, |
||||
|
options = { host } |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Check RTL override (U+202E) |
||||
|
if cfg.check_rtl_override and url_text:find("\226\128\174") then |
||||
|
lua_util.debugm(N, task, "URL contains RTL override") |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.rtl_override, |
||||
|
score = 6.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Check: URL structure anomalies |
||||
|
function checks.structure_analysis(task, url, cfg) |
||||
|
local findings = {} |
||||
|
local url_text = url:get_text() |
||||
|
local host = url:get_host() |
||||
|
|
||||
|
-- Check multiple @ signs |
||||
|
if cfg.check_multiple_at then |
||||
|
local _, at_count = url_text:gsub("@", "") |
||||
|
if at_count > cfg.max_at_signs then |
||||
|
lua_util.debugm(N, task, "URL has %d @ signs", at_count) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.multiple_at, |
||||
|
score = 3.0, |
||||
|
options = { string.format("%d", at_count) } |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Check backslashes (existing flag indicates obscured) |
||||
|
if cfg.check_backslash then |
||||
|
local url_flags_tab = rspamd_url.flags |
||||
|
local flags = url:get_flags_num() |
||||
|
if bit.band(flags, url_flags_tab.obscured) ~= 0 and url_text:find("\\") then |
||||
|
lua_util.debugm(N, task, "URL contains backslashes") |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.backslash, |
||||
|
score = 2.0, |
||||
|
options = { host or "unknown" } |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Check excessive dots in hostname |
||||
|
if cfg.check_excessive_dots and host then |
||||
|
local _, dot_count = host:gsub("%.", "") |
||||
|
if dot_count > cfg.max_host_dots then |
||||
|
lua_util.debugm(N, task, "URL hostname has %d dots", dot_count) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.excessive_dots, |
||||
|
score = 2.0, |
||||
|
options = { string.format("%d", dot_count) } |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Check URL length |
||||
|
if cfg.check_length and #url_text > cfg.max_url_length then |
||||
|
lua_util.debugm(N, task, "URL is very long: %d chars", #url_text) |
||||
|
table.insert(findings, { |
||||
|
symbol = settings.symbols.very_long, |
||||
|
score = 1.5, |
||||
|
options = { string.format("%d", #url_text) } |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
return findings |
||||
|
end |
||||
|
|
||||
|
-- Main analysis function |
||||
|
local function analyze_url(task, url, cfg) |
||||
|
local all_findings = {} |
||||
|
|
||||
|
-- Optional: check whitelist first |
||||
|
if cfg.use_whitelist and maps.whitelist then |
||||
|
local host = url:get_host() |
||||
|
if host and maps.whitelist:get_key(host) then |
||||
|
lua_util.debugm(N, task, "URL host is whitelisted: %s", host) |
||||
|
return all_findings |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Run all enabled checks (using built-in logic, no maps required) |
||||
|
if cfg.checks.user_password and cfg.checks.user_password.enabled then |
||||
|
local findings = checks.user_password_analysis(task, url, cfg.checks.user_password) |
||||
|
for _, f in ipairs(findings) do |
||||
|
table.insert(all_findings, f) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.numeric_ip and cfg.checks.numeric_ip.enabled then |
||||
|
local findings = checks.numeric_ip_analysis(task, url, cfg.checks.numeric_ip) |
||||
|
for _, f in ipairs(findings) do |
||||
|
table.insert(all_findings, f) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.tld and cfg.checks.tld.enabled then |
||||
|
local findings = checks.tld_analysis(task, url, cfg.checks.tld) |
||||
|
for _, f in ipairs(findings) do |
||||
|
table.insert(all_findings, f) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.unicode and cfg.checks.unicode.enabled then |
||||
|
local findings = checks.unicode_analysis(task, url, cfg.checks.unicode) |
||||
|
for _, f in ipairs(findings) do |
||||
|
table.insert(all_findings, f) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.structure and cfg.checks.structure.enabled then |
||||
|
local findings = checks.structure_analysis(task, url, cfg.checks.structure) |
||||
|
for _, f in ipairs(findings) do |
||||
|
table.insert(all_findings, f) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Run custom checks (advanced users) |
||||
|
for name, check_func in pairs(cfg.custom_checks) do |
||||
|
local ok, findings = pcall(check_func, task, url, cfg) |
||||
|
if ok and findings then |
||||
|
if type(findings) == 'table' and findings.symbol then |
||||
|
table.insert(all_findings, findings) |
||||
|
end |
||||
|
else |
||||
|
rspamd_logger.errx(task, "Error in custom check %s: %s", name, findings) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return all_findings |
||||
|
end |
||||
|
|
||||
|
-- Main callback |
||||
|
local function url_suspect_callback(task) |
||||
|
-- Get URLs with suspicious flags (using existing flags) |
||||
|
local suspect_urls = task:get_urls_filtered(settings.process_flags) |
||||
|
|
||||
|
if not suspect_urls or #suspect_urls == 0 then |
||||
|
return false |
||||
|
end |
||||
|
|
||||
|
lua_util.debugm(N, task, "Processing %s URLs with suspicious flags", #suspect_urls) |
||||
|
|
||||
|
local total_findings = 0 |
||||
|
|
||||
|
for _, url in ipairs(suspect_urls) do |
||||
|
local url_findings = analyze_url(task, url, settings) |
||||
|
|
||||
|
for _, finding in ipairs(url_findings) do |
||||
|
task:insert_result(finding.symbol, finding.score, finding.options or {}) |
||||
|
total_findings = total_findings + 1 |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Backward compatibility: R_SUSPICIOUS_URL |
||||
|
if settings.compat_mode and total_findings > 0 then |
||||
|
-- Check if we inserted any symbols |
||||
|
local has_findings = false |
||||
|
for _, symbol_name in pairs(settings.symbols) do |
||||
|
if task:has_symbol(symbol_name) then |
||||
|
has_findings = true |
||||
|
break |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if has_findings then |
||||
|
task:insert_result('R_SUSPICIOUS_URL', 5.0) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
return false |
||||
|
end |
||||
|
|
||||
|
-- Initialize maps (only if enabled) |
||||
|
local function init_maps(cfg) |
||||
|
if cfg.use_whitelist and cfg.whitelist_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.whitelist = lua_maps.map_add_from_ucl( |
||||
|
cfg.whitelist_map, 'set', 'url_suspect_whitelist') |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.user_password.use_pattern_map and cfg.checks.user_password.pattern_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.user_patterns = lua_maps.map_add_from_ucl( |
||||
|
cfg.checks.user_password.pattern_map, 'regexp', 'url_suspect_user_patterns') |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.user_password.use_blacklist and cfg.checks.user_password.blacklist_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.user_blacklist = lua_maps.map_add_from_ucl( |
||||
|
cfg.checks.user_password.blacklist_map, 'set', 'url_suspect_user_blacklist') |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.numeric_ip.use_range_map and cfg.checks.numeric_ip.range_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.suspicious_ips = lua_maps.map_add_from_ucl( |
||||
|
cfg.checks.numeric_ip.range_map, 'radix', 'url_suspect_ip_ranges') |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.tld.use_tld_map and cfg.checks.tld.tld_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.suspicious_tlds = lua_maps.map_add_from_ucl( |
||||
|
cfg.checks.tld.tld_map, 'set', 'url_suspect_tlds') |
||||
|
end |
||||
|
|
||||
|
if cfg.checks.structure.use_port_map and cfg.checks.structure.port_map then |
||||
|
local lua_maps = require "lua_maps" |
||||
|
maps.suspicious_ports = lua_maps.map_add_from_ucl( |
||||
|
cfg.checks.structure.port_map, 'set', 'url_suspect_ports') |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
-- Plugin registration |
||||
|
local opts = rspamd_config:get_all_opt(N) |
||||
|
if opts then |
||||
|
settings = lua_util.override_defaults(settings, opts) |
||||
|
end |
||||
|
|
||||
|
if settings.enabled then |
||||
|
init_maps(settings) |
||||
|
|
||||
|
local id = rspamd_config:register_symbol({ |
||||
|
name = 'URL_SUSPECT_CHECK', |
||||
|
type = 'callback', |
||||
|
callback = url_suspect_callback, |
||||
|
priority = 10, |
||||
|
group = 'url', |
||||
|
flags = 'empty,nice' |
||||
|
}) |
||||
|
|
||||
|
-- Register all symbol names as virtual |
||||
|
for _, symbol_name in pairs(settings.symbols) do |
||||
|
rspamd_config:register_symbol({ |
||||
|
name = symbol_name, |
||||
|
type = 'virtual', |
||||
|
parent = id, |
||||
|
group = 'url' |
||||
|
}) |
||||
|
end |
||||
|
|
||||
|
-- Backward compat symbol |
||||
|
if settings.compat_mode then |
||||
|
rspamd_config:register_symbol({ |
||||
|
name = 'R_SUSPICIOUS_URL', |
||||
|
type = 'virtual', |
||||
|
parent = id, |
||||
|
score = 5.0, |
||||
|
group = 'url', |
||||
|
description = 'Suspicious URL (legacy symbol)' |
||||
|
}) |
||||
|
end |
||||
|
end |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue