Browse Source

[Feature] Add symbol categories for MetaDefender and VirusTotal

Implemented a category-based symbol system for hash lookup antivirus
scanners (MetaDefender and VirusTotal) to replace dynamic scoring:

- Added 4 symbol categories: CLEAN (-0.5), LOW (2.0), MEDIUM (5.0), HIGH (8.0)
- Replaced full_score_engines with threshold-based categorization (low_category, medium_category)
- Fixed symbol registration in antivirus.lua to use rule instead of config
- Updated cache format to preserve symbol category across requests
- Added backward compatibility for old cache format
- Added symbols registration and metric score assignment
- Updated configuration documentation with examples

The new system provides:
- Clear threat categorization instead of linear interpolation
- Proper symbol weights applied automatically
- Consistent behavior between MetaDefender and VirusTotal
- Cache that preserves symbol categories

Configuration example:
metadefender {
  apikey = "KEY";
  type = "metadefender";
  minimum_engines = 3;
  low_category = 5;
  medium_category = 10;
}
pull/5656/head
Vsevolod Stakhov 3 months ago
parent
commit
64fc71440b
No known key found for this signature in database GPG Key ID: 7647B6790081437
  1. 39
      conf/local.d/antivirus.conf.example
  2. 84
      conf/modules.d/antivirus.conf
  3. 26
      lualib/lua_scanners/common.lua
  4. 76
      lualib/lua_scanners/metadefender.lua
  5. 100
      lualib/lua_scanners/virustotal.lua
  6. 66
      src/plugins/lua/antivirus.lua

39
conf/local.d/antivirus.conf.example

@ -5,8 +5,8 @@ metadefender {
# Required: Your MetaDefender API key from https://metadefender.opswat.com/
apikey = "YOUR_API_KEY_HERE";
# Symbol name (default: METADEFENDER_VIRUS)
symbol = "METADEFENDER_VIRUS";
# Main symbol name (for compatibility, usually not used directly)
symbol = "METADEFENDER";
# Scanner type - must be "metadefender"
type = "metadefender";
@ -28,9 +28,14 @@ metadefender {
# Lower value = more sensitive, may have more false positives
minimum_engines = 3;
# Number of engines at which maximum score is assigned (default: 7)
# Scores scale linearly between minimum_engines and full_score_engines
full_score_engines = 7;
# Threshold for low category (default: 5)
# Detections from minimum_engines to low_category-1 = LOW
low_category = 5;
# Threshold for medium category (default: 10)
# Detections from low_category to medium_category-1 = MEDIUM
# Detections >= medium_category = HIGH
medium_category = 10;
# HTTP request timeout in seconds
timeout = 5.0;
@ -39,6 +44,30 @@ metadefender {
# Longer cache reduces API calls but may miss new detections
cache_expire = 7200;
# Symbol categories with scores (can be customized)
symbols = {
clean = {
symbol = "METADEFENDER_CLEAN";
score = -0.5;
description = "MetaDefender decided attachment to be clean";
};
low = {
symbol = "METADEFENDER_LOW";
score = 2.0;
description = "MetaDefender found low number of threats (3-4 engines)";
};
medium = {
symbol = "METADEFENDER_MEDIUM";
score = 5.0;
description = "MetaDefender found medium number of threats (5-9 engines)";
};
high = {
symbol = "METADEFENDER_HIGH";
score = 8.0;
description = "MetaDefender found high number of threats (10+ engines)";
};
}
# Optional: Force an action when malware is detected
# action = "reject";

84
conf/modules.d/antivirus.conf

@ -59,8 +59,8 @@ antivirus {
#
# If `max_size` is set, messages > n bytes in size are not scanned
#max_size = 20000000;
# symbol to add
#symbol = "METADEFENDER_VIRUS";
# Main symbol (for compatibility, usually not used directly)
#symbol = "METADEFENDER";
# type of scanner
#type = "metadefender";
# Your MetaDefender API key (required)
@ -71,12 +71,88 @@ antivirus {
#log_clean = false;
# Minimum number of engines detecting malware for a hit (default 3)
#minimum_engines = 3;
# Number of engines at which we assign full score (default 7)
#full_score_engines = 7;
# Threshold for low category (default 5)
#low_category = 5;
# Threshold for medium category (default 10)
#medium_category = 10;
# Request timeout
#timeout = 5.0;
# Redis cache expiration time in seconds (default 7200 = 2 hours)
#cache_expire = 7200;
# Symbol categories with scores (can be overridden)
#symbols = {
# clean = {
# symbol = "METADEFENDER_CLEAN";
# score = -0.5;
# description = "MetaDefender decided attachment to be clean";
# };
# low = {
# symbol = "METADEFENDER_LOW";
# score = 2.0;
# description = "MetaDefender found low number of threats";
# };
# medium = {
# symbol = "METADEFENDER_MEDIUM";
# score = 5.0;
# description = "MetaDefender found medium number of threats";
# };
# high = {
# symbol = "METADEFENDER_HIGH";
# score = 8.0;
# description = "MetaDefender found high number of threats";
# };
#}
#}
#virustotal {
# VirusTotal API (hash lookup)
# Get your API key at https://www.virustotal.com/
#
# If `max_size` is set, messages > n bytes in size are not scanned
#max_size = 20000000;
# Main symbol (for compatibility, usually not used directly)
#symbol = "VIRUSTOTAL";
# type of scanner
#type = "virustotal";
# Your VirusTotal API key (required)
#apikey = "YOUR_API_KEY_HERE";
# Scan mime_parts separately (default true)
#scan_mime_parts = true;
# You can enable logging for clean messages
#log_clean = false;
# Minimum number of engines detecting malware for a hit (default 3)
#minimum_engines = 3;
# Threshold for low category (default 5)
#low_category = 5;
# Threshold for medium category (default 10)
#medium_category = 10;
# Request timeout
#timeout = 5.0;
# Redis cache expiration time in seconds (default 7200 = 2 hours)
#cache_expire = 7200;
# Symbol categories with scores (can be overridden)
#symbols = {
# clean = {
# symbol = "VIRUSTOTAL_CLEAN";
# score = -0.5;
# description = "VirusTotal decided attachment to be clean";
# };
# low = {
# symbol = "VIRUSTOTAL_LOW";
# score = 2.0;
# description = "VirusTotal found low number of threats";
# };
# medium = {
# symbol = "VIRUSTOTAL_MEDIUM";
# score = 5.0;
# description = "VirusTotal found medium number of threats";
# };
# high = {
# symbol = "VIRUSTOTAL_HIGH";
# score = 8.0;
# description = "VirusTotal found high number of threats";
# };
#}
#}
.include(try=true,priority=5) "${DBDIR}/dynamic/antivirus.conf"

26
lualib/lua_scanners/common.lua

@ -13,7 +13,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
]] --
--[[[
-- @module lua_scanners_common
@ -30,7 +30,6 @@ local fun = require "fun"
local exports = {}
local function log_clean(task, rule, msg)
msg = msg or 'message or mime_part is clean'
if rule.log_clean then
@ -38,7 +37,6 @@ local function log_clean(task, rule, msg)
else
lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
end
end
local function match_patterns(default_sym, found, patterns, dyn_weight)
@ -120,7 +118,6 @@ local function yield_result(task, rule, vname, dyn_weight, is_fail, maybe_part)
else
task:insert_result(symname, symscore, tm)
end
end
end
@ -217,7 +214,6 @@ local function dynamic_scan(task, rule)
end
local function need_check(task, content, rule, digest, fn, maybe_part)
local uncached = true
local key = digest
@ -236,11 +232,22 @@ local function need_check(task, content, rule, digest, fn, maybe_part)
yield_result(task, rule, 'File is encrypted',
0.0, 'encrypted', maybe_part)
else
-- Check if cached data contains symbol name (for category-based scanners)
-- Format: "SYMBOL_NAME\vdetails" or just "details"
if #threat_string >= 2 and rule.symbols then
-- New format with symbol name
local symbol_name = threat_string[1]
local details = threat_string[2]
lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - %s',
rule.log_prefix, key, symbol_name, details)
task:insert_result(symbol_name, 1.0, details)
else
-- Old format without symbol name
lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
rule.log_prefix, key, threat_string[1], score)
yield_result(task, rule, threat_string, score, false, maybe_part)
end
end
else
lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
rule.log_prefix, key, threat_string[1])
@ -262,15 +269,11 @@ local function need_check(task, content, rule, digest, fn, maybe_part)
f_message_not_too_small and
f_message_min_words and
f_dynamic_scan then
fn()
end
end
if rule.redis_params and not rule.no_cache then
key = rule.prefix .. key
if lua_redis.redis_make_request(task,
@ -286,7 +289,6 @@ local function need_check(task, content, rule, digest, fn, maybe_part)
end
return false
end
local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
@ -396,7 +398,6 @@ local function gen_extension(fname)
end
local function check_parts_match(task, rule)
local filter_func = function(p)
local mtype, msubtype = p:get_type()
local detected_ext = p:get_detected_ext()
@ -489,7 +490,6 @@ local function check_parts_match(task, rule)
end
local function check_metric_results(task, rule)
if rule.action ~= 'reject' then
local metric_result = task:get_metric_score()
local metric_action = task:get_metric_action()

76
lualib/lua_scanners/metadefender.lua

@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
]] --
--[[[
-- @module metadefender
@ -29,7 +29,6 @@ local common = require "lua_scanners/common"
local N = 'metadefender'
local function metadefender_config(opts)
local default_conf = {
name = N,
url = 'https://api.metadefender.com/v4/hash',
@ -47,7 +46,33 @@ local function metadefender_config(opts)
apikey = nil, -- Required to set by user
-- Specific for metadefender
minimum_engines = 3, -- Minimum required to get scored
full_score_engines = 7, -- After this number we set max score
-- Threshold-based categorization
low_category = 5, -- Low threat: minimum_engines to low_category-1
medium_category = 10, -- Medium threat: low_category to medium_category-1
-- High threat: medium_category and above
-- Symbol categories
symbols = {
clean = {
symbol = 'METADEFENDER_CLEAN',
score = -0.5,
description = 'MetaDefender decided attachment to be clean'
},
low = {
symbol = 'METADEFENDER_LOW',
score = 2.0,
description = 'MetaDefender found low number of threats'
},
medium = {
symbol = 'METADEFENDER_MEDIUM',
score = 5.0,
description = 'MetaDefender found medium number of threats'
},
high = {
symbol = 'METADEFENDER_HIGH',
score = 8.0,
description = 'MetaDefender found high number of threats'
},
},
}
default_conf = lua_util.override_defaults(default_conf, opts)
@ -102,7 +127,6 @@ local function metadefender_check(task, content, digest, rule, maybe_part)
task:insert_result(rule.symbol_fail, 1.0, 'HTTP error: ' .. http_err)
else
local cached
local dyn_score
-- Parse the response
if code ~= 200 then
if code == 404 then
@ -152,7 +176,6 @@ local function metadefender_check(task, content, digest, rule, maybe_part)
local total = scan_results.total_avs or 0
if detected == 0 then
cached = 'OK'
if rule['log_clean'] then
rspamd_logger.infox(task, '%s: hash %s clean',
rule.log_prefix, hash)
@ -160,27 +183,44 @@ local function metadefender_check(task, content, digest, rule, maybe_part)
lua_util.debugm(rule.name, task, '%s: hash %s clean',
rule.log_prefix, hash)
end
-- Insert CLEAN symbol
if rule.symbols and rule.symbols.clean then
local clean_sym = rule.symbols.clean.symbol or 'METADEFENDER_CLEAN'
local sopt = string.format("%s:0/%s", hash, total)
task:insert_result(clean_sym, 1.0, sopt)
-- Save with symbol name for proper cache retrieval
cached = string.format("%s\v%s", clean_sym, sopt)
else
cached = 'OK'
end
else
if detected < rule.minimum_engines then
lua_util.debugm(rule.name, task, '%s: hash %s has not enough hits: %s where %s is min',
rule.log_prefix, hash, detected, rule.minimum_engines)
cached = 'OK'
else
if detected >= rule.full_score_engines then
dyn_score = 1.0
-- Determine category based on detection count
local category
local category_sym
local sopt = string.format("%s:%s/%s", hash, detected, total)
if detected >= rule.medium_category then
category = 'high'
category_sym = rule.symbols.high.symbol or 'METADEFENDER_HIGH'
elseif detected >= rule.low_category then
category = 'medium'
category_sym = rule.symbols.medium.symbol or 'METADEFENDER_MEDIUM'
else
local norm_detected = detected - rule.minimum_engines
dyn_score = norm_detected / (rule.full_score_engines - rule.minimum_engines)
category = 'low'
category_sym = rule.symbols.low.symbol or 'METADEFENDER_LOW'
end
if dyn_score < 0 or dyn_score > 1 then
dyn_score = 1.0
end
rspamd_logger.infox(task, '%s: result - %s: "%s" - category: %s',
rule.log_prefix, rule.detection_category .. 'found', sopt, category)
local sopt = string.format("%s:%s/%s",
hash, detected, total)
common.yield_result(task, rule, sopt, dyn_score, nil, maybe_part)
cached = sopt
task:insert_result(category_sym, 1.0, sopt)
-- Save with symbol name for proper cache retrieval
cached = string.format("%s\v%s", category_sym, sopt)
end
end
else
@ -193,7 +233,7 @@ local function metadefender_check(task, content, digest, rule, maybe_part)
end
if cached then
common.save_cache(task, digest, rule, cached, dyn_score, maybe_part)
common.save_cache(task, digest, rule, cached, 1.0, maybe_part)
end
end
end
@ -206,10 +246,8 @@ local function metadefender_check(task, content, digest, rule, maybe_part)
metadefender_check_uncached) then
return
else
metadefender_check_uncached()
end
end
return {

100
lualib/lua_scanners/virustotal.lua

@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
]] --
--[[[
-- @module virustotal
@ -29,7 +29,6 @@ local common = require "lua_scanners/common"
local N = 'virustotal'
local function virustotal_config(opts)
local default_conf = {
name = N,
url = 'https://www.virustotal.com/vtapi/v2/file',
@ -47,7 +46,33 @@ local function virustotal_config(opts)
apikey = nil, -- Required to set by user
-- Specific for virustotal
minimum_engines = 3, -- Minimum required to get scored
full_score_engines = 7, -- After this number we set max score
-- Threshold-based categorization
low_category = 5, -- Low threat: minimum_engines to low_category-1
medium_category = 10, -- Medium threat: low_category to medium_category-1
-- High threat: medium_category and above
-- Symbol categories
symbols = {
clean = {
symbol = 'VIRUSTOTAL_CLEAN',
score = -0.5,
description = 'VirusTotal decided attachment to be clean'
},
low = {
symbol = 'VIRUSTOTAL_LOW',
score = 2.0,
description = 'VirusTotal found low number of threats'
},
medium = {
symbol = 'VIRUSTOTAL_MEDIUM',
score = 5.0,
description = 'VirusTotal found medium number of threats'
},
high = {
symbol = 'VIRUSTOTAL_HIGH',
score = 8.0,
description = 'VirusTotal found high number of threats'
},
},
}
default_conf = lua_util.override_defaults(default_conf, opts)
@ -98,7 +123,6 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
rspamd_logger.errx(task, 'HTTP error: %s, body: %s, headers: %s', http_err, body, headers)
else
local cached
local dyn_score
-- Parse the response
if code ~= 200 then
if code == 404 then
@ -133,7 +157,6 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
if not obj.positives or type(obj.positives) ~= 'number' then
if obj.response_code then
if obj.response_code == 0 then
cached = 'OK'
if rule['log_clean'] then
rspamd_logger.infox(task, '%s: hash %s clean (not found)',
rule.log_prefix, hash)
@ -141,6 +164,16 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
lua_util.debugm(rule.name, task, '%s: hash %s clean (not found)',
rule.log_prefix, hash)
end
-- Insert CLEAN symbol
if rule.symbols and rule.symbols.clean then
local clean_sym = rule.symbols.clean.symbol or 'VIRUSTOTAL_CLEAN'
local sopt = string.format("%s:0", hash)
task:insert_result(clean_sym, 1.0, sopt)
-- Save with symbol name for proper cache retrieval
cached = string.format("%s\v%s", clean_sym, sopt)
else
cached = 'OK'
end
else
rspamd_logger.errx(task, 'invalid JSON reply: %s, body: %s, headers: %s',
'bad response code: ' .. tostring(obj.response_code), body, headers)
@ -154,26 +187,51 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
return
end
else
if obj.positives < rule.minimum_engines then
if obj.positives == 0 then
if rule['log_clean'] then
rspamd_logger.infox(task, '%s: hash %s clean',
rule.log_prefix, hash)
else
lua_util.debugm(rule.name, task, '%s: hash %s clean',
rule.log_prefix, hash)
end
-- Insert CLEAN symbol
if rule.symbols and rule.symbols.clean then
local clean_sym = rule.symbols.clean.symbol or 'VIRUSTOTAL_CLEAN'
local sopt = string.format("%s:0/%s", hash, obj.total or 0)
task:insert_result(clean_sym, 1.0, sopt)
-- Save with symbol name for proper cache retrieval
cached = string.format("%s\v%s", clean_sym, sopt)
else
cached = 'OK'
end
elseif obj.positives < rule.minimum_engines then
lua_util.debugm(rule.name, task, '%s: hash %s has not enough hits: %s where %s is min',
rule.log_prefix, obj.positives, rule.minimum_engines)
-- TODO: add proper hashing!
rule.log_prefix, hash, obj.positives, rule.minimum_engines)
cached = 'OK'
else
if obj.positives > rule.full_score_engines then
dyn_score = 1.0
-- Determine category based on detection count
local category
local category_sym
local sopt = string.format("%s:%s/%s", hash, obj.positives, obj.total)
if obj.positives >= rule.medium_category then
category = 'high'
category_sym = rule.symbols.high.symbol or 'VIRUSTOTAL_HIGH'
elseif obj.positives >= rule.low_category then
category = 'medium'
category_sym = rule.symbols.medium.symbol or 'VIRUSTOTAL_MEDIUM'
else
local norm_pos = obj.positives - rule.minimum_engines
dyn_score = norm_pos / (rule.full_score_engines - rule.minimum_engines)
category = 'low'
category_sym = rule.symbols.low.symbol or 'VIRUSTOTAL_LOW'
end
if dyn_score < 0 or dyn_score > 1 then
dyn_score = 1.0
end
local sopt = string.format("%s:%s/%s",
hash, obj.positives, obj.total)
common.yield_result(task, rule, sopt, dyn_score, nil, maybe_part)
cached = sopt
rspamd_logger.infox(task, '%s: result - %s: "%s" - category: %s',
rule.log_prefix, rule.detection_category .. 'found', sopt, category)
task:insert_result(category_sym, 1.0, sopt)
-- Save with symbol name for proper cache retrieval
cached = string.format("%s\v%s", category_sym, sopt)
end
end
else
@ -186,7 +244,7 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
end
if cached then
common.save_cache(task, digest, rule, cached, dyn_score, maybe_part)
common.save_cache(task, digest, rule, cached, 1.0, maybe_part)
end
end
end
@ -199,10 +257,8 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
virustotal_check_uncached) then
return
else
virustotal_check_uncached()
end
end
return {

66
src/plugins/lua/antivirus.lua

@ -123,6 +123,9 @@ local function add_antivirus_rule(sym, opts)
rule.symbol_encrypted = opts.symbol_encrypted
rule.redis_params = redis_params
-- Store rule for symbol registration later
rule.symbol_main = opts.symbol
if not rule then
rspamd_logger.errx(rspamd_config, 'cannot configure %s for %s',
opts.type, opts.symbol)
@ -157,9 +160,9 @@ local function add_antivirus_rule(sym, opts)
rule.whitelist = rspamd_config:add_hash_map(opts.whitelist)
end
return function(task)
-- Return both callback and rule for symbol registration
local cb = function(task)
if rule.scan_mime_parts then
fun.each(function(p)
local content = p:get_content()
local clen = #content
@ -180,11 +183,12 @@ local function add_antivirus_rule(sym, opts)
cfg.check(task, content, p:get_digest(), rule, p)
end
end, common.check_parts_match(task, rule))
else
cfg.check(task, task:get_content(), task:get_digest(), rule)
end
end
return cb, rule
end
-- Registration
@ -200,15 +204,15 @@ if opts and type(opts) == 'table' then
if not m.name then
m.name = k
end
local cb = add_antivirus_rule(k, m)
local cb, rule = add_antivirus_rule(k, m)
if not cb then
rspamd_logger.errx(rspamd_config, 'cannot add rule: "' .. k .. '"')
lua_util.config_utils.push_config_error(N, 'cannot add AV rule: "' .. k .. '"')
else
rspamd_logger.infox(rspamd_config, 'added antivirus engine %s -> %s', k, m.symbol)
rspamd_logger.infox(rspamd_config, 'added antivirus engine %s -> %s', k, rule.symbol or m.symbol)
local t = {
name = m.symbol,
name = rule.symbol or m.symbol,
callback = cb,
score = 0.0,
group = N
@ -233,27 +237,27 @@ if opts and type(opts) == 'table' then
rspamd_config:register_symbol({
type = 'virtual',
name = m['symbol_fail'],
name = rule.symbol_fail or m['symbol_fail'],
parent = id,
score = 0.0,
group = N
})
rspamd_config:register_symbol({
type = 'virtual',
name = m['symbol_encrypted'],
name = rule.symbol_encrypted or m['symbol_encrypted'],
parent = id,
score = 0.0,
group = N
})
rspamd_config:register_symbol({
type = 'virtual',
name = m['symbol_macro'],
name = rule.symbol_macro or m['symbol_macro'],
parent = id,
score = 0.0,
group = N
})
has_valid = true
if type(m['patterns']) == 'table' then
if type(rule.patterns) == 'table' and type(m['patterns']) == 'table' then
if m['patterns'][1] then
for _, p in ipairs(m['patterns']) do
if type(p) == 'table' then
@ -321,6 +325,48 @@ if opts and type(opts) == 'table' then
end
end
end
if rule.symbols then
rspamd_logger.infox(rspamd_config, 'registering category symbols for %s', rule.name)
local function reg_symbols(tbl)
for _, sym in pairs(tbl) do
if type(sym) == 'string' then
rspamd_logger.infox(rspamd_config, 'registering symbol: %s (string)', sym)
rspamd_config:register_symbol({
type = 'virtual',
name = sym,
parent = id,
group = N
})
elseif type(sym) == 'table' then
if sym.symbol then
rspamd_logger.infox(rspamd_config, 'registering symbol: %s with score %s',
sym.symbol, sym.score or 'default')
rspamd_config:register_symbol({
type = 'virtual',
name = sym.symbol,
parent = id,
group = N
})
if sym.score then
rspamd_config:set_metric_symbol({
name = sym.symbol,
score = sym.score,
description = sym.description,
group = sym.group or N,
})
end
else
reg_symbols(sym)
end
end
end
end
reg_symbols(rule.symbols)
else
rspamd_logger.infox(rspamd_config, 'no category symbols defined for %s', rule.name)
end
if m['score'] then
-- Register metric symbol
local description = 'antivirus symbol'

Loading…
Cancel
Save