Merge pull request #5656 from rspamd/cursor/RSP-271-implement-metadefender-hash-lookup-module-09c0

Implement Metadefender hash lookup module
2 weeks ago · 1355b2afb8
7 changed files with 639 additions and 98 deletions
--- a/conf/local.d/antivirus.conf.example
+++ b/conf/local.d/antivirus.conf.example
@ -0,0 +1,76 @@
+# Example MetaDefender configuration
+# Copy relevant sections to local.d/antivirus.conf and customize
+
+metadefender {
+  # Required: Your MetaDefender API key from https://metadefender.opswat.com/
+  apikey = "YOUR_API_KEY_HERE";
+
+  # Main symbol name (for compatibility, usually not used directly)
+  symbol = "METADEFENDER";
+
+  # Scanner type - must be "metadefender"
+  type = "metadefender";
+
+  # Scan MIME parts separately instead of full message (recommended: true)
+  scan_mime_parts = true;
+
+  # Don't scan text or image MIME parts (saves API quota)
+  scan_text_mime = false;
+  scan_image_mime = false;
+
+  # Maximum file size to scan (20MB default)
+  max_size = 20000000;
+
+  # Log when files are clean (default: false to reduce noise)
+  log_clean = false;
+
+  # Minimum AV engines that must detect malware before flagging (default: 3)
+  # Lower value = more sensitive, may have more false positives
+  minimum_engines = 3;
+
+  # Threshold for low category (default: 5)
+  # Detections from minimum_engines to low_category-1 = LOW
+  low_category = 5;
+
+  # Threshold for medium category (default: 10)
+  # Detections from low_category to medium_category-1 = MEDIUM
+  # Detections >= medium_category = HIGH
+  medium_category = 10;
+
+  # HTTP request timeout in seconds
+  timeout = 5.0;
+
+  # Redis cache expiration (2 hours = 7200 seconds)
+  # Longer cache reduces API calls but may miss new detections
+  cache_expire = 7200;
+
+  # Symbol categories with scores (can be customized)
+  symbols = {
+    clean = {
+      symbol = "METADEFENDER_CLEAN";
+      score = -0.5;
+      description = "MetaDefender decided attachment to be clean";
+    };
+    low = {
+      symbol = "METADEFENDER_LOW";
+      score = 2.0;
+      description = "MetaDefender found low number of threats (3-4 engines)";
+    };
+    medium = {
+      symbol = "METADEFENDER_MEDIUM";
+      score = 5.0;
+      description = "MetaDefender found medium number of threats (5-9 engines)";
+    };
+    high = {
+      symbol = "METADEFENDER_HIGH";
+      score = 8.0;
+      description = "MetaDefender found high number of threats (10+ engines)";
+    };
+  }
+
+  # Optional: Force an action when malware is detected
+  # action = "reject";
+
+  # Optional: Custom message template
+  # message = '${SCANNER}: virus found: "${VIRUS}"';
+}
--- a/conf/modules.d/antivirus.conf
+++ b/conf/modules.d/antivirus.conf
@ -52,6 +52,109 @@ antivirus {
    #whitelist = "/etc/rspamd/antivirus.wl";
  #}

+  #metadefender {
+    # MetaDefender Cloud API (hash lookup)
+    # Get your free API key at https://metadefender.opswat.com/
+    # Free plan allows: 150 prevention requests/day, 1000 reputation requests/day
+    #
+    # If `max_size` is set, messages > n bytes in size are not scanned
+    #max_size = 20000000;
+    # Main symbol (for compatibility, usually not used directly)
+    #symbol = "METADEFENDER";
+    # type of scanner
+    #type = "metadefender";
+    # Your MetaDefender API key (required)
+    #apikey = "YOUR_API_KEY_HERE";
+    # Scan mime_parts separately (default true)
+    #scan_mime_parts = true;
+    # You can enable logging for clean messages
+    #log_clean = false;
+    # Minimum number of engines detecting malware for a hit (default 3)
+    #minimum_engines = 3;
+    # Threshold for low category (default 5)
+    #low_category = 5;
+    # Threshold for medium category (default 10)
+    #medium_category = 10;
+    # Request timeout
+    #timeout = 5.0;
+    # Redis cache expiration time in seconds (default 7200 = 2 hours)
+    #cache_expire = 7200;
+    # Symbol categories with scores (can be overridden)
+    #symbols = {
+    #  clean = {
+    #    symbol = "METADEFENDER_CLEAN";
+    #    score = -0.5;
+    #    description = "MetaDefender decided attachment to be clean";
+    #  };
+    #  low = {
+    #    symbol = "METADEFENDER_LOW";
+    #    score = 2.0;
+    #    description = "MetaDefender found low number of threats";
+    #  };
+    #  medium = {
+    #    symbol = "METADEFENDER_MEDIUM";
+    #    score = 5.0;
+    #    description = "MetaDefender found medium number of threats";
+    #  };
+    #  high = {
+    #    symbol = "METADEFENDER_HIGH";
+    #    score = 8.0;
+    #    description = "MetaDefender found high number of threats";
+    #  };
+    #}
+  #}
+
+  #virustotal {
+    # VirusTotal API (hash lookup)
+    # Get your API key at https://www.virustotal.com/
+    #
+    # If `max_size` is set, messages > n bytes in size are not scanned
+    #max_size = 20000000;
+    # Main symbol (for compatibility, usually not used directly)
+    #symbol = "VIRUSTOTAL";
+    # type of scanner
+    #type = "virustotal";
+    # Your VirusTotal API key (required)
+    #apikey = "YOUR_API_KEY_HERE";
+    # Scan mime_parts separately (default true)
+    #scan_mime_parts = true;
+    # You can enable logging for clean messages
+    #log_clean = false;
+    # Minimum number of engines detecting malware for a hit (default 3)
+    #minimum_engines = 3;
+    # Threshold for low category (default 5)
+    #low_category = 5;
+    # Threshold for medium category (default 10)
+    #medium_category = 10;
+    # Request timeout
+    #timeout = 5.0;
+    # Redis cache expiration time in seconds (default 7200 = 2 hours)
+    #cache_expire = 7200;
+    # Symbol categories with scores (can be overridden)
+    #symbols = {
+    #  clean = {
+    #    symbol = "VIRUSTOTAL_CLEAN";
+    #    score = -0.5;
+    #    description = "VirusTotal decided attachment to be clean";
+    #  };
+    #  low = {
+    #    symbol = "VIRUSTOTAL_LOW";
+    #    score = 2.0;
+    #    description = "VirusTotal found low number of threats";
+    #  };
+    #  medium = {
+    #    symbol = "VIRUSTOTAL_MEDIUM";
+    #    score = 5.0;
+    #    description = "VirusTotal found medium number of threats";
+    #  };
+    #  high = {
+    #    symbol = "VIRUSTOTAL_HIGH";
+    #    score = 8.0;
+    #    description = "VirusTotal found high number of threats";
+    #  };
+    #}
+  #}
+
  .include(try=true,priority=5) "${DBDIR}/dynamic/antivirus.conf"
  .include(try=true,priority=1,duplicate=merge) "$LOCAL_CONFDIR/local.d/antivirus.conf"
  .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/antivirus.conf"
--- a/lualib/lua_scanners/common.lua
+++ b/lualib/lua_scanners/common.lua
@ -13,7 +13,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --

 --[[[
 -- @module lua_scanners_common
@ -30,7 +30,6 @@ local fun = require "fun"
 local exports = {}

 local function log_clean(task, rule, msg)
-
  msg = msg or 'message or mime_part is clean'

  if rule.log_clean then
@ -38,7 +37,6 @@ local function log_clean(task, rule, msg)
  else
    lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
  end
-
 end

 local function match_patterns(default_sym, found, patterns, dyn_weight)
@ -111,16 +109,15 @@ local function yield_result(task, rule, vname, dyn_weight, is_fail, maybe_part)
    else
      all_whitelisted = false
      rspamd_logger.infox(task, '%s: result - %s: "%s - score: %s"',
-          rule.log_prefix, threat_info, tm, symscore)
+        rule.log_prefix, threat_info, tm, symscore)

      if maybe_part and rule.show_attachments and maybe_part:get_filename() then
        local fname = maybe_part:get_filename()
        task:insert_result(symname, symscore, string.format("%s|%s",
-            tm, fname))
+          tm, fname))
      else
        task:insert_result(symname, symscore, tm)
      end
-
    end
  end

@ -130,10 +127,10 @@ local function yield_result(task, rule, vname, dyn_weight, is_fail, maybe_part)
      flags = 'least'
    end
    task:set_pre_result(rule.action,
-        lua_util.template(rule.message or 'Rejected', {
-          SCANNER = rule.name,
-          VIRUS = threat_table,
-        }), rule.name, nil, nil, flags)
+      lua_util.template(rule.message or 'Rejected', {
+        SCANNER = rule.name,
+        VIRUS = threat_table,
+      }), rule.name, nil, nil, flags)
  end
 end

@ -144,7 +141,7 @@ local function message_not_too_large(task, content, rule)
  end
  if #content > max_size then
    rspamd_logger.infox(task, "skip %s check as it is too large: %s (%s is allowed)",
-        rule.log_prefix, #content, max_size)
+      rule.log_prefix, #content, max_size)
    return false
  end
  return true
@ -157,7 +154,7 @@ local function message_not_too_small(task, content, rule)
  end
  if #content < min_size then
    rspamd_logger.infox(task, "skip %s check as it is too small: %s (%s is allowed)",
-        rule.log_prefix, #content, min_size)
+      rule.log_prefix, #content, min_size)
    return false
  end
  return true
@ -178,7 +175,7 @@ local function message_min_words(task, rule)

    if not text_part_above_limit then
      rspamd_logger.infox(task, '%s: #words in all text parts is below text_part_min_words limit: %s',
-          rule.log_prefix, rule.text_part_min_words)
+        rule.log_prefix, rule.text_part_min_words)
    end

    return text_part_above_limit
@ -217,7 +214,6 @@ local function dynamic_scan(task, rule)
 end

 local function need_check(task, content, rule, digest, fn, maybe_part)
-
  local uncached = true
  local key = digest

@ -231,19 +227,30 @@ local function need_check(task, content, rule, digest, fn, maybe_part)
      if threat_string[1] ~= 'OK' then
        if threat_string[1] == 'MACRO' then
          yield_result(task, rule, 'File contains macros',
-              0.0, 'macro', maybe_part)
+            0.0, 'macro', maybe_part)
        elseif threat_string[1] == 'ENCRYPTED' then
          yield_result(task, rule, 'File is encrypted',
-              0.0, 'encrypted', maybe_part)
+            0.0, 'encrypted', maybe_part)
        else
-          lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
+          -- Check if cached data contains symbol name (for category-based scanners)
+          -- Format: "SYMBOL_NAME\vdetails" or just "details"
+          if #threat_string >= 2 and rule.symbols then
+            -- New format with symbol name
+            local symbol_name = threat_string[1]
+            local details = threat_string[2]
+            lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - %s',
+              rule.log_prefix, key, symbol_name, details)
+            task:insert_result(symbol_name, 1.0, details)
+          else
+            -- Old format without symbol name
+            lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
              rule.log_prefix, key, threat_string[1], score)
-          yield_result(task, rule, threat_string, score, false, maybe_part)
+            yield_result(task, rule, threat_string, score, false, maybe_part)
+          end
        end
-
      else
        lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
-            rule.log_prefix, key, threat_string[1])
+          rule.log_prefix, key, threat_string[1])
      end
      uncached = false
    else
@ -262,31 +269,26 @@ local function need_check(task, content, rule, digest, fn, maybe_part)
        f_message_not_too_small and
        f_message_min_words and
        f_dynamic_scan then
-
      fn()
-
    end
-
  end

  if rule.redis_params and not rule.no_cache then
-
    key = rule.prefix .. key

    if lua_redis.redis_make_request(task,
-        rule.redis_params, -- connect params
-        key, -- hash key
-        false, -- is write
-        redis_av_cb, --callback
-        'GET', -- command
-        { key } -- arguments)
-    ) then
+          rule.redis_params, -- connect params
+          key,             -- hash key
+          false,           -- is write
+          redis_av_cb,     --callback
+          'GET',           -- command
+          { key }          -- arguments)
+        ) then
      return true
    end
  end

  return false
-
 end

 local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
@ -299,10 +301,10 @@ local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
    -- Do nothing
    if err then
      rspamd_logger.errx(task, 'failed to save %s cache for %s -> "%s": %s',
-          rule.detection_category, to_save, key, err)
+        rule.detection_category, to_save, key, err)
    else
      lua_util.debugm(rule.name, task, '%s: saved cached result for %s: %s - score %s - ttl %s',
-          rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
+        rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
    end
  end

@ -321,12 +323,12 @@ local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
    key = rule.prefix .. key

    lua_redis.redis_make_request(task,
-        rule.redis_params, -- connect params
-        key, -- hash key
-        true, -- is write
-        redis_set_cb, --callback
-        'SETEX', -- command
-        { key, rule.cache_expire or 0, value }
+      rule.redis_params,   -- connect params
+      key,                 -- hash key
+      true,                -- is write
+      redis_set_cb,        --callback
+      'SETEX',             -- command
+      { key, rule.cache_expire or 0, value }
    )
  end

@ -396,7 +398,6 @@ local function gen_extension(fname)
 end

 local function check_parts_match(task, rule)
-
  local filter_func = function(p)
    local mtype, msubtype = p:get_type()
    local detected_ext = p:get_detected_ext()
@ -434,7 +435,7 @@ local function check_parts_match(task, rule)
          return true
        elseif magic.ct and match_filter(task, rule, magic.ct, rule.mime_parts_filter_regex, 'regex') then
          lua_util.debugm(rule.name, task, '%s: regex detected libmagic content-type: %s',
-              rule.log_prefix, magic.ct)
+            rule.log_prefix, magic.ct)
          return true
        end
      end
@ -489,7 +490,6 @@ local function check_parts_match(task, rule)
 end

 local function check_metric_results(task, rule)
-
  if rule.action ~= 'reject' then
    local metric_result = task:get_metric_score()
    local metric_action = task:get_metric_action()
--- a/lualib/lua_scanners/init.lua
+++ b/lualib/lua_scanners/init.lua
@ -38,6 +38,7 @@ require_scanner('kaspersky_se')
 require_scanner('savapi')
 require_scanner('sophos')
 require_scanner('virustotal')
+require_scanner('metadefender')
 require_scanner('avast')

 -- Other scanners
--- a/lualib/lua_scanners/metadefender.lua
+++ b/lualib/lua_scanners/metadefender.lua
@ -0,0 +1,259 @@
+--[[
+Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]] --
+
+--[[[
+-- @module metadefender
+-- This module contains MetaDefender Cloud integration support for hash lookups
+-- https://metadefender.opswat.com/
+--]]
+
+local lua_util = require "lua_util"
+local http = require "rspamd_http"
+local rspamd_cryptobox_hash = require "rspamd_cryptobox_hash"
+local rspamd_logger = require "rspamd_logger"
+local common = require "lua_scanners/common"
+
+local N = 'metadefender'
+
+local function metadefender_config(opts)
+  local default_conf = {
+    name = N,
+    url = 'https://api.metadefender.com/v4/hash',
+    timeout = 5.0,
+    log_clean = false,
+    retransmits = 1,
+    cache_expire = 7200, -- expire redis in 2h
+    message = '${SCANNER}: spam message found: "${VIRUS}"',
+    detection_category = "virus",
+    default_score = 1,
+    action = false,
+    scan_mime_parts = true,
+    scan_text_mime = false,
+    scan_image_mime = false,
+    apikey = nil,         -- Required to set by user
+    -- Specific for metadefender
+    minimum_engines = 3,  -- Minimum required to get scored
+    -- Threshold-based categorization
+    low_category = 5,     -- Low threat: minimum_engines to low_category-1
+    medium_category = 10, -- Medium threat: low_category to medium_category-1
+    -- High threat: medium_category and above
+    -- Symbol categories
+    symbols = {
+      clean = {
+        symbol = 'METADEFENDER_CLEAN',
+        score = -0.5,
+        description = 'MetaDefender decided attachment to be clean'
+      },
+      low = {
+        symbol = 'METADEFENDER_LOW',
+        score = 2.0,
+        description = 'MetaDefender found low number of threats'
+      },
+      medium = {
+        symbol = 'METADEFENDER_MEDIUM',
+        score = 5.0,
+        description = 'MetaDefender found medium number of threats'
+      },
+      high = {
+        symbol = 'METADEFENDER_HIGH',
+        score = 8.0,
+        description = 'MetaDefender found high number of threats'
+      },
+    },
+  }
+
+  default_conf = lua_util.override_defaults(default_conf, opts)
+
+  if not default_conf.prefix then
+    default_conf.prefix = 'rs_' .. default_conf.name .. '_'
+  end
+
+  if not default_conf.log_prefix then
+    if default_conf.name:lower() == default_conf.type:lower() then
+      default_conf.log_prefix = default_conf.name
+    else
+      default_conf.log_prefix = default_conf.name .. ' (' .. default_conf.type .. ')'
+    end
+  end
+
+  if not default_conf.apikey then
+    rspamd_logger.errx(rspamd_config, 'no apikey defined for metadefender, disable checks')
+
+    return nil
+  end
+
+  lua_util.add_debug_alias('external_services', default_conf.name)
+  return default_conf
+end
+
+local function metadefender_check(task, content, digest, rule, maybe_part)
+  local function metadefender_check_uncached()
+    local function make_url(hash)
+      return string.format('%s/%s', rule.url, hash)
+    end
+
+    -- MetaDefender uses SHA256 hash
+    local hash = rspamd_cryptobox_hash.create_specific('sha256')
+    hash:update(content)
+    hash = hash:hex()
+
+    local url = make_url(hash)
+    lua_util.debugm(N, task, "send request %s", url)
+    local request_data = {
+      task = task,
+      url = url,
+      timeout = rule.timeout,
+      headers = {
+        ['apikey'] = rule.apikey,
+      }
+    }
+
+    local function md_http_callback(http_err, code, body, headers)
+      if http_err then
+        rspamd_logger.errx(task, 'HTTP error: %s, body: %s, headers: %s', http_err, body, headers)
+        task:insert_result(rule.symbol_fail, 1.0, 'HTTP error: ' .. http_err)
+      else
+        local cached
+        -- Parse the response
+        if code ~= 200 then
+          if code == 404 then
+            cached = 'OK'
+            if rule['log_clean'] then
+              rspamd_logger.infox(task, '%s: hash %s clean (not found)',
+                rule.log_prefix, hash)
+            else
+              lua_util.debugm(rule.name, task, '%s: hash %s clean (not found)',
+                rule.log_prefix, hash)
+            end
+          elseif code == 429 then
+            -- Request rate limit exceeded
+            rspamd_logger.infox(task, 'metadefender request rate limit exceeded')
+            task:insert_result(rule.symbol_fail, 1.0, 'rate limit exceeded')
+            return
+          else
+            rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
+            task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
+            return
+          end
+        else
+          local ucl = require "ucl"
+          local parser = ucl.parser()
+          local res, json_err = parser:parse_string(body)
+
+          lua_util.debugm(rule.name, task, '%s: got reply data: "%s"',
+            rule.log_prefix, body)
+
+          if res then
+            local obj = parser:get_object()
+
+            -- MetaDefender API response structure:
+            -- scan_results.scan_all_result_a: 'Clean', 'Infected', 'Suspicious'
+            -- scan_results.scan_all_result_i: numeric result (0=clean)
+            -- scan_results.total_detected_avs: number of engines detecting malware
+            -- scan_results.total_avs: total number of engines
+
+            if not obj.scan_results then
+              rspamd_logger.errx(task, 'invalid JSON reply: no scan_results field, body: %s', body)
+              task:insert_result(rule.symbol_fail, 1.0, 'Bad JSON reply: no scan_results')
+              return
+            end
+
+            local scan_results = obj.scan_results
+            local detected = scan_results.total_detected_avs or 0
+            local total = scan_results.total_avs or 0
+
+            if detected == 0 then
+              if rule['log_clean'] then
+                rspamd_logger.infox(task, '%s: hash %s clean',
+                  rule.log_prefix, hash)
+              else
+                lua_util.debugm(rule.name, task, '%s: hash %s clean',
+                  rule.log_prefix, hash)
+              end
+              -- Insert CLEAN symbol
+              if rule.symbols and rule.symbols.clean then
+                local clean_sym = rule.symbols.clean.symbol or 'METADEFENDER_CLEAN'
+                local sopt = string.format("%s:0/%s", hash, total)
+                task:insert_result(clean_sym, 1.0, sopt)
+                -- Save with symbol name for proper cache retrieval
+                cached = string.format("%s\v%s", clean_sym, sopt)
+              else
+                cached = 'OK'
+              end
+            else
+              if detected < rule.minimum_engines then
+                lua_util.debugm(rule.name, task, '%s: hash %s has not enough hits: %s where %s is min',
+                  rule.log_prefix, hash, detected, rule.minimum_engines)
+                cached = 'OK'
+              else
+                -- Determine category based on detection count
+                local category
+                local category_sym
+                local sopt = string.format("%s:%s/%s", hash, detected, total)
+
+                if detected >= rule.medium_category then
+                  category = 'high'
+                  category_sym = rule.symbols.high.symbol or 'METADEFENDER_HIGH'
+                elseif detected >= rule.low_category then
+                  category = 'medium'
+                  category_sym = rule.symbols.medium.symbol or 'METADEFENDER_MEDIUM'
+                else
+                  category = 'low'
+                  category_sym = rule.symbols.low.symbol or 'METADEFENDER_LOW'
+                end
+
+                rspamd_logger.infox(task, '%s: result - %s: "%s" - category: %s',
+                  rule.log_prefix, rule.detection_category .. 'found', sopt, category)
+
+                task:insert_result(category_sym, 1.0, sopt)
+                -- Save with symbol name for proper cache retrieval
+                cached = string.format("%s\v%s", category_sym, sopt)
+              end
+            end
+          else
+            -- not res
+            rspamd_logger.errx(task, 'invalid JSON reply: %s, body: %s, headers: %s',
+              json_err, body, headers)
+            task:insert_result(rule.symbol_fail, 1.0, 'Bad JSON reply: ' .. json_err)
+            return
+          end
+        end
+
+        if cached then
+          common.save_cache(task, digest, rule, cached, 1.0, maybe_part)
+        end
+      end
+    end
+
+    request_data.callback = md_http_callback
+    http.request(request_data)
+  end
+
+  if common.condition_check_and_continue(task, content, rule, digest,
+        metadefender_check_uncached) then
+    return
+  else
+    metadefender_check_uncached()
+  end
+end
+
+return {
+  type = 'antivirus',
+  description = 'MetaDefender Cloud integration',
+  configure = metadefender_config,
+  check = metadefender_check,
+  name = N
+}
--- a/lualib/lua_scanners/virustotal.lua
+++ b/lualib/lua_scanners/virustotal.lua
@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-]]--
+]] --

 --[[[
 -- @module virustotal
@ -29,7 +29,6 @@ local common = require "lua_scanners/common"
 local N = 'virustotal'

 local function virustotal_config(opts)
-
  local default_conf = {
    name = N,
    url = 'https://www.virustotal.com/vtapi/v2/file',
@ -44,10 +43,36 @@ local function virustotal_config(opts)
    scan_mime_parts = true,
    scan_text_mime = false,
    scan_image_mime = false,
-    apikey = nil, -- Required to set by user
+    apikey = nil,         -- Required to set by user
    -- Specific for virustotal
-    minimum_engines = 3, -- Minimum required to get scored
-    full_score_engines = 7, -- After this number we set max score
+    minimum_engines = 3,  -- Minimum required to get scored
+    -- Threshold-based categorization
+    low_category = 5,     -- Low threat: minimum_engines to low_category-1
+    medium_category = 10, -- Medium threat: low_category to medium_category-1
+    -- High threat: medium_category and above
+    -- Symbol categories
+    symbols = {
+      clean = {
+        symbol = 'VIRUSTOTAL_CLEAN',
+        score = -0.5,
+        description = 'VirusTotal decided attachment to be clean'
+      },
+      low = {
+        symbol = 'VIRUSTOTAL_LOW',
+        score = 2.0,
+        description = 'VirusTotal found low number of threats'
+      },
+      medium = {
+        symbol = 'VIRUSTOTAL_MEDIUM',
+        score = 5.0,
+        description = 'VirusTotal found medium number of threats'
+      },
+      high = {
+        symbol = 'VIRUSTOTAL_HIGH',
+        score = 8.0,
+        description = 'VirusTotal found high number of threats'
+      },
+    },
  }

  default_conf = lua_util.override_defaults(default_conf, opts)
@ -78,7 +103,7 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
  local function virustotal_check_uncached()
    local function make_url(hash)
      return string.format('%s/report?apikey=%s&resource=%s',
-          rule.url, rule.apikey, hash)
+        rule.url, rule.apikey, hash)
    end

    local hash = rspamd_cryptobox_hash.create_specific('md5')
@ -98,17 +123,16 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
        rspamd_logger.errx(task, 'HTTP error: %s, body: %s, headers: %s', http_err, body, headers)
      else
        local cached
-        local dyn_score
        -- Parse the response
        if code ~= 200 then
          if code == 404 then
            cached = 'OK'
            if rule['log_clean'] then
              rspamd_logger.infox(task, '%s: hash %s clean (not found)',
-                  rule.log_prefix, hash)
+                rule.log_prefix, hash)
            else
              lua_util.debugm(rule.name, task, '%s: hash %s clean (not found)',
-                  rule.log_prefix, hash)
+                rule.log_prefix, hash)
            end
          elseif code == 204 then
            -- Request rate limit exceeded
@ -126,67 +150,101 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
          local res, json_err = parser:parse_string(body)

          lua_util.debugm(rule.name, task, '%s: got reply data: "%s"',
-              rule.log_prefix, body)
+            rule.log_prefix, body)

          if res then
            local obj = parser:get_object()
            if not obj.positives or type(obj.positives) ~= 'number' then
              if obj.response_code then
                if obj.response_code == 0 then
-                  cached = 'OK'
                  if rule['log_clean'] then
                    rspamd_logger.infox(task, '%s: hash %s clean (not found)',
-                        rule.log_prefix, hash)
+                      rule.log_prefix, hash)
                  else
                    lua_util.debugm(rule.name, task, '%s: hash %s clean (not found)',
-                        rule.log_prefix, hash)
+                      rule.log_prefix, hash)
+                  end
+                  -- Insert CLEAN symbol
+                  if rule.symbols and rule.symbols.clean then
+                    local clean_sym = rule.symbols.clean.symbol or 'VIRUSTOTAL_CLEAN'
+                    local sopt = string.format("%s:0", hash)
+                    task:insert_result(clean_sym, 1.0, sopt)
+                    -- Save with symbol name for proper cache retrieval
+                    cached = string.format("%s\v%s", clean_sym, sopt)
+                  else
+                    cached = 'OK'
                  end
                else
                  rspamd_logger.errx(task, 'invalid JSON reply: %s, body: %s, headers: %s',
-                      'bad response code: ' .. tostring(obj.response_code), body, headers)
+                    'bad response code: ' .. tostring(obj.response_code), body, headers)
                  task:insert_result(rule.symbol_fail, 1.0, 'Bad JSON reply: no `positives` element')
                  return
                end
              else
                rspamd_logger.errx(task, 'invalid JSON reply: %s, body: %s, headers: %s',
-                    'no response_code', body, headers)
+                  'no response_code', body, headers)
                task:insert_result(rule.symbol_fail, 1.0, 'Bad JSON reply: no `positives` element')
                return
              end
            else
-              if obj.positives < rule.minimum_engines then
+              if obj.positives == 0 then
+                if rule['log_clean'] then
+                  rspamd_logger.infox(task, '%s: hash %s clean',
+                    rule.log_prefix, hash)
+                else
+                  lua_util.debugm(rule.name, task, '%s: hash %s clean',
+                    rule.log_prefix, hash)
+                end
+                -- Insert CLEAN symbol
+                if rule.symbols and rule.symbols.clean then
+                  local clean_sym = rule.symbols.clean.symbol or 'VIRUSTOTAL_CLEAN'
+                  local sopt = string.format("%s:0/%s", hash, obj.total or 0)
+                  task:insert_result(clean_sym, 1.0, sopt)
+                  -- Save with symbol name for proper cache retrieval
+                  cached = string.format("%s\v%s", clean_sym, sopt)
+                else
+                  cached = 'OK'
+                end
+              elseif obj.positives < rule.minimum_engines then
                lua_util.debugm(rule.name, task, '%s: hash %s has not enough hits: %s where %s is min',
-                    rule.log_prefix, obj.positives, rule.minimum_engines)
-                -- TODO: add proper hashing!
+                  rule.log_prefix, hash, obj.positives, rule.minimum_engines)
                cached = 'OK'
              else
-                if obj.positives > rule.full_score_engines then
-                  dyn_score = 1.0
+                -- Determine category based on detection count
+                local category
+                local category_sym
+                local sopt = string.format("%s:%s/%s", hash, obj.positives, obj.total)
+
+                if obj.positives >= rule.medium_category then
+                  category = 'high'
+                  category_sym = rule.symbols.high.symbol or 'VIRUSTOTAL_HIGH'
+                elseif obj.positives >= rule.low_category then
+                  category = 'medium'
+                  category_sym = rule.symbols.medium.symbol or 'VIRUSTOTAL_MEDIUM'
                else
-                  local norm_pos = obj.positives - rule.minimum_engines
-                  dyn_score = norm_pos / (rule.full_score_engines - rule.minimum_engines)
+                  category = 'low'
+                  category_sym = rule.symbols.low.symbol or 'VIRUSTOTAL_LOW'
                end

-                if dyn_score < 0 or dyn_score > 1 then
-                  dyn_score = 1.0
-                end
-                local sopt = string.format("%s:%s/%s",
-                    hash, obj.positives, obj.total)
-                common.yield_result(task, rule, sopt, dyn_score, nil, maybe_part)
-                cached = sopt
+                rspamd_logger.infox(task, '%s: result - %s: "%s" - category: %s',
+                  rule.log_prefix, rule.detection_category .. 'found', sopt, category)
+
+                task:insert_result(category_sym, 1.0, sopt)
+                -- Save with symbol name for proper cache retrieval
+                cached = string.format("%s\v%s", category_sym, sopt)
              end
            end
          else
            -- not res
            rspamd_logger.errx(task, 'invalid JSON reply: %s, body: %s, headers: %s',
-                json_err, body, headers)
+              json_err, body, headers)
            task:insert_result(rule.symbol_fail, 1.0, 'Bad JSON reply: ' .. json_err)
            return
          end
        end

        if cached then
-          common.save_cache(task, digest, rule, cached, dyn_score, maybe_part)
+          common.save_cache(task, digest, rule, cached, 1.0, maybe_part)
        end
      end
    end
@ -196,13 +254,11 @@ local function virustotal_check(task, content, digest, rule, maybe_part)
  end

  if common.condition_check_and_continue(task, content, rule, digest,
-      virustotal_check_uncached) then
+        virustotal_check_uncached) then
    return
  else
-
    virustotal_check_uncached()
  end
-
 end

 return {
--- a/src/plugins/lua/antivirus.lua
+++ b/src/plugins/lua/antivirus.lua
@ -27,8 +27,8 @@ local N = "antivirus"

 if confighelp then
  rspamd_config:add_example(nil, 'antivirus',
-      "Check messages for viruses",
-      [[
+    "Check messages for viruses",
+    [[
  antivirus {
    # multiple scanners could be checked, for each we create a configuration block with an arbitrary name
    clamav {
@ -75,7 +75,7 @@ end

 -- Encode as base32 in the source to avoid crappy stuff
 local eicar_pattern = rspamd_util.decode_base32(
-    [[akp6woykfbonrepmwbzyfpbmibpone3mj3pgwbffzj9e1nfjdkorisckwkohrnfe1nt41y3jwk1cirjki4w4nkieuni4ndfjcktnn1yjmb1wn]]
+  [[akp6woykfbonrepmwbzyfpbmibpone3mj3pgwbffzj9e1nfjdkorisckwkohrnfe1nt41y3jwk1cirjki4w4nkieuni4ndfjcktnn1yjmb1wn]]
 )

 local function add_antivirus_rule(sym, opts)
@ -91,7 +91,7 @@ local function add_antivirus_rule(sym, opts)

  if not cfg then
    rspamd_logger.errx(rspamd_config, 'unknown antivirus type: %s',
-        opts.type)
+      opts.type)
    return nil
  end

@ -109,7 +109,7 @@ local function add_antivirus_rule(sym, opts)
  if opts.attachments_only ~= nil then
    opts.scan_mime_parts = opts.attachments_only
    rspamd_logger.warnx(rspamd_config, '%s [%s]: Using attachments_only is deprecated. ' ..
-        'Please use scan_mime_parts = %s instead', opts.symbol, opts.type, opts.attachments_only)
+      'Please use scan_mime_parts = %s instead', opts.symbol, opts.type, opts.attachments_only)
  end
  -- WORKAROUND for deprecated attachments_only

@ -123,9 +123,12 @@ local function add_antivirus_rule(sym, opts)
  rule.symbol_encrypted = opts.symbol_encrypted
  rule.redis_params = redis_params

+  -- Store rule for symbol registration later
+  rule.symbol_main = opts.symbol
+
  if not rule then
    rspamd_logger.errx(rspamd_config, 'cannot configure %s for %s',
-        opts.type, opts.symbol)
+      opts.type, opts.symbol)
    return nil
  end

@ -133,10 +136,10 @@ local function add_antivirus_rule(sym, opts)
  rule.patterns_fail = common.create_regex_table(opts.patterns_fail or {})

  lua_redis.register_prefix(rule.prefix .. '_*', N,
-      string.format('Antivirus cache for rule "%s"',
-          rule.type), {
-        type = 'string',
-      })
+    string.format('Antivirus cache for rule "%s"',
+      rule.type), {
+      type = 'string',
+    })

  -- if any mime_part filter defined, do not scan all attachments
  if opts.mime_parts_filter_regex ~= nil
@ -157,9 +160,9 @@ local function add_antivirus_rule(sym, opts)
    rule.whitelist = rspamd_config:add_hash_map(opts.whitelist)
  end

-  return function(task)
+  -- Return both callback and rule for symbol registration
+  local cb = function(task)
    if rule.scan_mime_parts then
-
      fun.each(function(p)
        local content = p:get_content()
        local clen = #content
@ -173,18 +176,19 @@ local function add_antivirus_rule(sym, opts)

            if clen == #opts.eicar_fake_pattern and content == opts.eicar_fake_pattern then
              rspamd_logger.infox(task, 'found eicar fake replacement part in the part (filename="%s")',
-                  p:get_filename())
+                p:get_filename())
              content = eicar_pattern
            end
          end
          cfg.check(task, content, p:get_digest(), rule, p)
        end
      end, common.check_parts_match(task, rule))
-
    else
      cfg.check(task, task:get_content(), task:get_digest(), rule)
    end
  end
+
+  return cb, rule
 end

 -- Registration
@ -200,15 +204,15 @@ if opts and type(opts) == 'table' then
      if not m.name then
        m.name = k
      end
-      local cb = add_antivirus_rule(k, m)
+      local cb, rule = add_antivirus_rule(k, m)

      if not cb then
        rspamd_logger.errx(rspamd_config, 'cannot add rule: "' .. k .. '"')
        lua_util.config_utils.push_config_error(N, 'cannot add AV rule: "' .. k .. '"')
      else
-        rspamd_logger.infox(rspamd_config, 'added antivirus engine %s -> %s', k, m.symbol)
+        rspamd_logger.infox(rspamd_config, 'added antivirus engine %s -> %s', k, rule.symbol or m.symbol)
        local t = {
-          name = m.symbol,
+          name = rule.symbol or m.symbol,
          callback = cb,
          score = 0.0,
          group = N
@ -233,27 +237,27 @@ if opts and type(opts) == 'table' then

        rspamd_config:register_symbol({
          type = 'virtual',
-          name = m['symbol_fail'],
+          name = rule.symbol_fail or m['symbol_fail'],
          parent = id,
          score = 0.0,
          group = N
        })
        rspamd_config:register_symbol({
          type = 'virtual',
-          name = m['symbol_encrypted'],
+          name = rule.symbol_encrypted or m['symbol_encrypted'],
          parent = id,
          score = 0.0,
          group = N
        })
        rspamd_config:register_symbol({
          type = 'virtual',
-          name = m['symbol_macro'],
+          name = rule.symbol_macro or m['symbol_macro'],
          parent = id,
          score = 0.0,
          group = N
        })
        has_valid = true
-        if type(m['patterns']) == 'table' then
+        if type(rule.patterns) == 'table' and type(m['patterns']) == 'table' then
          if m['patterns'][1] then
            for _, p in ipairs(m['patterns']) do
              if type(p) == 'table' then
@ -321,6 +325,48 @@ if opts and type(opts) == 'table' then
            end
          end
        end
+        if rule.symbols then
+          rspamd_logger.infox(rspamd_config, 'registering category symbols for %s', rule.name)
+          local function reg_symbols(tbl)
+            for _, sym in pairs(tbl) do
+              if type(sym) == 'string' then
+                rspamd_logger.infox(rspamd_config, 'registering symbol: %s (string)', sym)
+                rspamd_config:register_symbol({
+                  type = 'virtual',
+                  name = sym,
+                  parent = id,
+                  group = N
+                })
+              elseif type(sym) == 'table' then
+                if sym.symbol then
+                  rspamd_logger.infox(rspamd_config, 'registering symbol: %s with score %s',
+                    sym.symbol, sym.score or 'default')
+                  rspamd_config:register_symbol({
+                    type = 'virtual',
+                    name = sym.symbol,
+                    parent = id,
+                    group = N
+                  })
+
+                  if sym.score then
+                    rspamd_config:set_metric_symbol({
+                      name = sym.symbol,
+                      score = sym.score,
+                      description = sym.description,
+                      group = sym.group or N,
+                    })
+                  end
+                else
+                  reg_symbols(sym)
+                end
+              end
+            end
+          end
+
+          reg_symbols(rule.symbols)
+        else
+          rspamd_logger.infox(rspamd_config, 'no category symbols defined for %s', rule.name)
+        end
        if m['score'] then
          -- Register metric symbol
          local description = 'antivirus symbol'