mirror of https://github.com/rspamd/rspamd.git
Rapid spam filtering system
https://rspamd.com/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
436 lines
13 KiB
436 lines
13 KiB
--[[
|
|
Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
]]--
|
|
|
|
--[[[
|
|
-- @module cloudmark
|
|
-- This module contains Cloudmark v2 interface
|
|
--]]
|
|
|
|
local lua_util = require "lua_util"
|
|
local http = require "rspamd_http"
|
|
local upstream_list = require "rspamd_upstream_list"
|
|
local rspamd_logger = require "rspamd_logger"
|
|
local ucl = require "ucl"
|
|
local rspamd_util = require "rspamd_util"
|
|
local common = require "lua_scanners/common"
|
|
local fun = require "fun"
|
|
local lua_mime = require "lua_mime"
|
|
|
|
local N = 'cloudmark'
|
|
-- Boundary for multipart transfers, generated on module init
|
|
local static_boundary = rspamd_util.random_hex(32)
|
|
|
|
local function cloudmark_url(rule, addr, maybe_url)
|
|
local url
|
|
local port = addr:get_port()
|
|
|
|
maybe_url = maybe_url or rule.url
|
|
if port == 0 then
|
|
port = rule.default_port
|
|
end
|
|
if rule.use_https then
|
|
url = string.format('https://%s:%d%s', tostring(addr),
|
|
port, maybe_url)
|
|
else
|
|
url = string.format('http://%s:%d%s', tostring(addr),
|
|
port, maybe_url)
|
|
end
|
|
|
|
return url
|
|
end
|
|
|
|
-- Detect cloudmark max size
|
|
local function cloudmark_preload(rule, cfg, ev_base, _)
|
|
local upstream = rule.upstreams:get_upstream_round_robin()
|
|
local addr = upstream:get_addr()
|
|
local function max_message_size_cb(http_err, code, body, _)
|
|
if http_err then
|
|
rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
|
|
http_err)
|
|
return
|
|
end
|
|
if code ~= 200 then
|
|
rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
|
|
end
|
|
local parser = ucl.parser()
|
|
local ret, err = parser:parse_string(body)
|
|
if not ret then
|
|
rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
|
|
return
|
|
end
|
|
local obj = parser:get_object()
|
|
local ms = obj.maxMessageSize
|
|
if not ms then
|
|
rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
|
|
return
|
|
end
|
|
|
|
rule.max_size = ms
|
|
lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
|
|
end
|
|
http.request({
|
|
ev_base = ev_base,
|
|
config = cfg,
|
|
url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
|
|
callback = max_message_size_cb,
|
|
})
|
|
end
|
|
|
|
local function numerify(d)
|
|
local l = {}
|
|
for k in pairs(d) do
|
|
table.insert(l, k)
|
|
end
|
|
for _, k in ipairs(l) do
|
|
local new_key = tonumber(k)
|
|
if new_key then
|
|
d[new_key] = d[k]
|
|
d[k] = nil
|
|
end
|
|
end
|
|
end
|
|
|
|
local function cloudmark_config(opts)
|
|
|
|
local cloudmark_conf = {
|
|
name = N,
|
|
default_port = 2713,
|
|
url = '/score/v2/message',
|
|
use_https = false,
|
|
timeout = 5.0,
|
|
log_clean = false,
|
|
retransmits = 1,
|
|
score_threshold = 90, -- minimum score to considerate reply
|
|
message = '${SCANNER}: spam message found: "${VIRUS}"',
|
|
max_message = 0,
|
|
detection_category = "hash",
|
|
default_score = 1,
|
|
action = false,
|
|
log_spamcause = true,
|
|
symbol_fail = 'CLOUDMARK_FAIL',
|
|
symbol = 'CLOUDMARK_CHECK',
|
|
symbol_spam = 'CLOUDMARK_SPAM',
|
|
add_score_header = false, -- Add X-CMAE-Score header
|
|
add_headers = false, -- allow addition of the headers from Cloudmark
|
|
scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
|
|
}
|
|
|
|
cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)
|
|
|
|
if type(cloudmark_conf.scores_symbols) == 'table' then
|
|
numerify(cloudmark_conf.scores_symbols)
|
|
end
|
|
|
|
if not cloudmark_conf.prefix then
|
|
cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
|
|
end
|
|
|
|
if not cloudmark_conf.log_prefix then
|
|
if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
|
|
cloudmark_conf.log_prefix = cloudmark_conf.name
|
|
else
|
|
cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
|
|
end
|
|
end
|
|
|
|
if not cloudmark_conf.servers and cloudmark_conf.socket then
|
|
cloudmark_conf.servers = cloudmark_conf.socket
|
|
end
|
|
|
|
if not cloudmark_conf.servers then
|
|
rspamd_logger.errx(rspamd_config, 'no servers defined')
|
|
|
|
return nil
|
|
end
|
|
|
|
cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
|
|
cloudmark_conf.servers,
|
|
cloudmark_conf.default_port)
|
|
|
|
if cloudmark_conf.upstreams then
|
|
|
|
cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
|
|
cloudmark_conf.preloads = { cloudmark_preload }
|
|
lua_util.add_debug_alias('external_services', cloudmark_conf.name)
|
|
return cloudmark_conf
|
|
end
|
|
|
|
rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
|
|
cloudmark_conf['servers'])
|
|
return nil
|
|
end
|
|
|
|
-- Converts a key-value map to the table representing multipart body, with the following values:
|
|
-- `data`: data of the part
|
|
-- `filename`: optional filename
|
|
-- `content-type`: content type of the element (optional)
|
|
-- `content-transfer-encoding`: optional CTE header
|
|
local function table_to_multipart_body(tbl, boundary)
|
|
local seen_data = false
|
|
local out = {}
|
|
|
|
for k, v in pairs(tbl) do
|
|
if v.data then
|
|
seen_data = true
|
|
table.insert(out, string.format('--%s\r\n', boundary))
|
|
if v.filename then
|
|
table.insert(out,
|
|
string.format('Content-Disposition: form-data; name="%s"; filename="%s"\r\n',
|
|
k, v.filename))
|
|
else
|
|
table.insert(out,
|
|
string.format('Content-Disposition: form-data; name="%s"\r\n', k))
|
|
end
|
|
if v['content-type'] then
|
|
table.insert(out,
|
|
string.format('Content-Type: %s\r\n', v['content-type']))
|
|
else
|
|
table.insert(out, 'Content-Type: text/plain\r\n')
|
|
end
|
|
if v['content-transfer-encoding'] then
|
|
table.insert(out,
|
|
string.format('Content-Transfer-Encoding: %s\r\n',
|
|
v['content-transfer-encoding']))
|
|
else
|
|
table.insert(out, 'Content-Transfer-Encoding: binary\r\n')
|
|
end
|
|
table.insert(out, '\r\n')
|
|
table.insert(out, v.data)
|
|
table.insert(out, '\r\n')
|
|
end
|
|
end
|
|
|
|
if seen_data then
|
|
table.insert(out, string.format('--%s--\r\n', boundary))
|
|
end
|
|
|
|
return out
|
|
end
|
|
|
|
local function get_specific_symbol(scores_symbols, score)
|
|
local selected
|
|
local sel_thr = -1
|
|
|
|
for threshold, sym in pairs(scores_symbols) do
|
|
if sel_thr < threshold and threshold <= score then
|
|
selected = sym
|
|
sel_thr = threshold
|
|
end
|
|
end
|
|
|
|
return selected
|
|
end
|
|
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
|
|
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
|
|
assert(get_specific_symbol({ }, 80) == nil)
|
|
assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
|
|
assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')
|
|
|
|
local function parse_cloudmark_reply(task, rule, body)
|
|
local parser = ucl.parser()
|
|
local ret, err = parser:parse_string(body)
|
|
if not ret then
|
|
rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
|
|
task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
|
|
return
|
|
end
|
|
local obj = parser:get_object()
|
|
lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)
|
|
|
|
if not obj.score then
|
|
rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
|
|
task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
|
|
return
|
|
end
|
|
|
|
if obj.analysis then
|
|
-- Report analysis string
|
|
rspamd_logger.infox(task, 'cloudmark report string: %s', obj.analysis)
|
|
end
|
|
|
|
local score = tonumber(obj.score) or 0
|
|
if score >= rule.score_threshold then
|
|
task:insert_result(rule.symbol_spam, 1.0, tostring(score))
|
|
end
|
|
|
|
if rule.add_headers and type(obj.appendHeaders) == 'table' then
|
|
local headers_add = fun.tomap(fun.map(function(h)
|
|
return h.headerField, {
|
|
order = 1, value = h.body
|
|
}
|
|
end, obj.appendHeaders))
|
|
lua_mime.modify_headers(task, {
|
|
add = headers_add
|
|
})
|
|
end
|
|
|
|
if rule.add_score_header then
|
|
lua_mime.modify_headers(task, {
|
|
add = {
|
|
['X-CMAE-Score'] = {
|
|
order = 1,
|
|
value = tostring(score)
|
|
}
|
|
}
|
|
})
|
|
end
|
|
|
|
if type(rule.scores_symbols) == 'table' then
|
|
local sym = get_specific_symbol(rule.scores_symbols, score)
|
|
if sym then
|
|
task:insert_result(sym, 1.0, tostring(score))
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
local function cloudmark_check(task, content, digest, rule, maybe_part)
|
|
local function cloudmark_check_uncached()
|
|
local upstream = rule.upstreams:get_upstream_round_robin()
|
|
local addr = upstream:get_addr()
|
|
local retransmits = rule.retransmits
|
|
|
|
local url = cloudmark_url(rule, addr)
|
|
local message_data = task:get_content()
|
|
if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
|
|
task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
|
|
return
|
|
end
|
|
local request = {
|
|
rfc822 = {
|
|
['Content-Type'] = 'message/rfc822',
|
|
data = message_data,
|
|
}
|
|
}
|
|
|
|
local helo = task:get_helo()
|
|
if helo then
|
|
request['heloDomain'] = {
|
|
data = helo,
|
|
}
|
|
end
|
|
local mail_from = task:get_from('smtp') or {}
|
|
if mail_from[1] and #mail_from[1].addr > 1 then
|
|
request['mailFrom'] = {
|
|
data = mail_from[1].addr
|
|
}
|
|
end
|
|
|
|
local rcpt_to = task:get_recipients('smtp')
|
|
if rcpt_to then
|
|
request['rcptTo'] = {
|
|
data = table.concat(fun.totable(fun.map(function(r)
|
|
return r.addr
|
|
end, rcpt_to)), ',')
|
|
}
|
|
end
|
|
|
|
local fip = task:get_from_ip()
|
|
if fip and fip:is_valid() then
|
|
request['connIp'] = {
|
|
data = tostring(fip)
|
|
}
|
|
end
|
|
|
|
local hostname = task:get_hostname()
|
|
if hostname then
|
|
request['fromHost'] = hostname
|
|
end
|
|
|
|
local request_data = {
|
|
task = task,
|
|
url = url,
|
|
body = table_to_multipart_body(request, static_boundary),
|
|
headers = {
|
|
['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
|
|
},
|
|
timeout = rule.timeout,
|
|
}
|
|
|
|
local function cloudmark_callback(http_err, code, body, headers)
|
|
|
|
local function cloudmark_requery()
|
|
-- set current upstream to fail because an error occurred
|
|
upstream:fail()
|
|
|
|
-- retry with another upstream until retransmits exceeds
|
|
if retransmits > 0 then
|
|
|
|
retransmits = retransmits - 1
|
|
|
|
lua_util.debugm(rule.name, task,
|
|
'%s: request Error: %s - retries left: %s',
|
|
rule.log_prefix, http_err, retransmits)
|
|
|
|
-- Select a different upstream!
|
|
upstream = rule.upstreams:get_upstream_round_robin()
|
|
addr = upstream:get_addr()
|
|
url = cloudmark_url(rule, addr)
|
|
|
|
lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
|
|
rule.log_prefix, addr, addr:get_port())
|
|
request_data.url = url
|
|
|
|
http.request(request_data)
|
|
else
|
|
rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
|
|
'exceed', rule.log_prefix)
|
|
task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
|
|
'retransmits exceed')
|
|
upstream:fail()
|
|
end
|
|
end
|
|
|
|
if http_err then
|
|
cloudmark_requery()
|
|
else
|
|
-- Parse the response
|
|
if upstream then
|
|
upstream:ok()
|
|
end
|
|
if code ~= 200 then
|
|
rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
|
|
task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
|
|
return
|
|
end
|
|
parse_cloudmark_reply(task, rule, body)
|
|
end
|
|
end
|
|
|
|
request_data.callback = cloudmark_callback
|
|
http.request(request_data)
|
|
end
|
|
|
|
if common.condition_check_and_continue(task, content, rule, digest,
|
|
cloudmark_check_uncached, maybe_part) then
|
|
return
|
|
else
|
|
cloudmark_check_uncached()
|
|
end
|
|
end
|
|
|
|
return {
|
|
type = { 'cloudmark', 'scanner' },
|
|
description = 'Cloudmark cartridge interface',
|
|
configure = cloudmark_config,
|
|
check = cloudmark_check,
|
|
name = N,
|
|
}
|