Browse Source

[Feature] Detect part types in mime parser

pull/5619/head
Vsevolod Stakhov 4 weeks ago
parent
commit
476ff023a2
No known key found for this signature in database GPG Key ID: 7647B6790081437
  1. 57
      lualib/lua_magic/init.lua
  2. 120
      src/libmime/message.c
  3. 247
      src/libmime/mime_parser.c
  4. 10
      src/libmime/mime_parser.h
  5. 4
      src/libserver/cfg_file.h

57
lualib/lua_magic/init.lua

@ -12,7 +12,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
]] --
--[[[
-- @module lua_magic
@ -57,17 +57,17 @@ local function process_patterns(log_obj)
end
lua_util.debugm(N, log_obj, 'add tail pattern %s for ext %s',
str, pattern.ext)
str, pattern.ext)
elseif match.position < short_match_limit then
short_patterns[#short_patterns + 1] = {
str, match, pattern
}
if str:sub(1, 1) == '^' then
lua_util.debugm(N, log_obj, 'add head pattern %s for ext %s',
str, pattern.ext)
str, pattern.ext)
else
lua_util.debugm(N, log_obj, 'add short pattern %s for ext %s',
str, pattern.ext)
str, pattern.ext)
end
if max_short_offset < match.position then
@ -79,7 +79,7 @@ local function process_patterns(log_obj)
}
lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
str, pattern.ext)
str, pattern.ext)
end
else
processed_patterns[#processed_patterns + 1] = {
@ -87,7 +87,7 @@ local function process_patterns(log_obj)
}
lua_util.debugm(N, log_obj, 'add long pattern %s for ext %s',
str, pattern.ext)
str, pattern.ext)
end
end
@ -133,25 +133,25 @@ local function process_patterns(log_obj)
fun.map(function(t)
return t[1]
end, processed_patterns)),
compile_flags
compile_flags
)
compiled_short_patterns = rspamd_trie.create(fun.totable(
fun.map(function(t)
return t[1]
end, short_patterns)),
compile_flags
compile_flags
)
compiled_tail_patterns = rspamd_trie.create(fun.totable(
fun.map(function(t)
return t[1]
end, tail_patterns)),
compile_flags
compile_flags
)
lua_util.debugm(N, log_obj,
'compiled %s (%s short; %s long; %s tail) patterns',
#processed_patterns + #short_patterns + #tail_patterns,
#short_patterns, #processed_patterns, #tail_patterns)
'compiled %s (%s short; %s long; %s tail) patterns',
#processed_patterns + #short_patterns + #tail_patterns,
#short_patterns, #processed_patterns, #tail_patterns)
end
end
@ -173,7 +173,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
end
lua_util.debugm(N, log_obj, 'add pattern for %s, weight %s, total weight %s',
ext, weight, res[ext])
ext, weight, res[ext])
end
local function match_position(pos, expected)
@ -224,7 +224,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
for _, pos in ipairs(matched_positions) do
lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
pattern.ext, pos, offset)
pattern.ext, pos, offset)
if match_position(pos + offset, position) then
if match.heuristic then
local ext, weight = match.heuristic(input, log_obj, pos + offset, part)
@ -247,7 +247,7 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
local matched = false
for _, pos in ipairs(matched_positions) do
lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
pattern.ext, pos, offset)
pattern.ext, pos, offset)
if not match_position(pos + offset, position) then
matched = true
matched_pos = pos
@ -275,7 +275,6 @@ local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_
end
end
end
end
local function process_detected(res)
@ -312,13 +311,13 @@ exports.detect = function(part, log_obj)
if inplen > min_tail_offset then
local tail = input:span(inplen - min_tail_offset, min_tail_offset)
match_chunk(tail, input, inplen, inplen - min_tail_offset,
compiled_tail_patterns, tail_patterns, log_obj, res, part)
compiled_tail_patterns, tail_patterns, log_obj, res, part)
end
-- Try short match
local head = input:span(1, math.min(max_short_offset, inplen))
match_chunk(head, input, inplen, 0,
compiled_short_patterns, short_patterns, log_obj, res, part)
compiled_short_patterns, short_patterns, log_obj, res, part)
-- Check if we have enough data or go to long patterns
local extensions, confidence = process_detected(res)
@ -332,17 +331,17 @@ exports.detect = function(part, log_obj)
if #input > exports.chunk_size * 3 then
-- Chunked version as input is too long
local chunk1, chunk2 = input:span(1, exports.chunk_size * 2),
input:span(inplen - exports.chunk_size, exports.chunk_size)
input:span(inplen - exports.chunk_size, exports.chunk_size)
local offset1, offset2 = 0, inplen - exports.chunk_size
match_chunk(chunk1, input, inplen,
offset1, compiled_patterns, processed_patterns, log_obj, res, part)
offset1, compiled_patterns, processed_patterns, log_obj, res, part)
match_chunk(chunk2, input, inplen,
offset2, compiled_patterns, processed_patterns, log_obj, res, part)
offset2, compiled_patterns, processed_patterns, log_obj, res, part)
else
-- Input is short enough to match it at all
match_chunk(input, input, inplen, 0,
compiled_patterns, processed_patterns, log_obj, res, part)
compiled_patterns, processed_patterns, log_obj, res, part)
end
else
-- Table input is NYI
@ -372,6 +371,18 @@ exports.detect_mime_part = function(part, log_obj)
return ext, types[ext]
end
-- Fallback by filename extension (e.g. .eml attachments with generic content-type)
local fname
if part.get_filename then
fname = part:get_filename()
end
if type(fname) == 'string' then
local lfn = fname:lower()
if #lfn > 4 and lfn:sub(-4) == '.eml' then
return 'eml', types['eml']
end
end
-- Text/html and other parts
ext, weight = heuristics.text_part_heuristic(part, log_obj)
if ext and weight and weight > 20 then
@ -385,4 +396,4 @@ exports.chunk_size = 32768
exports.types = types
return exports
return exports

120
src/libmime/message.c

@ -36,12 +36,12 @@
#include <unicode/uchar.h>
#include "sodium.h"
#include "libserver/cfg_file_private.h"
#include "lua/lua_common.h"
#define RSPAMD_TOKENIZER_INTERNAL
#include "contrib/uthash/utlist.h"
#include "contrib/t1ha/t1ha.h"
#include "received.h"
#define RSPAMD_TOKENIZER_INTERNAL
#include "mime_parser.h"
#include "libstat/tokenizers/custom_tokenizer.h"
#include "received.h"
#define GTUBE_SYMBOL "GTUBE"
@ -989,8 +989,38 @@ rspamd_message_from_data(struct rspamd_task *task, const unsigned char *start,
else if (task->cfg && task->cfg->libs_ctx) {
lua_State *L = task->cfg->lua_state;
if (rspamd_lua_require_function(L,
"lua_magic", "detect_mime_part")) {
if (task->cfg->mime_parser_cfg &&
rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg) != -1) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
lua_rawgeti(L, LUA_REGISTRYINDEX, rspamd_mime_parser_get_lua_magic_cbref(task->cfg->mime_parser_cfg));
pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
*pmime = part;
ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
rspamd_lua_setclass(L, rspamd_task_classname, -1);
*ptask = task;
if (lua_pcall(L, 2, 2, 0) != 0) {
msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
}
else {
if (lua_istable(L, -1)) {
lua_pushstring(L, "ct");
lua_gettable(L, -2);
if (lua_isstring(L, -1)) {
mb = rspamd_mempool_strdup(task->task_pool,
lua_tostring(L, -1));
}
}
}
lua_settop(L, 0);
}
else if (rspamd_lua_require_function(L,
"lua_magic", "detect_mime_part")) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
@ -1405,7 +1435,7 @@ void rspamd_message_process(struct rspamd_task *task)
unsigned int tw, *ptw, dw;
struct rspamd_mime_part *part;
lua_State *L = NULL;
int magic_func_pos = -1, content_func_pos = -1, old_top = -1, funcs_top = -1;
int content_func_pos = -1, old_top = -1, funcs_top = -1;
if (task->cfg) {
L = task->cfg->lua_state;
@ -1417,13 +1447,7 @@ void rspamd_message_process(struct rspamd_task *task)
old_top = lua_gettop(L);
}
if (L && rspamd_lua_require_function(L,
"lua_magic", "detect_mime_part")) {
magic_func_pos = lua_gettop(L);
}
else {
msg_err_task("cannot require lua_magic.detect_mime_part");
}
/* lua_magic is preloaded by mime parser init; do not require here */
if (L && rspamd_lua_require_function(L,
"lua_content", "maybe_process_mime_part")) {
@ -1441,75 +1465,7 @@ void rspamd_message_process(struct rspamd_task *task)
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
{
if (magic_func_pos != -1 && part->parsed_data.len > 0) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
lua_pushcfunction(L, &rspamd_lua_traceback);
int err_idx = lua_gettop(L);
lua_pushvalue(L, magic_func_pos);
pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
*pmime = part;
ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
rspamd_lua_setclass(L, rspamd_task_classname, -1);
*ptask = task;
if (lua_pcall(L, 2, 2, err_idx) != 0) {
msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
}
else {
if (lua_istable(L, -1)) {
const char *mb;
/* First returned value */
part->detected_ext = rspamd_mempool_strdup(task->task_pool,
lua_tostring(L, -2));
lua_pushstring(L, "ct");
lua_gettable(L, -2);
if (lua_isstring(L, -1)) {
mb = lua_tostring(L, -1);
if (mb) {
rspamd_ftok_t srch;
srch.begin = mb;
srch.len = strlen(mb);
part->detected_ct = rspamd_content_type_parse(srch.begin,
srch.len,
task->task_pool);
}
}
lua_pop(L, 1);
lua_pushstring(L, "type");
lua_gettable(L, -2);
if (lua_isstring(L, -1)) {
part->detected_type = rspamd_mempool_strdup(task->task_pool,
lua_tostring(L, -1));
}
lua_pop(L, 1);
lua_pushstring(L, "no_text");
lua_gettable(L, -2);
if (lua_isboolean(L, -1)) {
if (!!lua_toboolean(L, -1)) {
part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
}
}
lua_pop(L, 1);
}
}
lua_settop(L, funcs_top);
}
/* detected_* are already set by mime_parser; no extra lua_magic call here */
/* Now detect content */
if (content_func_pos != -1 && part->parsed_data.len > 0 &&

247
src/libmime/mime_parser.c

@ -23,17 +23,69 @@
#include "multipattern.h"
#include "contrib/libottery/ottery.h"
#include "contrib/uthash/utlist.h"
#include "lua/lua_common.h"
#include "lua/lua_classnames.h"
#include <openssl/cms.h>
#include <openssl/pkcs7.h>
#include "rspamd_simdutf.h"
struct rspamd_mime_parser_lib_ctx {
struct rspamd_mime_parser_config {
struct rspamd_multipattern *mp_boundary;
unsigned char hkey[rspamd_cryptobox_SIPKEYBYTES]; /* Key for hashing */
unsigned int key_usages;
int lua_magic_detect_cbref;
lua_State *L;
};
struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL;
static struct rspamd_mime_parser_config *mime_parser_cfg = NULL;
struct rspamd_mime_parser_config *
rspamd_mime_parser_init_shared(struct rspamd_config *cfg)
{
if (mime_parser_cfg == NULL) {
mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
g_assert(mime_parser_cfg->mp_boundary != NULL);
rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0);
rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0);
GError *err = NULL;
if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
g_error_free(err);
g_abort();
}
ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
mime_parser_cfg->key_usages = 0;
mime_parser_cfg->lua_magic_detect_cbref = -1;
}
mime_parser_cfg->L = (lua_State *) cfg->lua_state;
if (mime_parser_cfg->L && mime_parser_cfg->lua_magic_detect_cbref == -1) {
int old_top = lua_gettop(mime_parser_cfg->L);
if (rspamd_lua_require_function(mime_parser_cfg->L, "lua_magic", "detect_mime_part")) {
mime_parser_cfg->lua_magic_detect_cbref = luaL_ref(mime_parser_cfg->L, LUA_REGISTRYINDEX);
}
lua_settop(mime_parser_cfg->L, old_top);
}
cfg->mime_parser_cfg = mime_parser_cfg;
return mime_parser_cfg;
}
void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *unused)
{
/* noop: lifetime tied to process */
}
int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg)
{
if (cfg) {
return cfg->lua_magic_detect_cbref;
}
return -1;
}
static const unsigned int max_nested = 64;
static const unsigned int max_key_usages = 10000;
@ -56,7 +108,7 @@ struct rspamd_mime_boundary {
int flags;
};
struct rspamd_mime_parser_ctx {
struct rspamd_mime_parser_runtime {
GPtrArray *stack; /* Stack of parts */
GArray *boundaries; /* Boundaries found in the whole message */
const char *start;
@ -69,23 +121,23 @@ struct rspamd_mime_parser_ctx {
static enum rspamd_mime_parse_error
rspamd_mime_parse_multipart_part(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
GError **err);
static enum rspamd_mime_parse_error
rspamd_mime_parse_message(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
GError **err);
static enum rspamd_mime_parse_error
rspamd_mime_parse_normal_part(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_content_type *ct,
GError **err);
static enum rspamd_mime_parse_error
rspamd_mime_process_multipart_node(struct rspamd_task *task,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_mime_part *multipart,
const char *start, const char *end,
gboolean is_finished,
@ -162,19 +214,22 @@ rspamd_cte_from_string(const char *str)
static void
rspamd_mime_parser_init_lib(void)
{
lib_ctx = g_malloc0(sizeof(*lib_ctx));
lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
g_assert(lib_ctx->mp_boundary != NULL);
rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0);
rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0);
mime_parser_cfg = g_malloc0(sizeof(*mime_parser_cfg));
mime_parser_cfg->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
g_assert(mime_parser_cfg->mp_boundary != NULL);
rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\r--", 0);
rspamd_multipattern_add_pattern(mime_parser_cfg->mp_boundary, "\n--", 0);
GError *err = NULL;
if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
if (!rspamd_multipattern_compile(mime_parser_cfg->mp_boundary, RSPAMD_MULTIPATTERN_COMPILE_NO_FS, &err)) {
msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
g_error_free(err);
g_abort();
}
ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
mime_parser_cfg->key_usages = 0;
mime_parser_cfg->L = NULL;
mime_parser_cfg->lua_magic_detect_cbref = -1;
}
static enum rspamd_cte
@ -398,7 +453,8 @@ rspamd_mime_part_get_cte(struct rspamd_task *task,
enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN;
gboolean parent_propagated = FALSE;
hdr = rspamd_message_get_header_from_hash(hdrs, "Content-Transfer-Encoding", FALSE);
hdr = rspamd_message_get_header_from_hash(hdrs,
"Content-Transfer-Encoding", FALSE);
if (hdr == NULL) {
if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN &&
@ -648,7 +704,7 @@ void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part)
static enum rspamd_mime_parse_error
rspamd_mime_parse_normal_part(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_content_type *ct,
GError **err)
{
@ -845,10 +901,11 @@ rspamd_mime_parse_normal_part(struct rspamd_task *task,
return RSPAMD_MIME_PARSE_OK;
}
struct rspamd_mime_multipart_cbdata {
struct rspamd_task *task;
struct rspamd_mime_part *multipart;
struct rspamd_mime_parser_ctx *st;
struct rspamd_mime_parser_runtime *st;
const char *part_start;
rspamd_ftok_t *cur_boundary;
uint64_t bhash;
@ -857,7 +914,7 @@ struct rspamd_mime_multipart_cbdata {
static enum rspamd_mime_parse_error
rspamd_mime_process_multipart_node(struct rspamd_task *task,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_mime_part *multipart,
const char *start, const char *end,
gboolean is_finished,
@ -996,7 +1053,123 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task,
}
}
else {
/* First, decode the part normally */
ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err);
if (ret == RSPAMD_MIME_PARSE_OK) {
/* Ask lua_magic if this is a message (e.g. .eml) */
lua_State *L = NULL;
int old_top = -1, err_idx;
gboolean promote_to_message = FALSE;
if (task->cfg) {
L = task->cfg->lua_state;
}
if (L) {
old_top = lua_gettop(L);
lua_pushcfunction(L, &rspamd_lua_traceback);
err_idx = lua_gettop(L);
if (task->cfg->mime_parser_cfg && task->cfg->mime_parser_cfg->lua_magic_detect_cbref != -1) {
lua_rawgeti(L, LUA_REGISTRYINDEX, task->cfg->mime_parser_cfg->lua_magic_detect_cbref);
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
*pmime = npart;
ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
rspamd_lua_setclass(L, rspamd_task_classname, -1);
*ptask = task;
if (lua_pcall(L, 2, 2, err_idx) != 0) {
msg_err_task("cannot detect type (lua_magic): %s", lua_tostring(L, -1));
}
else {
/* Stack: [traceback][ext][table] */
if (lua_istable(L, -1)) {
/* Fill detected_ext */
if (lua_isstring(L, -2)) {
npart->detected_ext = rspamd_mempool_strdup(task->task_pool,
lua_tostring(L, -2));
}
/* detected_ct */
lua_pushstring(L, "ct");
lua_gettable(L, -2);
if (lua_isstring(L, -1)) {
const char *mb = lua_tostring(L, -1);
if (mb) {
rspamd_ftok_t srch;
srch.begin = mb;
srch.len = strlen(mb);
npart->detected_ct = rspamd_content_type_parse(srch.begin,
srch.len,
task->task_pool);
}
}
lua_pop(L, 1);
/* detected_type and promotion */
lua_pushstring(L, "type");
lua_gettable(L, -2);
if (lua_isstring(L, -1)) {
const char *t = lua_tostring(L, -1);
if (t) {
npart->detected_type = rspamd_mempool_strdup(task->task_pool, t);
if (strcmp(t, "message") == 0) {
promote_to_message = TRUE;
}
}
}
lua_pop(L, 1);
/* no_text flag */
lua_pushstring(L, "no_text");
lua_gettable(L, -2);
if (lua_isboolean(L, -1)) {
if (!!lua_toboolean(L, -1)) {
npart->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
}
}
lua_pop(L, 1);
/* ext fallback for promotion */
if (!promote_to_message && lua_isstring(L, -2)) {
const char *ext = lua_tostring(L, -2);
if (ext && g_ascii_strcasecmp(ext, "eml") == 0) {
promote_to_message = TRUE;
}
}
}
}
/* Clean stack */
lua_settop(L, old_top);
}
else {
/* Pop traceback */
lua_settop(L, old_top);
}
}
if (promote_to_message) {
msg_debug_mime("treat part as embedded message (lua_magic)");
st->nesting++;
g_ptr_array_add(st->stack, npart);
npart->part_type = RSPAMD_MIME_PART_MESSAGE;
ret = rspamd_mime_parse_message(task, npart, st, err);
}
}
}
return ret;
@ -1005,7 +1178,7 @@ rspamd_mime_process_multipart_node(struct rspamd_task *task,
static enum rspamd_mime_parse_error
rspamd_mime_parse_multipart_cb(struct rspamd_task *task,
struct rspamd_mime_part *multipart,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_mime_multipart_cbdata *cb,
struct rspamd_mime_boundary *b)
{
@ -1048,7 +1221,7 @@ rspamd_mime_parse_multipart_cb(struct rspamd_task *task,
static enum rspamd_mime_parse_error
rspamd_multipart_boundaries_filter(struct rspamd_task *task,
struct rspamd_mime_part *multipart,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
struct rspamd_mime_multipart_cbdata *cb)
{
struct rspamd_mime_boundary *cur;
@ -1162,7 +1335,7 @@ rspamd_multipart_boundaries_filter(struct rspamd_task *task,
static enum rspamd_mime_parse_error
rspamd_mime_parse_multipart_part(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
GError **err)
{
struct rspamd_mime_multipart_cbdata cbdata;
@ -1192,7 +1365,7 @@ rspamd_mime_parse_multipart_part(struct rspamd_task *task,
cbdata.cur_boundary = &part->ct->boundary;
rspamd_cryptobox_siphash((unsigned char *) &cbdata.bhash,
cbdata.cur_boundary->begin, cbdata.cur_boundary->len,
lib_ctx->hkey);
mime_parser_cfg->hkey);
msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash);
}
else {
@ -1223,7 +1396,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
gsize blen;
gboolean closing = FALSE;
struct rspamd_mime_boundary b;
struct rspamd_mime_parser_ctx *st = context;
struct rspamd_mime_parser_runtime *st = context;
struct rspamd_task *task;
task = st->task;
@ -1307,7 +1480,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
}
rspamd_cryptobox_siphash((unsigned char *) &b.hash, lc_copy, blen,
lib_ctx->hkey);
mime_parser_cfg->hkey);
msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset",
(int) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start);
@ -1315,7 +1488,7 @@ rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED;
rspamd_cryptobox_siphash((unsigned char *) &b.closed_hash, lc_copy,
blen + 2,
lib_ctx->hkey);
mime_parser_cfg->hkey);
msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset",
(int) blen + 2, lc_copy,
b.closed_hash,
@ -1406,17 +1579,17 @@ end:
static void
rspamd_mime_preprocess_message(struct rspamd_task *task,
struct rspamd_mime_part *top,
struct rspamd_mime_parser_ctx *st)
struct rspamd_mime_parser_runtime *st)
{
if (top->raw_data.begin >= st->pos) {
rspamd_multipattern_lookup(lib_ctx->mp_boundary,
rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
top->raw_data.begin - 1,
top->raw_data.len + 1,
rspamd_mime_preprocess_cb, st, NULL);
}
else {
rspamd_multipattern_lookup(lib_ctx->mp_boundary,
rspamd_multipattern_lookup(mime_parser_cfg->mp_boundary,
st->pos,
st->end - st->pos,
rspamd_mime_preprocess_cb, st, NULL);
@ -1424,7 +1597,7 @@ rspamd_mime_preprocess_message(struct rspamd_task *task,
}
static void
rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st)
rspamd_mime_parse_stack_free(struct rspamd_mime_parser_runtime *st)
{
if (st) {
g_ptr_array_free(st->stack, TRUE);
@ -1436,7 +1609,7 @@ rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st)
static enum rspamd_mime_parse_error
rspamd_mime_parse_message(struct rspamd_task *task,
struct rspamd_mime_part *part,
struct rspamd_mime_parser_ctx *st,
struct rspamd_mime_parser_runtime *st,
GError **err)
{
struct rspamd_content_type *ct, *sel = NULL;
@ -1448,7 +1621,7 @@ rspamd_mime_parse_message(struct rspamd_task *task,
unsigned int i;
enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
GString str;
struct rspamd_mime_parser_ctx *nst = st;
struct rspamd_mime_parser_runtime *nst = st;
if (st->nesting > max_nested) {
g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
@ -1732,17 +1905,17 @@ rspamd_mime_parse_message(struct rspamd_task *task,
enum rspamd_mime_parse_error
rspamd_mime_parse_task(struct rspamd_task *task, GError **err)
{
struct rspamd_mime_parser_ctx *st;
struct rspamd_mime_parser_runtime *st;
enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
if (lib_ctx == NULL) {
rspamd_mime_parser_init_lib();
if (mime_parser_cfg == NULL) {
rspamd_mime_parser_init_shared(task->cfg);
}
if (++lib_ctx->key_usages > max_key_usages) {
if (++mime_parser_cfg->key_usages > max_key_usages) {
/* Regenerate siphash key */
ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
lib_ctx->key_usages = 0;
ottery_rand_bytes(mime_parser_cfg->hkey, sizeof(mime_parser_cfg->hkey));
mime_parser_cfg->key_usages = 0;
}
st = g_malloc0(sizeof(*st));

10
src/libmime/mime_parser.h

@ -18,6 +18,16 @@
#include "config.h"
struct rspamd_config;
struct rspamd_mime_parser_config;
/* Initialize shared mime parser config (stores Lua refs, precompiled data) */
struct rspamd_mime_parser_config *rspamd_mime_parser_init_shared(struct rspamd_config *cfg);
void rspamd_mime_parser_free_shared(struct rspamd_mime_parser_config *cfg);
/* Accessors */
int rspamd_mime_parser_get_lua_magic_cbref(const struct rspamd_mime_parser_config *cfg);
#ifdef __cplusplus
extern "C" {

4
src/libserver/cfg_file.h

@ -49,6 +49,7 @@ struct rspamd_external_libs_ctx;
struct rspamd_cryptobox_pubkey;
struct rspamd_dns_resolver;
struct rspamd_tokenizer_manager;
struct rspamd_mime_parser_config;
/**
* Logging type
@ -490,7 +491,8 @@ struct rspamd_config {
struct rspamd_monitored_ctx *monitored_ctx; /**< context for monitored resources */
void *redis_pool; /**< redis connection pool */
struct rspamd_re_cache *re_cache; /**< static regexp cache */
struct rspamd_re_cache *re_cache; /**< static regexp cache */
struct rspamd_mime_parser_config *mime_parser_cfg; /**< mime parser shared config */
GHashTable *trusted_keys; /**< list of trusted public keys */

Loading…
Cancel
Save