Browse Source

[Project] Further content module work

pull/3166/head
Vsevolod Stakhov 6 years ago
parent
commit
9744716ba7
  1. 9
      lualib/lua_content/ical.lua
  2. 73
      lualib/lua_content/init.lua
  3. 113
      src/libmime/message.c

9
lualib/lua_content/ical.lua

@ -15,6 +15,7 @@ limitations under the License.
]]--
local l = require 'lpeg'
local rspamd_text = require "rspamd_text"
local wsp = l.P" "
local crlf = l.P"\r"^-1 * l.P"\n"
@ -25,7 +26,7 @@ local elt = name * ":" * wsp^0 * value * eol
local exports = {}
local function ical_txt_values(input)
local function process_ical(input, _, _)
local control={n='\n', r='\r'}
local escaper = l.Ct((elt / function(_,b) return (b:gsub("\\(.)", control)) end)^1)
@ -35,13 +36,13 @@ local function ical_txt_values(input)
return nil
end
return table.concat(values, "\n")
return rspamd_text.fromtable(values, "\n")
end
--[[[
-- @function lua_ical.ical_txt_values(input)
-- @function lua_ical.process(input)
-- Returns all values from ical as a plain text. Names are completely ignored.
--]]
exports.ical_txt_values = ical_txt_values
exports.process = process_ical
return exports

73
lualib/lua_content/init.lua

@ -17,4 +17,75 @@ limitations under the License.
--[[[
-- @module lua_content
-- This module contains content processing logic
--]]
--]]
local exports = {}
local N = "lua_content"
local lua_util = require "lua_util"
local content_modules = {
ical = {
mime_type = "text/calendar",
module = require "lua_content/ical",
extensions = {'ical'},
output = "text"
},
}
local modules_by_mime_type
local modules_by_extension
local function init()
modules_by_mime_type = {}
modules_by_extension = {}
for k,v in pairs(content_modules) do
if v.mime_type then
modules_by_mime_type[v.mime_type] = {k, v}
end
if v.extensions then
for _,ext in ipairs(v.extensions) do
modules_by_extension[ext] = {k, v}
end
end
end
end
exports.maybe_process_mime_part = function(part, log_obj)
if not modules_by_mime_type then
init()
end
local ctype, csubtype = part:get_type()
local mt = string.format("%s/%s", ctype or 'application',
csubtype or 'octet-stream')
local pair = modules_by_mime_type[mt]
if not pair then
local ext = part:get_detected_ext()
if ext then
pair = modules_by_extension[ext]
end
end
if pair then
lua_util.debugm(N, log_obj, "found known content of type %s: %s",
mt, pair[1])
local data = pair[2].module.process(part:get_content(), part, log_obj)
if data then
lua_util.debugm(N, log_obj, "extracted content from %s: %s type",
pair[1], type(data))
part:set_specific(data)
else
lua_util.debugm(N, log_obj, "failed to extract anything from %s",
pair[1])
end
end
end
return exports

113
src/libmime/message.c

@ -694,71 +694,8 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task,
rspamd_mime_text_part_maybe_convert (task, text_part);
if (text_part->utf_raw_content != NULL) {
/* Check for ical */
rspamd_ftok_t cal_ct;
/*
* TODO: If we want to process more than that, we need
* to create some generic framework that accepts a part
* and returns a processed data
*/
RSPAMD_FTOK_ASSIGN (&cal_ct, "calendar");
if (rspamd_ftok_casecmp (&cal_ct, &text_part->mime_part->ct->subtype) == 0) {
lua_State *L = task->cfg->lua_state;
gint err_idx;
lua_pushcfunction (L, &rspamd_lua_traceback);
err_idx = lua_gettop (L);
/* Obtain function */
if (!rspamd_lua_require_function (L, "lua_ical", "ical_txt_values")) {
msg_err_task ("cannot require lua_ical.ical_txt_values");
lua_settop (L, err_idx - 1);
return FALSE;
}
lua_pushlstring (L, text_part->utf_raw_content->data,
text_part->utf_raw_content->len);
if (lua_pcall (L, 1, 1, err_idx) != 0) {
msg_err_task ("cannot call lua lua_ical.ical_txt_values: %s",
lua_tostring (L, -1));
lua_settop (L, err_idx - 1);
return FALSE;
}
if (lua_type (L, -1) == LUA_TSTRING) {
const char *ndata;
gsize nsize;
ndata = lua_tolstring (L, -1, &nsize);
text_part->utf_content = g_byte_array_sized_new (nsize);
g_byte_array_append (text_part->utf_content, ndata, nsize);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) free_byte_array_callback,
text_part->utf_content);
}
else if (lua_type (L, -1) == LUA_TNIL) {
msg_info_task ("cannot convert text/calendar to plain text");
text_part->utf_content = text_part->utf_raw_content;
}
else {
msg_err_task ("invalid return type when calling lua_ical.ical_txt_values: %s",
lua_typename (L, lua_type (L, -1)));
lua_settop (L, err_idx - 1);
return FALSE;
}
lua_settop (L, err_idx - 1);
}
else {
/* Just have the same content */
text_part->utf_content = text_part->utf_raw_content;
}
/* Just have the same content */
text_part->utf_content = text_part->utf_raw_content;
}
else {
/*
@ -1378,7 +1315,7 @@ rspamd_message_process (struct rspamd_task *task)
guint tw, *ptw, dw;
struct rspamd_mime_part *part;
lua_State *L = NULL;
gint func_pos = -1;
gint magic_func_pos = -1, content_func_pos = -1, old_top = -1;
if (task->cfg) {
L = task->cfg->lua_state;
@ -1386,20 +1323,32 @@ rspamd_message_process (struct rspamd_task *task)
rspamd_archives_process (task);
if (L) {
old_top = lua_gettop (L);
}
if (L && rspamd_lua_require_function (L,
"lua_magic", "detect_mime_part")) {
func_pos = lua_gettop (L);
magic_func_pos = lua_gettop (L);
}
else {
msg_err_task ("cannot require lua_magic.detect_mime_part");
}
if (L && rspamd_lua_require_function (L,
"lua_content", "maybe_process_mime_part")) {
content_func_pos = lua_gettop (L);
}
else {
msg_err_task ("cannot require lua_content.maybe_process_mime_part");
}
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
if (func_pos != -1 && part->parsed_data.len > 0) {
if (magic_func_pos != -1 && part->parsed_data.len > 0) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
lua_pushvalue (L, func_pos);
lua_pushvalue (L, magic_func_pos);
pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
*pmime = part;
@ -1447,7 +1396,27 @@ rspamd_message_process (struct rspamd_task *task)
}
}
lua_settop (L, func_pos);
lua_settop (L, magic_func_pos);
}
/* Now detect content */
if (content_func_pos != -1 && part->parsed_data.len > 0) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
lua_pushvalue (L, content_func_pos);
pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
*pmime = part;
ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
rspamd_lua_setclass (L, "rspamd{task}", -1);
*ptask = task;
if (lua_pcall (L, 2, 2, 0) != 0) {
msg_err_task ("cannot detect content: %s", lua_tostring (L, -1));
}
lua_settop (L, magic_func_pos);
}
if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
@ -1455,8 +1424,8 @@ rspamd_message_process (struct rspamd_task *task)
}
}
if (func_pos != -1) {
lua_settop (L, func_pos - 1);
if (old_top != -1) {
lua_settop (L, old_top);
}
/* Calculate average words length and number of short words */

Loading…
Cancel
Save