Browse Source

Rewrite fuzzy check plugin.

pull/121/head
Vsevolod Stakhov 11 years ago
parent
commit
18984aadd0
  1. 2
      config.h.in
  2. 639
      src/plugins/fuzzy_check.c

2
config.h.in

@ -96,7 +96,7 @@
#elif defined(__GNUC__)
# define RSPAMD_PACKED(name) struct __attribute__((packed)) name
#else
# define RSPAMD_PACKED(name)
# define RSPAMD_PACKED(name) struct name
#endif

639
src/plugins/fuzzy_check.c

@ -48,6 +48,7 @@
#include "utlist.h"
#include "main.h"
#include "blake2.h"
#include "ottery.h"
#define DEFAULT_SYMBOL "R_FUZZY_HASH"
#define DEFAULT_UPSTREAM_ERROR_TIME 10
@ -97,7 +98,7 @@ struct fuzzy_ctx {
struct fuzzy_client_session {
gint state;
rspamd_fuzzy_t *h;
GPtrArray *commands;
struct event ev;
struct timeval tv;
struct rspamd_task *task;
@ -108,13 +109,9 @@ struct fuzzy_client_session {
struct fuzzy_learn_session {
struct event ev;
rspamd_fuzzy_t *h;
gint cmd;
gint value;
gint flag;
GPtrArray *commands;
gint *saved;
GError **err;
struct fuzzy_mapping *map;
struct timeval tv;
struct rspamd_http_connection_entry *http_entry;
struct upstream *server;
@ -492,6 +489,9 @@ fuzzy_io_fin (void *ud)
{
struct fuzzy_client_session *session = ud;
if (session->commands) {
g_ptr_array_free (session->commands, FALSE);
}
event_del (&session->ev);
close (session->fd);
}
@ -519,24 +519,16 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
if (legacy || part->words == NULL) {
cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
cmd->cmd = c;
cmd->version = RSPAMD_FUZZY_VERSION;
if (c != FUZZY_CHECK) {
cmd->flag = flag;
cmd->value = weight;
}
cmd->shingles_count = 0;
rspamd_strlcpy (cmd->digest, part->fuzzy->hash_pipe, sizeof (cmd->digest));
*size = sizeof (struct rspamd_fuzzy_cmd);
if (size != NULL) {
*size = sizeof (struct rspamd_fuzzy_cmd);
}
}
else {
shcmd = rspamd_mempool_alloc0 (pool, sizeof (*shcmd));
shcmd->basic.cmd = c;
shcmd->basic.version = RSPAMD_FUZZY_VERSION;
if (c != FUZZY_CHECK) {
shcmd->basic.flag = flag;
shcmd->basic.value = weight;
}
/*
* Generate hash from all words in the part
@ -557,7 +549,18 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
}
cmd = (struct rspamd_fuzzy_cmd *)shcmd;
*size = sizeof (struct rspamd_fuzzy_shingle_cmd);
if (size != NULL) {
*size = sizeof (struct rspamd_fuzzy_shingle_cmd);
}
}
cmd->tag = ottery_rand_uint32 ();
cmd->cmd = c;
cmd->version = RSPAMD_FUZZY_VERSION;
if (c != FUZZY_CHECK) {
cmd->flag = flag;
cmd->value = weight;
}
return cmd;
@ -584,6 +587,7 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
cmd->value = weight;
}
cmd->shingles_count = 0;
cmd->tag = ottery_rand_uint32 ();
if (legacy) {
GChecksum *cksum;
@ -603,7 +607,9 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
blake2b_final (&st, cmd->digest, sizeof (cmd->digest));
}
*size = sizeof (struct rspamd_fuzzy_cmd);
if (size != NULL) {
*size = sizeof (struct rspamd_fuzzy_cmd);
}
return cmd;
}
@ -623,33 +629,81 @@ fuzzy_cmd_to_wire (gint fd, const struct rspamd_fuzzy_cmd *cmd, gsize len)
return TRUE;
}
static gboolean
fuzzy_cmd_vector_to_wire (gint fd, GPtrArray *v)
{
guint i;
const struct rspamd_fuzzy_cmd *cmd;
gsize len;
for (i = 0; i < v->len; i ++) {
cmd = g_ptr_array_index (v, i);
len = cmd->shingles_count > 0 ? sizeof (struct rspamd_fuzzy_shingle_cmd) :
sizeof (struct rspamd_fuzzy_cmd);
if (!fuzzy_cmd_to_wire (fd, cmd, len)) {
return FALSE;
}
}
return TRUE;
}
/*
* Read replies one-by-one and remove them from req array
*/
static const struct rspamd_fuzzy_reply *
fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req)
{
const guchar *p = *pos;
gint remain = *r;
guint i;
const struct rspamd_fuzzy_cmd *cmd;
const struct rspamd_fuzzy_reply *rep;
if (remain == 0 || (guint)remain < sizeof (struct rspamd_fuzzy_reply)) {
return NULL;
}
rep = (const struct rspamd_fuzzy_reply *)p;
/*
* Search for tag
*/
for (i = 0; i < req->len; i ++) {
cmd = g_ptr_array_index (req, i);
if (cmd->tag == rep->tag) {
g_ptr_array_remove_index (req, i);
*pos += sizeof (struct rspamd_fuzzy_reply);
*r -= sizeof (struct rspamd_fuzzy_reply);
return rep;
}
}
msg_info ("unexpected tag: %ud", rep->tag);
return NULL;
}
/* Call this whenever we got data from fuzzy storage */
static void
fuzzy_io_callback (gint fd, short what, void *arg)
{
struct fuzzy_client_session *session = arg;
struct legacy_fuzzy_cmd cmd;
const struct rspamd_fuzzy_reply *rep;
struct fuzzy_mapping *map;
gchar buf[62], *err_str;
guchar buf[2048], *p;
const gchar *symbol;
gint value = 0, flag = 0, r;
gint r;
double nval;
gint ret = 0;
if (what == EV_WRITE) {
/* Send command to storage */
memset (&cmd, 0, sizeof (cmd));
cmd.blocksize = session->h->block_size;
cmd.value = 0;
memcpy (cmd.hash, session->h->hash_pipe, sizeof (cmd.hash));
cmd.cmd = FUZZY_CHECK;
cmd.flag = 0;
if (write (fd, &cmd, sizeof (struct legacy_fuzzy_cmd)) == -1) {
if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
ret = -1;
}
else {
event_del (&session->ev);
event_set (&session->ev, fd, EV_READ, fuzzy_io_callback, session);
event_set (&session->ev, fd, EV_READ | EV_PERSIST,
fuzzy_io_callback, session);
event_add (&session->ev, &session->tv);
session->state = 1;
}
@ -659,48 +713,44 @@ fuzzy_io_callback (gint fd, short what, void *arg)
if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
ret = -1;
}
else if (buf[0] == 'O' && buf[1] == 'K') {
buf[r] = 0;
/* Now try to get value */
value = strtol (buf + 3, &err_str, 10);
if (*err_str == ' ') {
/* Now read flag */
flag = strtol (err_str + 1, &err_str, 10);
}
*err_str = '\0';
/* Get mapping by flag */
if ((map =
g_hash_table_lookup (session->rule->mappings,
GINT_TO_POINTER (flag))) == NULL) {
/* Default symbol and default weight */
symbol = session->rule->symbol;
nval = fuzzy_normalize (value, session->rule->max_score);
}
else {
/* Get symbol and weight from map */
symbol = map->symbol;
nval = fuzzy_normalize (value, map->weight);
}
msg_info (
"<%s>, found fuzzy hash '%s' with weight: %.2f, in list: %s:%d%s",
session->task->message_id,
fuzzy_to_string (session->h),
nval,
symbol,
flag,
map == NULL ? "(unknown)" : "");
if (map != NULL || !session->rule->skip_unknown) {
rspamd_snprintf (buf,
sizeof (buf),
"%d: %d / %.2f",
flag,
value,
nval);
rspamd_task_insert_result_single (session->task,
symbol,
nval,
g_list_prepend (NULL,
rspamd_mempool_strdup (session->task->task_pool, buf)));
else {
p = buf;
while ((rep = fuzzy_process_reply (&p, &r, session->commands)) != NULL) {
/* Get mapping by flag */
if ((map =
g_hash_table_lookup (session->rule->mappings,
GINT_TO_POINTER (rep->flag))) == NULL) {
/* Default symbol and default weight */
symbol = session->rule->symbol;
}
else {
/* Get symbol and weight from map */
symbol = map->symbol;
}
nval = fuzzy_normalize (rep->value, session->rule->max_score);
nval *= rep->prob;
msg_info (
"<%s>, found fuzzy hash with weight: %.2f, in list: %s:%d%s",
session->task->message_id,
nval,
symbol,
rep->flag,
map == NULL ? "(unknown)" : "");
if (map != NULL || !session->rule->skip_unknown) {
rspamd_snprintf (buf,
sizeof (buf),
"%d: %.2f / %.2f",
rep->flag,
rep->prob,
nval);
rspamd_task_insert_result_single (session->task,
symbol,
nval,
g_list_prepend (NULL,
rspamd_mempool_strdup (session->task->task_pool, buf)));
}
}
}
ret = 1;
@ -719,32 +769,33 @@ fuzzy_io_callback (gint fd, short what, void *arg)
errno,
strerror (errno));
rspamd_upstream_fail (session->server);
remove_normal_event (session->task->s, fuzzy_io_fin, session);
}
else {
rspamd_upstream_ok (session->server);
if (session->commands->len == 0) {
/*
* All requests are processed now
*/
remove_normal_event (session->task->s, fuzzy_io_fin, session);
}
}
remove_normal_event (session->task->s, fuzzy_io_fin, session);
}
static void
fuzzy_learn_callback (gint fd, short what, void *arg)
{
struct fuzzy_learn_session *session = arg;
struct legacy_fuzzy_cmd cmd;
gchar buf[512];
const gchar *cmd_name, *symbol;
const struct rspamd_fuzzy_reply *rep;
struct fuzzy_mapping *map;
guchar buf[2048], *p;
const gchar *symbol;
gint r;
gint ret = 0;
cmd_name = (session->cmd == FUZZY_WRITE ? "add" : "delete");
if (what == EV_WRITE) {
/* Send command to storage */
cmd.blocksize = session->h->block_size;
memcpy (cmd.hash, session->h->hash_pipe, sizeof (cmd.hash));
cmd.cmd = session->cmd;
cmd.value = session->value;
cmd.flag = session->flag;
if (write (fd, &cmd, sizeof (struct legacy_fuzzy_cmd)) == -1) {
/* Send commands to storage */
if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
if (*(session->err) == NULL) {
g_set_error (session->err,
g_quark_from_static_string ("fuzzy check"),
@ -754,61 +805,65 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
}
else {
event_del (&session->ev);
event_set (&session->ev, fd, EV_READ, fuzzy_learn_callback,
session);
event_set (&session->ev, fd, EV_READ | EV_PERSIST,
fuzzy_learn_callback, session);
event_add (&session->ev, &session->tv);
}
}
else if (what == EV_READ) {
if (session->map) {
symbol = session->map->symbol;
}
else {
symbol = session->rule->symbol;
}
if (read (fd, buf, sizeof (buf)) == -1) {
msg_info ("cannot %s fuzzy hash for message <%s>, list %s:%d",
cmd_name,
session->task->message_id,
symbol,
session->flag);
if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
msg_info ("cannot process fuzzy hash for message <%s>",
session->task->message_id);
if (*(session->err) == NULL) {
g_set_error (session->err,
g_quark_from_static_string ("fuzzy check"),
errno, "read socket error: %s", strerror (errno));
g_quark_from_static_string ("fuzzy check"),
errno, "read socket error: %s", strerror (errno));
}
ret = -1;
}
else if (buf[0] == 'O' && buf[1] == 'K') {
msg_info ("%s fuzzy hash '%s', list: %s:%d for message <%s>",
cmd_name,
fuzzy_to_string (session->h),
symbol,
session->flag,
session->task->message_id);
ret = 1;
}
else {
msg_info ("cannot %s fuzzy hash '%s' for message <%s>, list %s:%d",
cmd_name,
fuzzy_to_string (session->h),
session->task->message_id,
symbol,
session->flag);
if (*(session->err) == NULL) {
g_set_error (session->err,
g_quark_from_static_string (
"fuzzy check"), EINVAL, "%s fuzzy error", cmd_name);
p = buf;
while ((rep = fuzzy_process_reply (&p, &r, session->commands)) != NULL) {
if ((map =
g_hash_table_lookup (session->rule->mappings,
GINT_TO_POINTER (rep->flag))) == NULL) {
/* Default symbol and default weight */
symbol = session->rule->symbol;
}
else {
/* Get symbol and weight from map */
symbol = map->symbol;
}
if (rep->prob > 0.5) {
msg_info ("processed fuzzy hash, list: %s:%d for message <%s>",
symbol,
rep->flag,
session->task->message_id);
ret = 1;
}
else {
msg_info ("cannot process fuzzy hash for message <%s>, list %s:%d",
session->task->message_id,
symbol,
rep->flag);
if (*(session->err) == NULL) {
g_set_error (session->err,
g_quark_from_static_string ("fuzzy check"),
EINVAL, "process fuzzy error");
}
ret = 1;
}
}
ret = 1;
}
}
else {
errno = ETIMEDOUT;
if (*(session->err) == NULL) {
g_set_error (session->err,
g_quark_from_static_string (
"fuzzy check"), EINVAL, "%s fuzzy, IO timeout", cmd_name);
g_quark_from_static_string ("fuzzy check"), EINVAL,
"process fuzzy, IO timeout");
}
ret = -1;
}
@ -825,11 +880,14 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
rspamd_upstream_ok (session->server);
}
rspamd_http_connection_unref (session->http_entry->conn);
event_del (&session->ev);
close (session->fd);
if (ret == -1 || session->commands->len == 0) {
(*session->saved) --;
rspamd_http_connection_unref (session->http_entry->conn);
event_del (&session->ev);
close (session->fd);
}
if (--(*(session->saved)) == 0) {
if (*session->saved == 0) {
if (*(session->err) != NULL) {
rspamd_controller_send_error (session->http_entry,
(*session->err)->code, (*session->err)->message);
@ -843,61 +901,20 @@ fuzzy_learn_callback (gint fd, short what, void *arg)
}
}
static inline void
register_fuzzy_call (struct rspamd_task *task,
struct fuzzy_rule *rule,
rspamd_fuzzy_t *h)
{
struct fuzzy_client_session *session;
struct upstream *selected;
gint sock;
/* Get upstream */
selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_HASHED,
h->hash_pipe, sizeof (h->hash_pipe));
if (selected) {
if ((sock = rspamd_inet_address_connect (rspamd_upstream_addr (selected),
SOCK_DGRAM, TRUE)) == -1) {
msg_warn ("cannot connect to %s, %d, %s",
rspamd_upstream_name (selected),
errno,
strerror (errno));
}
else {
/* Create session for a socket */
session =
rspamd_mempool_alloc (task->task_pool,
sizeof (struct fuzzy_client_session));
event_set (&session->ev, sock, EV_WRITE, fuzzy_io_callback,
session);
msec_to_tv (fuzzy_module_ctx->io_timeout, &session->tv);
session->state = 0;
session->h = h;
session->task = task;
session->fd = sock;
session->server = selected;
session->rule = rule;
event_add (&session->ev, &session->tv);
register_async_event (task->s,
fuzzy_io_fin,
session,
g_quark_from_static_string ("fuzzy check"));
}
}
}
static void
fuzzy_check_rule (struct rspamd_task *task, struct fuzzy_rule *rule)
static GPtrArray *
fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
gint c, gint flag, guint32 value)
{
struct mime_text_part *part;
struct mime_part *mime_part;
struct rspamd_image *image;
gchar *checksum;
struct rspamd_fuzzy_cmd *cmd;
gsize hashlen;
GList *cur;
rspamd_fuzzy_t *fake_fuzzy;
GPtrArray *res;
cur = task->text_parts;
res = g_ptr_array_new ();
while (cur) {
part = cur->data;
@ -932,8 +949,19 @@ fuzzy_check_rule (struct rspamd_task *task, struct fuzzy_rule *rule)
continue;
}
register_fuzzy_call (task, rule, part->fuzzy);
register_fuzzy_call (task, rule, part->double_fuzzy);
/*
* Try legacy first
*/
cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
part, TRUE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
cmd = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
part, FALSE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
cur = g_list_next (cur);
}
@ -946,16 +974,20 @@ fuzzy_check_rule (struct rspamd_task *task, struct fuzzy_rule *rule)
fuzzy_module_ctx->min_height) {
if (fuzzy_module_ctx->min_width <= 0 || image->width >=
fuzzy_module_ctx->min_width) {
checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
image->data->data,
image->data->len);
/* Construct fake fuzzy hash */
fake_fuzzy = rspamd_mempool_alloc0 (task->task_pool,
sizeof (rspamd_fuzzy_t));
rspamd_strlcpy (fake_fuzzy->hash_pipe, checksum,
sizeof (fake_fuzzy->hash_pipe));
register_fuzzy_call (task, rule, fake_fuzzy);
g_free (checksum);
cmd = fuzzy_cmd_from_data_part (rule, c, flag, value,
task->task_pool,
image->data->data, image->data->len,
TRUE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
cmd = fuzzy_cmd_from_data_part (rule, c, flag, value,
task->task_pool,
image->data->data, image->data->len,
FALSE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
}
}
}
@ -969,28 +1001,83 @@ fuzzy_check_rule (struct rspamd_task *task, struct fuzzy_rule *rule)
fuzzy_check_content_type (rule, mime_part->type)) {
if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >=
fuzzy_module_ctx->min_bytes) {
checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
mime_part->content->data, mime_part->content->len);
/* Construct fake fuzzy hash */
fake_fuzzy =
rspamd_mempool_alloc0 (task->task_pool,
sizeof (rspamd_fuzzy_t));
rspamd_strlcpy (fake_fuzzy->hash_pipe, checksum,
sizeof (fake_fuzzy->hash_pipe));
register_fuzzy_call (task, rule, fake_fuzzy);
g_free (checksum);
cmd = fuzzy_cmd_from_data_part (rule, c, flag, value,
task->task_pool,
mime_part->content->data, mime_part->content->len,
TRUE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
cmd = fuzzy_cmd_from_data_part (rule, c, flag, value,
task->task_pool,
mime_part->content->data, mime_part->content->len,
FALSE, NULL);
if (cmd) {
g_ptr_array_add (res, cmd);
}
}
}
cur = g_list_next (cur);
}
if (res->len == 0) {
g_ptr_array_free (res, FALSE);
return NULL;
}
return res;
}
/* This callback is called when we check message via fuzzy hashes storage */
static inline void
register_fuzzy_client_call (struct rspamd_task *task,
struct fuzzy_rule *rule,
GPtrArray *commands)
{
struct fuzzy_client_session *session;
struct upstream *selected;
gint sock;
/* Get upstream */
selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN);
if (selected) {
if ((sock = rspamd_inet_address_connect (rspamd_upstream_addr (selected),
SOCK_DGRAM, TRUE)) == -1) {
msg_warn ("cannot connect to %s, %d, %s",
rspamd_upstream_name (selected),
errno,
strerror (errno));
}
else {
/* Create session for a socket */
session =
rspamd_mempool_alloc (task->task_pool,
sizeof (struct fuzzy_client_session));
event_set (&session->ev, sock, EV_WRITE, fuzzy_io_callback,
session);
msec_to_tv (fuzzy_module_ctx->io_timeout, &session->tv);
session->state = 0;
session->commands = commands;
session->task = task;
session->fd = sock;
session->server = selected;
session->rule = rule;
event_add (&session->ev, &session->tv);
register_async_event (task->s,
fuzzy_io_fin,
session,
g_quark_from_static_string ("fuzzy check"));
}
}
}
/* This callback is called when we check message in fuzzy hashes storage */
static void
fuzzy_symbol_callback (struct rspamd_task *task, void *unused)
{
struct fuzzy_rule *rule;
GList *cur;
GPtrArray *commands;
/* Check whitelist */
if (fuzzy_module_ctx->whitelist) {
@ -1006,23 +1093,28 @@ fuzzy_symbol_callback (struct rspamd_task *task, void *unused)
cur = fuzzy_module_ctx->fuzzy_rules;
while (cur) {
rule = cur->data;
fuzzy_check_rule (task, rule);
commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, 0, 0);
if (commands != NULL) {
register_fuzzy_client_call (task, rule, commands);
}
cur = g_list_next (cur);
}
}
static inline gboolean
register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
struct fuzzy_rule *rule, struct rspamd_task *task, rspamd_fuzzy_t *h,
gint cmd, gint value, gint flag, gint *saved, GError **err)
struct fuzzy_rule *rule,
struct rspamd_task *task,
GPtrArray *commands,
gint *saved,
GError **err)
{
struct fuzzy_learn_session *s;
struct upstream *selected;
gint sock;
/* Get upstream */
selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_HASHED,
h->hash_pipe, sizeof (h->hash_pipe));
selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN);
if (selected) {
/* Create UDP socket */
if ((sock = rspamd_inet_address_connect (rspamd_upstream_addr (selected),
@ -1030,7 +1122,6 @@ register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
return FALSE;
}
else {
/* Socket is made, create session */
s =
rspamd_mempool_alloc (task->task_pool,
sizeof (struct fuzzy_learn_session));
@ -1038,20 +1129,13 @@ register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
event_base_set (entry->rt->ev_base, &s->ev);
msec_to_tv (fuzzy_module_ctx->io_timeout, &s->tv);
s->task = task;
s->h =
rspamd_mempool_alloc (task->task_pool, sizeof (rspamd_fuzzy_t));
memcpy (s->h, h, sizeof (rspamd_fuzzy_t));
s->commands = commands;
s->http_entry = entry;
s->server = selected;
s->cmd = cmd;
s->value = value;
s->flag = flag;
s->saved = saved;
s->fd = sock;
s->err = err;
s->rule = rule;
s->map = g_hash_table_lookup (rule->mappings,
GINT_TO_POINTER (flag));
/* We ref connection to avoid freeing before we process fuzzy rule */
rspamd_http_connection_ref (entry->conn);
event_add (&s->ev, &s->tv);
@ -1063,131 +1147,17 @@ register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
return FALSE;
}
static int
fuzzy_process_rule (struct rspamd_http_connection_entry *entry,
struct fuzzy_rule *rule,
struct rspamd_task *task,
GError **err,
gint cmd,
gint flag,
gint value,
gint *saved)
{
struct mime_text_part *part;
struct mime_part *mime_part;
struct rspamd_image *image;
GList *cur;
gchar *checksum;
rspamd_fuzzy_t fake_fuzzy;
gint processed = 0;
/* Plan new event for writing */
cur = task->text_parts;
while (cur) {
part = cur->data;
if (part->is_empty || part->fuzzy == NULL ||
part->fuzzy->hash_pipe[0] == '\0' ||
(fuzzy_module_ctx->min_bytes > 0 && part->content->len <
fuzzy_module_ctx->min_bytes)) {
/* Skip empty parts */
msg_info ("<%s>: part %Xd is too short for fuzzy process, skip it",
task->message_id, part->fuzzy ? part->fuzzy->h : 0);
cur = g_list_next (cur);
continue;
}
if (!register_fuzzy_controller_call (entry, rule, task,
part->fuzzy, cmd, value, flag, saved, err)) {
goto err;
}
if (!register_fuzzy_controller_call (entry, rule, task,
part->double_fuzzy, cmd, value, flag, saved, err)) {
/* Cannot write hash */
goto err;
}
processed++;
cur = g_list_next (cur);
}
/* Process images */
cur = task->images;
while (cur) {
image = cur->data;
if (image->data->len > 0) {
if (fuzzy_module_ctx->min_height <= 0 || image->height >=
fuzzy_module_ctx->min_height) {
if (fuzzy_module_ctx->min_width <= 0 || image->width >=
fuzzy_module_ctx->min_width) {
checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
image->data->data,
image->data->len);
/* Construct fake fuzzy hash */
fake_fuzzy.block_size = 0;
memset (fake_fuzzy.hash_pipe, 0,
sizeof (fake_fuzzy.hash_pipe));
rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum,
sizeof (fake_fuzzy.hash_pipe));
if (!register_fuzzy_controller_call (entry, rule, task,
&fake_fuzzy, cmd, value, flag, saved, err)) {
g_free (checksum);
goto err;
}
msg_info ("save hash of image: [%s] to list: %d",
checksum,
flag);
g_free (checksum);
processed++;
}
}
}
cur = g_list_next (cur);
}
/* Process other parts */
cur = task->parts;
while (cur) {
mime_part = cur->data;
if (mime_part->content->len > 0 &&
fuzzy_check_content_type (rule, mime_part->type)) {
if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->content->len >=
fuzzy_module_ctx->min_bytes) {
checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5,
mime_part->content->data, mime_part->content->len);
/* Construct fake fuzzy hash */
fake_fuzzy.block_size = 0;
memset (fake_fuzzy.hash_pipe, 0, sizeof (fake_fuzzy.hash_pipe));
rspamd_strlcpy (fake_fuzzy.hash_pipe, checksum,
sizeof (fake_fuzzy.hash_pipe));
if (!register_fuzzy_controller_call (entry, rule, task,
&fake_fuzzy, cmd, value, flag, saved, err)) {
goto err;
}
msg_info ("save hash of part of type: %s/%s: [%s] to list %d",
mime_part->type->type, mime_part->type->subtype,
checksum, flag);
g_free (checksum);
processed++;
}
}
cur = g_list_next (cur);
}
return processed;
err:
return -1;
}
static void
fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
struct rspamd_http_message *msg, gint cmd, gint value, gint flag,
struct fuzzy_ctx *ctx)
{
struct fuzzy_rule *rule;
struct rspamd_task *task;
gboolean processed = FALSE, res = TRUE;
GList *cur;
struct rspamd_task *task;
GError **err;
GPtrArray *commands;
gint r, *saved, rules = 0;
/* Prepare task */
@ -1223,8 +1193,13 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
continue;
}
rules ++;
res = fuzzy_process_rule (conn_ent, rule, task, err, cmd, flag,
value, saved);
res = 0;
commands = fuzzy_generate_commands (task, rule, cmd, flag, value);
if (commands != NULL) {
res = register_fuzzy_controller_call (conn_ent, rule, task, commands,
saved, err);
}
if (res) {
processed = TRUE;

Loading…
Cancel
Save