Browse Source
* Major cleanup of cmake build system
* Major cleanup of cmake build system
* Add initial version of statshow utility for statfiles debugging * Add debugging for statistics * Remove unused utilitiesrspamd-0.5
15 changed files with 356 additions and 269 deletions
-
131CMakeLists.txt
-
2config.h.in
-
7src/classifiers/bayes.c
-
1src/classifiers/classifiers.h
-
3src/controller.c
-
9src/filter.c
-
5src/tokenizers/osb.c
-
4src/tokenizers/tokenizers.c
-
4src/tokenizers/tokenizers.h
-
41test/CMakeLists.txt
-
7utils/CMakeLists.txt
-
54utils/expression_parser.c
-
22utils/statshow/CMakeLists.txt
-
262utils/statshow/statshow.c
-
73utils/url_extracter.c
@ -0,0 +1,41 @@ |
|||
SET(TESTSRC rspamd_expression_test.c |
|||
rspamd_memcached_test.c |
|||
rspamd_mem_pool_test.c |
|||
rspamd_statfile_test.c |
|||
rspamd_fuzzy_test.c |
|||
rspamd_test_suite.c |
|||
rspamd_url_test.c |
|||
rspamd_dns_test.c) |
|||
|
|||
SET(TESTDEPENDS ../src/mem_pool.c |
|||
../src/hash.c |
|||
../src/url.c |
|||
../src/trie.c |
|||
../src/util.c |
|||
../src/radix.c |
|||
../src/fuzzy.c |
|||
../src/map.c |
|||
../src/logger.c |
|||
../src/memcached.c |
|||
../src/message.c |
|||
../src/html.c |
|||
../src/expressions.c |
|||
../src/statfile.c |
|||
../src/events.c |
|||
../src/upstream.c |
|||
../src/dns.c) |
|||
|
|||
ADD_EXECUTABLE(rspamd-test EXCLUDE_FROM_ALL ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC}) |
|||
SET_TARGET_PROPERTIES(rspamd-test PROPERTIES LINKER_LANGUAGE C) |
|||
SET_TARGET_PROPERTIES(rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST") |
|||
TARGET_LINK_LIBRARIES(rspamd-test event) |
|||
TARGET_LINK_LIBRARIES(rspamd-test ${GLIB2_LIBRARIES}) |
|||
TARGET_LINK_LIBRARIES(rspamd-test ${CMAKE_REQUIRED_LIBRARIES}) |
|||
IF(GMIME2_FOUND) |
|||
TARGET_LINK_LIBRARIES(rspamd-test ${GMIME2_LIBRARIES}) |
|||
ELSE(GMIME2_FOUND) |
|||
TARGET_LINK_LIBRARIES(rspamd-test ${GMIME24_LIBRARIES}) |
|||
ENDIF(GMIME2_FOUND) |
|||
IF(ENABLE_STATIC MATCHES "ON") |
|||
TARGET_LINK_LIBRARIES(rspamd-test ${PCRE_LIBRARIES}) |
|||
ENDIF(ENABLE_STATIC MATCHES "ON") |
|||
@ -0,0 +1,7 @@ |
|||
ADD_SUBDIRECTORY(statshow) |
|||
|
|||
# Redirector |
|||
IF(ENABLE_REDIRECTOR MATCHES "ON") |
|||
CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY) |
|||
INSTALL(PROGRAMS redirector.pl DESTINATION bin RENAME rspamd-redirector) |
|||
ENDIF(ENABLE_REDIRECTOR MATCHES "ON") |
|||
@ -1,54 +0,0 @@ |
|||
#include "../src/config.h" |
|||
#include "../src/main.h" |
|||
#include "../src/cfg_file.h" |
|||
#include "../src/expressions.h" |
|||
|
|||
rspamd_hash_t *counters = NULL; |
|||
|
|||
int |
|||
main (int argc, char **argv) |
|||
{ |
|||
memory_pool_t *pool; |
|||
struct expression *cur; |
|||
char *line, *outstr; |
|||
int r, s; |
|||
char buf[BUFSIZ]; |
|||
|
|||
pool = memory_pool_new (memory_pool_get_size ()); |
|||
|
|||
line = fgets (buf, sizeof (buf), stdin); |
|||
while (line) { |
|||
s = strlen (line); |
|||
if (buf[s - 1] == '\n') { |
|||
buf[s - 1] = '\0'; |
|||
} |
|||
if (buf[s - 2] == '\r') { |
|||
buf[s - 2] = '\0'; |
|||
} |
|||
|
|||
r = 0; |
|||
cur = parse_expression (pool, line); |
|||
s = strlen (line) * 4; |
|||
outstr = memory_pool_alloc (pool, s); |
|||
while (cur) { |
|||
if (cur->type == EXPR_REGEXP) { |
|||
r += snprintf (outstr + r, s - r, "OP:%s ", (char *)cur->content.operand); |
|||
} else if (cur->type == EXPR_STR) { |
|||
r += snprintf (outstr + r, s - r, "S:%s ", (char *)cur->content.operand); |
|||
|
|||
} else if (cur->type == EXPR_FUNCTION) { |
|||
r += snprintf (outstr + r, s - r, "F:%s ", ((struct expression_function *)cur->content.operand)->name); |
|||
} |
|||
else { |
|||
r += snprintf (outstr + r, s - r, "O:%c ", cur->content.operation); |
|||
} |
|||
cur = cur->next; |
|||
} |
|||
printf ("Parsed expression: '%s' -> '%s'\n", line, outstr); |
|||
line = fgets (buf, sizeof (buf), stdin); |
|||
} |
|||
|
|||
memory_pool_delete (pool); |
|||
|
|||
return 0; |
|||
} |
|||
@ -0,0 +1,22 @@ |
|||
SET(STATSHOWSRC statshow.c) |
|||
|
|||
ADD_EXECUTABLE(statshow EXCLUDE_FROM_ALL ${CLASSIFIERSSRC} ${TOKENIZERSSRC} ${STATSHOWSRC}) |
|||
SET_TARGET_PROPERTIES(statshow PROPERTIES LINKER_LANGUAGE C) |
|||
SET_TARGET_PROPERTIES(statshow PROPERTIES COMPILE_FLAGS "-I../../src") |
|||
TARGET_LINK_LIBRARIES(statshow event) |
|||
TARGET_LINK_LIBRARIES(statshow ${GLIB2_LIBRARIES}) |
|||
TARGET_LINK_LIBRARIES(statshow ${CMAKE_REQUIRED_LIBRARIES}) |
|||
IF(GMIME2_FOUND) |
|||
TARGET_LINK_LIBRARIES(statshow ${GMIME2_LIBRARIES}) |
|||
ELSE(GMIME2_FOUND) |
|||
TARGET_LINK_LIBRARIES(statshow ${GMIME24_LIBRARIES}) |
|||
ENDIF(GMIME2_FOUND) |
|||
TARGET_LINK_LIBRARIES(statshow rspamd_lua) |
|||
IF(ENABLE_LUAJIT MATCHES "ON") |
|||
TARGET_LINK_LIBRARIES(statshow "${LUAJIT_LIBRARY}") |
|||
ELSE(ENABLE_LUAJIT MATCHES "ON") |
|||
TARGET_LINK_LIBRARIES(statshow "${LUA_LIBRARY}") |
|||
ENDIF(ENABLE_LUAJIT MATCHES "ON") |
|||
IF(ENABLE_STATIC MATCHES "ON") |
|||
TARGET_LINK_LIBRARIES(statshow ${PCRE_LIBRARIES}) |
|||
ENDIF(ENABLE_STATIC MATCHES "ON") |
|||
@ -0,0 +1,262 @@ |
|||
/* Copyright (c) 2010, Vsevolod Stakhov |
|||
* All rights reserved. |
|||
* |
|||
* Redistribution and use in source and binary forms, with or without |
|||
* modification, are permitted provided that the following conditions are met: |
|||
* * Redistributions of source code must retain the above copyright |
|||
* notice, this list of conditions and the following disclaimer. |
|||
* * Redistributions in binary form must reproduce the above copyright |
|||
* notice, this list of conditions and the following disclaimer in the |
|||
* documentation and/or other materials provided with the distribution. |
|||
* |
|||
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY |
|||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|||
* DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY |
|||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
*/ |
|||
|
|||
#include "config.h" |
|||
#include "main.h" |
|||
#include "cfg_file.h" |
|||
#include "util.h" |
|||
#include "map.h" |
|||
#include "cfg_xml.h" |
|||
#include "classifiers/classifiers.h" |
|||
#include "tokenizers/tokenizers.h" |
|||
#include "message.h" |
|||
|
|||
|
|||
static gchar *cfg_name; |
|||
|
|||
static GOptionEntry entries[] = |
|||
{ |
|||
{ "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL }, |
|||
{ NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL } |
|||
}; |
|||
|
|||
static void |
|||
read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg) |
|||
{ |
|||
GError *error = NULL; |
|||
GOptionContext *context; |
|||
|
|||
context = g_option_context_new ("- run statshow utility"); |
|||
g_option_context_set_summary (context, "Summary:\n Statshow utility version " RVERSION "\n Release id: " RID); |
|||
g_option_context_add_main_entries (context, entries, NULL); |
|||
if (!g_option_context_parse (context, argc, argv, &error)) { |
|||
fprintf (stderr, "option parsing failed: %s\n", error->message); |
|||
exit (1); |
|||
} |
|||
cfg->cfg_name = cfg_name; |
|||
} |
|||
|
|||
static gboolean |
|||
load_rspamd_config (struct config_file *cfg) |
|||
{ |
|||
if (! read_xml_config (cfg, cfg->cfg_name)) { |
|||
return FALSE; |
|||
} |
|||
|
|||
/* Do post-load actions */ |
|||
post_load_config (cfg); |
|||
|
|||
return TRUE; |
|||
} |
|||
|
|||
static void |
|||
classifiers_callback (gpointer value, void *arg) |
|||
{ |
|||
struct worker_task *task = arg; |
|||
struct classifier_config *cl = value; |
|||
struct classifier_ctx *ctx; |
|||
struct mime_text_part *text_part; |
|||
GTree *tokens = NULL; |
|||
GList *cur; |
|||
f_str_t c; |
|||
gchar *header = NULL; |
|||
|
|||
ctx = cl->classifier->init_func (task->task_pool, cl); |
|||
ctx->debug = TRUE; |
|||
|
|||
if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { |
|||
while (cur != NULL) { |
|||
if (header) { |
|||
c.len = strlen (cur->data); |
|||
if (c.len > 0) { |
|||
c.begin = cur->data; |
|||
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { |
|||
msg_info ("cannot tokenize input"); |
|||
return; |
|||
} |
|||
} |
|||
} |
|||
else { |
|||
text_part = (struct mime_text_part *)cur->data; |
|||
if (text_part->is_empty) { |
|||
cur = g_list_next (cur); |
|||
continue; |
|||
} |
|||
c.begin = text_part->content->data; |
|||
c.len = text_part->content->len; |
|||
/* Tree would be freed at task pool freeing */ |
|||
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) { |
|||
msg_info ("cannot tokenize input"); |
|||
return; |
|||
} |
|||
} |
|||
cur = g_list_next (cur); |
|||
} |
|||
g_hash_table_insert (task->tokens, cl->tokenizer, tokens); |
|||
} |
|||
|
|||
if (tokens == NULL) { |
|||
return; |
|||
} |
|||
|
|||
/* Take care of subject */ |
|||
tokenize_subject (task, &tokens); |
|||
cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task); |
|||
} |
|||
|
|||
static void |
|||
process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd) |
|||
{ |
|||
struct worker_task *task; |
|||
struct rspamd_worker *fake_worker; |
|||
|
|||
|
|||
/* Make fake worker for task */ |
|||
fake_worker = g_malloc (sizeof (struct rspamd_worker)); |
|||
fake_worker->srv = rspamd; |
|||
|
|||
/* Make task */ |
|||
task = construct_task (fake_worker); |
|||
/* Copy message */ |
|||
task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t)); |
|||
task->msg->begin = buf; |
|||
task->msg->len = len; |
|||
|
|||
/* Process message */ |
|||
if (process_message (task) != 0) { |
|||
return; |
|||
} |
|||
|
|||
g_list_foreach (task->cfg->classifiers, classifiers_callback, task); |
|||
|
|||
g_free (fake_worker); |
|||
} |
|||
|
|||
static void |
|||
process_stdin (struct rspamd_main *rspamd) |
|||
{ |
|||
gchar *in_buf; |
|||
gint r = 0, len; |
|||
|
|||
/* Allocate input buffer */ |
|||
len = BUFSIZ; |
|||
in_buf = g_malloc (len); |
|||
|
|||
/* Read stdin */ |
|||
while (!feof (stdin)) { |
|||
r += fread (in_buf + r, 1, len - r, stdin); |
|||
if (len - r < len / 2) { |
|||
/* Grow buffer */ |
|||
len *= 2; |
|||
in_buf = g_realloc (in_buf, len); |
|||
} |
|||
} |
|||
|
|||
process_buffer (in_buf, r, rspamd); |
|||
g_free (in_buf); |
|||
} |
|||
|
|||
static void |
|||
process_file (const gchar *filename, struct rspamd_main *rspamd) |
|||
{ |
|||
struct stat st; |
|||
char *in_buf; |
|||
gsize r = 0; |
|||
gint fd; |
|||
|
|||
if (stat (filename, &st) == -1) { |
|||
msg_err ("stat failed: %s", strerror (errno)); |
|||
return; |
|||
} |
|||
|
|||
if ((fd = open (filename, O_RDONLY)) == -1) { |
|||
msg_err ("stat failed: %s", strerror (errno)); |
|||
return; |
|||
} |
|||
|
|||
in_buf = g_malloc (st.st_size); |
|||
|
|||
while (r < st.st_size) { |
|||
r += read (fd, in_buf + r, r - st.st_size); |
|||
} |
|||
|
|||
process_buffer (in_buf, r, rspamd); |
|||
g_free (in_buf); |
|||
} |
|||
|
|||
gint |
|||
main (gint argc, gchar **argv, gchar **env) |
|||
{ |
|||
struct config_file *cfg; |
|||
struct rspamd_main *rspamd; |
|||
gchar **arg; |
|||
|
|||
rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main)); |
|||
bzero (rspamd, sizeof (struct rspamd_main)); |
|||
rspamd->server_pool = memory_pool_new (memory_pool_get_size ()); |
|||
rspamd->cfg = (struct config_file *)g_malloc (sizeof (struct config_file)); |
|||
if (!rspamd || !rspamd->cfg) { |
|||
fprintf (stderr, "Cannot allocate memory\n"); |
|||
exit (-errno); |
|||
} |
|||
|
|||
bzero (rspamd->cfg, sizeof (struct config_file)); |
|||
rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ()); |
|||
init_defaults (rspamd->cfg); |
|||
|
|||
read_cmd_line (&argc, &argv, rspamd->cfg); |
|||
if (rspamd->cfg->cfg_name == NULL) { |
|||
rspamd->cfg->cfg_name = FIXED_CONFIG_FILE; |
|||
} |
|||
|
|||
/* First set logger to console logger */ |
|||
rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd->cfg); |
|||
(void)open_log (); |
|||
g_log_set_default_handler (rspamd_glib_log_function, rspamd->cfg); |
|||
|
|||
/* Init classifiers options */ |
|||
register_classifier_opt ("bayes", "min_tokens"); |
|||
register_classifier_opt ("winnow", "min_tokens"); |
|||
register_classifier_opt ("winnow", "learn_threshold"); |
|||
/* Load config */ |
|||
if (! load_rspamd_config (rspamd->cfg)) { |
|||
exit (EXIT_FAILURE); |
|||
} |
|||
|
|||
/* Init statfile pool */ |
|||
rspamd->statfile_pool = statfile_pool_new (rspamd->server_pool, rspamd->cfg->max_statfile_size); |
|||
|
|||
/* Check argc */ |
|||
if (argc > 1) { |
|||
arg = argv[1]; |
|||
while (*arg) { |
|||
process_file (*arg, rspamd); |
|||
arg ++; |
|||
} |
|||
} |
|||
else { |
|||
process_stdin (rspamd); |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
@ -1,73 +0,0 @@ |
|||
#include <sys/types.h> |
|||
#include <sys/time.h> |
|||
#include <sys/wait.h> |
|||
#include <sys/param.h> |
|||
|
|||
#include <netinet/in.h> |
|||
#include <arpa/inet.h> |
|||
#include <netdb.h> |
|||
#include <syslog.h> |
|||
#include <fcntl.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
|
|||
#include <gmime/gmime.h> |
|||
|
|||
#include "../src/config.h" |
|||
#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H) |
|||
#include <sys/queue.h> |
|||
#endif |
|||
#ifdef HAVE_OWN_QUEUE_H |
|||
#include "../src/queue.h" |
|||
#endif |
|||
|
|||
#include "../src/main.h" |
|||
#include "../src/cfg_file.h" |
|||
#include "../src/url.h" |
|||
#include "../src/util.h" |
|||
#include "../src/message.h" |
|||
|
|||
rspamd_hash_t *counters = NULL; |
|||
|
|||
int |
|||
main (int argc, char **argv) |
|||
{ |
|||
struct worker_task task; |
|||
struct uri *url; |
|||
char *buf = NULL; |
|||
size_t pos = 0, size = 65535; |
|||
GList *cur; |
|||
|
|||
g_mem_set_vtable(glib_mem_profiler_table); |
|||
g_mime_init (0); |
|||
bzero (&task, sizeof (struct worker_task)); |
|||
task.task_pool = memory_pool_new (memory_pool_get_size ()); |
|||
|
|||
/* Preallocate buffer */ |
|||
buf = g_malloc (size); |
|||
|
|||
while (!feof (stdin)) { |
|||
*(buf + pos) = getchar (); |
|||
pos ++; |
|||
if (pos == size) { |
|||
size *= 2; |
|||
buf = g_realloc (buf, size); |
|||
} |
|||
} |
|||
|
|||
task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file)); |
|||
|
|||
task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t)); |
|||
task.msg->begin = buf; |
|||
task.msg->len = pos; |
|||
process_message (&task); |
|||
|
|||
cur = task.urls; |
|||
while (cur) { |
|||
url = cur->data; |
|||
printf ("%s\n", struri (url)); |
|||
cur = g_list_next (cur); |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue