Browse Source

* Major cleanup of cmake build system

* Add initial version of statshow utility for statfiles debugging
* Add debugging for statistics
* Remove unused utilities
rspamd-0.5
Vsevolod Stakhov 15 years ago
parent
commit
683b90f4c6
  1. 131
      CMakeLists.txt
  2. 2
      config.h.in
  3. 7
      src/classifiers/bayes.c
  4. 1
      src/classifiers/classifiers.h
  5. 3
      src/controller.c
  6. 9
      src/filter.c
  7. 5
      src/tokenizers/osb.c
  8. 4
      src/tokenizers/tokenizers.c
  9. 4
      src/tokenizers/tokenizers.h
  10. 41
      test/CMakeLists.txt
  11. 7
      utils/CMakeLists.txt
  12. 54
      utils/expression_parser.c
  13. 22
      utils/statshow/CMakeLists.txt
  14. 262
      utils/statshow/statshow.c
  15. 73
      utils/url_extracter.c

131
CMakeLists.txt

@ -7,7 +7,7 @@ PROJECT(rspamd C)
SET(RSPAMD_VERSION_MAJOR 0)
SET(RSPAMD_VERSION_MINOR 3)
SET(RSPAMD_VERSION_PATCH 12)
SET(RSPAMD_VERSION_PATCH 13)
SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
@ -19,7 +19,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
OPTION(DEBUG_MODE "Enable debug output [default: ON]" ON)
OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]" OFF)
OPTION(ENABLE_PERL "Enable perl client API [default: OFF]" OFF)
OPTION(SKIP_RELINK_RPATH "Skip relinking and full RPATH for the install tree" OFF)
OPTION(ENABLE_REDIRECTOR "Enable redirector install [default: OFF]" OFF)
OPTION(ENABLE_PROFILING "Enable profiling [default: OFF]" OFF)
@ -159,32 +158,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
MESSAGE(STATUS "Configuring for Solaris")
ENDIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
IF(ENABLE_PERL MATCHES "ON")
IF(NOT PERL_EXECUTABLE)
MESSAGE(FATAL_ERROR "Error: Perl is not found but is required")
ENDIF(NOT PERL_EXECUTABLE)
# Find perl libraries and cflags
EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ccopts OUTPUT_VARIABLE PERL_CFLAGS)
EXECUTE_PROCESS(COMMAND ${PERL_EXECUTABLE} -MExtUtils::Embed -e ldopts OUTPUT_VARIABLE PERL_LDFLAGS)
STRING(REGEX REPLACE "[\r\n]" " " PERL_CFLAGS ${PERL_CFLAGS})
STRING(REGEX REPLACE " +$" "" PERL_CFLAGS ${PERL_CFLAGS})
STRING(REGEX REPLACE "[\r\n]" " " PERL_LDFLAGS ${PERL_LDFLAGS})
STRING(REGEX REPLACE " +$" "" PERL_LDFLAGS ${PERL_LDFLAGS})
# Handle DynaLoader
STRING(REGEX MATCH "/[^ ]*/DynaLoader.a" PERL_DYNALOADER ${PERL_LDFLAGS})
STRING(REGEX REPLACE "/[^ ]*/DynaLoader.a " "" PERL_LDFLAGS ${PERL_LDFLAGS})
IF(PERL_DYNALOADER)
EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E copy ${PERL_DYNALOADER} ${rspamd_BINARY_DIR}/compat/libdynaloader.so)
LINK_DIRECTORIES(${rspamd_BINARY_DIR}/compat/)
ENDIF(PERL_DYNALOADER)
ELSE(ENABLE_PERL MATCHES "ON")
SET(WITHOUT_PERL 1)
ENDIF(ENABLE_PERL MATCHES "ON")
INCLUDE(FindLua51)
# Check for luajit
IF(ENABLE_LUAJIT MATCHES "ON")
@ -525,11 +498,6 @@ ENDIF(SUPPORT_STD_FLAG)
SET(CMAKE_C_FLAGS "${CMAKE_C_OPT_FLAGS} ${CMAKE_C_FLAGS} ${CMAKE_C_WARN_FLAGS}")
IF(ENABLE_REDIRECTOR MATCHES "ON")
CONFIGURE_FILE(utils/redirector.pl.in utils/redirector.pl @ONLY)
ENDIF(ENABLE_REDIRECTOR MATCHES "ON")
IF(DEBUG_MODE MATCHES "ON")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g")
ENDIF(DEBUG_MODE MATCHES "ON")
@ -620,8 +588,8 @@ ADD_SUBDIRECTORY(src/client)
ADD_SUBDIRECTORY(src/json)
ADD_SUBDIRECTORY(src/cdb)
# ADD_SUBDIRECTORY(src/evdns)
# ADD_SUBDIRECTORY(src/plugins/custom)
ADD_SUBDIRECTORY(utils)
ADD_SUBDIRECTORY(test)
SET(TOKENIZERSSRC src/tokenizers/tokenizers.c
src/tokenizers/osb.c)
@ -636,47 +604,6 @@ SET(PLUGINSSRC src/plugins/surbl.c
src/plugins/fuzzy_check.c
src/plugins/spf.c)
SET(TESTSRC test/rspamd_expression_test.c
test/rspamd_memcached_test.c
test/rspamd_mem_pool_test.c
test/rspamd_statfile_test.c
test/rspamd_fuzzy_test.c
test/rspamd_test_suite.c
test/rspamd_url_test.c
test/rspamd_dns_test.c)
SET(TESTDEPENDS src/mem_pool.c
src/hash.c
src/url.c
src/trie.c
src/util.c
src/radix.c
src/fuzzy.c
src/map.c
src/logger.c
src/memcached.c
src/message.c
src/html.c
src/expressions.c
src/statfile.c
src/events.c
src/upstream.c
src/dns.c)
SET(UTILSSRC utils/url_extracter.c)
SET(EXPRSRC utils/expression_parser.c)
SET(UTILSDEPENDS src/mem_pool.c
src/hash.c
src/url.c
src/trie.c
src/fuzzy.c
src/expressions.c
src/message.c
src/html.c
src/util.c
src/radix.c)
LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM)
############################ TARGETS SECTION ###############################
@ -685,20 +612,6 @@ ADD_CUSTOM_COMMAND(OUTPUT src/modules.c
COMMAND ../utils/gen-modules.sh ${PLUGINSSRC}
WORKING_DIRECTORY src)
IF(ENABLE_PERL MATCHES "ON")
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_SOURCE_DIR}/perl/Makefile
DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile.PL
COMMAND ${PERL_EXECUTABLE} ./Makefile.PL DESTDIR=${DESTDIR} PREFIX=${PREFIX} INSTALLMAN3DIR=${MAN_PREFIX}/man3
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl)
CONFIGURE_FILE(perl/Makefile.PL.in perl/Makefile.PL)
ADD_CUSTOM_TARGET(perlmodule
COMMAND ${CMAKE_MAKE_PROGRAM} DESTDIR=${DESTDIR}
DEPENDS ${CMAKE_SOURCE_DIR}/perl/Makefile
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/perl
VERBATIM)
ENDIF(ENABLE_PERL MATCHES "ON")
CONFIGURE_FILE(config.h.in src/config.h)
CONFIGURE_FILE(contrib/exim/local_scan.c.in contrib/exim/local_scan_rspamd.c @ONLY)
CONFIGURE_FILE(rspamd.xml.sample conf/rspamd.xml.sample @ONLY)
@ -710,19 +623,6 @@ SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C)
SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS "-DRSPAMD_MAIN")
SET_TARGET_PROPERTIES(rspamd PROPERTIES VERSION ${RSPAMD_VERSION})
IF(ENABLE_PERL MATCHES "ON")
SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS ${PERL_CFLAGS}
LINK_FLAGS ${PERL_LDFLAGS})
IF(PERL_DYNALOADER)
TARGET_LINK_LIBRARIES(rspamd dynaloader)
ENDIF(PERL_DYNALOADER)
ENDIF(ENABLE_PERL MATCHES "ON")
IF(ENABLE_PERL MATCHES "ON")
ADD_DEPENDENCIES(rspamd perlmodule)
ENDIF(ENABLE_PERL MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd rspamd_lua)
IF(ENABLE_LUAJIT MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd "${LUAJIT_LIBRARY}")
@ -752,21 +652,6 @@ IF(ENABLE_GPERF_TOOLS MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd profiler)
ENDIF(ENABLE_GPERF_TOOLS MATCHES "ON")
ADD_EXECUTABLE(test/rspamd-test ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC})
SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES LINKER_LANGUAGE C)
SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST")
TARGET_LINK_LIBRARIES(test/rspamd-test event)
TARGET_LINK_LIBRARIES(test/rspamd-test ${GLIB2_LIBRARIES})
TARGET_LINK_LIBRARIES(test/rspamd-test ${CMAKE_REQUIRED_LIBRARIES})
IF(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME2_LIBRARIES})
ELSE(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(test/rspamd-test ${GMIME24_LIBRARIES})
ENDIF(GMIME2_FOUND)
IF(ENABLE_STATIC MATCHES "ON")
TARGET_LINK_LIBRARIES(test/rspamd-test ${PCRE_LIBRARIES})
ENDIF(ENABLE_STATIC MATCHES "ON")
##################### INSTALLATION ##########################################
@ -823,16 +708,6 @@ ENDFOREACH(LUA_CONF)
INSTALL(FILES "doc/rspamd.8" DESTINATION man/man8)
INSTALL(FILES "doc/rspamc.1" DESTINATION man/man1)
# Perl lib
IF(ENABLE_PERL MATCHES "ON")
INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)")
ENDIF(ENABLE_PERL MATCHES "ON")
# Redirector
IF(ENABLE_REDIRECTOR MATCHES "ON")
INSTALL(PROGRAMS utils/redirector.pl DESTINATION bin RENAME rspamd-redirector)
ENDIF(ENABLE_REDIRECTOR MATCHES "ON")
# Start scripts
IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" AND NOT BUILD_PORT)
INSTALL(PROGRAMS freebsd/rspamd.sh DESTINATION etc/rc.d)

2
config.h.in

@ -113,7 +113,7 @@
#cmakedefine HAVE_SETITIMER 1
#cmakedefine WITHOUT_PERL 1
#define WITHOUT_PERL 1
#cmakedefine WITH_LUA 1

7
src/classifiers/bayes.c

@ -131,6 +131,7 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
if (cur->post_probability < G_MINDOUBLE * 100) {
cur->post_probability = G_MINDOUBLE * 100;
}
}
renorm = 0;
for (i = 0; i < cd->statfiles_num; i ++) {
@ -144,6 +145,10 @@ bayes_classify_callback (gpointer key, gpointer value, gpointer data)
if (cur->post_probability < G_MINDOUBLE * 10) {
cur->post_probability = G_MINDOUBLE * 100;
}
if (cd->ctx->debug) {
msg_info ("token: %s, statfile: %s, probability: %.4f, post_probability: %.4f",
node->extra, cur->st->symbol, cur->value, cur->post_probability);
}
}
return FALSE;
@ -156,7 +161,7 @@ bayes_init (memory_pool_t *pool, struct classifier_config *cfg)
ctx->pool = pool;
ctx->cfg = cfg;
ctx->debug = FALSE;
return ctx;
}

1
src/classifiers/classifiers.h

@ -15,6 +15,7 @@ struct worker_task;
struct classifier_ctx {
memory_pool_t *pool;
GHashTable *results;
gboolean debug;
struct classifier_config *cfg;
};

3
src/controller.c

@ -850,7 +850,8 @@ controller_read_socket (f_str_t * in, void *arg)
c.begin = part->content->data;
c.len = part->content->len;
if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer, session->session_pool, &c, &tokens)) {
if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer,
session->session_pool, &c, &tokens, FALSE)) {
i = rspamd_snprintf (out_buf, sizeof (out_buf), "weights failed, tokenizer error" CRLF END);
free_task (task, FALSE);
if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {

9
src/filter.c

@ -36,9 +36,6 @@
#include "classifiers/classifiers.h"
#include "tokenizers/tokenizers.h"
#ifndef WITHOUT_PERL
# include "perl.h"
#endif
#ifdef WITH_LUA
# include "lua/lua_common.h"
#endif
@ -615,7 +612,7 @@ classifiers_callback (gpointer value, void *arg)
c.len = strlen (cur->data);
if (c.len > 0) {
c.begin = cur->data;
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
msg_info ("cannot tokenize input");
return;
}
@ -630,7 +627,7 @@ classifiers_callback (gpointer value, void *arg)
c.begin = text_part->content->data;
c.len = text_part->content->len;
/* Tree would be freed at task pool freeing */
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens)) {
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
msg_info ("cannot tokenize input");
return;
}
@ -857,7 +854,7 @@ learn_task (const gchar *statfile, struct worker_task *task, GError **err)
/* Get tokens */
if (!cl->tokenizer->tokenize_func (
cl->tokenizer, task->task_pool,
&c, &tokens)) {
&c, &tokens, FALSE)) {
g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message");
return FALSE;
}

5
src/tokenizers/osb.c

@ -35,7 +35,7 @@
extern const int primes[];
int
osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree)
osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t * input, GTree ** tree, gboolean save_token)
{
token_node_t *new = NULL;
f_str_t token = { NULL, 0, 0 }, *res;
@ -69,6 +69,9 @@ osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t * pool, f_str_t *
new = memory_pool_alloc0 (pool, sizeof (token_node_t));
new->h1 = h1;
new->h2 = h2;
if (save_token) {
new->extra = (uintptr_t)memory_pool_fstrdup (pool, &token);
}
if (g_tree_lookup (*tree, new) == NULL) {
g_tree_insert (*tree, new, new);

4
src/tokenizers/tokenizers.c

@ -239,13 +239,13 @@ tokenize_subject (struct worker_task *task, GTree ** tree)
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = task->subject;
subject.len = strlen (task->subject);
osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
}
if ((sub = g_mime_message_get_subject (task->message)) != NULL) {
new = memory_pool_alloc (task->task_pool, sizeof (token_node_t));
subject.begin = (gchar *)sub;
subject.len = strlen (sub);
osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree);
osb_tokenizer->tokenize_func (osb_tokenizer, task->task_pool, &subject, tree, FALSE);
}
}

4
src/tokenizers/tokenizers.h

@ -24,7 +24,7 @@ typedef struct token_node_s {
/* Common tokenizer structure */
struct tokenizer {
char *name;
int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
int (*tokenize_func)(struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
f_str_t* (*get_next_word)(f_str_t *buf, f_str_t *token);
};
@ -35,7 +35,7 @@ struct tokenizer* get_tokenizer (char *name);
/* Get next word from specified f_str_t buf */
f_str_t *get_next_word (f_str_t *buf, f_str_t *token);
/* OSB tokenize function */
int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur);
int osb_tokenize_text (struct tokenizer *tokenizer, memory_pool_t *pool, f_str_t *input, GTree **cur, gboolean save_token);
/* Common tokenizer for headers */
int tokenize_headers (memory_pool_t *pool, struct worker_task *task, GTree **cur);
/* Make tokens for a subject */

41
test/CMakeLists.txt

@ -0,0 +1,41 @@
SET(TESTSRC rspamd_expression_test.c
rspamd_memcached_test.c
rspamd_mem_pool_test.c
rspamd_statfile_test.c
rspamd_fuzzy_test.c
rspamd_test_suite.c
rspamd_url_test.c
rspamd_dns_test.c)
SET(TESTDEPENDS ../src/mem_pool.c
../src/hash.c
../src/url.c
../src/trie.c
../src/util.c
../src/radix.c
../src/fuzzy.c
../src/map.c
../src/logger.c
../src/memcached.c
../src/message.c
../src/html.c
../src/expressions.c
../src/statfile.c
../src/events.c
../src/upstream.c
../src/dns.c)
ADD_EXECUTABLE(rspamd-test EXCLUDE_FROM_ALL ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC})
SET_TARGET_PROPERTIES(rspamd-test PROPERTIES LINKER_LANGUAGE C)
SET_TARGET_PROPERTIES(rspamd-test PROPERTIES COMPILE_FLAGS "-DRSPAMD_TEST")
TARGET_LINK_LIBRARIES(rspamd-test event)
TARGET_LINK_LIBRARIES(rspamd-test ${GLIB2_LIBRARIES})
TARGET_LINK_LIBRARIES(rspamd-test ${CMAKE_REQUIRED_LIBRARIES})
IF(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(rspamd-test ${GMIME2_LIBRARIES})
ELSE(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(rspamd-test ${GMIME24_LIBRARIES})
ENDIF(GMIME2_FOUND)
IF(ENABLE_STATIC MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd-test ${PCRE_LIBRARIES})
ENDIF(ENABLE_STATIC MATCHES "ON")

7
utils/CMakeLists.txt

@ -0,0 +1,7 @@
ADD_SUBDIRECTORY(statshow)
# Redirector
IF(ENABLE_REDIRECTOR MATCHES "ON")
CONFIGURE_FILE(redirector.pl.in redirector.pl @ONLY)
INSTALL(PROGRAMS redirector.pl DESTINATION bin RENAME rspamd-redirector)
ENDIF(ENABLE_REDIRECTOR MATCHES "ON")

54
utils/expression_parser.c

@ -1,54 +0,0 @@
#include "../src/config.h"
#include "../src/main.h"
#include "../src/cfg_file.h"
#include "../src/expressions.h"
rspamd_hash_t *counters = NULL;
int
main (int argc, char **argv)
{
memory_pool_t *pool;
struct expression *cur;
char *line, *outstr;
int r, s;
char buf[BUFSIZ];
pool = memory_pool_new (memory_pool_get_size ());
line = fgets (buf, sizeof (buf), stdin);
while (line) {
s = strlen (line);
if (buf[s - 1] == '\n') {
buf[s - 1] = '\0';
}
if (buf[s - 2] == '\r') {
buf[s - 2] = '\0';
}
r = 0;
cur = parse_expression (pool, line);
s = strlen (line) * 4;
outstr = memory_pool_alloc (pool, s);
while (cur) {
if (cur->type == EXPR_REGEXP) {
r += snprintf (outstr + r, s - r, "OP:%s ", (char *)cur->content.operand);
} else if (cur->type == EXPR_STR) {
r += snprintf (outstr + r, s - r, "S:%s ", (char *)cur->content.operand);
} else if (cur->type == EXPR_FUNCTION) {
r += snprintf (outstr + r, s - r, "F:%s ", ((struct expression_function *)cur->content.operand)->name);
}
else {
r += snprintf (outstr + r, s - r, "O:%c ", cur->content.operation);
}
cur = cur->next;
}
printf ("Parsed expression: '%s' -> '%s'\n", line, outstr);
line = fgets (buf, sizeof (buf), stdin);
}
memory_pool_delete (pool);
return 0;
}

22
utils/statshow/CMakeLists.txt

@ -0,0 +1,22 @@
SET(STATSHOWSRC statshow.c)
ADD_EXECUTABLE(statshow EXCLUDE_FROM_ALL ${CLASSIFIERSSRC} ${TOKENIZERSSRC} ${STATSHOWSRC})
SET_TARGET_PROPERTIES(statshow PROPERTIES LINKER_LANGUAGE C)
SET_TARGET_PROPERTIES(statshow PROPERTIES COMPILE_FLAGS "-I../../src")
TARGET_LINK_LIBRARIES(statshow event)
TARGET_LINK_LIBRARIES(statshow ${GLIB2_LIBRARIES})
TARGET_LINK_LIBRARIES(statshow ${CMAKE_REQUIRED_LIBRARIES})
IF(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(statshow ${GMIME2_LIBRARIES})
ELSE(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(statshow ${GMIME24_LIBRARIES})
ENDIF(GMIME2_FOUND)
TARGET_LINK_LIBRARIES(statshow rspamd_lua)
IF(ENABLE_LUAJIT MATCHES "ON")
TARGET_LINK_LIBRARIES(statshow "${LUAJIT_LIBRARY}")
ELSE(ENABLE_LUAJIT MATCHES "ON")
TARGET_LINK_LIBRARIES(statshow "${LUA_LIBRARY}")
ENDIF(ENABLE_LUAJIT MATCHES "ON")
IF(ENABLE_STATIC MATCHES "ON")
TARGET_LINK_LIBRARIES(statshow ${PCRE_LIBRARIES})
ENDIF(ENABLE_STATIC MATCHES "ON")

262
utils/statshow/statshow.c

@ -0,0 +1,262 @@
/* Copyright (c) 2010, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "main.h"
#include "cfg_file.h"
#include "util.h"
#include "map.h"
#include "cfg_xml.h"
#include "classifiers/classifiers.h"
#include "tokenizers/tokenizers.h"
#include "message.h"
static gchar *cfg_name;
static GOptionEntry entries[] =
{
{ "config", 'c', 0, G_OPTION_ARG_STRING, &cfg_name, "Specify config file", NULL },
{ NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }
};
static void
read_cmd_line (gint *argc, gchar ***argv, struct config_file *cfg)
{
GError *error = NULL;
GOptionContext *context;
context = g_option_context_new ("- run statshow utility");
g_option_context_set_summary (context, "Summary:\n Statshow utility version " RVERSION "\n Release id: " RID);
g_option_context_add_main_entries (context, entries, NULL);
if (!g_option_context_parse (context, argc, argv, &error)) {
fprintf (stderr, "option parsing failed: %s\n", error->message);
exit (1);
}
cfg->cfg_name = cfg_name;
}
static gboolean
load_rspamd_config (struct config_file *cfg)
{
if (! read_xml_config (cfg, cfg->cfg_name)) {
return FALSE;
}
/* Do post-load actions */
post_load_config (cfg);
return TRUE;
}
static void
classifiers_callback (gpointer value, void *arg)
{
struct worker_task *task = arg;
struct classifier_config *cl = value;
struct classifier_ctx *ctx;
struct mime_text_part *text_part;
GTree *tokens = NULL;
GList *cur;
f_str_t c;
gchar *header = NULL;
ctx = cl->classifier->init_func (task->task_pool, cl);
ctx->debug = TRUE;
if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) {
while (cur != NULL) {
if (header) {
c.len = strlen (cur->data);
if (c.len > 0) {
c.begin = cur->data;
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
msg_info ("cannot tokenize input");
return;
}
}
}
else {
text_part = (struct mime_text_part *)cur->data;
if (text_part->is_empty) {
cur = g_list_next (cur);
continue;
}
c.begin = text_part->content->data;
c.len = text_part->content->len;
/* Tree would be freed at task pool freeing */
if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE)) {
msg_info ("cannot tokenize input");
return;
}
}
cur = g_list_next (cur);
}
g_hash_table_insert (task->tokens, cl->tokenizer, tokens);
}
if (tokens == NULL) {
return;
}
/* Take care of subject */
tokenize_subject (task, &tokens);
cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task);
}
static void
process_buffer (gchar *buf, gsize len, struct rspamd_main *rspamd)
{
struct worker_task *task;
struct rspamd_worker *fake_worker;
/* Make fake worker for task */
fake_worker = g_malloc (sizeof (struct rspamd_worker));
fake_worker->srv = rspamd;
/* Make task */
task = construct_task (fake_worker);
/* Copy message */
task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
task->msg->begin = buf;
task->msg->len = len;
/* Process message */
if (process_message (task) != 0) {
return;
}
g_list_foreach (task->cfg->classifiers, classifiers_callback, task);
g_free (fake_worker);
}
static void
process_stdin (struct rspamd_main *rspamd)
{
gchar *in_buf;
gint r = 0, len;
/* Allocate input buffer */
len = BUFSIZ;
in_buf = g_malloc (len);
/* Read stdin */
while (!feof (stdin)) {
r += fread (in_buf + r, 1, len - r, stdin);
if (len - r < len / 2) {
/* Grow buffer */
len *= 2;
in_buf = g_realloc (in_buf, len);
}
}
process_buffer (in_buf, r, rspamd);
g_free (in_buf);
}
static void
process_file (const gchar *filename, struct rspamd_main *rspamd)
{
struct stat st;
char *in_buf;
gsize r = 0;
gint fd;
if (stat (filename, &st) == -1) {
msg_err ("stat failed: %s", strerror (errno));
return;
}
if ((fd = open (filename, O_RDONLY)) == -1) {
msg_err ("stat failed: %s", strerror (errno));
return;
}
in_buf = g_malloc (st.st_size);
while (r < st.st_size) {
r += read (fd, in_buf + r, r - st.st_size);
}
process_buffer (in_buf, r, rspamd);
g_free (in_buf);
}
gint
main (gint argc, gchar **argv, gchar **env)
{
struct config_file *cfg;
struct rspamd_main *rspamd;
gchar **arg;
rspamd = (struct rspamd_main *)g_malloc (sizeof (struct rspamd_main));
bzero (rspamd, sizeof (struct rspamd_main));
rspamd->server_pool = memory_pool_new (memory_pool_get_size ());
rspamd->cfg = (struct config_file *)g_malloc (sizeof (struct config_file));
if (!rspamd || !rspamd->cfg) {
fprintf (stderr, "Cannot allocate memory\n");
exit (-errno);
}
bzero (rspamd->cfg, sizeof (struct config_file));
rspamd->cfg->cfg_pool = memory_pool_new (memory_pool_get_size ());
init_defaults (rspamd->cfg);
read_cmd_line (&argc, &argv, rspamd->cfg);
if (rspamd->cfg->cfg_name == NULL) {
rspamd->cfg->cfg_name = FIXED_CONFIG_FILE;
}
/* First set logger to console logger */
rspamd_set_logger (RSPAMD_LOG_CONSOLE, TYPE_MAIN, rspamd->cfg);
(void)open_log ();
g_log_set_default_handler (rspamd_glib_log_function, rspamd->cfg);
/* Init classifiers options */
register_classifier_opt ("bayes", "min_tokens");
register_classifier_opt ("winnow", "min_tokens");
register_classifier_opt ("winnow", "learn_threshold");
/* Load config */
if (! load_rspamd_config (rspamd->cfg)) {
exit (EXIT_FAILURE);
}
/* Init statfile pool */
rspamd->statfile_pool = statfile_pool_new (rspamd->server_pool, rspamd->cfg->max_statfile_size);
/* Check argc */
if (argc > 1) {
arg = argv[1];
while (*arg) {
process_file (*arg, rspamd);
arg ++;
}
}
else {
process_stdin (rspamd);
}
return 0;
}

73
utils/url_extracter.c

@ -1,73 +0,0 @@
#include <sys/types.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <syslog.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <gmime/gmime.h>
#include "../src/config.h"
#if !defined(HAVE_OWN_QUEUE_H) && defined(HAVE_SYS_QUEUE_H)
#include <sys/queue.h>
#endif
#ifdef HAVE_OWN_QUEUE_H
#include "../src/queue.h"
#endif
#include "../src/main.h"
#include "../src/cfg_file.h"
#include "../src/url.h"
#include "../src/util.h"
#include "../src/message.h"
rspamd_hash_t *counters = NULL;
int
main (int argc, char **argv)
{
struct worker_task task;
struct uri *url;
char *buf = NULL;
size_t pos = 0, size = 65535;
GList *cur;
g_mem_set_vtable(glib_mem_profiler_table);
g_mime_init (0);
bzero (&task, sizeof (struct worker_task));
task.task_pool = memory_pool_new (memory_pool_get_size ());
/* Preallocate buffer */
buf = g_malloc (size);
while (!feof (stdin)) {
*(buf + pos) = getchar ();
pos ++;
if (pos == size) {
size *= 2;
buf = g_realloc (buf, size);
}
}
task.cfg = memory_pool_alloc0 (task.task_pool, sizeof (struct config_file));
task.msg = memory_pool_alloc (task.task_pool, sizeof (f_str_t));
task.msg->begin = buf;
task.msg->len = pos;
process_message (&task);
cur = task.urls;
while (cur) {
url = cur->data;
printf ("%s\n", struri (url));
cur = g_list_next (cur);
}
return 0;
}
Loading…
Cancel
Save