From 0a449230d76840ec6b40cd06c8e93f2e6febb038 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 6 Jan 2025 09:58:25 +0000 Subject: [PATCH] [Feature] Allow to store shingles as opaque Lua data --- src/lua/CMakeLists.txt | 3 +- src/lua/lua_common.c | 3 +- src/lua/lua_common.h | 4 +- src/lua/lua_shingles.cxx | 122 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 src/lua/lua_shingles.cxx diff --git a/src/lua/CMakeLists.txt b/src/lua/CMakeLists.txt index 46de053ba..135a21da2 100644 --- a/src/lua/CMakeLists.txt +++ b/src/lua/CMakeLists.txt @@ -35,6 +35,7 @@ SET(LUASRC ${CMAKE_CURRENT_SOURCE_DIR}/lua_common.c ${CMAKE_CURRENT_SOURCE_DIR}/lua_tensor.c ${CMAKE_CURRENT_SOURCE_DIR}/lua_parsers.c ${CMAKE_CURRENT_SOURCE_DIR}/lua_compress.c - ${CMAKE_CURRENT_SOURCE_DIR}/lua_classnames.c) + ${CMAKE_CURRENT_SOURCE_DIR}/lua_classnames.c + ${CMAKE_CURRENT_SOURCE_DIR}/lua_shingles.cxx) SET(RSPAMD_LUA ${LUASRC} PARENT_SCOPE) \ No newline at end of file diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index d79efc308..3a0f1a06c 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -985,6 +985,7 @@ rspamd_lua_init(bool wipe_mem) luaopen_tensor(L); luaopen_parsers(L); luaopen_compress(L); + luaopen_shingle(L); #ifndef WITH_LUAJIT rspamd_lua_add_preload(L, "bit", luaopen_bit); lua_settop(L, 0); diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 1d39d0c52..accc6be86 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -1,5 +1,5 @@ /* - * Copyright 2024 Vsevolod Stakhov + * Copyright 2025 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -421,6 +421,8 @@ void luaopen_tensor(lua_State *L); void luaopen_parsers(lua_State *L); +void luaopen_shingle(lua_State *L); + void rspamd_lua_dostring(const char *line); double rspamd_lua_normalize(struct rspamd_config *cfg, diff --git a/src/lua/lua_shingles.cxx b/src/lua/lua_shingles.cxx new file mode 100644 index 000000000..355a9e7f2 --- /dev/null +++ b/src/lua/lua_shingles.cxx @@ -0,0 +1,122 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lua_common.h" +#include "shingles.h" +#include "fmt/format.h" + +#define RSPAMD_SHINGLE_CLASS "rspamd{shingle}" + +/*** + * @module rspamd_shingle + * This module provides methods to work with text shingles + */ + +/*** + * @method shingle:to_table() + * Converts shingle to table of decimal strings + * @return {table} table of RSPAMD_SHINGLE_SIZE decimal strings + */ +LUA_FUNCTION_DEF(shingle, to_table); + +/*** + * @method shingle:get(index) + * Gets element at index as two lua_Integer values (high and low 32 bits) + * @param {number} index 1-based index + * @return {number,number} high and low 32-bit parts + */ +LUA_FUNCTION_DEF(shingle, get); + +/*** + * @method shingle:get_string(index) + * Gets element at index as decimal string + * @param {number} index 1-based index + * @return {string} decimal representation + */ +LUA_FUNCTION_DEF(shingle, get_string); + +static const struct luaL_reg shinglelib_m[] = { + LUA_INTERFACE_DEF(shingle, to_table), + LUA_INTERFACE_DEF(shingle, get), + LUA_INTERFACE_DEF(shingle, get_string), + {"__tostring", rspamd_lua_class_tostring}, + {nullptr, nullptr}}; + +static struct rspamd_shingle * +lua_check_shingle(lua_State *L, int pos) +{ + void *ud = rspamd_lua_check_udata(L, pos, RSPAMD_SHINGLE_CLASS); + luaL_argcheck(L, ud != nullptr, pos, "'shingle' expected"); + return static_cast(ud); +} + +static int +lua_shingle_to_table(lua_State *L) +{ + LUA_TRACE_POINT; + auto *sh = lua_check_shingle(L, 1); + + lua_createtable(L, RSPAMD_SHINGLE_SIZE, 0); + + for (int i = 0; i < RSPAMD_SHINGLE_SIZE; i++) { + auto str = fmt::format("{}", sh->hashes[i]); + lua_pushstring(L, str.c_str()); + lua_rawseti(L, -2, i + 1); + } + + return 1; +} + +static int +lua_shingle_get(lua_State *L) +{ + LUA_TRACE_POINT; + auto *sh = lua_check_shingle(L, 1); + auto idx = luaL_checkinteger(L, 2) - 1; + + if (idx < 0 || idx >= RSPAMD_SHINGLE_SIZE) { + return luaL_error(L, "index out of bounds: %d", idx + 1); + } + + uint64_t val = sh->hashes[idx]; + lua_pushinteger(L, (lua_Integer) (val >> 32)); + lua_pushinteger(L, (lua_Integer) (val & 0xFFFFFFFF)); + + return 2; +} + +static int +lua_shingle_get_string(lua_State *L) +{ + LUA_TRACE_POINT; + auto *sh = lua_check_shingle(L, 1); + auto idx = luaL_checkinteger(L, 2) - 1; + + if (idx < 0 || idx >= RSPAMD_SHINGLE_SIZE) { + return luaL_error(L, "index out of bounds: %d", idx + 1); + } + + auto str = fmt::format("{}", sh->hashes[idx]); + lua_pushstring(L, str.c_str()); + + return 1; +} + +void luaopen_shingle(lua_State *L) +{ + rspamd_lua_new_class(L, RSPAMD_SHINGLE_CLASS, shinglelib_m); + lua_pop(L, 1); +}