Rapid spam filtering system https://rspamd.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

571 lines
16 KiB

  1. --[[
  2. Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]] --
  13. local fun = require 'fun'
  14. local lua_util = require "lua_util"
  15. local rspamd_util = require "rspamd_util"
  16. local ts = require("tableshape").types
  17. local logger = require 'rspamd_logger'
  18. local common = require "lua_selectors/common"
  19. local M = "selectors"
  20. local maps = require "lua_selectors/maps"
  21. local function pure_type(ltype)
  22. return ltype:match('^(.*)_list$')
  23. end
  24. local transform_function = {
  25. -- Returns the lowercased string
  26. ['lower'] = {
  27. ['types'] = {
  28. ['string'] = true,
  29. },
  30. ['map_type'] = 'string',
  31. ['process'] = function(inp, _)
  32. return inp:lower(), 'string'
  33. end,
  34. ['description'] = 'Returns the lowercased string',
  35. },
  36. -- Returns the lowercased utf8 string
  37. ['lower_utf8'] = {
  38. ['types'] = {
  39. ['string'] = true,
  40. },
  41. ['map_type'] = 'string',
  42. ['process'] = function(inp, t)
  43. return rspamd_util.lower_utf8(inp), t
  44. end,
  45. ['description'] = 'Returns the lowercased utf8 string',
  46. },
  47. -- Returns the first element
  48. ['first'] = {
  49. ['types'] = {
  50. ['list'] = true,
  51. },
  52. ['process'] = function(inp, t)
  53. return fun.head(inp), pure_type(t)
  54. end,
  55. ['description'] = 'Returns the first element',
  56. },
  57. -- Returns the last element
  58. ['last'] = {
  59. ['types'] = {
  60. ['list'] = true,
  61. },
  62. ['process'] = function(inp, t)
  63. return fun.nth(fun.length(inp), inp), pure_type(t)
  64. end,
  65. ['description'] = 'Returns the last element',
  66. },
  67. -- Returns the nth element
  68. ['nth'] = {
  69. ['types'] = {
  70. ['list'] = true,
  71. },
  72. ['process'] = function(inp, t, args)
  73. return fun.nth(args[1] or 1, inp), pure_type(t)
  74. end,
  75. ['description'] = 'Returns the nth element',
  76. ['args_schema'] = { ts.number + ts.string / tonumber }
  77. },
  78. ['take_n'] = {
  79. ['types'] = {
  80. ['list'] = true,
  81. },
  82. ['process'] = function(inp, t, args)
  83. return fun.take_n(args[1] or 1, inp), t
  84. end,
  85. ['description'] = 'Returns the n first elements',
  86. ['args_schema'] = { ts.number + ts.string / tonumber }
  87. },
  88. ['drop_n'] = {
  89. ['types'] = {
  90. ['list'] = true,
  91. },
  92. ['process'] = function(inp, t, args)
  93. return fun.drop_n(args[1] or 1, inp), t
  94. end,
  95. ['description'] = 'Returns list without the first n elements',
  96. ['args_schema'] = { ts.number + ts.string / tonumber }
  97. },
  98. -- Joins strings into a single string using separator in the argument
  99. ['join'] = {
  100. ['types'] = {
  101. ['string_list'] = true
  102. },
  103. ['process'] = function(inp, _, args)
  104. return table.concat(fun.totable(inp), args[1] or ''), 'string'
  105. end,
  106. ['description'] = 'Joins strings into a single string using separator in the argument',
  107. ['args_schema'] = { ts.string:is_optional() }
  108. },
  109. -- Joins strings into a set of strings using N elements and a separator in the argument
  110. ['join_nth'] = {
  111. ['types'] = {
  112. ['string_list'] = true
  113. },
  114. ['process'] = function(inp, _, args)
  115. local step = args[1]
  116. local sep = args[2] or ''
  117. local inp_t = fun.totable(inp)
  118. local res = {}
  119. for i = 1, #inp_t, step do
  120. table.insert(res, table.concat(inp_t, sep, i, i + step))
  121. end
  122. return res, 'string_list'
  123. end,
  124. ['description'] = 'Joins strings into a set of strings using N elements and a separator in the argument',
  125. ['args_schema'] = { ts.number + ts.string / tonumber, ts.string:is_optional() }
  126. },
  127. -- Joins tables into a table of strings
  128. ['join_tables'] = {
  129. ['types'] = {
  130. ['list'] = true
  131. },
  132. ['process'] = function(inp, _, args)
  133. local sep = args[1] or ''
  134. return fun.map(function(t)
  135. return table.concat(t, sep)
  136. end, inp), 'string_list'
  137. end,
  138. ['description'] = 'Joins tables into a table of strings',
  139. ['args_schema'] = { ts.string:is_optional() }
  140. },
  141. -- Sort strings
  142. ['sort'] = {
  143. ['types'] = {
  144. ['list'] = true
  145. },
  146. ['process'] = function(inp, t, _)
  147. table.sort(inp)
  148. return inp, t
  149. end,
  150. ['description'] = 'Sort strings lexicographically',
  151. },
  152. -- Return unique elements based on hashing (can work without sorting)
  153. ['uniq'] = {
  154. ['types'] = {
  155. ['list'] = true
  156. },
  157. ['process'] = function(inp, t, _)
  158. local tmp = {}
  159. fun.each(function(val)
  160. tmp[val] = true
  161. end, inp)
  162. return fun.map(function(k, _)
  163. return k
  164. end, tmp), t
  165. end,
  166. ['description'] = 'Returns a list of unique elements (using a hash table)',
  167. },
  168. -- Create a digest from string or a list of strings
  169. ['digest'] = {
  170. ['types'] = {
  171. ['string'] = true
  172. },
  173. ['map_type'] = 'string',
  174. ['process'] = function(inp, _, args)
  175. return common.create_digest(inp, args), 'string'
  176. end,
  177. ['description'] = [[Create a digest from a string.
  178. The first argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
  179. the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
  180. ['args_schema'] = common.digest_schema()
  181. },
  182. -- Extracts substring
  183. ['substring'] = {
  184. ['types'] = {
  185. ['string'] = true
  186. },
  187. ['map_type'] = 'string',
  188. ['process'] = function(inp, _, args)
  189. local start_pos = args[1] or 1
  190. local end_pos = args[2] or -1
  191. return inp:sub(start_pos, end_pos), 'string'
  192. end,
  193. ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
  194. ['args_schema'] = { (ts.number + ts.string / tonumber):is_optional(),
  195. (ts.number + ts.string / tonumber):is_optional() }
  196. },
  197. -- Prepends a string or a strings list
  198. ['prepend'] = {
  199. ['types'] = {
  200. ['string'] = true
  201. },
  202. ['map_type'] = 'string',
  203. ['process'] = function(inp, _, args)
  204. local prepend = table.concat(args, '')
  205. return prepend .. inp, 'string'
  206. end,
  207. ['description'] = 'Prepends a string or a strings list',
  208. },
  209. -- Appends a string or a strings list
  210. ['append'] = {
  211. ['types'] = {
  212. ['string'] = true
  213. },
  214. ['map_type'] = 'string',
  215. ['process'] = function(inp, _, args)
  216. local append = table.concat(args, '')
  217. return inp .. append, 'string'
  218. end,
  219. ['description'] = 'Appends a string or a strings list',
  220. },
  221. -- Regexp matching
  222. ['regexp'] = {
  223. ['types'] = {
  224. ['string'] = true
  225. },
  226. ['map_type'] = 'string',
  227. ['process'] = function(inp, _, args)
  228. local rspamd_regexp = require "rspamd_regexp"
  229. local re = rspamd_regexp.create_cached(args[1])
  230. if not re then
  231. logger.errx('invalid regexp: %s', args[1])
  232. return nil
  233. end
  234. local res = re:search(inp, false, true)
  235. if res then
  236. -- Map all results in a single list
  237. local flattened_table = {}
  238. local function flatten_table(tbl)
  239. for _, v in ipairs(tbl) do
  240. if type(v) == 'table' then
  241. flatten_table(v)
  242. else
  243. table.insert(flattened_table, v)
  244. end
  245. end
  246. end
  247. flatten_table(res)
  248. return flattened_table, 'string_list'
  249. end
  250. return nil
  251. end,
  252. ['description'] = 'Regexp matching, returns all matches flattened in a single list',
  253. ['args_schema'] = { ts.string }
  254. },
  255. -- Returns a value if it exists in some map (or acts like a `filter` function)
  256. ['filter_map'] = {
  257. ['types'] = {
  258. ['string'] = true
  259. },
  260. ['map_type'] = 'string',
  261. ['process'] = function(inp, t, args)
  262. local map = maps[args[1]]
  263. if not map then
  264. logger.errx('invalid map name: %s', args[1])
  265. return nil
  266. end
  267. local res = map:get_key(inp)
  268. if res then
  269. return inp, t
  270. end
  271. return nil
  272. end,
  273. ['description'] = 'Returns a value if it exists in some map (or acts like a `filter` function)',
  274. ['args_schema'] = { ts.string }
  275. },
  276. -- Returns a value if it exists in some map (or acts like a `filter` function)
  277. ['except_map'] = {
  278. ['types'] = {
  279. ['string'] = true
  280. },
  281. ['map_type'] = 'string',
  282. ['process'] = function(inp, t, args)
  283. local map = maps[args[1]]
  284. if not map then
  285. logger.errx('invalid map name: %s', args[1])
  286. return nil
  287. end
  288. local res = map:get_key(inp)
  289. if not res then
  290. return inp, t
  291. end
  292. return nil
  293. end,
  294. ['description'] = 'Returns a value if it does not exists in some map (or acts like a `except` function)',
  295. ['args_schema'] = { ts.string }
  296. },
  297. -- Returns a value from some map corresponding to some key (or acts like a `map` function)
  298. ['apply_map'] = {
  299. ['types'] = {
  300. ['string'] = true
  301. },
  302. ['map_type'] = 'string',
  303. ['process'] = function(inp, t, args)
  304. local map = maps[args[1]]
  305. if not map then
  306. logger.errx('invalid map name: %s', args[1])
  307. return nil
  308. end
  309. local res = map:get_key(inp)
  310. if res then
  311. return res, t
  312. end
  313. return nil
  314. end,
  315. ['description'] = 'Returns a value from some map corresponding to some key (or acts like a `map` function)',
  316. ['args_schema'] = { ts.string }
  317. },
  318. -- Drops input value and return values from function's arguments or an empty string
  319. ['id'] = {
  320. ['types'] = {
  321. ['string'] = true,
  322. ['list'] = true,
  323. },
  324. ['map_type'] = 'string',
  325. ['process'] = function(_, _, args)
  326. if args[1] and args[2] then
  327. return fun.map(tostring, args), 'string_list'
  328. elseif args[1] then
  329. return args[1], 'string'
  330. end
  331. return '', 'string'
  332. end,
  333. ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
  334. ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
  335. },
  336. ['equal'] = {
  337. ['types'] = {
  338. ['string'] = true,
  339. },
  340. ['map_type'] = 'string',
  341. ['process'] = function(inp, _, args)
  342. if inp == args[1] then
  343. return inp, 'string'
  344. end
  345. return nil
  346. end,
  347. ['description'] = [[Boolean function equal.
  348. Returns either nil or its argument if input is equal to argument]],
  349. ['args_schema'] = { ts.string }
  350. },
  351. -- Boolean function in, returns either nil or its input if input is in args list
  352. ['in'] = {
  353. ['types'] = {
  354. ['string'] = true,
  355. },
  356. ['map_type'] = 'string',
  357. ['process'] = function(inp, t, args)
  358. for _, a in ipairs(args) do
  359. if a == inp then
  360. return inp, t
  361. end
  362. end
  363. return nil
  364. end,
  365. ['description'] = [[Boolean function in.
  366. Returns either nil or its input if input is in args list]],
  367. ['args_schema'] = ts.array_of(ts.string)
  368. },
  369. ['not_in'] = {
  370. ['types'] = {
  371. ['string'] = true,
  372. },
  373. ['map_type'] = 'string',
  374. ['process'] = function(inp, t, args)
  375. for _, a in ipairs(args) do
  376. if a == inp then
  377. return nil
  378. end
  379. end
  380. return inp, t
  381. end,
  382. ['description'] = [[Boolean function not in.
  383. Returns either nil or its input if input is not in args list]],
  384. ['args_schema'] = ts.array_of(ts.string)
  385. },
  386. ['inverse'] = {
  387. ['types'] = {
  388. ['string'] = true,
  389. },
  390. ['map_type'] = 'string',
  391. ['process'] = function(inp, _, args)
  392. if inp then
  393. return nil
  394. else
  395. return (args[1] or 'true'), 'string'
  396. end
  397. end,
  398. ['description'] = [[Inverses input.
  399. Empty string comes the first argument or 'true', non-empty string comes nil]],
  400. ['args_schema'] = { ts.string:is_optional() }
  401. },
  402. ['ipmask'] = {
  403. ['types'] = {
  404. ['string'] = true,
  405. },
  406. ['map_type'] = 'string',
  407. ['process'] = function(inp, _, args)
  408. local rspamd_ip = require "rspamd_ip"
  409. -- Non optimal: convert string to an IP address
  410. local ip = rspamd_ip.from_string(inp)
  411. if not ip or not ip:is_valid() then
  412. lua_util.debugm(M, "cannot convert %s to IP", inp)
  413. return nil
  414. end
  415. if ip:get_version() == 4 then
  416. local mask = tonumber(args[1])
  417. return ip:apply_mask(mask):to_string(), 'string'
  418. else
  419. -- IPv6 takes the second argument or the first one...
  420. local mask_str = args[2] or args[1]
  421. local mask = tonumber(mask_str)
  422. return ip:apply_mask(mask):to_string(), 'string'
  423. end
  424. end,
  425. ['description'] = 'Applies mask to IP address.' ..
  426. ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
  427. ['args_schema'] = { (ts.number + ts.string / tonumber),
  428. (ts.number + ts.string / tonumber):is_optional() }
  429. },
  430. -- Returns the string(s) with all non ascii chars replaced
  431. ['to_ascii'] = {
  432. ['types'] = {
  433. ['string'] = true,
  434. ['list'] = true,
  435. },
  436. ['map_type'] = 'string',
  437. ['process'] = function(inp, _, args)
  438. if type(inp) == 'table' then
  439. return fun.map(
  440. function(s)
  441. return string.gsub(tostring(s), '[\128-\255]', args[1] or '?')
  442. end, inp), 'string_list'
  443. else
  444. return string.gsub(tostring(inp), '[\128-\255]', '?'), 'string'
  445. end
  446. end,
  447. ['description'] = 'Returns the string with all non-ascii bytes replaced with the character ' ..
  448. 'given as second argument or `?`',
  449. ['args_schema'] = { ts.string:is_optional() }
  450. },
  451. -- Extracts tld from a hostname
  452. ['get_tld'] = {
  453. ['types'] = {
  454. ['string'] = true
  455. },
  456. ['map_type'] = 'string',
  457. ['process'] = function(inp, _, _)
  458. return rspamd_util.get_tld(inp), 'string'
  459. end,
  460. ['description'] = 'Returns effective second-level domain (eSLD) using the Public Suffix List',
  461. ['args_schema'] = {}
  462. },
  463. -- Converts list of strings to numbers and returns a packed string
  464. ['pack_numbers'] = {
  465. ['types'] = {
  466. ['string_list'] = true
  467. },
  468. ['map_type'] = 'string',
  469. ['process'] = function(inp, _, args)
  470. local fmt = args[1] or 'f'
  471. local res = {}
  472. for _, s in ipairs(inp) do
  473. table.insert(res, tonumber(s))
  474. end
  475. return rspamd_util.pack(string.rep(fmt, #res), lua_util.unpack(res)), 'string'
  476. end,
  477. ['description'] = 'Converts a list of strings to numbers & returns a packed string',
  478. ['args_schema'] = { ts.string:is_optional() }
  479. },
  480. -- Filter nils from a list
  481. ['filter_string_nils'] = {
  482. ['types'] = {
  483. ['string_list'] = true
  484. },
  485. ['process'] = function(inp, _, _)
  486. return fun.filter(function(val)
  487. return type(val) == 'string' and val ~= 'nil'
  488. end, inp), 'string_list'
  489. end,
  490. ['description'] = 'Removes all nils from a list of strings (when converted implicitly)',
  491. ['args_schema'] = {}
  492. },
  493. -- Call a set of methods on a userdata object
  494. ['apply_methods'] = {
  495. ['types'] = {
  496. ['userdata'] = true,
  497. },
  498. ['process'] = function(inp, _, args)
  499. local res = {}
  500. for _, arg in ipairs(args) do
  501. local meth = inp[arg]
  502. local ret = meth(inp)
  503. if ret then
  504. table.insert(res, tostring(ret))
  505. end
  506. end
  507. return res, 'string_list'
  508. end,
  509. ['description'] = 'Apply a list of method calls to the userdata object',
  510. },
  511. -- Apply method to list of userdata and use it as a filter, excluding elements for which method returns false/nil
  512. ['filter_method'] = {
  513. ['types'] = {
  514. ['userdata_list'] = true
  515. },
  516. ['process'] = function(inp, t, args)
  517. local meth = args[1]
  518. if not meth then
  519. logger.errx('invalid method name: %s', args[1])
  520. return nil
  521. end
  522. return fun.filter(function(val)
  523. return val[meth](val)
  524. end, inp), 'userdata_list'
  525. end,
  526. ['description'] = 'Apply method to list of userdata and use it as a filter,' ..
  527. ' excluding elements for which method returns false/nil',
  528. ['args_schema'] = { ts.string }
  529. },
  530. }
  531. transform_function.match = transform_function.regexp
  532. return transform_function