mirror of https://github.com/rspamd/rspamd.git
Rapid spam filtering system
https://rspamd.com/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
161 lines
4.0 KiB
161 lines
4.0 KiB
--[[
|
|
Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
]]--
|
|
|
|
--[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
|
|
|
|
|
|
local lpeg = require "lpeg"
|
|
|
|
local P = lpeg.P
|
|
local R = lpeg.R
|
|
local S = lpeg.S
|
|
local D = R'09' -- Digits
|
|
local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
|
|
local B = -(I + D) -- Word boundary
|
|
local EOS = -lpeg.P(1) -- end of string
|
|
|
|
-- Pattern for long strings and long comments.
|
|
local longstring = #(P'[[' + (P'[' * P'='^0 * '[')) * P(function(input, index)
|
|
local level = input:match('^%[(=*)%[', index)
|
|
if level then
|
|
local _, last = input:find(']' .. level .. ']', index, true)
|
|
if last then return last + 1 end
|
|
end
|
|
end)
|
|
|
|
-- String literals.
|
|
local singlequoted = P"'" * ((1 - S"'\r\n\f\\") + (P'\\' * 1))^0 * "'"
|
|
local doublequoted = P'"' * ((1 - S'"\r\n\f\\') + (P'\\' * 1))^0 * '"'
|
|
|
|
-- Comments.
|
|
local eol = P'\r\n' + '\n'
|
|
local line = (1 - S'\r\n\f')^0 * eol^-1
|
|
local singleline = P'--' * line
|
|
local multiline = P'--' * longstring
|
|
|
|
-- Numbers.
|
|
local sign = S'+-'^-1
|
|
local decimal = D^1
|
|
local hexadecimal = P'0' * S'xX' * R('09', 'AF', 'af') ^ 1
|
|
local float = D^1 * P'.' * D^0 + P'.' * D^1
|
|
local maybeexp = (float + decimal) * (S'eE' * sign * D^1)^-1
|
|
|
|
local function compile_keywords(keywords)
|
|
local list = {}
|
|
for word in keywords:gmatch('%S+') do
|
|
list[#list + 1] = word
|
|
end
|
|
-- Sort by length
|
|
table.sort(list, function(a, b)
|
|
return #a > #b
|
|
end)
|
|
|
|
local pattern
|
|
for _, word in ipairs(list) do
|
|
local p = lpeg.P(word)
|
|
pattern = pattern and (pattern + p) or p
|
|
end
|
|
|
|
local AB = B + EOS -- ending boundary
|
|
return pattern * AB
|
|
end
|
|
|
|
-- Identifiers
|
|
local ident = I * (I + D)^0
|
|
local expr = ('.' * ident)^0
|
|
|
|
local patterns = {
|
|
{'whitespace', S'\r\n\f\t\v '^1},
|
|
{'constant', (P'true' + 'false' + 'nil') * B},
|
|
{'string', singlequoted + doublequoted + longstring},
|
|
{'comment', multiline + singleline},
|
|
{'number', hexadecimal + maybeexp},
|
|
{'operator', P'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
|
|
+ 'or' + S']{=>^[<;)*(%}+-:,/.#'},
|
|
{'keyword', compile_keywords([[
|
|
break do else elseif end for function if in local repeat return then until while
|
|
]])},
|
|
{'identifier', lpeg.Cmt(ident,
|
|
function(input, index)
|
|
return expr:match(input, index)
|
|
end)
|
|
},
|
|
{'error', 1},
|
|
}
|
|
|
|
local compiled
|
|
|
|
local function compile_patterns()
|
|
if not compiled then
|
|
local function process(elt)
|
|
local n,grammar = elt[1],elt[2]
|
|
return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
|
|
end
|
|
local any = process(patterns[1])
|
|
for i = 2, #patterns do
|
|
any = any + process(patterns[i])
|
|
end
|
|
compiled = any
|
|
end
|
|
|
|
return compiled
|
|
end
|
|
|
|
local function sync(token, lnum, cnum)
|
|
local lastidx
|
|
lnum, cnum = lnum or 1, cnum or 1
|
|
if token:find '\n' then
|
|
for i in token:gmatch '()\n' do
|
|
lnum = lnum + 1
|
|
lastidx = i
|
|
end
|
|
cnum = #token - lastidx + 1
|
|
else
|
|
cnum = cnum + #token
|
|
end
|
|
return lnum, cnum
|
|
end
|
|
|
|
local exports = {}
|
|
|
|
exports.gmatch = function(input)
|
|
local parser = compile_patterns()
|
|
local index, lnum, cnum = 1, 1, 1
|
|
|
|
return function()
|
|
local kind, after = parser:match(input, index)
|
|
if kind and after then
|
|
local text = input:sub(index, after - 1)
|
|
local oldlnum, oldcnum = lnum, cnum
|
|
index = after
|
|
lnum, cnum = sync(text, lnum, cnum)
|
|
return kind, text, oldlnum, oldcnum
|
|
end
|
|
end
|
|
end
|
|
|
|
exports.lex_to_table = function(input)
|
|
local out = {}
|
|
|
|
for kind, text, lnum, cnum in exports.gmatch(input) do
|
|
out[#out + 1] = {kind, text, lnum, cnum}
|
|
end
|
|
|
|
return out
|
|
end
|
|
|
|
return exports
|
|
|