|
|
--[[Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.]]--
--[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
local lpeg = require "lpeg"
local P = lpeg.Plocal R = lpeg.Rlocal S = lpeg.Slocal D = R'09' -- Digitslocal I = R('AZ', 'az', '\127\255') + '_' -- Identifierslocal B = -(I + D) -- Word boundarylocal EOS = -lpeg.P(1) -- end of string
-- Pattern for long strings and long comments.local longstring = #(P'[[' + (P'[' * P'='^0 * '[')) * P(function(input, index) local level = input:match('^%[(=*)%[', index) if level then local _, last = input:find(']' .. level .. ']', index, true) if last then return last + 1 end endend)
-- String literals.local singlequoted = P"'" * ((1 - S"'\r\n\f\\") + (P'\\' * 1))^0 * "'"local doublequoted = P'"' * ((1 - S'"\r\n\f\\') + (P'\\' * 1))^0 * '"'
-- Comments.local eol = P'\r\n' + '\n'local line = (1 - S'\r\n\f')^0 * eol^-1local singleline = P'--' * linelocal multiline = P'--' * longstring
-- Numbers.local sign = S'+-'^-1local decimal = D^1local hexadecimal = P'0' * S'xX' * R('09', 'AF', 'af') ^ 1local float = D^1 * P'.' * D^0 + P'.' * D^1local maybeexp = (float + decimal) * (S'eE' * sign * D^1)^-1
local function compile_keywords(keywords) local list = {} for word in keywords:gmatch('%S+') do list[#list + 1] = word end -- Sort by length table.sort(list, function(a, b) return #a > #b end)
local pattern for _, word in ipairs(list) do local p = lpeg.P(word) pattern = pattern and (pattern + p) or p end
local AB = B + EOS -- ending boundary return pattern * ABend
-- Identifierslocal ident = I * (I + D)^0local expr = ('.' * ident)^0
local patterns = { {'whitespace', S'\r\n\f\t\v '^1}, {'constant', (P'true' + 'false' + 'nil') * B}, {'string', singlequoted + doublequoted + longstring}, {'comment', multiline + singleline}, {'number', hexadecimal + maybeexp}, {'operator', P'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<=' + 'or' + S']{=>^[<;)*(%}+-:,/.#'}, {'keyword', compile_keywords([[ break do else elseif end for function if in local repeat return then until while ]])}, {'identifier', lpeg.Cmt(ident, function(input, index) return expr:match(input, index) end) }, {'error', 1},}
local compiled
local function compile_patterns() if not compiled then local function process(elt) local n,grammar = elt[1],elt[2] return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp() end local any = process(patterns[1]) for i = 2, #patterns do any = any + process(patterns[i]) end compiled = any end
return compiledend
local function sync(token, lnum, cnum) local lastidx lnum, cnum = lnum or 1, cnum or 1 if token:find '\n' then for i in token:gmatch '()\n' do lnum = lnum + 1 lastidx = i end cnum = #token - lastidx + 1 else cnum = cnum + #token end return lnum, cnumend
local exports = {}
exports.gmatch = function(input) local parser = compile_patterns() local index, lnum, cnum = 1, 1, 1
return function() local kind, after = parser:match(input, index) if kind and after then local text = input:sub(index, after - 1) local oldlnum, oldcnum = lnum, cnum index = after lnum, cnum = sync(text, lnum, cnum) return kind, text, oldlnum, oldcnum end endend
exports.lex_to_table = function(input) local out = {}
for kind, text, lnum, cnum in exports.gmatch(input) do out[#out + 1] = {kind, text, lnum, cnum} end
return outend
return exports
|