mirror of
https://github.com/onionltd/EndGame.git
synced 2024-10-01 04:15:36 -04:00
1196 lines
33 KiB
Lua
1196 lines
33 KiB
Lua
-- Copyright (C) Yichun Zhang (agentzh)
|
|
|
|
|
|
local ffi = require 'ffi'
|
|
local base = require "resty.core.base"
|
|
local bit = require "bit"
|
|
require "resty.core.time" -- for ngx.now used by resty.lrucache
|
|
local lrucache = require "resty.lrucache"
|
|
|
|
local lrucache_get = lrucache.get
|
|
local lrucache_set = lrucache.set
|
|
local ffi_string = ffi.string
|
|
local ffi_gc = ffi.gc
|
|
local ffi_copy = ffi.copy
|
|
local ffi_cast = ffi.cast
|
|
local C = ffi.C
|
|
local bor = bit.bor
|
|
local band = bit.band
|
|
local lshift = bit.lshift
|
|
local sub = string.sub
|
|
local fmt = string.format
|
|
local byte = string.byte
|
|
local ngx = ngx
|
|
local type = type
|
|
local tostring = tostring
|
|
local error = error
|
|
local setmetatable = setmetatable
|
|
local tonumber = tonumber
|
|
local get_string_buf = base.get_string_buf
|
|
local get_string_buf_size = base.get_string_buf_size
|
|
local new_tab = base.new_tab
|
|
local subsystem = ngx.config.subsystem
|
|
local ngx_phase = ngx.get_phase
|
|
local ngx_log = ngx.log
|
|
local ngx_NOTICE = ngx.NOTICE
|
|
|
|
|
|
local _M = {
|
|
version = base.version
|
|
}
|
|
|
|
|
|
ngx.re = new_tab(0, 5)
|
|
|
|
|
|
ffi.cdef[[
|
|
const char *pcre_version(void);
|
|
]]
|
|
|
|
|
|
local pcre_ver
|
|
|
|
if not pcall(function() pcre_ver = ffi_string(C.pcre_version()) end) then
|
|
setmetatable(ngx.re, {
|
|
__index = function(_, key)
|
|
error("no support for 'ngx.re." .. key .. "': OpenResty was " ..
|
|
"compiled without PCRE support", 2)
|
|
end
|
|
})
|
|
|
|
_M.no_pcre = true
|
|
|
|
return _M
|
|
end
|
|
|
|
|
|
local MAX_ERR_MSG_LEN = 128
|
|
|
|
|
|
local FLAG_COMPILE_ONCE = 0x01
|
|
local FLAG_DFA = 0x02
|
|
local FLAG_JIT = 0x04
|
|
local FLAG_DUPNAMES = 0x08
|
|
local FLAG_NO_UTF8_CHECK = 0x10
|
|
|
|
|
|
local PCRE_CASELESS = 0x0000001
|
|
local PCRE_MULTILINE = 0x0000002
|
|
local PCRE_DOTALL = 0x0000004
|
|
local PCRE_EXTENDED = 0x0000008
|
|
local PCRE_ANCHORED = 0x0000010
|
|
local PCRE_UTF8 = 0x0000800
|
|
local PCRE_DUPNAMES = 0x0080000
|
|
local PCRE_JAVASCRIPT_COMPAT = 0x2000000
|
|
|
|
|
|
local PCRE_ERROR_NOMATCH = -1
|
|
|
|
|
|
local regex_match_cache
|
|
local regex_sub_func_cache = new_tab(0, 4)
|
|
local regex_sub_str_cache = new_tab(0, 4)
|
|
local max_regex_cache_size
|
|
local regex_cache_size = 0
|
|
local script_engine
|
|
local ngx_lua_ffi_max_regex_cache_size
|
|
local ngx_lua_ffi_destroy_regex
|
|
local ngx_lua_ffi_compile_regex
|
|
local ngx_lua_ffi_exec_regex
|
|
local ngx_lua_ffi_create_script_engine
|
|
local ngx_lua_ffi_destroy_script_engine
|
|
local ngx_lua_ffi_init_script_engine
|
|
local ngx_lua_ffi_compile_replace_template
|
|
local ngx_lua_ffi_script_eval_len
|
|
local ngx_lua_ffi_script_eval_data
|
|
|
|
-- PCRE 8.43 on macOS introduced the MAP_JIT option when creating the memory
|
|
-- region used to store JIT compiled code, which does not survive across
|
|
-- `fork()`, causing further usage of PCRE JIT compiler to segfault in worker
|
|
-- processes.
|
|
--
|
|
-- This flag prevents any regex used in the init phase to be JIT compiled or
|
|
-- cached when running under macOS, even if the user requests so. Caching is
|
|
-- thus disabled to prevent further calls of same regex in worker to have poor
|
|
-- performance.
|
|
--
|
|
-- TODO: improve this workaround when PCRE allows for unspecifying the MAP_JIT
|
|
-- option.
|
|
local no_jit_in_init
|
|
|
|
if jit.os == "OSX" then
|
|
local maj, min = string.match(pcre_ver, "^(%d+)%.(%d+)")
|
|
if maj and min then
|
|
local pcre_ver_num = tonumber(maj .. min)
|
|
|
|
if pcre_ver_num >= 843 then
|
|
no_jit_in_init = true
|
|
end
|
|
|
|
else
|
|
-- assume this version is faulty as well
|
|
no_jit_in_init = true
|
|
end
|
|
end
|
|
|
|
|
|
if subsystem == 'http' then
|
|
ffi.cdef[[
|
|
|
|
typedef struct {
|
|
ngx_str_t value;
|
|
void *lengths;
|
|
void *values;
|
|
} ngx_http_lua_complex_value_t;
|
|
|
|
typedef struct {
|
|
void *pool;
|
|
unsigned char *name_table;
|
|
int name_count;
|
|
int name_entry_size;
|
|
|
|
int ncaptures;
|
|
int *captures;
|
|
|
|
void *regex;
|
|
void *regex_sd;
|
|
|
|
ngx_http_lua_complex_value_t *replace;
|
|
|
|
const char *pattern;
|
|
} ngx_http_lua_regex_t;
|
|
|
|
ngx_http_lua_regex_t *
|
|
ngx_http_lua_ffi_compile_regex(const unsigned char *pat,
|
|
size_t pat_len, int flags,
|
|
int pcre_opts, unsigned char *errstr,
|
|
size_t errstr_size);
|
|
|
|
int ngx_http_lua_ffi_exec_regex(ngx_http_lua_regex_t *re, int flags,
|
|
const unsigned char *s, size_t len, int pos);
|
|
|
|
void ngx_http_lua_ffi_destroy_regex(ngx_http_lua_regex_t *re);
|
|
|
|
int ngx_http_lua_ffi_compile_replace_template(ngx_http_lua_regex_t *re,
|
|
const unsigned char
|
|
*replace_data,
|
|
size_t replace_len);
|
|
|
|
struct ngx_http_lua_script_engine_s;
|
|
typedef struct ngx_http_lua_script_engine_s *ngx_http_lua_script_engine_t;
|
|
|
|
ngx_http_lua_script_engine_t *ngx_http_lua_ffi_create_script_engine(void);
|
|
|
|
void ngx_http_lua_ffi_init_script_engine(ngx_http_lua_script_engine_t *e,
|
|
const unsigned char *subj,
|
|
ngx_http_lua_regex_t *compiled,
|
|
int count);
|
|
|
|
void ngx_http_lua_ffi_destroy_script_engine(
|
|
ngx_http_lua_script_engine_t *e);
|
|
|
|
size_t ngx_http_lua_ffi_script_eval_len(ngx_http_lua_script_engine_t *e,
|
|
ngx_http_lua_complex_value_t *cv);
|
|
|
|
size_t ngx_http_lua_ffi_script_eval_data(ngx_http_lua_script_engine_t *e,
|
|
ngx_http_lua_complex_value_t *cv,
|
|
unsigned char *dst);
|
|
|
|
uint32_t ngx_http_lua_ffi_max_regex_cache_size(void);
|
|
]]
|
|
|
|
ngx_lua_ffi_max_regex_cache_size = C.ngx_http_lua_ffi_max_regex_cache_size
|
|
ngx_lua_ffi_destroy_regex = C.ngx_http_lua_ffi_destroy_regex
|
|
ngx_lua_ffi_compile_regex = C.ngx_http_lua_ffi_compile_regex
|
|
ngx_lua_ffi_exec_regex = C.ngx_http_lua_ffi_exec_regex
|
|
ngx_lua_ffi_create_script_engine = C.ngx_http_lua_ffi_create_script_engine
|
|
ngx_lua_ffi_init_script_engine = C.ngx_http_lua_ffi_init_script_engine
|
|
ngx_lua_ffi_destroy_script_engine = C.ngx_http_lua_ffi_destroy_script_engine
|
|
ngx_lua_ffi_compile_replace_template =
|
|
C.ngx_http_lua_ffi_compile_replace_template
|
|
ngx_lua_ffi_script_eval_len = C.ngx_http_lua_ffi_script_eval_len
|
|
ngx_lua_ffi_script_eval_data = C.ngx_http_lua_ffi_script_eval_data
|
|
|
|
elseif subsystem == 'stream' then
|
|
ffi.cdef[[
|
|
|
|
typedef struct {
|
|
ngx_str_t value;
|
|
void *lengths;
|
|
void *values;
|
|
} ngx_stream_lua_complex_value_t;
|
|
|
|
typedef struct {
|
|
void *pool;
|
|
unsigned char *name_table;
|
|
int name_count;
|
|
int name_entry_size;
|
|
|
|
int ncaptures;
|
|
int *captures;
|
|
|
|
void *regex;
|
|
void *regex_sd;
|
|
|
|
ngx_stream_lua_complex_value_t *replace;
|
|
|
|
const char *pattern;
|
|
} ngx_stream_lua_regex_t;
|
|
|
|
ngx_stream_lua_regex_t *
|
|
ngx_stream_lua_ffi_compile_regex(const unsigned char *pat,
|
|
size_t pat_len, int flags,
|
|
int pcre_opts, unsigned char *errstr,
|
|
size_t errstr_size);
|
|
|
|
int ngx_stream_lua_ffi_exec_regex(ngx_stream_lua_regex_t *re, int flags,
|
|
const unsigned char *s, size_t len, int pos);
|
|
|
|
void ngx_stream_lua_ffi_destroy_regex(ngx_stream_lua_regex_t *re);
|
|
|
|
int ngx_stream_lua_ffi_compile_replace_template(ngx_stream_lua_regex_t *re,
|
|
const unsigned char
|
|
*replace_data,
|
|
size_t replace_len);
|
|
|
|
struct ngx_stream_lua_script_engine_s;
|
|
typedef struct ngx_stream_lua_script_engine_s
|
|
*ngx_stream_lua_script_engine_t;
|
|
|
|
ngx_stream_lua_script_engine_t *
|
|
ngx_stream_lua_ffi_create_script_engine(void);
|
|
|
|
void ngx_stream_lua_ffi_init_script_engine(
|
|
ngx_stream_lua_script_engine_t *e, const unsigned char *subj,
|
|
ngx_stream_lua_regex_t *compiled, int count);
|
|
|
|
void ngx_stream_lua_ffi_destroy_script_engine(
|
|
ngx_stream_lua_script_engine_t *e);
|
|
|
|
size_t ngx_stream_lua_ffi_script_eval_len(
|
|
ngx_stream_lua_script_engine_t *e, ngx_stream_lua_complex_value_t *cv);
|
|
|
|
size_t ngx_stream_lua_ffi_script_eval_data(
|
|
ngx_stream_lua_script_engine_t *e, ngx_stream_lua_complex_value_t *cv,
|
|
unsigned char *dst);
|
|
|
|
uint32_t ngx_stream_lua_ffi_max_regex_cache_size(void);
|
|
]]
|
|
|
|
ngx_lua_ffi_max_regex_cache_size = C.ngx_stream_lua_ffi_max_regex_cache_size
|
|
ngx_lua_ffi_destroy_regex = C.ngx_stream_lua_ffi_destroy_regex
|
|
ngx_lua_ffi_compile_regex = C.ngx_stream_lua_ffi_compile_regex
|
|
ngx_lua_ffi_exec_regex = C.ngx_stream_lua_ffi_exec_regex
|
|
ngx_lua_ffi_create_script_engine = C.ngx_stream_lua_ffi_create_script_engine
|
|
ngx_lua_ffi_init_script_engine = C.ngx_stream_lua_ffi_init_script_engine
|
|
ngx_lua_ffi_destroy_script_engine =
|
|
C.ngx_stream_lua_ffi_destroy_script_engine
|
|
ngx_lua_ffi_compile_replace_template =
|
|
C.ngx_stream_lua_ffi_compile_replace_template
|
|
ngx_lua_ffi_script_eval_len = C.ngx_stream_lua_ffi_script_eval_len
|
|
ngx_lua_ffi_script_eval_data = C.ngx_stream_lua_ffi_script_eval_data
|
|
end
|
|
|
|
|
|
local c_str_type = ffi.typeof("const char *")
|
|
|
|
local cached_re_opts = new_tab(0, 4)
|
|
|
|
local buf_grow_ratio = 2
|
|
|
|
|
|
function _M.set_buf_grow_ratio(ratio)
|
|
buf_grow_ratio = ratio
|
|
end
|
|
|
|
|
|
local function get_max_regex_cache_size()
|
|
if max_regex_cache_size then
|
|
return max_regex_cache_size
|
|
end
|
|
max_regex_cache_size = ngx_lua_ffi_max_regex_cache_size()
|
|
return max_regex_cache_size
|
|
end
|
|
|
|
|
|
local regex_cache_is_empty = true
|
|
|
|
|
|
function _M.is_regex_cache_empty()
|
|
return regex_cache_is_empty
|
|
end
|
|
|
|
|
|
local function lrucache_set_wrapper(...)
|
|
regex_cache_is_empty = false
|
|
lrucache_set(...)
|
|
end
|
|
|
|
|
|
local parse_regex_opts = function (opts)
|
|
local t = cached_re_opts[opts]
|
|
if t then
|
|
return t[1], t[2]
|
|
end
|
|
|
|
local flags = 0
|
|
local pcre_opts = 0
|
|
local len = #opts
|
|
|
|
for i = 1, len do
|
|
local opt = byte(opts, i)
|
|
if opt == byte("o") then
|
|
flags = bor(flags, FLAG_COMPILE_ONCE)
|
|
|
|
elseif opt == byte("j") then
|
|
flags = bor(flags, FLAG_JIT)
|
|
|
|
elseif opt == byte("i") then
|
|
pcre_opts = bor(pcre_opts, PCRE_CASELESS)
|
|
|
|
elseif opt == byte("s") then
|
|
pcre_opts = bor(pcre_opts, PCRE_DOTALL)
|
|
|
|
elseif opt == byte("m") then
|
|
pcre_opts = bor(pcre_opts, PCRE_MULTILINE)
|
|
|
|
elseif opt == byte("u") then
|
|
pcre_opts = bor(pcre_opts, PCRE_UTF8)
|
|
|
|
elseif opt == byte("U") then
|
|
pcre_opts = bor(pcre_opts, PCRE_UTF8)
|
|
flags = bor(flags, FLAG_NO_UTF8_CHECK)
|
|
|
|
elseif opt == byte("x") then
|
|
pcre_opts = bor(pcre_opts, PCRE_EXTENDED)
|
|
|
|
elseif opt == byte("d") then
|
|
flags = bor(flags, FLAG_DFA)
|
|
|
|
elseif opt == byte("a") then
|
|
pcre_opts = bor(pcre_opts, PCRE_ANCHORED)
|
|
|
|
elseif opt == byte("D") then
|
|
pcre_opts = bor(pcre_opts, PCRE_DUPNAMES)
|
|
flags = bor(flags, FLAG_DUPNAMES)
|
|
|
|
elseif opt == byte("J") then
|
|
pcre_opts = bor(pcre_opts, PCRE_JAVASCRIPT_COMPAT)
|
|
|
|
else
|
|
error(fmt('unknown flag "%s" (flags "%s")', sub(opts, i, i), opts),
|
|
3)
|
|
end
|
|
end
|
|
|
|
cached_re_opts[opts] = {flags, pcre_opts}
|
|
return flags, pcre_opts
|
|
end
|
|
|
|
|
|
if no_jit_in_init then
|
|
local parse_regex_opts_ = parse_regex_opts
|
|
|
|
parse_regex_opts = function (opts)
|
|
if ngx_phase() ~= "init" then
|
|
-- past init_by_lua* phase now
|
|
parse_regex_opts = parse_regex_opts_
|
|
return parse_regex_opts(opts)
|
|
end
|
|
|
|
local t = cached_re_opts[opts]
|
|
if t then
|
|
return t[1], t[2]
|
|
end
|
|
|
|
local flags = 0
|
|
local pcre_opts = 0
|
|
local len = #opts
|
|
|
|
for i = 1, len do
|
|
local opt = byte(opts, i)
|
|
if opt == byte("o") then
|
|
ngx_log(ngx_NOTICE, "regex compilation cache disabled in init ",
|
|
"phase under macOS")
|
|
|
|
elseif opt == byte("j") then
|
|
ngx_log(ngx_NOTICE, "regex compilation disabled in init ",
|
|
"phase under macOS")
|
|
|
|
elseif opt == byte("i") then
|
|
pcre_opts = bor(pcre_opts, PCRE_CASELESS)
|
|
|
|
elseif opt == byte("s") then
|
|
pcre_opts = bor(pcre_opts, PCRE_DOTALL)
|
|
|
|
elseif opt == byte("m") then
|
|
pcre_opts = bor(pcre_opts, PCRE_MULTILINE)
|
|
|
|
elseif opt == byte("u") then
|
|
pcre_opts = bor(pcre_opts, PCRE_UTF8)
|
|
|
|
elseif opt == byte("U") then
|
|
pcre_opts = bor(pcre_opts, PCRE_UTF8)
|
|
flags = bor(flags, FLAG_NO_UTF8_CHECK)
|
|
|
|
elseif opt == byte("x") then
|
|
pcre_opts = bor(pcre_opts, PCRE_EXTENDED)
|
|
|
|
elseif opt == byte("d") then
|
|
flags = bor(flags, FLAG_DFA)
|
|
|
|
elseif opt == byte("a") then
|
|
pcre_opts = bor(pcre_opts, PCRE_ANCHORED)
|
|
|
|
elseif opt == byte("D") then
|
|
pcre_opts = bor(pcre_opts, PCRE_DUPNAMES)
|
|
flags = bor(flags, FLAG_DUPNAMES)
|
|
|
|
elseif opt == byte("J") then
|
|
pcre_opts = bor(pcre_opts, PCRE_JAVASCRIPT_COMPAT)
|
|
|
|
else
|
|
error(fmt('unknown flag "%s" (flags "%s")', sub(opts, i, i),
|
|
opts), 3)
|
|
end
|
|
end
|
|
|
|
cached_re_opts[opts] = {flags, pcre_opts}
|
|
return flags, pcre_opts
|
|
end
|
|
end
|
|
|
|
|
|
local function collect_named_captures(compiled, flags, res)
|
|
local name_count = compiled.name_count
|
|
local name_table = compiled.name_table
|
|
local entry_size = compiled.name_entry_size
|
|
|
|
local ind = 0
|
|
local dup_names = (band(flags, FLAG_DUPNAMES) ~= 0)
|
|
for i = 1, name_count do
|
|
local n = bor(lshift(name_table[ind], 8), name_table[ind + 1])
|
|
-- ngx.say("n = ", n)
|
|
local name = ffi_string(name_table + ind + 2)
|
|
local cap = res[n]
|
|
if dup_names then
|
|
-- unmatched captures (false) are not collected
|
|
if cap then
|
|
local old = res[name]
|
|
if old then
|
|
old[#old + 1] = cap
|
|
else
|
|
res[name] = {cap}
|
|
end
|
|
end
|
|
else
|
|
res[name] = cap
|
|
end
|
|
|
|
ind = ind + entry_size
|
|
end
|
|
end
|
|
|
|
|
|
local function collect_captures(compiled, rc, subj, flags, res)
|
|
local cap = compiled.captures
|
|
local ncap = compiled.ncaptures
|
|
local name_count = compiled.name_count
|
|
|
|
if not res then
|
|
res = new_tab(ncap, name_count)
|
|
end
|
|
|
|
local i = 0
|
|
local n = 0
|
|
while i <= ncap do
|
|
if i > rc then
|
|
res[i] = false
|
|
else
|
|
local from = cap[n]
|
|
if from >= 0 then
|
|
local to = cap[n + 1]
|
|
res[i] = sub(subj, from + 1, to)
|
|
else
|
|
res[i] = false
|
|
end
|
|
end
|
|
i = i + 1
|
|
n = n + 2
|
|
end
|
|
|
|
if name_count > 0 then
|
|
collect_named_captures(compiled, flags, res)
|
|
end
|
|
|
|
return res
|
|
end
|
|
|
|
|
|
_M.collect_captures = collect_captures
|
|
|
|
|
|
local function destroy_compiled_regex(compiled)
|
|
ngx_lua_ffi_destroy_regex(ffi_gc(compiled, nil))
|
|
end
|
|
|
|
|
|
_M.destroy_compiled_regex = destroy_compiled_regex
|
|
|
|
|
|
local function re_match_compile(regex, opts)
|
|
local flags = 0
|
|
local pcre_opts = 0
|
|
|
|
if opts then
|
|
flags, pcre_opts = parse_regex_opts(opts)
|
|
else
|
|
opts = ""
|
|
end
|
|
|
|
local compiled, key
|
|
local compile_once = (band(flags, FLAG_COMPILE_ONCE) == 1)
|
|
|
|
-- FIXME: better put this in the outer scope when fixing the ngx.re API's
|
|
-- compatibility in the init_by_lua* context.
|
|
if not regex_match_cache then
|
|
local sz = get_max_regex_cache_size()
|
|
if sz <= 0 then
|
|
compile_once = false
|
|
else
|
|
regex_match_cache = lrucache.new(sz)
|
|
end
|
|
end
|
|
|
|
if compile_once then
|
|
key = regex .. '\0' .. opts
|
|
compiled = lrucache_get(regex_match_cache, key)
|
|
end
|
|
|
|
-- compile the regex
|
|
|
|
if compiled == nil then
|
|
-- print("compiled regex not found, compiling regex...")
|
|
local errbuf = get_string_buf(MAX_ERR_MSG_LEN)
|
|
|
|
compiled = ngx_lua_ffi_compile_regex(regex, #regex, flags,
|
|
pcre_opts, errbuf,
|
|
MAX_ERR_MSG_LEN)
|
|
|
|
if compiled == nil then
|
|
return nil, ffi_string(errbuf)
|
|
end
|
|
|
|
ffi_gc(compiled, ngx_lua_ffi_destroy_regex)
|
|
|
|
-- print("ncaptures: ", compiled.ncaptures)
|
|
|
|
if compile_once then
|
|
-- print("inserting compiled regex into cache")
|
|
lrucache_set_wrapper(regex_match_cache, key, compiled)
|
|
end
|
|
end
|
|
|
|
return compiled, compile_once, flags
|
|
end
|
|
|
|
|
|
_M.re_match_compile = re_match_compile
|
|
|
|
|
|
local function re_match_helper(subj, regex, opts, ctx, want_caps, res, nth)
|
|
-- we need to cast this to strings to avoid exceptions when they are
|
|
-- something else.
|
|
subj = tostring(subj)
|
|
|
|
local compiled, compile_once, flags = re_match_compile(regex, opts)
|
|
if compiled == nil then
|
|
-- compiled_once holds the error string
|
|
if not want_caps then
|
|
return nil, nil, compile_once
|
|
end
|
|
return nil, compile_once
|
|
end
|
|
|
|
-- exec the compiled regex
|
|
|
|
local rc
|
|
do
|
|
local pos
|
|
if ctx then
|
|
pos = ctx.pos
|
|
if not pos or pos <= 0 then
|
|
pos = 0
|
|
else
|
|
pos = pos - 1
|
|
end
|
|
|
|
else
|
|
pos = 0
|
|
end
|
|
|
|
rc = ngx_lua_ffi_exec_regex(compiled, flags, subj, #subj, pos)
|
|
end
|
|
|
|
if rc == PCRE_ERROR_NOMATCH then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil
|
|
end
|
|
|
|
if rc < 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
if not want_caps then
|
|
return nil, nil, "pcre_exec() failed: " .. rc
|
|
end
|
|
return nil, "pcre_exec() failed: " .. rc
|
|
end
|
|
|
|
if rc == 0 then
|
|
if band(flags, FLAG_DFA) == 0 then
|
|
if not want_caps then
|
|
return nil, nil, "capture size too small"
|
|
end
|
|
return nil, "capture size too small"
|
|
end
|
|
|
|
rc = 1
|
|
end
|
|
|
|
-- print("cap 0: ", compiled.captures[0])
|
|
-- print("cap 1: ", compiled.captures[1])
|
|
|
|
if ctx then
|
|
ctx.pos = compiled.captures[1] + 1
|
|
end
|
|
|
|
if not want_caps then
|
|
if not nth or nth < 0 then
|
|
nth = 0
|
|
end
|
|
|
|
if nth > compiled.ncaptures then
|
|
return nil, nil, "nth out of bound"
|
|
end
|
|
|
|
if nth >= rc then
|
|
return nil, nil
|
|
end
|
|
|
|
local from = compiled.captures[nth * 2] + 1
|
|
local to = compiled.captures[nth * 2 + 1]
|
|
|
|
if from < 0 or to < 0 then
|
|
return nil, nil
|
|
end
|
|
|
|
return from, to
|
|
end
|
|
|
|
res = collect_captures(compiled, rc, subj, flags, res)
|
|
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
|
|
return res
|
|
end
|
|
|
|
|
|
function ngx.re.match(subj, regex, opts, ctx, res)
|
|
return re_match_helper(subj, regex, opts, ctx, true, res)
|
|
end
|
|
|
|
|
|
function ngx.re.find(subj, regex, opts, ctx, nth)
|
|
return re_match_helper(subj, regex, opts, ctx, false, nil, nth)
|
|
end
|
|
|
|
|
|
do
|
|
local function destroy_re_gmatch_iterator(iterator)
|
|
if not iterator._compile_once then
|
|
destroy_compiled_regex(iterator._compiled)
|
|
end
|
|
iterator._compiled = nil
|
|
iterator._pos = nil
|
|
iterator._subj = nil
|
|
end
|
|
|
|
|
|
local function iterate_re_gmatch(self)
|
|
local compiled = self._compiled
|
|
local subj = self._subj
|
|
local subj_len = self._subj_len
|
|
local flags = self._flags
|
|
local pos = self._pos
|
|
|
|
if not pos then
|
|
-- The iterator is exhausted.
|
|
return nil
|
|
end
|
|
|
|
local rc = ngx_lua_ffi_exec_regex(compiled, flags, subj, subj_len, pos)
|
|
|
|
if rc == PCRE_ERROR_NOMATCH then
|
|
destroy_re_gmatch_iterator(self)
|
|
return nil
|
|
end
|
|
|
|
if rc < 0 then
|
|
destroy_re_gmatch_iterator(self)
|
|
return nil, "pcre_exec() failed: " .. rc
|
|
end
|
|
|
|
if rc == 0 then
|
|
if band(flags, FLAG_DFA) == 0 then
|
|
destroy_re_gmatch_iterator(self)
|
|
return nil, "capture size too small"
|
|
end
|
|
|
|
rc = 1
|
|
end
|
|
|
|
local cp_pos = tonumber(compiled.captures[1])
|
|
if cp_pos == compiled.captures[0] then
|
|
cp_pos = cp_pos + 1
|
|
if cp_pos > subj_len then
|
|
local res = collect_captures(compiled, rc, subj, flags)
|
|
destroy_re_gmatch_iterator(self)
|
|
return res
|
|
end
|
|
end
|
|
self._pos = cp_pos
|
|
return collect_captures(compiled, rc, subj, flags)
|
|
end
|
|
|
|
|
|
local re_gmatch_iterator_mt = { __call = iterate_re_gmatch }
|
|
|
|
function ngx.re.gmatch(subj, regex, opts)
|
|
subj = tostring(subj)
|
|
|
|
local compiled, compile_once, flags = re_match_compile(regex, opts)
|
|
if compiled == nil then
|
|
-- compiled_once holds the error string
|
|
return nil, compile_once
|
|
end
|
|
|
|
local re_gmatch_iterator = {
|
|
_compiled = compiled,
|
|
_compile_once = compile_once,
|
|
_subj = subj,
|
|
_subj_len = #subj,
|
|
_flags = flags,
|
|
_pos = 0,
|
|
}
|
|
|
|
return setmetatable(re_gmatch_iterator, re_gmatch_iterator_mt)
|
|
end
|
|
end -- do
|
|
|
|
|
|
local function new_script_engine(subj, compiled, count)
|
|
if not script_engine then
|
|
script_engine = ngx_lua_ffi_create_script_engine()
|
|
if script_engine == nil then
|
|
return nil
|
|
end
|
|
ffi_gc(script_engine, ngx_lua_ffi_destroy_script_engine)
|
|
end
|
|
|
|
ngx_lua_ffi_init_script_engine(script_engine, subj, compiled, count)
|
|
return script_engine
|
|
end
|
|
|
|
|
|
local function check_buf_size(buf, buf_size, pos, len, new_len, must_alloc)
|
|
if new_len > buf_size then
|
|
buf_size = buf_size * buf_grow_ratio
|
|
if buf_size < new_len then
|
|
buf_size = new_len
|
|
end
|
|
local new_buf = get_string_buf(buf_size, must_alloc)
|
|
ffi_copy(new_buf, buf, len)
|
|
buf = new_buf
|
|
pos = buf + len
|
|
end
|
|
return buf, buf_size, pos, new_len
|
|
end
|
|
|
|
|
|
_M.check_buf_size = check_buf_size
|
|
|
|
|
|
local function re_sub_compile(regex, opts, replace, func)
|
|
local flags = 0
|
|
local pcre_opts = 0
|
|
|
|
if opts then
|
|
flags, pcre_opts = parse_regex_opts(opts)
|
|
else
|
|
opts = ""
|
|
end
|
|
|
|
local compiled
|
|
local compile_once = (band(flags, FLAG_COMPILE_ONCE) == 1)
|
|
if compile_once then
|
|
if func then
|
|
local subcache = regex_sub_func_cache[opts]
|
|
if subcache then
|
|
-- print("cache hit!")
|
|
compiled = subcache[regex]
|
|
end
|
|
|
|
else
|
|
local subcache = regex_sub_str_cache[opts]
|
|
if subcache then
|
|
local subsubcache = subcache[regex]
|
|
if subsubcache then
|
|
-- print("cache hit!")
|
|
compiled = subsubcache[replace]
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- compile the regex
|
|
|
|
if compiled == nil then
|
|
-- print("compiled regex not found, compiling regex...")
|
|
local errbuf = get_string_buf(MAX_ERR_MSG_LEN)
|
|
|
|
compiled = ngx_lua_ffi_compile_regex(regex, #regex, flags, pcre_opts,
|
|
errbuf, MAX_ERR_MSG_LEN)
|
|
|
|
if compiled == nil then
|
|
return nil, ffi_string(errbuf)
|
|
end
|
|
|
|
ffi_gc(compiled, ngx_lua_ffi_destroy_regex)
|
|
|
|
if func == nil then
|
|
local rc =
|
|
ngx_lua_ffi_compile_replace_template(compiled, replace,
|
|
#replace)
|
|
if rc ~= 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil, "failed to compile the replacement template"
|
|
end
|
|
end
|
|
|
|
-- print("ncaptures: ", compiled.ncaptures)
|
|
|
|
if compile_once then
|
|
if regex_cache_size < get_max_regex_cache_size() then
|
|
-- print("inserting compiled regex into cache")
|
|
if func then
|
|
local subcache = regex_sub_func_cache[opts]
|
|
if not subcache then
|
|
regex_sub_func_cache[opts] = {[regex] = compiled}
|
|
|
|
else
|
|
subcache[regex] = compiled
|
|
end
|
|
|
|
else
|
|
local subcache = regex_sub_str_cache[opts]
|
|
if not subcache then
|
|
regex_sub_str_cache[opts] =
|
|
{[regex] = {[replace] = compiled}}
|
|
|
|
else
|
|
local subsubcache = subcache[regex]
|
|
if not subsubcache then
|
|
subcache[regex] = {[replace] = compiled}
|
|
|
|
else
|
|
subsubcache[replace] = compiled
|
|
end
|
|
end
|
|
end
|
|
|
|
regex_cache_size = regex_cache_size + 1
|
|
else
|
|
compile_once = false
|
|
end
|
|
end
|
|
end
|
|
|
|
return compiled, compile_once, flags
|
|
end
|
|
|
|
|
|
_M.re_sub_compile = re_sub_compile
|
|
|
|
|
|
local function re_sub_func_helper(subj, regex, replace, opts, global)
|
|
local compiled, compile_once, flags =
|
|
re_sub_compile(regex, opts, nil, replace)
|
|
if not compiled then
|
|
-- error string is in compile_once
|
|
return nil, nil, compile_once
|
|
end
|
|
|
|
-- exec the compiled regex
|
|
|
|
subj = tostring(subj)
|
|
local subj_len = #subj
|
|
local count = 0
|
|
local pos = 0
|
|
local cp_pos = 0
|
|
|
|
local dst_buf_size = get_string_buf_size()
|
|
-- Note: we have to always allocate the string buffer because
|
|
-- the user might call whatever resty.core's API functions recursively
|
|
-- in the user callback function.
|
|
local dst_buf = get_string_buf(dst_buf_size, true)
|
|
local dst_pos = dst_buf
|
|
local dst_len = 0
|
|
|
|
while true do
|
|
local rc = ngx_lua_ffi_exec_regex(compiled, flags, subj, subj_len, pos)
|
|
if rc == PCRE_ERROR_NOMATCH then
|
|
break
|
|
end
|
|
|
|
if rc < 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil, nil, "pcre_exec() failed: " .. rc
|
|
end
|
|
|
|
if rc == 0 then
|
|
if band(flags, FLAG_DFA) == 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil, nil, "capture size too small"
|
|
end
|
|
|
|
rc = 1
|
|
end
|
|
|
|
count = count + 1
|
|
local prefix_len = compiled.captures[0] - cp_pos
|
|
|
|
local res = collect_captures(compiled, rc, subj, flags)
|
|
|
|
local piece = tostring(replace(res))
|
|
local piece_len = #piece
|
|
|
|
local new_dst_len = dst_len + prefix_len + piece_len
|
|
dst_buf, dst_buf_size, dst_pos, dst_len =
|
|
check_buf_size(dst_buf, dst_buf_size, dst_pos, dst_len,
|
|
new_dst_len, true)
|
|
|
|
if prefix_len > 0 then
|
|
ffi_copy(dst_pos, ffi_cast(c_str_type, subj) + cp_pos,
|
|
prefix_len)
|
|
dst_pos = dst_pos + prefix_len
|
|
end
|
|
|
|
if piece_len > 0 then
|
|
ffi_copy(dst_pos, piece, piece_len)
|
|
dst_pos = dst_pos + piece_len
|
|
end
|
|
|
|
cp_pos = compiled.captures[1]
|
|
pos = cp_pos
|
|
if pos == compiled.captures[0] then
|
|
pos = pos + 1
|
|
if pos > subj_len then
|
|
break
|
|
end
|
|
end
|
|
|
|
if not global then
|
|
break
|
|
end
|
|
end
|
|
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
|
|
if count > 0 then
|
|
if pos < subj_len then
|
|
local suffix_len = subj_len - cp_pos
|
|
|
|
local new_dst_len = dst_len + suffix_len
|
|
local _
|
|
dst_buf, _, dst_pos, dst_len =
|
|
check_buf_size(dst_buf, dst_buf_size, dst_pos, dst_len,
|
|
new_dst_len, true)
|
|
|
|
ffi_copy(dst_pos, ffi_cast(c_str_type, subj) + cp_pos,
|
|
suffix_len)
|
|
end
|
|
return ffi_string(dst_buf, dst_len), count
|
|
end
|
|
|
|
return subj, 0
|
|
end
|
|
|
|
|
|
local function re_sub_str_helper(subj, regex, replace, opts, global)
|
|
local compiled, compile_once, flags =
|
|
re_sub_compile(regex, opts, replace, nil)
|
|
if not compiled then
|
|
-- error string is in compile_once
|
|
return nil, nil, compile_once
|
|
end
|
|
|
|
-- exec the compiled regex
|
|
|
|
subj = tostring(subj)
|
|
local subj_len = #subj
|
|
local count = 0
|
|
local pos = 0
|
|
local cp_pos = 0
|
|
|
|
local dst_buf_size = get_string_buf_size()
|
|
local dst_buf = get_string_buf(dst_buf_size)
|
|
local dst_pos = dst_buf
|
|
local dst_len = 0
|
|
|
|
while true do
|
|
local rc = ngx_lua_ffi_exec_regex(compiled, flags, subj, subj_len, pos)
|
|
if rc == PCRE_ERROR_NOMATCH then
|
|
break
|
|
end
|
|
|
|
if rc < 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil, nil, "pcre_exec() failed: " .. rc
|
|
end
|
|
|
|
if rc == 0 then
|
|
if band(flags, FLAG_DFA) == 0 then
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
return nil, nil, "capture size too small"
|
|
end
|
|
|
|
rc = 1
|
|
end
|
|
|
|
count = count + 1
|
|
local prefix_len = compiled.captures[0] - cp_pos
|
|
|
|
local cv = compiled.replace
|
|
if cv.lengths ~= nil then
|
|
local e = new_script_engine(subj, compiled, rc)
|
|
if e == nil then
|
|
return nil, nil, "failed to create script engine"
|
|
end
|
|
|
|
local bit_len = ngx_lua_ffi_script_eval_len(e, cv)
|
|
local new_dst_len = dst_len + prefix_len + bit_len
|
|
dst_buf, dst_buf_size, dst_pos, dst_len =
|
|
check_buf_size(dst_buf, dst_buf_size, dst_pos, dst_len,
|
|
new_dst_len)
|
|
|
|
if prefix_len > 0 then
|
|
ffi_copy(dst_pos, ffi_cast(c_str_type, subj) + cp_pos,
|
|
prefix_len)
|
|
dst_pos = dst_pos + prefix_len
|
|
end
|
|
|
|
if bit_len > 0 then
|
|
ngx_lua_ffi_script_eval_data(e, cv, dst_pos)
|
|
dst_pos = dst_pos + bit_len
|
|
end
|
|
|
|
else
|
|
local bit_len = cv.value.len
|
|
|
|
dst_buf, dst_buf_size, dst_pos, dst_len =
|
|
check_buf_size(dst_buf, dst_buf_size, dst_pos, dst_len,
|
|
dst_len + prefix_len + bit_len)
|
|
|
|
if prefix_len > 0 then
|
|
ffi_copy(dst_pos, ffi_cast(c_str_type, subj) + cp_pos,
|
|
prefix_len)
|
|
dst_pos = dst_pos + prefix_len
|
|
end
|
|
|
|
if bit_len > 0 then
|
|
ffi_copy(dst_pos, cv.value.data, bit_len)
|
|
dst_pos = dst_pos + bit_len
|
|
end
|
|
end
|
|
|
|
cp_pos = compiled.captures[1]
|
|
pos = cp_pos
|
|
if pos == compiled.captures[0] then
|
|
pos = pos + 1
|
|
if pos > subj_len then
|
|
break
|
|
end
|
|
end
|
|
|
|
if not global then
|
|
break
|
|
end
|
|
end
|
|
|
|
if not compile_once then
|
|
destroy_compiled_regex(compiled)
|
|
end
|
|
|
|
if count > 0 then
|
|
if pos < subj_len then
|
|
local suffix_len = subj_len - cp_pos
|
|
|
|
local new_dst_len = dst_len + suffix_len
|
|
local _
|
|
dst_buf, _, dst_pos, dst_len =
|
|
check_buf_size(dst_buf, dst_buf_size, dst_pos, dst_len,
|
|
new_dst_len)
|
|
|
|
ffi_copy(dst_pos, ffi_cast(c_str_type, subj) + cp_pos,
|
|
suffix_len)
|
|
end
|
|
return ffi_string(dst_buf, dst_len), count
|
|
end
|
|
|
|
return subj, 0
|
|
end
|
|
|
|
|
|
local function re_sub_helper(subj, regex, replace, opts, global)
|
|
local repl_type = type(replace)
|
|
if repl_type == "function" then
|
|
return re_sub_func_helper(subj, regex, replace, opts, global)
|
|
end
|
|
|
|
if repl_type ~= "string" then
|
|
replace = tostring(replace)
|
|
end
|
|
|
|
return re_sub_str_helper(subj, regex, replace, opts, global)
|
|
end
|
|
|
|
|
|
function ngx.re.sub(subj, regex, replace, opts)
|
|
return re_sub_helper(subj, regex, replace, opts, false)
|
|
end
|
|
|
|
|
|
function ngx.re.gsub(subj, regex, replace, opts)
|
|
return re_sub_helper(subj, regex, replace, opts, true)
|
|
end
|
|
|
|
|
|
return _M
|