Skip to content

Commit

Permalink
optimize: regex: improved heuristic enabling the PCRE MAP_JIT workaro…
Browse files Browse the repository at this point in the history
…und.

This is a follow up to d046562 which improves the heuristics to decide
whether PCRE JIT compilation should be disabled in init phase under
macOS or not. The heuristics now are:

- If under macOS
- If using PCRE 8.43 or greater
- Then PCRE JIT compilation will be disabled in the init phase

While PCRE JIT compilation is disabled as such, a notice log will notify
users attempting to use the `j` or `o` flags.

Once past the init phase, we revert the workaround and allow for the `j`
and `o` flags to be used, without any branching or additional
performance penalty.

New tests and minor refactorings have been included as well.
  • Loading branch information
thibaultcha committed May 7, 2019
1 parent feaffc1 commit f45c32e
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 33 deletions.
133 changes: 105 additions & 28 deletions lib/resty/core/regex.lua
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,40 @@ local ngx_lua_ffi_init_script_engine
local ngx_lua_ffi_compile_replace_template
local ngx_lua_ffi_script_eval_len
local ngx_lua_ffi_script_eval_data
-- PCRE 8.43 on macOS introduced the MAP_JIT option when creating memory region
-- used to store JIT compiled code, which does not survive across `fork()`,
-- causing further usage of PCRE JIT compiler to segfault in worker processes

-- PCRE 8.43 on macOS introduced the MAP_JIT option when creating the memory
-- region used to store JIT compiled code, which does not survive across
-- `fork()`, causing further usage of PCRE JIT compiler to segfault in worker
-- processes.
--
-- This flag prevents any regex used in the init phase to be JIT compiled or
-- cached when running under macOS, even if the user requests so. Caching is
-- thus disabled to prevent further calls of same regex in worker to have poor
-- performance.
--
-- this flag prevents any regex used in init phase to be JIT compiled or cached
-- when running under macOS even if the user requests so. caching is disabled
-- to prevent further calls of same regex in worker to have poor performance.
local pcre_map_jit_fix = true
-- TODO: improve this workaround when PCRE allows for unspecifying the MAP_JIT
-- option.
local no_jit_in_init

if jit.os == "OSX" then
ffi.cdef[[
const char *pcre_version(void);
]]

local function get_pcre_map_jit_fix()
if not pcre_map_jit_fix then
return pcre_map_jit_fix
end
local pcre_ver = ffi_string(C.pcre_version())

pcre_map_jit_fix = (jit.os == "OSX" and ngx_phase() == "init")
local maj, min = string.match(pcre_ver, "^(%d+)%.(%d+)")
if maj and min then
local pcre_ver_num = tonumber(maj .. min)

return pcre_map_jit_fix
if pcre_ver_num >= 843 then
no_jit_in_init = true
end

else
-- assume this version is faulty as well
no_jit_in_init = true
end
end


Expand Down Expand Up @@ -298,7 +314,7 @@ local function lrucache_set_wrapper(...)
end


local function parse_regex_opts(opts)
local parse_regex_opts = function (opts)
local t = cached_re_opts[opts]
if t then
return t[1], t[2]
Expand All @@ -311,22 +327,10 @@ local function parse_regex_opts(opts)
for i = 1, len do
local opt = byte(opts, i)
if opt == byte("o") then
if get_pcre_map_jit_fix() then
ngx_log(ngx_NOTICE, "running regex in init phase under macOS, ",
"compilation cache temporarily disabled")

else
flags = bor(flags, FLAG_COMPILE_ONCE)
end
flags = bor(flags, FLAG_COMPILE_ONCE)

elseif opt == byte("j") then
if get_pcre_map_jit_fix() then
ngx_log(ngx_NOTICE, "running regex in init phase under macOS, ",
"PCRE JIT compilation temporarily disabled")

else
flags = bor(flags, FLAG_JIT)
end
flags = bor(flags, FLAG_JIT)

elseif opt == byte("i") then
pcre_opts = bor(pcre_opts, PCRE_CASELESS)
Expand Down Expand Up @@ -371,6 +375,79 @@ local function parse_regex_opts(opts)
end


if no_jit_in_init then
local parse_regex_opts_ = parse_regex_opts

parse_regex_opts = function (opts)
if ngx_phase() ~= "init" then
-- past init_by_lua* phase now
parse_regex_opts = parse_regex_opts_
return parse_regex_opts(opts)
end

local t = cached_re_opts[opts]
if t then
return t[1], t[2]
end

local flags = 0
local pcre_opts = 0
local len = #opts

for i = 1, len do
local opt = byte(opts, i)
if opt == byte("o") then
ngx_log(ngx_NOTICE, "regex compilation cache disabled in init ",
"phase under macOS")

elseif opt == byte("j") then
ngx_log(ngx_NOTICE, "regex compilation disabled in init ",
"phase under macOS")

elseif opt == byte("i") then
pcre_opts = bor(pcre_opts, PCRE_CASELESS)

elseif opt == byte("s") then
pcre_opts = bor(pcre_opts, PCRE_DOTALL)

elseif opt == byte("m") then
pcre_opts = bor(pcre_opts, PCRE_MULTILINE)

elseif opt == byte("u") then
pcre_opts = bor(pcre_opts, PCRE_UTF8)

elseif opt == byte("U") then
pcre_opts = bor(pcre_opts, PCRE_UTF8)
flags = bor(flags, FLAG_NO_UTF8_CHECK)

elseif opt == byte("x") then
pcre_opts = bor(pcre_opts, PCRE_EXTENDED)

elseif opt == byte("d") then
flags = bor(flags, FLAG_DFA)

elseif opt == byte("a") then
pcre_opts = bor(pcre_opts, PCRE_ANCHORED)

elseif opt == byte("D") then
pcre_opts = bor(pcre_opts, PCRE_DUPNAMES)
flags = bor(flags, FLAG_DUPNAMES)

elseif opt == byte("J") then
pcre_opts = bor(pcre_opts, PCRE_JAVASCRIPT_COMPAT)

else
error(fmt('unknown flag "%s" (flags "%s")', sub(opts, i, i),
opts), 3)
end
end

cached_re_opts[opts] = {flags, pcre_opts}
return flags, pcre_opts
end
end


local function collect_named_captures(compiled, flags, res)
local name_count = compiled.name_count
local name_table = compiled.name_table
Expand Down
72 changes: 67 additions & 5 deletions t/re-bugs.t
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ run_tests();

__DATA__

=== TEST 1: PCRE MAP_JIT bug on macOS
=== TEST 1: PCRE MAP_JIT workaround on macOS
--- init_by_lua_block
ngx.re.match("c", "test", "jo")
--- config
Expand All @@ -45,17 +45,79 @@ GET /re
c0
c0
--- grep_error_log eval
qr/.+parse_regex_opts\(\): running regex in init phase under macOS,.+/
qr/.+parse_regex_opts\(\): .*? disabled in init phase under macOS/
--- grep_error_log_out eval
qr/(:?.+parse_regex_opts\(\): running regex in init phase under macOS,.+){2}/s
qr/parse_regex_opts\(\): regex compilation disabled in init phase under macOS
.*?parse_regex_opts\(\): regex compilation cache disabled in init phase under macOS/s
--- no_error_log
[error]
--- skip_eval
4: $^O ne 'darwin'



=== TEST 2: PCRE MAP_JIT bug fix does not affect other OSes
=== TEST 2: PCRE MAP_JIT workaround on macOS logs only once per flag
--- init_by_lua_block
jit.off() -- must disable in this test or logs will be fuzzy

for i = 1, 2 do
ngx.re.match("c", "test", "j")
end

for i = 1, 2 do
ngx.re.match("c", "test", "o")
end

for i = 1, 2 do
ngx.re.match("c", "test", "jo")
end
--- config
location /re {
content_by_lua_block {
ngx.say(ngx.re.sub("c", "a", "b", ""))
ngx.say(ngx.re.sub("c", "a", "b", "jo"))
}
}
--- request
GET /re
--- response_body
c0
c0
--- grep_error_log eval
qr/parse_regex_opts\(\): .*? disabled in init phase under macOS/
--- grep_error_log_out eval
qr/\A(?:parse_regex_opts\(\): regex compilation (?:cache )?disabled in init phase under macOS\s*){4}\z/
--- no_error_log
[error]
--- skip_eval
4: $^O ne 'darwin'



=== TEST 3: PCRE MAP_JIT workaround is reverted after init phase
--- init_by_lua_block
ngx.re.match("c", "test", "jo")
--- config
location /re {
content_by_lua_block {
ngx.say(ngx.re.sub("c", "a", "b", ""))
ngx.say(ngx.re.sub("c", "a", "b", "jo"))
}
}
--- request
GET /re
--- response_body
c0
c0
--- no_error_log
[error]
disabled in init phase under macOS, client:
--- skip_eval
4: $^O ne 'darwin'



=== TEST 4: PCRE MAP_JIT workaround is not in effect under other OSs
--- init_by_lua_block
ngx.re.match("c", "test", "jo")
--- config
Expand All @@ -72,6 +134,6 @@ c0
c0
--- no_error_log
[error]
running regex in init phase under macOS
disabled in init phase under macOS
--- skip_eval
4: $^O ne 'linux'

0 comments on commit f45c32e

Please sign in to comment.