Skip to content

Commit

Permalink
fix(perf): HTML4::EncodingReader detection
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Apr 10, 2022
1 parent 1eb5580 commit e444525
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/nokogiri/html4/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def start_element(name, attrs = [])
end

def self.detect_encoding(chunk)
(m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/)) &&
(m = chunk.match(/\A(<\?xml[ \t\r\n][^>]*>)/)) &&
(return Nokogiri.XML(m[1]).encoding)

if Nokogiri.jruby?
Expand Down
12 changes: 12 additions & 0 deletions test/html4/test_document_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,18 @@ def binopen(file)
end
end
end

it "does not start backtracking during detection of XHTML encoding" do
# this test is a quick and dirty version
# of the more complete perf test that is on main.
n = 40_000
redos_string = "<?xml " + (" " * n)
redos_string.encode!("ASCII-8BIT")
start_time = Time.now
Nokogiri::HTML4(redos_string)
elapsed_time = Time.now - start_time
assert_operator(elapsed_time, :<, 1)
end
end
end
end
Expand Down

0 comments on commit e444525

Please sign in to comment.