Skip to content

Commit

Permalink
Merge pull request #637 from gjtorikian/double-ua
Browse files Browse the repository at this point in the history
Address wild double UA bug
  • Loading branch information
gjtorikian authored Apr 18, 2021
2 parents 3e79177 + ef3d8bd commit 3b2e7cc
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 25 deletions.
4 changes: 2 additions & 2 deletions bin/htmlproofer
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ Mercenary.program(:htmlproofer) do |p|
options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?

options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config'], symbolize_names: false) unless opts['hydra_config'].nil?

unless opts['timeframe'].nil?
options[:cache] ||= {}
Expand Down
4 changes: 2 additions & 2 deletions lib/html-proofer/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def self.to_regex?(item)
end
end

def self.parse_json_option(option_name, config)
def self.parse_json_option(option_name, config, symbolize_names: true)
raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?

Expand All @@ -78,7 +78,7 @@ def self.parse_json_option(option_name, config)
return {} if config.strip.empty?

begin
JSON.parse(config, { symbolize_names: true })
JSON.parse(config, { symbolize_names: symbolize_names })
rescue StandardError
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
end
Expand Down
43 changes: 25 additions & 18 deletions spec/html-proofer/command_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,105 +10,112 @@

it 'works with alt-ignore' do
ignorable_links = "#{FIXTURES_DIR}/images/ignorable_alt_via_options.html"
output = make_bin('--alt-ignore /wikimedia/,gpl.png', ignorable_links)
output = make_bin("--alt-ignore /wikimedia/,gpl.png #{ignorable_links}")
expect(output).to match('successfully')
end

it 'works with checks-to-ignore' do
external = "#{FIXTURES_DIR}/links/file.foo"
output = make_bin('--extension .foo --checks-to-ignore LinkCheck', external)
output = make_bin("--extension .foo --checks-to-ignore 'LinkCheck' #{external}")
expect(output).to match('successfully')
expect(output).to_not match('LinkCheck')
expect(output).to_not match(/Running.+?LinkCheck/)
end

it 'works with check-external-hash' do
broken_hash_on_the_web = "#{FIXTURES_DIR}/links/broken_hash_on_the_web.html"
output = make_bin('--check-external-hash', broken_hash_on_the_web)
output = make_bin("--check-external-hash #{broken_hash_on_the_web}")
expect(output).to match('1 failure')
end

it 'works with directory-index-file' do
link_pointing_to_directory = "#{FIXTURES_DIR}/links/link_pointing_to_directory.html"
output = make_bin('--directory-index-file index.php', link_pointing_to_directory)
output = make_bin("--directory-index-file index.php #{link_pointing_to_directory}")
expect(output).to match('successfully')
end

it 'works with disable-external' do
external = "#{FIXTURES_DIR}/links/broken_link_external.html"
output = make_bin('--disable-external', external)
output = make_bin("--disable-external #{external}")
expect(output).to match('successfully')
end

it 'works with extension' do
external = "#{FIXTURES_DIR}/links/file.foo"
output = make_bin('--extension .foo', external)
output = make_bin("--extension .foo #{external}")
expect(output).to match('1 failure')
expect(output).to match('LinkCheck')
end

it 'works with file-ignore' do
external = "#{FIXTURES_DIR}/links/broken_hash_internal.html"
output = make_bin("--file-ignore #{external}", external)
output = make_bin("--file-ignore #{external} #{external}")
expect(output).to match('successfully')
end

it 'works with internal-domains' do
translated_link = "#{FIXTURES_DIR}/links/link_translated_internal_domains.html"
output = make_bin('--internal-domains www.example.com,example.com', translated_link)
output = make_bin("--internal-domains www.example.com,example.com #{translated_link}")
expect(output).to match('successfully')
end

it 'works with url-ignore' do
ignorable_links = "#{FIXTURES_DIR}/links/ignorable_links_via_options.html"
output = make_bin('--url-ignore /^http:\/\//,/sdadsad/,../whaadadt.html', ignorable_links)
output = make_bin("--url-ignore /^http:\/\//,/sdadsad/,../whaadadt.html #{ignorable_links}")
expect(output).to match('successfully')
end

it 'works with url-swap' do
translated_link = "#{FIXTURES_DIR}/links/link_translated_via_href_swap.html"
output = make_bin('--url-swap "\A/articles/([\w-]+):\1.html"', translated_link)
output = make_bin(%|--url-swap "\\A/articles/([\\w-]+):\\1.html" #{translated_link}|)
expect(output).to match('successfully')
end

it 'works with url-swap and colon' do
translated_link = "#{FIXTURES_DIR}/links/link_translated_via_href_swap2.html"
output = make_bin('--url-swap "http\://www.example.com:"', translated_link)
output = make_bin(%(--url-swap "http\\://www.example.com:" #{translated_link}))
expect(output).to match('successfully')
end

it 'works with only-4xx' do
broken_hash_on_the_web = "#{FIXTURES_DIR}/links/broken_hash_on_the_web.html"
output = make_bin('--only-4xx', broken_hash_on_the_web)
output = make_bin("--only-4xx #{broken_hash_on_the_web}")
expect(output).to match('successfully')
end

it 'works with check-favicon' do
broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html"
output = make_bin('--check-favicon', broken)
output = make_bin("--check-favicon #{broken}")
expect(output).to match('1 failure')
end

it 'works with check-html' do
broken = "#{FIXTURES_DIR}/html/missing_closing_quotes.html"
output = make_bin('--check-html --report-eof-tags', broken)
output = make_bin("--check-html --report-eof-tags #{broken}")
expect(output).to match('1 failure')
end

it 'works with empty-alt-ignore' do
broken = "#{FIXTURES_DIR}/images/empty_image_alt_text.html"
output = make_bin('--empty-alt-ignore', broken)
output = make_bin("--empty-alt-ignore #{broken}")
expect(output).to match('successfully')
end

it 'works with allow-hash-href' do
broken = "#{FIXTURES_DIR}/links/hash_href.html"
output = make_bin('--allow-hash-href', broken)
output = make_bin("--allow-hash-href #{broken}")
expect(output).to match('successfully')
end

# VCR doesn't work with bins, for some reason; parse STDOUT, ugh
skip 'has only one UA [does not work in CI]' do
http = make_bin(%|--typhoeus-config='{"verbose":true,"headers":{"User-Agent":"Mozilla/5.0 (Macintosh; My New User-Agent)"}}' --as-links https://linkedin.com|)
expect(http.scan(/User-Agent: Typhoeus/).count).to eq 0
expect(http.scan(%r{User-Agent: Mozilla/5.0 \(Macintosh; My New User-Agent\)}).count).to eq 2
end

it 'navigates above itself in a subdirectory' do
real_link = "#{FIXTURES_DIR}/links/root_folder/documentation-from-my-project/"
output = make_bin("--root-dir #{FIXTURES_DIR}/links/root_folder/", real_link)
output = make_bin("--root-dir #{FIXTURES_DIR}/links/root_folder/ #{real_link}")
expect(output).to match('successfully')
end

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions spec/html-proofer/proofer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@
expect(proofer.options[:typhoeus][:in_processes]).to eq(nil)
end

it 'only has one UA with file' do
github_hash = "#{FIXTURES_DIR}/links/github_hash.html"
http = capture_proofer_http(github_hash, :file, typhoeus: { verbose: true, headers: { 'User-Agent' => 'Mozilla/5.0 (compatible; My New User-Agent)' } })
expect(http['request']['headers']['User-Agent']).to eq(['Mozilla/5.0 (compatible; My New User-Agent)'])
end

describe 'sorting' do
it 'understands sorting by path' do
output = send_proofer_output("#{FIXTURES_DIR}/sorting/path", :directory, log_level: :info)
Expand Down
15 changes: 12 additions & 3 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,18 @@ def send_proofer_output(file, type, opts = {})
end
end

def make_bin(cmd, path = nil)
stdout, stderr = Open3.capture3("bin/htmlproofer #{cmd} #{path}")
"#{stdout}\n#{stderr}"
def capture_proofer_http(item, type, opts = {})
proofer = make_proofer(item, type, opts)
cassette_name = make_cassette_name(item, opts)
VCR.use_cassette(cassette_name, record: :new_episodes) do
capture_stderr { proofer.run }
VCR.current_cassette.serializable_hash['http_interactions'].last
end
end

def make_bin(args)
stdout, stderr = Open3.capture3("bin/htmlproofer #{args}")
"#{stdout}\n#{stderr}".encode('UTF-8', invalid: :replace, undef: :replace)
end

def make_cassette_name(file, opts)
Expand Down

0 comments on commit 3b2e7cc

Please sign in to comment.