Skip to content

Commit

Permalink
Ignore empty rules (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoineGagne committed Nov 21, 2023
1 parent 6405fdb commit 839025a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/workflows/erlang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
strategy:
matrix:
otp:
- '26.1.2'
- '25.2.1'
- '24.3.4'
- '23.3.4'
Expand Down
12 changes: 9 additions & 3 deletions src/robots.erl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ handle_line(Line) ->
sort_rules(_, Value = {allowed, all}) ->
Value;
sort_rules(_, {Allowed, Disallowed}) ->
Compare = fun(R1, R2) -> not (R1 =< R2) end,
Compare = fun(R1, R2) -> R1 > R2 end,
{lists:sort(Compare, Allowed), lists:sort(Compare, Disallowed)};
sort_rules(sitemap, Value) ->
Value.
Expand All @@ -143,17 +143,23 @@ sort_rules(sitemap, Value) ->
trim(String) ->
string:trim(String, both).

-spec build_rules({binary(), binary()}, {[agent()], boolean(), rules_index()}) ->
{[agent()], boolean(), rules_index()}.
-spec build_rules({binary(), binary()}, {[agent()], IsFirstAgent, rules_index()}) ->
{[agent()], IsFirstAgent, rules_index()}
when
IsFirstAgent :: boolean().
build_rules({<<"user-agent">>, RawAgent}, {Agents, false, RulesIndex}) ->
Reversed = to_agent(RawAgent),
{[Reversed | Agents], false, RulesIndex};
build_rules({<<"user-agent">>, RawAgent}, {_Agents, true, RulesIndex}) ->
Reversed = to_agent(RawAgent),
{[Reversed], false, RulesIndex};
build_rules({<<"allow">>, <<>>}, {Agents, _, RulesIndex}) ->
{Agents, true, RulesIndex};
build_rules({<<"allow">>, Rule}, {Agents, _, RulesIndex}) ->
{_, UpdatedIndex} = lists:foldl(fun update_index/2, {{allowed, Rule}, RulesIndex}, Agents),
{Agents, true, UpdatedIndex};
build_rules({<<"disallow">>, <<>>}, {Agents, _, RulesIndex}) ->
{Agents, true, RulesIndex};
build_rules({<<"disallow">>, Rule}, {Agents, _, RulesIndex}) ->
{_, UpdatedIndex} = lists:foldl(fun update_index/2, {{disallowed, Rule}, RulesIndex}, Agents),
{Agents, true, UpdatedIndex};
Expand Down
21 changes: 20 additions & 1 deletion test/robots_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
-define(ANOTHER_RULE, <<"/bar">>).
-define(A_VALID_CODE, 200).
-define(A_VALID_CONTENT, <<"User-Agent: ", ?USER_AGENT/binary, "\nAllow: ", ?A_RULE/binary>>).
-define(SOME_CONTENT_WITH_EMPTY_RULES, <<
"# START YOAST BLOCK\n"
"# ---------------------------\n"
"User-agent: *\n"
"Disallow:\n"
"# ---------------------------\n"
"# END YOAST BLOCK\n"
>>).
-define(ANOTHER_VALID_CONTENT,
<<"User-Agent: ", ?USER_AGENT/binary, "\nAllow: ", ?A_RULE/binary, "\nDisallow: ",
?ANOTHER_RULE/binary>>
Expand Down Expand Up @@ -58,7 +66,8 @@ groups() ->
match_independently_of_the_casing_of_the_agent,
return_false_if_agent_is_disallowed,
return_true_if_no_matching_rules_can_be_found,
return_true_if_everything_is_allowed_for_the_corresponding_agent
return_true_if_everything_is_allowed_for_the_corresponding_agent,
ignore_empty_rules
]}
].

Expand Down Expand Up @@ -240,6 +249,16 @@ return_true_if_everything_is_allowed_for_the_corresponding_agent(_Config) ->

?assert(robots:is_allowed(?USER_AGENT, ?AN_URL, RulesIndex)).

ignore_empty_rules() ->
[
{doc,
"Given a robot.txt with a wildcard associated with an empty rules, when parsing, then allow everything."}
].
ignore_empty_rules(_Config) ->
{ok, RulesIndex} = robots:parse(?SOME_CONTENT_WITH_EMPTY_RULES, ?A_VALID_CODE),

?assert(robots:is_allowed(?USER_AGENT, ?AN_URL, RulesIndex)).

%%%===================================================================
%%% Internal functions
%%%===================================================================

0 comments on commit 839025a

Please sign in to comment.