Skip to content

Commit

Permalink
Bugfix: make the parser handle the case where zero repetitions occur,…
Browse files Browse the repository at this point in the history
… by handling parse results on the basis of what names the matcher expects to bind, not on what names are actually bound.
  • Loading branch information
paulstansifer committed Jul 24, 2012
1 parent 1c47256 commit f785ccc
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 15 deletions.
5 changes: 3 additions & 2 deletions src/libsyntax/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,9 @@ type matcher = spanned<matcher_>;
enum matcher_ {
/* match one token */
mtc_tok(token::token),
/* match repetitions of a sequence: body, separator, zero ok? : */
mtc_rep(~[matcher], option<token::token>, bool),
/* match repetitions of a sequence: body, separator, zero ok?,
lo, hi position-in-match-array used: */
mtc_rep(~[matcher], option<token::token>, bool, uint, uint),
/* parse a Rust NT: name to bind, name of NT, position in match array : */
mtc_bb(ident, ident, uint)
}
Expand Down
38 changes: 27 additions & 11 deletions src/libsyntax/ext/tt/earley_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type matcher_pos = ~{
mut idx: uint,
mut up: matcher_pos_up, // mutable for swapping only
matches: ~[dvec<@arb_depth>],
match_lo: uint, match_hi: uint,
sp_lo: uint,
};

Expand All @@ -55,17 +56,25 @@ fn count_names(ms: &[matcher]) -> uint {
vec::foldl(0u, ms, |ct, m| {
ct + alt m.node {
mtc_tok(_) { 0u }
mtc_rep(more_ms, _, _) { count_names(more_ms) }
mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) }
mtc_bb(_,_,_) { 1u }
}})
}

#[warn(no_non_implicitly_copyable_typarams)]
fn new_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
fn initial_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
-> matcher_pos {
let mut match_idx_hi = 0u;
for ms.each() |elt| {
alt elt.node {
mtc_tok(_) {}
mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic...
mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest
}
}
~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none),
matches: copy vec::from_fn(count_names(ms), |_i| dvec::dvec()),
sp_lo: lo}
match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo}
}

/* logically, an arb_depth should contain only one kind of nonterminal */
Expand All @@ -79,7 +88,7 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth])
ret_val: hashmap<ident, @arb_depth>) {
alt m {
{node: mtc_tok(_), span: _} { }
{node: mtc_rep(more_ms, _, _), span: _} {
{node: mtc_rep(more_ms, _, _, _, _), span: _} {
for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
}
{node: mtc_bb(bind_name, _, idx), span: sp} {
Expand All @@ -104,7 +113,7 @@ enum parse_result {
fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
-> parse_result {
let mut cur_eis = ~[];
vec::push(cur_eis, new_matcher_pos(ms, none, rdr.peek().sp.lo));
vec::push(cur_eis, initial_matcher_pos(ms, none, rdr.peek().sp.lo));
loop {
let mut bb_eis = ~[]; // black-box parsed by parser.rs
Expand Down Expand Up @@ -141,10 +150,10 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
// I bet this is a perf problem: we're preemptively
// doing a lot of array work that will get thrown away
// most of the time.
for ei.matches.eachi() |idx, elt| {
let sub = elt.get();
// Some subtrees don't contain the name at all
if sub.len() == 0u { again; }

// Only touch the binders we have actually bound
for uint::range(ei.match_lo, ei.match_hi) |idx| {
let sub = ei.matches[idx].get();
new_pos.matches[idx]
.push(@seq(sub, mk_sp(ei.sp_lo,sp.hi)));
}
Expand Down Expand Up @@ -176,10 +185,15 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
} else {
alt copy ei.elts[idx].node {
/* need to descend into sequence */
mtc_rep(matchers, sep, zero_ok) {
mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){
if zero_ok {
let new_ei = copy ei;
new_ei.idx += 1u;
//we specifically matched zero repeats.
for uint::range(match_idx_lo, match_idx_hi) |idx| {
new_ei.matches[idx].push(@seq(~[], sp));
}

vec::push(cur_eis, new_ei);
}

Expand All @@ -189,7 +203,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
vec::push(cur_eis, ~{
elts: matchers, sep: sep, mut idx: 0u,
mut up: matcher_pos_up(some(ei_t)),
matches: matches, sp_lo: sp.lo
matches: matches,
match_lo: match_idx_lo, match_hi: match_idx_hi,
sp_lo: sp.lo
});
}
mtc_bb(_,_,_) { vec::push(bb_eis, ei) }
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/ext/tt/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
ms(mtc_bb(@~"lhs",@~"mtcs", 0u)),
ms(mtc_tok(FAT_ARROW)),
ms(mtc_bb(@~"rhs",@~"tt", 1u)),
], some(SEMI), false))];
], some(SEMI), false, 0u, 2u))];

let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic,
cx.parse_sess().interner, none, arg);
Expand Down
2 changes: 2 additions & 0 deletions src/libsyntax/ext/tt/transcribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth {
seq(ads, _) { ads[idx] }
}
}
unchecked {io::println(#fmt["%? / %?", copy r.repeat_idx,
copy r.repeat_len]);};
vec::foldl(start, r.repeat_idx, red)
}

Expand Down
3 changes: 2 additions & 1 deletion src/libsyntax/parse/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,13 +1194,14 @@ class parser {
let m = if self.token == token::DOLLAR {
self.bump();
if self.token == token::LPAREN {
let name_idx_lo = *name_idx;
let ms = self.parse_matcher_subseq(name_idx, token::LPAREN,
token::RPAREN);
if ms.len() == 0u {
self.fatal(~"repetition body must be nonempty");
}
let (sep, zerok) = self.parse_sep_and_zerok();
mtc_rep(ms, sep, zerok)
mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx)
} else {
let bound_to = self.parse_ident();
self.expect(token::COLON);
Expand Down

0 comments on commit f785ccc

Please sign in to comment.