Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit control flow forces code duplication #96

Open
madmann91 opened this issue Aug 22, 2018 · 1 comment
Open

Explicit control flow forces code duplication #96

madmann91 opened this issue Aug 22, 2018 · 1 comment

Comments

@madmann91
Copy link
Contributor

The explicit control flow in the match (and therefore also in the branch()) intrinsic functions will duplicate code when specialization:

fn main(i: i32, j: i32) -> i32 {
    let light = match j {
        0 => @ || 5,
        1 => @ || 6,
        2 => @ || 7,
        3 => @ || 8,
        _ => @ || 9
    };
    let shader = match i {
        0 => @ || 1,
        1 => @ || 2,
        2 => @ || 3,
        3 => @ || 4,
        _ => @ || 5
    };
    light() + shader()
}

In the following example, a programmer would logically expect the compiler to generate two switches. Unfortunately, this is not what happens, as one switch will become embedded inside each and every branch of the other (thereby creating 1 + 5 switches). See the thorin IR at the bottom of this post for reference.

The solution to this problem would be (in my opinion), to stop using the branch() and match() intrinsics, and use a combination of select + jump (and maybe a new primop that selects between more than two options) instead (which is what we were doing at some point before introducing branch()). This will indeed complicate a bit code generation but is also more natural (the IR is then smaller) and I don't think it is too hard to implement.

Module before PE (2 switches):

module 'test_pe'


main_136(mem mem_137, qs32 __138, qs32 __139, fn(mem, qs32) return_140) extern  @(bool 0, bool 0, bool 0, bool 0)
    (qs32, fn()) match_232 = tuple qs32 1, matchcase_225
    (qs32, fn()) match_240 = tuple qs32 2, matchcase_233
    (qs32, fn()) match_248 = tuple qs32 3, matchcase_241
    (qs32, fn()) match_224 = tuple qs32 0, matchcase_218
    (mem, frame) _162 = enter mem_137
    mem _164 = extract _162, qu32 0
    match_142(__139, matchotherwise_149, ((qs32, fn()) tuple qs32 0, matchcase_218), ((qs32, fn()) tuple qs32 1, matchcase_225), ((qs32, fn()) tuple qs32 2, matchcase_233), ((qs32, fn()) tuple qs32 3, matchcase_241))

    matchcase_218()
        next_150(lambda_219)
    
    matchotherwise_149()
        next_150(lambda_212)
    
    matchcase_233()
        next_150(lambda_234)
    
    matchcase_225()
        next_150(lambda_226)
    
    matchcase_241()
        next_150(lambda_242)
    
    next_150(fn(mem, fn(mem, qs32)) light_151)
        (qs32, fn()) match_211 = tuple qs32 3, matchcase_204
        (qs32, fn()) match_195 = tuple qs32 1, matchcase_188
        (qs32, fn()) match_187 = tuple qs32 0, matchcase_180
        (qs32, fn()) match_203 = tuple qs32 2, matchcase_196
        match_152(__138, matchotherwise_159, ((qs32, fn()) tuple qs32 0, matchcase_180), ((qs32, fn()) tuple qs32 1, matchcase_188), ((qs32, fn()) tuple qs32 2, matchcase_196), ((qs32, fn()) tuple qs32 3, matchcase_204))
    
    matchcase_188()
        next_160(lambda_189)
    
    matchotherwise_159()
        next_160(lambda_172)
    
    matchcase_204()
        next_160(lambda_205)
    
    matchcase_180()
        next_160(lambda_181)
    
    matchcase_196()
        next_160(lambda_197)
    
    next_160(fn(mem, fn(mem, qs32)) shader_161)
        light_151(_164, light_cont_165)
    
    light_cont_165(mem mem_166, qs32 light_167)
        shader_161(mem_166, shader_cont_168)
    
    shader_cont_168(mem mem_169, qs32 shader_170)
        qs32 _171 = add light_167, shader_170
        return_140(mem_169, _171)

Module after PE (6 switches):

module 'test_pe'


main_2626(mem mem_2627, qs32 __2628, qs32 __2629, fn(mem, qs32) return_2630) extern 
    (qs32, fn()) match_2700 = tuple qs32 2, matchcase_2690
    (qs32, fn()) match_2689 = tuple qs32 1, matchcase_2679
    (qs32, fn()) match_2711 = tuple qs32 3, matchcase_2701
    (qs32, fn()) match_2678 = tuple qs32 0, matchcase_2664
    match_2631(__2629, matchotherwise_2638, ((qs32, fn()) tuple qs32 0, matchcase_2664), ((qs32, fn()) tuple qs32 1, matchcase_2679), ((qs32, fn()) tuple qs32 2, matchcase_2690), ((qs32, fn()) tuple qs32 3, matchcase_2701))

    matchotherwise_2638()
        (qs32, fn()) match_2651 = tuple qs32 0, matchcase_2649
        (qs32, fn()) match_2659 = tuple qs32 2, matchcase_2657
        (qs32, fn()) match_2663 = tuple qs32 3, matchcase_2661
        (qs32, fn()) match_2655 = tuple qs32 1, matchcase_2653
        match_2639(__2628, matchotherwise_2646, ((qs32, fn()) tuple qs32 0, matchcase_2649), ((qs32, fn()) tuple qs32 1, matchcase_2653), ((qs32, fn()) tuple qs32 2, matchcase_2657), ((qs32, fn()) tuple qs32 3, matchcase_2661))
    
    matchcase_2649()
        return_2630(mem_2627, qs32 10)
    
    matchcase_2653()
        return_2630(mem_2627, qs32 11)
    
    matchotherwise_2646()
        return_2630(mem_2627, qs32 14)
    
    matchcase_2657()
        return_2630(mem_2627, qs32 12)
    
    matchcase_2661()
        return_2630(mem_2627, qs32 13)
    
    matchcase_2679()
        (qs32, fn()) match_2684 = tuple qs32 1, matchcase_2683
        (qs32, fn()) match_2682 = tuple qs32 0, matchcase_2681
        (qs32, fn()) match_2686 = tuple qs32 2, matchcase_2685
        (qs32, fn()) match_2688 = tuple qs32 3, matchcase_2687
        match_2639(__2628, matchotherwise_2680, ((qs32, fn()) tuple qs32 0, matchcase_2681), ((qs32, fn()) tuple qs32 1, matchcase_2683), ((qs32, fn()) tuple qs32 2, matchcase_2685), ((qs32, fn()) tuple qs32 3, matchcase_2687))
    
    matchcase_2687()
        return_2630(mem_2627, qs32 10)
    
    matchcase_2685()
        return_2630(mem_2627, qs32 9)
    
    matchotherwise_2680()
        return_2630(mem_2627, qs32 11)
    
    matchcase_2683()
        return_2630(mem_2627, qs32 8)
    
    matchcase_2681()
        return_2630(mem_2627, qs32 7)
    
    matchcase_2690()
        (qs32, fn()) match_2695 = tuple qs32 1, matchcase_2694
        (qs32, fn()) match_2693 = tuple qs32 0, matchcase_2692
        (qs32, fn()) match_2697 = tuple qs32 2, matchcase_2696
        (qs32, fn()) match_2699 = tuple qs32 3, matchcase_2698
        match_2639(__2628, matchotherwise_2691, ((qs32, fn()) tuple qs32 0, matchcase_2692), ((qs32, fn()) tuple qs32 1, matchcase_2694), ((qs32, fn()) tuple qs32 2, matchcase_2696), ((qs32, fn()) tuple qs32 3, matchcase_2698))
    
    matchotherwise_2691()
        return_2630(mem_2627, qs32 12)
    
    matchcase_2694()
        return_2630(mem_2627, qs32 9)
    
    matchcase_2692()
        return_2630(mem_2627, qs32 8)
    
    matchcase_2696()
        return_2630(mem_2627, qs32 10)
    
    matchcase_2698()
        return_2630(mem_2627, qs32 11)
    
    matchcase_2664()
        (qs32, fn()) match_2671 = tuple qs32 1, matchcase_2669
        (qs32, fn()) match_2674 = tuple qs32 2, matchcase_2672
        (qs32, fn()) match_2668 = tuple qs32 0, matchcase_2666
        (qs32, fn()) match_2677 = tuple qs32 3, matchcase_2675
        match_2639(__2628, matchotherwise_2665, ((qs32, fn()) tuple qs32 0, matchcase_2666), ((qs32, fn()) tuple qs32 1, matchcase_2669), ((qs32, fn()) tuple qs32 2, matchcase_2672), ((qs32, fn()) tuple qs32 3, matchcase_2675))
    
    matchcase_2666()
        return_2630(mem_2627, qs32 6)
    
    matchotherwise_2665()
        return_2630(mem_2627, qs32 10)
    
    matchcase_2669()
        return_2630(mem_2627, qs32 7)
    
    matchcase_2675()
        return_2630(mem_2627, qs32 9)
    
    matchcase_2672()
        return_2630(mem_2627, qs32 8)
    
    matchcase_2701()
        (qs32, fn()) match_2706 = tuple qs32 1, matchcase_2705
        (qs32, fn()) match_2704 = tuple qs32 0, matchcase_2703
        (qs32, fn()) match_2708 = tuple qs32 2, matchcase_2707
        (qs32, fn()) match_2710 = tuple qs32 3, matchcase_2709
        match_2639(__2628, matchotherwise_2702, ((qs32, fn()) tuple qs32 0, matchcase_2703), ((qs32, fn()) tuple qs32 1, matchcase_2705), ((qs32, fn()) tuple qs32 2, matchcase_2707), ((qs32, fn()) tuple qs32 3, matchcase_2709))
    
    matchcase_2707()
        return_2630(mem_2627, qs32 11)
    
    matchcase_2705()
        return_2630(mem_2627, qs32 10)
    
    matchcase_2703()
        return_2630(mem_2627, qs32 9)
    
    matchotherwise_2702()
        return_2630(mem_2627, qs32 13)
    
    matchcase_2709()
        return_2630(mem_2627, qs32 12)
@leissa
Copy link
Member

leissa commented Aug 22, 2018

We used to do the select + jump trick back in the days of yore. IMHO the branch intrinsic is a bit more concise and easier to work with. At least I never found my self looking back to select + jump. That being said, I don't quite see why select + jump would help?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants