Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

operand #N does not dominate this use #502

Closed
KronicDeth opened this issue Aug 13, 2020 · 11 comments
Closed

operand #N does not dominate this use #502

KronicDeth opened this issue Aug 13, 2020 · 11 comments
Assignees
Labels
bug Something isn't working compiler Issues pertaining to the compiler with no specific tag
Milestone

Comments

@KronicDeth
Copy link
Collaborator

init.erl

-module(init).
-export([start/0]).
-import(erlang, [demonitor/1, display/1, process_info/2, spawn_monitor/1]).

start() ->
  {ChildPid, MonitorReference} = spawn_monitor(fun () ->
    ok
  end),
  wait(2),
  Message = {'DOWN', MonitorReference, process, ChildPid, normal},
  display(has_message(Message)),
  display(demonitor(MonitorReference)),
  display(has_message(Message)).

has_message(Message) ->
  Messages = process_info(self(), messages),
  lists:member(Message, Messages).

wait(Milliseconds) ->
  receive
  after Milliseconds -> ok
  end,
  ok.

Error

Compiling tests/lib/erlang/demonitor_1/with_reference/with_monitor/does_not_flush_existing_message/init.erl
loc("tests/lib/erlang/demonitor_1/with_reference/with_monitor/does_not_flush_existing_message/init.erl":11:11): error: operand #0 does not dominate this use


module @init {
  eir.func @lumen_eh_personality() -> i32 attributes {std.varargs = true}
  eir.func @"init:start-fun-0-0/0"() -> !eir.term attributes {personality = @lumen_eh_personality} {
    %0 = eir.constant.atom #eir.atom<{ id = 71, value = 'ok' }>
    %1 = eir.cast %0 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    eir.return %1 : !eir.term
  }
  eir.func @"init:start/0"() -> !eir.term attributes {personality = @lumen_eh_personality} {
    %0 = eir.closure @"init:start-fun-0-0/0"() {arity = 0 : i8, env_len = 0 : i8, index = 0 : i32, module = #eir.atom<{ id = 62, value = 'init' }>, old_unique = 0 : i32, unique = "\FD\D4\0CF\BB$\CEg\00\00\00\00\00\00\00\00"} : () -> !eir.term
    %1 = eir.call @"erlang:spawn_monitor/1"(%0) : (!eir.term) -> !eir.term
    eir.br ^bb11(%1 : !eir.term)
  ^bb1(%2: !eir.term):  // pred: ^bb9
    %3 = eir.call @"erlang:display/1"(%2) : (!eir.term) -> !eir.term
    eir.br ^bb2(%3 : !eir.term)
  ^bb2(%4: !eir.term):  // pred: ^bb1
    %5 = eir.call @"erlang:demonitor/1"(%31) : (!eir.term) -> !eir.term
    eir.br ^bb3(%5 : !eir.term)
  ^bb3(%6: !eir.term):  // pred: ^bb2
    %7 = eir.call @"erlang:display/1"(%6) : (!eir.term) -> !eir.term
    eir.br ^bb4(%7 : !eir.term)
  ^bb4(%8: !eir.term):  // pred: ^bb3
    %9 = eir.constant.atom #eir.atom<{ id = 74, value = 'DOWN' }>
    %10 = eir.cast %9 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %11 = eir.constant.atom #eir.atom<{ id = 75, value = 'process' }>
    %12 = eir.cast %11 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %13 = eir.constant.atom #eir.atom<{ id = 76, value = 'normal' }>
    %14 = eir.cast %13 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %15 = eir.tuple(%10, %31, %12, %30, %14) {alloca = false} : (!eir.term, !eir.term, !eir.term, !eir.term, !eir.term) -> !eir<"box<!eir<"tuple<5x!eir.term">>">
    %16 = eir.cast %15 : !eir<"box<!eir<"tuple<5x!eir.term">>"> to !eir.term {from = !eir<"box<!eir<"tuple<5x!eir.term">>">, to = !eir.term}
    %17 = eir.call @"init:has_message/1"(%16) : (!eir.term) -> !eir.term
    eir.br ^bb5(%17 : !eir.term)
  ^bb5(%18: !eir.term):  // pred: ^bb4
    %19 = eir.call @"erlang:display/1"(%18) : (!eir.term) -> !eir.term
    eir.return %19 : !eir.term
  ^bb6(%20: !eir.term):  // pred: ^bb7
    %21 = eir.constant.atom #eir.atom<{ id = 46, value = 'error' }>
    %22 = eir.cast %21 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %23 = eir.constant.atom #eir.atom<{ id = 200, value = 'badmatch' }>
    %24 = eir.cast %23 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %25 = eir.tuple(%24, %35) {alloca = false} : (!eir.term, !eir.term) -> !eir<"box<!eir<"tuple<2x!eir.term">>">
    %26 = eir.cast %25 : !eir<"box<!eir<"tuple<2x!eir.term">>"> to !eir.term {from = !eir<"box<!eir<"tuple<2x!eir.term">>">, to = !eir.term}
    eir.throw(%22, %26, %20) : (!eir.term, !eir.term, !eir.term)
  ^bb7:  // pred: ^bb8
    %27 = eir.trace_capture : !eir.term
    eir.br ^bb6(%27 : !eir.term)
  ^bb8:  // pred: ^bb13
    eir.br ^bb7
  ^bb9(%28: !eir.term):  // pred: ^bb10
    %29 = eir.call @"init:has_message/1"(%16) : (!eir.term) -> !eir.term
    eir.br ^bb1(%29 : !eir.term)
  ^bb10(%30: !eir.term, %31: !eir.term):  // pred: ^bb12
    %32 = eir.constant.int #eir.int<{ value = 2 }>
    %33 = eir.cast %32 : !eir.fixnum to !eir.term {from = !eir.fixnum, to = !eir.term}
    %34 = eir.call @"init:wait/1"(%33) : (!eir.term) -> !eir.term
    eir.br ^bb9(%34 : !eir.term)
  ^bb11(%35: !eir.term):  // pred: ^bb0
    %36 = eir.is_type(%35) {type = !eir<"box<!eir<"tuple<2x!eir.term">>">} : (!eir.term) -> i1
    eir.cond_br %36 : i1, ^bb12, ^bb13(%35 : !eir.term)
  ^bb12:  // pred: ^bb11
    %37 = eir.cast %35 : !eir.term to !eir<"box<!eir<"tuple<2x!eir.term">>"> {from = !eir.term, to = !eir<"box<!eir<"tuple<2x!eir.term">>">}
    %38 = eir.getelementptr %37[] {element = !eir.term, index = 1 : index, pointee = !eir<"tuple<2x!eir.term">} : (!eir<"box<!eir<"tuple<2x!eir.term">>">) -> !eir.ref<!eir.term>
    %39 = eir.load(%38) : (!eir.ref<!eir.term>) -> !eir.term
    %40 = eir.getelementptr %37[] {element = !eir.term, index = 2 : index, pointee = !eir<"tuple<2x!eir.term">} : (!eir<"box<!eir<"tuple<2x!eir.term">>">) -> !eir.ref<!eir.term>
    %41 = eir.load(%40) : (!eir.ref<!eir.term>) -> !eir.term
    eir.br ^bb10(%39, %41 : !eir.term, !eir.term)
  ^bb13(%42: !eir.term):  // pred: ^bb11
    eir.br ^bb8
  }
  eir.func @"init:wait/1"(%arg0: !eir.term) -> !eir.term attributes {personality = @lumen_eh_personality} {
    %0 = eir.receive.start %arg0 : (!eir.term) -> !eir.receive_ref
    br ^bb4(%0 : !eir.receive_ref)
  ^bb1:  // pred: ^bb4
    %1 = eir.receive.message %3 : (!eir.receive_ref) -> !eir.term
    br ^bb5(%1 : !eir.term)
  ^bb2:  // pred: ^bb4
    %c3_i8 = constant 3 : i8
    %2 = cmpi "eq", %4, %c3_i8 : i8
    cond_br %2, ^bb6, ^bb3
  ^bb3:  // pred: ^bb2
    eir.unreachable
  ^bb4(%3: !eir.receive_ref):  // pred: ^bb0
    %4 = eir.receive.wait %3 : (!eir.receive_ref) -> i8
    %c2_i8 = constant 2 : i8
    %5 = cmpi "eq", %4, %c2_i8 : i8
    cond_br %5, ^bb1, ^bb2
  ^bb5(%6: !eir.term):  // pred: ^bb1
    %7 = eir.constant.atom #eir.atom<{ id = 71, value = 'ok' }>
    %8 = eir.cast %7 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    eir.return %8 : !eir.term
  ^bb6:  // pred: ^bb2
    %9 = eir.constant.atom #eir.atom<{ id = 71, value = 'ok' }>
    %10 = eir.cast %9 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    eir.return %10 : !eir.term
  }
  eir.func @"init:has_message/1"(%arg0: !eir.term) -> !eir.term attributes {personality = @lumen_eh_personality} {
    %0 = eir.call @"erlang:self/0"() : () -> !eir.term
    eir.br ^bb1(%0 : !eir.term)
  ^bb1(%1: !eir.term):  // pred: ^bb0
    %2 = eir.constant.atom #eir.atom<{ id = 80, value = 'messages' }>
    %3 = eir.cast %2 : !eir.atom to !eir.term {from = !eir.atom, to = !eir.term}
    %4 = eir.call @"erlang:process_info/2"(%1, %3) : (!eir.term, !eir.term) -> !eir.term
    eir.br ^bb2(%4 : !eir.term)
  ^bb2(%5: !eir.term):  // pred: ^bb1
    %6 = eir.call @"lists:member/2"(%arg0, %5) : (!eir.term, !eir.term) -> !eir.term
    eir.return %6 : !eir.term
  }
}loc("tests/lib/erlang/demonitor_1/with_reference/with_monitor/does_not_flush_existing_message/init.erl":1:1): error: module verification error
error: unexpected error occurred when lowering EIR module
@KronicDeth KronicDeth added bug Something isn't working compiler Issues pertaining to the compiler with no specific tag labels Aug 13, 2020
@KronicDeth KronicDeth added this to the ElixirConf 2020 milestone Aug 13, 2020
@KronicDeth
Copy link
Collaborator Author

This version where Message is inlined does not cause the error:

init.erl

-module(init).
-export([start/0]).
-import(erlang, [demonitor/1, display/1, process_info/2, spawn_monitor/1]).

start() ->
  {ChildPid, MonitorReference} = spawn_monitor(fun () ->
    ok
  end),
  wait(2),
  display(has_message({'DOWN', MonitorReference, process, ChildPid, normal})),
  display(demonitor(MonitorReference)),
  display(has_message({'DOWN', MonitorReference, process, ChildPid, normal})).

has_message(Message) ->
  Messages = process_info(self(), messages),
  lists:member(Message, Messages).

wait(Milliseconds) ->
  receive
  after Milliseconds -> ok
  end,
  ok.

@bitwalker
Copy link
Collaborator

@hansihe Can you take a look at this in EIR? In the code I'm generating indicates that the Message value is defined in a block that isn't a predecessor of the blocks which call has_message/1, and I'm not sure why that is. Since the lowering of things at this stage (prior to conversion to LLVM dialect) is basically a reflection of how things are defined in EIR, I suspect this bug is happening in graph simplification somewhere.

Of course it could be a weird bug in codegen, but I'm not sure how a case like this would even happen - I don't ever define things in blocks other than where they were defined in EIR unless I'm creating new successor blocks; but those are inserted in between the current block in EIR and the successors in EIR, automatically ensuring that anything defined in those extra blocks dominates the EIR successors as well.

@KronicDeth
Copy link
Collaborator Author

Another example

init.erl

-module(init).
-export([start/0]).
-import(erlang, [display/1]).
-import(lumen, [log_exit/1]).

start() ->
  log_exit(false),
  StartPid = self(),
  {ParentPid, ParentMonitorReference} = spawn_monitor(fun () ->
    ChildPid = spawn(fun () ->
      wait_to_shutdown()
    end),
    display(link(ChildPid)),
    StartPid ! {child_pid, ChildPid},
    wait_to_shutdown()
  end),
  {ChildPid, ChildMonitorReference} = receive
    {child_pid, ChildPid} ->
      {ChildPid, monitor(process, ChildPid)}
  end,
  shutdown(ParentPid),
  receive
    {'DOWN', ParentMonitorReference, process, _, Reason} ->
      display({parent, exited, Reason});
    10 ->
      display({parent, alive, is_process_alive(ParentPid)})
  end,
  receive
    {'DOWN', ChildMonitorReference, process, _, Reason} ->
      display({child, exited, Reason});
    10 ->
      display({child, alive, is_process_alive(ChildPid)})
  end.

shutdown(Pid) ->
  Pid ! shutdown.

wait_to_shutdown() ->
  receive
    shutdown -> ok
  end.

Error

    Compiling tests/lib/erlang/link_1/with_local_pid/when_the_process_does_not_exit_normal_linked_processes_exit_too/init.erl
loc("tests/lib/erlang/link_1/with_local_pid/when_the_process_does_not_exit_normal_linked_processes_exit_too/init.erl":21:3): error: operand #0 does not dominate this use

Line 21 is shutdown(ParentPid),

@hansihe
Copy link
Collaborator

hansihe commented Aug 24, 2020

Sorry I didn't look at this earlier, I had totally forgotten I was mentioned here.

The resulting IR does seem to pass the eir validation pass, which does validate value visibility. Looking at the IR does seem to confirm that things look pretty well formed:

start dot

Does seem like a strange issue though. I'll take a look at the new example Luke gave as well.

@hansihe
Copy link
Collaborator

hansihe commented Aug 24, 2020

This version where Message is inlined does not cause the error:

init.erl

-module(init).
-export([start/0]).
-import(erlang, [demonitor/1, display/1, process_info/2, spawn_monitor/1]).

start() ->
  {ChildPid, MonitorReference} = spawn_monitor(fun () ->
    ok
  end),
  wait(2),
  display(has_message({'DOWN', MonitorReference, process, ChildPid, normal})),
  display(demonitor(MonitorReference)),
  display(has_message({'DOWN', MonitorReference, process, ChildPid, normal})).

has_message(Message) ->
  Messages = process_info(self(), messages),
  lists:member(Message, Messages).

wait(Milliseconds) ->
  receive
  after Milliseconds -> ok
  end,
  ok.

This doesn't cause the same error, but it does segfault the lumen compiler when I try to compile it.

The strange part is that the IR produced is pretty much identical, even to the point that the two calls to has_message still refer to the same PrimOp. Maybe the segfault here is happening somewhere before that mlir validation would have been performed?

start dot

@hansihe
Copy link
Collaborator

hansihe commented Aug 24, 2020

@bitwalker Could a bug in the live value data provided to codegen cause something like this? It does seem a bit unlikely to me since that part of eir is pretty well tested, but could be worth investigating at least.

@hansihe
Copy link
Collaborator

hansihe commented Aug 25, 2020

I looked into this some more and I found what the problem is.

PrimOps are singletons, if two tuple construction primops in the same function container read the same values, it will always map to the same primop handle and therefore value.

This also means that primops aren't really rigidly located anywhere in the eir block graph, they are conceptually floating and can be scheduled anywhere between where its reads are defined, and where it's being read. They can be scheduled anywhere from zero times to several, as PrimOps aren't allowed to have any side effects at the language level.

In MLIR codegen right now, it seems like primops are only ever constructed the first time they are encountered. If they are later encountered again, the same value constructed earlier is used.

The issue with this is, with the way the builder iterates over eir blocks, there is no guarantee that blocks are ever visited in any particular order. When the same PrimOp is used multiple times, this means there is no guarantee at all that the bb where the primop is first constructed is visible to all the other places where it is used. (Using a different iteration order is not a fix, as long as we construct the primop on the first use, there will always be cases where there can be other execution paths where the constructed primop is not visible)

In the long term, we want to implement more sophisticated scheduling of primops. This means scheduling it so that is only ever constructed once for every execution path, but only actually constructed where it is used. This code is not really all that tricky, it should be a pretty simple graph operation. It does however make sense to me to implement this scheduling code in eir, since it would be really easy to expose to any backends via lowerutils.

As a temporary fix, I propose we just construct the primop in every bb where it's used, and don't attempt to deduplicate the construction in codegen. This should fix this issue.

Thoughts @bitwalker?

@hansihe
Copy link
Collaborator

hansihe commented Aug 25, 2020

Regarding the long term proper fix, I remember we talked earlier about introducing a new IR in eir that is easier to lower from, I opened an issue for it. eirproject/eir#28

I think going for something like this would simplify the Lumen codegen quite a bit, make it less error prone, and also make it easier to make other eir backends in the future.

@bitwalker
Copy link
Collaborator

The issue with this is, with the way the builder iterates over eir blocks, there is no guarantee that blocks are ever visited in any particular order. When the same PrimOp is used multiple times, this means there is no guarantee at all that the bb where the primop is first constructed is visible to all the other places where it is used.

It was my expectation that walking the scope provided by the lowering analysis would traverse the blocks in breadth-first fashion, such that the introduction of any value would strictly dominate any uses both within a block and in subsequent blocks. If the traversal order is not already breadth-first, I'd make it so that it is (or at least an option to do so). That is obviously separate from the issue of unscheduled operations, but is still important.

As a temporary fix, I propose we just construct the primop in every bb where it's used, and don't attempt to deduplicate the construction in codegen

I don't think it will work to lower each primop as a new instance naively, since the way values are tracked currently relies on some form of identity; i.e. if I encounter a use of value123, which is the result of some primop that I've already lowered, then I expect that I can use that value to refer to the result of that operation; if each primop gets lowered to a new value, then I can no longer do that. I'm fairly certain the resulting IR would be horrible, assuming it was valid. Obviously, code generation has a bug under the current set of assumptions, but it is relatively easy to address that bug by rescheduling operations before verification.

To that point, it makes more sense for me to use the dominance analysis in MLIR to drive a pass that reschedules operations that do not dominate all of their uses. It would be fairly trivial to write, and wouldn't have nearly as negative of an effect on the resulting IR. I have other uses for such an analysis anyway, so its useful to me one way or the other.

The unscheduled/floating representation isn't an issue per se, but the lack of any use-def analysis is a problem. I was under the impression that the liveness analysis effectively provided that, albeit indirectly (and maybe it is and I'm misunderstanding). Definitely a priority to address that as soon as possible.

@KronicDeth KronicDeth modified the milestones: ElixirConf 2020, In 2020 Sep 3, 2020
@KronicDeth
Copy link
Collaborator Author

This affects the following files in OTP:

  • lib/asn1/src/asn1ct_tok.erl
  • lib/common_test/src/ct_config_plain.erl
  • lib/common_test/src/ct_ftp.erl
  • otp/lib/common_test/src/ct_ssh.erl
  • lib/common_test/src/erl2html2.erl
  • lib/compiler/src/core_lint.erl
  • lib/compiler/src/rec_env.erl
  • lib/compiler/src/sys_pre_attributes.erl
  • lib/debugger/src/dbg_idb.erl
  • lib/debugger/src/i.erl
  • lib/eunit/src/eunit_striptests.erl
  • lib/inets/src/http_lib/http_transport.erl
  • lib/inets/src/http_server/httpd_acceptor_sup.erl
  • lib/inets/src/http_server/httpd_esi.erl
  • lib/inets/src/http_server/httpd_log.erl
  • lib/inets/src/http_server/mod_actions.erl
  • lib/inets/src/http_server/mod_alias.erl
  • lib/inets/src/http_server/mod_auth_server.erl
  • lib/inets/src/http_server/mod_get.erl
  • lib/kernel/src/global_search.erl
  • lib/kernel/src/kernel_refc.erl
  • lib/kernel/src/wrap_log_reader.erl
  • lib/megaco/src/engine/megaco_timer.erl
  • lib/mnesia/src/mnesia_backup.erl
  • lib/mnesia/src/mnesia_frag_hash.erl
  • lib/mnesia/src/mnesia_snmp_hook.erl
  • lib/observer/src/etop_tr.erl
  • lib/observer/src/observer_html_lib.erl
  • lib/sasl/src/alarm_handler.erl
  • lib/sasl/src/rb_format_supp.erl
  • lib/sasl/src/release_handler_1.erl
  • lib/stdlib/src/array.erl
  • lib/stdlib/src/error_logger_file_h.erl
  • lib/stdlib/src/gb_trees.erl
  • lib/stdlib/src/orddict.erl
  • lib/xmerl/src/xmerl_html.erl
  • lib/xmerl/src/xmerl_xml.erl

@KronicDeth KronicDeth changed the title operand #0 does not dominate this use operand #N does not dominate this use Sep 21, 2020
@bitwalker
Copy link
Collaborator

The original examples are fixed as of #582. If it comes up again, it will be due to a different cause, so I'm going to close this for now and we can open new issues should that occur.

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
bug Something isn't working compiler Issues pertaining to the compiler with no specific tag
Projects
None yet
Development

No branches or pull requests

3 participants