Erlang supervisor tree terminating-Collection of common programming errors

I’m trying to build a supervisor tree where I have this structure:

1 root supervisor -> 3 “level” supervisors -> each level supervisor has an initializer supervisor -> X number of workers (right now only 1 for the examples sake)

But for some reason starting the second level supervisor makes the whole tree terminate, if I only start 1 root -> 1 level -> 1 init -> 1 worker (or more workers) it’s fine, but as soon as I try to add more supervisors the tree terminates.

-module(otp_supervisor).
-behavior(supervisor).

-export([start_cell/1]).
-export([init/1]).

start_cell(root) ->
    supervisor:start_link({local, root}, ?MODULE, [root]);
start_cell({Type, Role}) ->
    supervisor:start_link({local, Type}, ?MODULE, [{Type, Role}]).

init([root]) -> 
    init_root(one_for_one, 3, 60);
init([{level, Param}]) -> 
    init_level(one_for_one, 3, 60, {member, Param});
init([{member, Param}]) -> 
    init_member(one_for_one, 3, 60, Param).

init_root(RestartStrategy, MaxRestart, MaxTime) ->
    io:format("~p ~s: Spawning...~n", [self(), root_supervisor]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {olevel,
                {otp_supervisor, start_cell, [{level, overseer}]},
                permanent, 1000, supervisor, [otp_supervisor]
            },
            {slevel,
                {otp_supervisor, start_cell, [{level, supervisor}]},
              permanent, 1000, supervisor, [otp_supervisor]
            },
            {wlevel,
                {otp_supervisor, start_cell, [{level, worker}]},
                permanent, 1000, supervisor, [otp_supervisor]
            }
        ]
        }
    }.

init_level(RestartStrategy, MaxRestart, MaxTime, {member, overseer}) ->
    io:format("~p ~s: Spawning...~n", [self(), overseer_level_supervisor]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {oinit,
                {otp_supervisor, start_cell, [{member, overseer}]},
                permanent, 1000, supervisor, [otp_supervisor]}
        ]
        }
    };
init_level(RestartStrategy, MaxRestart, MaxTime, {member, supervisor}) ->
    io:format("~p ~s: Spawning...~n", [self(), supervisor_level_supervisor]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {sinit,
                {otp_supervisor, start_cell, [{member, supervisor}]},
                permanent, 1000, supervisor, [otp_supervisor]}
        ]
        }
    };
init_level(RestartStrategy, MaxRestart, MaxTime, {member, worker}) ->
    io:format("~p ~s: Spawning...~n", [self(), worker_level_supervisor]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {winit,
                {otp_supervisor, start_cell, [{member, worker}]},
                permanent, 1000, supervisor, [otp_supervisor]}
        ]
        }
    }.

init_member(RestartStrategy, MaxRestart, MaxTime, overseer) ->
    io:format("~p ~s: Spawning...~n", [self(), init_overseer]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {ol_core,
                {aux_datasocket, start, [ol_overseer1]},
                permanent, 1000, worker, [aux_datasocket]
            }
        ]
        }
    };
init_member(RestartStrategy, MaxRestart, MaxTime, supervisor) ->
    io:format("~p ~s: Spawning...~n", [self(), init_supervisor]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {sl_core,
                {aux_datasocket, start, [sl_overseer1]},
                permanent, 1000, worker, [aux_datasocket]
            }
        ]
        }
    };
init_member(RestartStrategy, MaxRestart, MaxTime, worker) ->
    io:format("~p ~s: Spawning...~n", [self(), init_worker]),
    {ok, {
        {RestartStrategy, MaxRestart, MaxTime},
        [
            {wl_core,
                {aux_datasocket, start, [wl_overseer1]},
                permanent, 1000, worker, [aux_datasocket]
            }
        ]
        }
    }.

The aux_datasocket module is a very simple gen_server that works perfectly fine by itself (as it does nothing but start up the gen_server right now) so I am positive the error does not lie in that module.

  1. Guess: You are making multiple registrations of the same supervisor with the {local, Type} registration point. The way to debug this is to run rel -boot start_sasl and then look for the crash/progress reports and try to figure out what is wrong. The multiplicity-problem suggests this is what is wrong.

    Another important thing is that running this from the shell links the tree to the shell. So if you end up killing the shell due to an error, poof goes the supervisor tree as well. You need to move it out of the linked network:

    Pid = spawn(fun() -> {ok, _} = supervisor_tree_start(), receive stop -> ok end end),
    ...
    Pid ! stop.