Current State

sequenceDiagram
    participant E as entrypoint
    participant ME as MontyExperiment
    participant M as Monty
    participant SM as SensorModule(s)
    participant LM as LearningModule(s)
    participant MS as MotorSystem
    participant P as Policy
    participant DL as Dataloader
    participant D as Dataset

    E ->>+ ME : train
    loop range(train_epochs)
        ME ->>+ ME : run_epoch
            loop various criteria
                ME ->>+ ME : run_episode
                    ME ->>+ ME : pre_episode
                        ME ->>+ M : pre_episode
                            M ->>+ M : reset_episode_steps
                            deactivate M
                            M ->>+ M : switch_to_matching_step
                            deactivate M
                            M ->>+ LM : pre_episode
                                # TODO
                            LM -->>- M : .
                            M ->>+ SM : pre_episode
                                # TODO
                            SM -->>- M : .
                        M -->>- ME : .
                        ME ->>+ DL : pre_episode
                            DL ->>+ MS : pre_episode
                            MS -->>- DL : .
                        DL -->>- ME : .
                    deactivate ME
                    ME ->>+ DL : __iter__
                        DL ->>+ D : reset
                            # TODO
                        D -->>- DL : (observation, state)
                        DL ->> MS : self._state = state
                    DL -->>- ME : .
                    loop enumerate
                        ME ->>+ DL : __next__
                            DL ->>+ MS : __call__
                                # TODO
                            MS -->>- DL : action
                            DL ->>+ D : __getitem__(action)
                                # TODO
                            D -->>- DL : (observation, state)
                            DL ->> MS : self._state = state
                        DL -->>- ME : (step, observation)
                        ME ->>+ ME : pre_step(step, observation)
                            # logger stuff
                        deactivate ME
                        ME ->>+ M : step(observation)
                            alt step_type == "matching_step"
                                M ->>+ M : _matching_step(observation)
                                    M ->>+ M : aggregate_sensory_inputs(observation)
                                        M ->>+ M : get_agent_state
                                            M ->>+ MS : _state
                                            MS -->>- M : _state
                                            M ->>+ MS : _policy
                                            MS -->>- M : _policy
                                            M ->>+ P : get_agent_state(_state)
                                            P -->>- M : agent_state
                                        deactivate M
                                        M ->>+ SM : update_state(agent_state)
                                            # TODO
                                        SM -->>- M : .
                                        M ->>+ SM : step(raw_obs)
                                            # TODO
                                        SM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _step_learning_modules
                                        M ->>+ LM : matching_step(sensor_inputs)
                                            # TODO
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _vote
                                        M ->>+ LM : send_out_votes
                                        LM -->>- M : votes
                                        M ->>+ LM : receive_votes(votes)
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _pass_goal_states
                                        M ->>+ LM : propose_goal_state
                                        LM -->>- M : goal_state
                                    deactivate M
                                    M ->>+ M : _pass_infos_to_motor_system
                                        alt MontyForGraphMatching | MontyForEvidenceGraphMatching
                                            M ->>+ M : _pass_input_obs_to_motor_system(infos)
                                                M ->>+ MS : _policy
                                                MS -->>- M : _policy
                                                M ->> P : self.processed_observations = infos
                                                alt hasattr(_policy.tangent_locs)
                                                    M ->> P : self.tangent_locs.append(visited)
                                                    M ->> P : self.tangent_norms.append(visited)
                                                end
                                            deactivate M
                                        end
                                        alt MontyForEvidenceGraphMatching
                                            M ->>+ MS : _policy
                                            MS -->>- M : _policy
                                            alt _policy.use_goal_state_driven_actions
                                                M ->>+ MS : set_driving_goal_state(best_goal_state)
                                                MS -->- M : .
                                            end
                                        end
                                    deactivate M
                                    M ->>+ M : _set_step_type_and_check_if_done
                                        M ->>+ M : update_step_counters
                                        deactivate M
                                    deactivate M
                                deactivate M
                            else step_type == "exploratory_step"
                                Note right of M : Omitted
                            end
                        M -->>- ME : .
                        ME ->>+ ME : post_step(step, observation)
                            # logger stuff
                        deactivate ME
                    end
                deactivate ME
            end
        deactivate ME
    end
    ME -->>- E : .

Without Using MotorSystem to pass state and more RL-like

sequenceDiagram
    participant E as entrypoint
    participant ME as MontyExperiment
    participant M as Monty
    participant SM as SensorModule(s)
    participant LM as LearningModule(s)
    participant MS as MotorSystem
    participant P as Policy
    participant DL as Dataloader
    participant PP as PositioningProcedure
    participant D as Dataset

    E ->>+ ME : train
    loop range(train_epochs)
        ME ->>+ ME : run_epoch
            loop various criteria
                ME ->>+ ME : run_episode
                    ME ->>+ ME : pre_episode
                        ME ->>+ M : pre_episode
                            M ->>+ M : reset_episode_steps
                            deactivate M
                            M ->>+ M : switch_to_matching_step
                            deactivate M
                            M ->>+ LM : pre_episode
                                # TODO
                            LM -->>- M : .
                            M ->>+ SM : pre_episode
                                # TODO
                            SM -->>- M : .
                            M ->>+ MS : pre_episode
                            MS -->>- M : .
                        M -->>- ME : .
                        ME ->>+ DL : pre_episode
                        DL -->>- ME : .
                        ME ->>+ DL : reset
                            DL ->>+ D : reset
                                # TODO
                            D -->>- DL : (observation, state)
                            DL ->>+ DL : position(observation, state)
                                loop while positioning
                                    DL ->>+ PP : position(observation, state)
                                    PP -->>- DL : action
                                    DL ->>+ D : __getitem__(action)
                                        # TODO
                                    D -->>- DL : (observation, state)
                                end
                            deactivate DL
                        DL -->>- ME : (observation, state)
                    deactivate ME
                    loop
                        ME ->>+ ME : pre_step(observation, state)
                            # logger stuff
                        deactivate ME
                        ME ->>+ M : step(observation, state)
                            alt step_type == "matching_step"
                                M ->>+ M : _matching_step(observation)
                                    M ->>+ M : aggregate_sensory_inputs(observation)
                                        M ->>+ SM : update_state(agent_state)
                                            # TODO
                                        SM -->>- M : .
                                        M ->>+ SM : step(raw_obs)
                                            # TODO
                                        SM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _step_learning_modules
                                        M ->>+ LM : matching_step(sensor_inputs)
                                            # TODO
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _vote
                                        M ->>+ LM : send_out_votes
                                        LM -->>- M : votes
                                        M ->>+ LM : receive_votes(votes)
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _pass_goal_states
                                        M ->>+ LM : propose_goal_state
                                        LM -->>- M : goal_state
                                    deactivate M
                                    M ->>+ M : _step_motor_system
                                        alt MontyForEvidenceGraphMatching
                                            M ->>+ MS : _policy
                                            MS -->>- M : _policy
                                            alt _policy.use_goal_state_driven_actions
                                                M ->>+ P : set_driving_goal_state(best_goal_state)
                                                P -->- M : .
                                            end
                                        end
                                        M ->>+ MS : __call__(infos, state)
                                        MS -->>- M : action
                                    deactivate M
                                    M ->>+ M : _set_step_type_and_check_if_done
                                        M ->>+ M : update_step_counters
                                        deactivate M
                                    deactivate M
                                deactivate M
                            else step_type == "exploratory_step"
                                Note right of M : Omitted
                            end
                        M -->>- ME : action
                        ME ->>+ DL : step(action)
                            DL ->>+ D : __getitem__(action)
                                # TODO
                            D -->>- DL : (observation, state)
                        DL -->>- ME : (observation, state)
                        ME ->>+ ME : post_step(observation, state)
                            # logger stuff
                        deactivate ME
                    end
                deactivate ME
            end
        deactivate ME
    end
    ME -->>- E : .

Minimal change for positioning policies only

sequenceDiagram
    participant E as entrypoint
    participant ME as MontyExperiment
    participant M as Monty
    participant SM as SensorModule(s)
    participant LM as LearningModule(s)
    participant MS as MotorSystem
    participant P as Policy
    participant DL as Dataloader
    participant PP as PositioningPolicy
    participant D as Dataset

    E ->>+ ME : train
    loop range(train_epochs)
        ME ->>+ ME : run_epoch
            loop various criteria
                ME ->>+ ME : run_episode
                    ME ->>+ ME : pre_episode
                        ME ->>+ M : pre_episode
                            M ->>+ M : reset_episode_steps
                            deactivate M
                            M ->>+ M : switch_to_matching_step
                            deactivate M
                            M ->>+ LM : pre_episode
                                # TODO
                            LM -->>- M : .
                            M ->>+ SM : pre_episode
                                # TODO
                            SM -->>- M : .
                        M -->>- ME : .
                        ME ->>+ DL : pre_episode
                            DL ->>+ MS : pre_episode
                            MS -->>- DL : .
                        DL -->>- ME : .
                    deactivate ME
                    ME ->>+ DL : __iter__
                        DL ->>+ D : reset
                            # TODO
                        D -->>- DL : (observation, state)
                        DL ->>+ DL : position(observation, state)
                            loop while positioning
                                DL ->>+ PP : position(observation, state)
                                PP -->>- DL : action
                                DL ->>+ D : __getitem__(action)
                                    # TODO
                                D -->>- DL : (observation, state)
                            end
                        deactivate DL
                        DL ->> MS : self._state = state
                    DL -->>- ME : .
                    loop enumerate
                        ME ->>+ DL : __next__
                            DL ->>+ MS : __call__
                                # TODO
                            MS -->>- DL : action
                            DL ->>+ D : __getitem__(action)
                                # TODO
                            D -->>- DL : (observation, state)
                            DL ->> MS : self._state = state
                        DL -->>- ME : (step, observation)
                        ME ->>+ ME : pre_step(step, observation)
                            # logger stuff
                        deactivate ME
                        ME ->>+ M : step(observation)
                            alt step_type == "matching_step"
                                M ->>+ M : _matching_step(observation)
                                    M ->>+ M : aggregate_sensory_inputs(observation)
                                        M ->>+ M : get_agent_state
                                            M ->>+ MS : _state
                                            MS -->>- M : _state
                                            M ->>+ MS : _policy
                                            MS -->>- M : _policy
                                            M ->>+ P : get_agent_state(_state)
                                            P -->>- M : agent_state
                                        deactivate M
                                        M ->>+ SM : update_state(agent_state)
                                            # TODO
                                        SM -->>- M : .
                                        M ->>+ SM : step(raw_obs)
                                            # TODO
                                        SM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _step_learning_modules
                                        M ->>+ LM : matching_step(sensor_inputs)
                                            # TODO
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _vote
                                        M ->>+ LM : send_out_votes
                                        LM -->>- M : votes
                                        M ->>+ LM : receive_votes(votes)
                                        LM -->>- M : .
                                    deactivate M
                                    M ->>+ M : _pass_goal_states
                                        M ->>+ LM : propose_goal_state
                                        LM -->>- M : goal_state
                                    deactivate M
                                    M ->>+ M : _pass_infos_to_motor_system
                                        alt MontyForGraphMatching | MontyForEvidenceGraphMatching
                                            M ->>+ M : _pass_input_obs_to_motor_system(infos)
                                                M ->>+ MS : _policy
                                                MS -->>- M : _policy
                                                M ->> P : self.processed_observations = infos
                                                alt hasattr(_policy.tangent_locs)
                                                    M ->> P : self.tangent_locs.append(visited)
                                                    M ->> P : self.tangent_norms.append(visited)
                                                end
                                            deactivate M
                                        end
                                        alt MontyForEvidenceGraphMatching
                                            M ->>+ MS : _policy
                                            MS -->>- M : _policy
                                            alt _policy.use_goal_state_driven_actions
                                                M ->>+ MS : set_driving_goal_state(best_goal_state)
                                                MS -->- M : .
                                            end
                                        end
                                    deactivate M
                                    M ->>+ M : _set_step_type_and_check_if_done
                                        M ->>+ M : update_step_counters
                                        deactivate M
                                    deactivate M
                                deactivate M
                            else step_type == "exploratory_step"
                                Note right of M : Omitted
                            end
                        M -->>- ME : .
                        ME ->>+ ME : post_step(step, observation)
                            # logger stuff
                        deactivate ME
                    end
                deactivate ME
            end
        deactivate ME
    end
    ME -->>- E : .

tristanls/monty_main_loop_20250418.md

Current State

Without Using MotorSystem to pass state and more RL-like

Minimal change for positioning policies only