Skip to content


Added examples.
Browse files Browse the repository at this point in the history
  • Loading branch information
perara committed Nov 4, 2024
1 parent 11c944d commit 0f65a09
Show file tree
Hide file tree
Showing 9 changed files with 807 additions and 244 deletions.
3 changes: 2 additions & 1 deletion jsp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ find_package(GLEW REQUIRED)
find_package(glfw3 REQUIRED)
find_package(CURL REQUIRED)
find_package(fmt REQUIRED)

find_package(nlohmann_json CONFIG REQUIRED)

Expand Down Expand Up @@ -92,6 +92,7 @@ target_link_libraries(jobshop PRIVATE

# Installation
Expand Down
33 changes: 24 additions & 9 deletions jsp/bindings/jobshop_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#include <nanobind/ndarray.h>
#include <nanobind/stl/array.h>
#include <nanobind/stl/optional.h>
#include <nanobind/stl/unique_ptr.h>
#include <nanobind/stl/shared_ptr.h>
#include <optional>
#include "environment/job_shop_environment.h"
#include "algorithms/job_shop_qlearning.h"
Expand Down Expand Up @@ -91,8 +89,16 @@ NB_MODULE(jobshop, m) {
op.machine = nb::cast<int>(state[1]);
op.eligibleMachines = std::bitset<MAX_MACHINES>(nb::cast<std::string>(state[2])); // Convert string back to bitset
op.dependentOperations = nb::cast<std::vector<std::pair<int, int>>>(state[3]);
.def("isEligible", [](const Operation& op, int machine) {
return op.eligibleMachines[machine];
.def("setEligible", [](Operation& op, int machine, bool eligible) {
op.eligibleMachines[machine] = eligible;

// Bind Job struct
nb::class_<Job>(m, "Job")
Expand Down Expand Up @@ -128,23 +134,32 @@ NB_MODULE(jobshop, m) {
.def_rw("completedJobs", &State::completedJobs)
.def_rw("jobStartTimes", &State::jobStartTimes);

// Bind ScheduleEntry struct
nb::class_<ScheduleEntry>(m, "ScheduleEntry")
.def(nb::init<int, int, int, int>(), nb::arg("job"), nb::arg("operation"), nb::arg("start"), nb::arg("duration"))
.def(nb::init<int, int, int, int, int>(),
.def_rw("job", &ScheduleEntry::job)
.def_rw("operation", &ScheduleEntry::operation)
.def_rw("machine", &ScheduleEntry::machine)
.def_rw("start", &ScheduleEntry::start)
.def_rw("duration", &ScheduleEntry::duration)
.def("__getstate__", [](const ScheduleEntry &se) {
return std::make_tuple(se.job, se.operation, se.start, se.duration);
return std::make_tuple(se.job, se.operation, se.machine, se.start, se.duration);
.def("__setstate__", [](ScheduleEntry &se, const std::tuple<int, int, int, int> &state) {
.def("__setstate__", [](ScheduleEntry &se, const std::tuple<int, int, int, int, int> &state) {
new (&se) ScheduleEntry{
std::get<0>(state), // job
std::get<1>(state), // operation
std::get<2>(state), // machine
std::get<3>(state), // start
std::get<4>(state) // duration

Expand Down
245 changes: 13 additions & 232 deletions jsp/examples/
Original file line number Diff line number Diff line change
@@ -1,238 +1,15 @@
from __future__ import annotations

import argparse
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Tuple, Optional, Callable

from typing import List, Optional, Callable
import jobshop
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from sb3_contrib import MaskablePPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv

class ObservationSpace(ABC):
def get_observation_space(self, env: JobShopGymEnv) -> spaces.Space:

def get_observation(self, env: JobShopGymEnv) -> np.ndarray:

class DefaultObservationSpace(ObservationSpace):
def get_observation_space(self, env: JobShopGymEnv) -> spaces.Space:
num_jobs = len(env.env.getJobs())
num_machines = env.env.getNumMachines()
max_operations = max(len(job.operations) for job in env.env.getJobs())

low = np.zeros(num_jobs * max_operations + num_jobs * 3 + num_machines)
high = np.inf * np.ones(num_jobs * max_operations + num_jobs * 3 + num_machines)

return spaces.Box(low=low, high=high, dtype=np.float32)

def get_observation(self, env: JobShopGymEnv) -> np.ndarray:
state: jobshop.JobShopState = env.env.getState()
job_progress = np.array(state.jobProgress, copy=False).flatten()
completed_jobs = np.array(state.completedJobs, dtype=np.float32)
job_start_times = np.array(state.jobStartTimes, dtype=np.float32)
machine_availability = np.array(state.machineAvailability, dtype=np.float32)
next_operation_for_job = np.array(state.nextOperationForJob, dtype=np.float32)

return np.concatenate([

class NormalizedObservationSpace(ObservationSpace):
def get_observation_space(self, env: JobShopGymEnv) -> spaces.Space:
num_jobs = len(env.env.getJobs())
num_machines = env.env.getNumMachines()
max_operations = max(len(job.operations) for job in env.env.getJobs())

return spaces.Box(low=0, high=1, shape=(num_jobs * max_operations + num_jobs * 3 + num_machines,), dtype=np.float32)

def get_observation(self, env: JobShopGymEnv) -> np.ndarray:
state: jobshop.JobShopState = env.env.getState()
total_time = env.env.getTotalTime()
max_time = sum(op.duration for job in env.env.getJobs() for op in job.operations)

job_progress = np.array(state.jobProgress, copy=False).flatten() / max_time
completed_jobs = np.array(state.completedJobs, dtype=np.float32)
job_start_times = np.array(state.jobStartTimes, dtype=np.float32) / max_time
machine_availability = np.array(state.machineAvailability, dtype=np.float32) / max_time
next_operation_for_job = np.array(state.nextOperationForJob, dtype=np.float32) / max(len(job.operations) for job in env.env.getJobs())

return np.concatenate([

class RewardFunction(ABC):
def calculate_reward(self, env: JobShopGymEnv, done: bool) -> float:

class MakespanRewardFunction(RewardFunction):
def calculate_reward(self, env: JobShopGymEnv, done: bool) -> float:
if done:
return -env.env.getTotalTime()
return 0

class ProgressRewardFunction(RewardFunction):
def __init__(self, completion_bonus: float = 1000):
self.completion_bonus = completion_bonus
self.last_progress = 0
self.utilization_weight = 0.5 # Match C++ utilization reward weight

def calculate_reward(self, env: JobShopGymEnv, done: bool) -> float:
state = env.env.getState()
current_progress = sum(state.nextOperationForJob) / sum(len(job.operations) for job in env.env.getJobs())

# Match C++ reward calculation components
progress_reward = (current_progress - self.last_progress) * 100
self.last_progress = current_progress

# Add machine utilization component to match C++ implementation
total_time = env.env.getTotalTime()
if total_time > 0:
utilization_reward = sum(state.machineAvailability) / (total_time * len(state.machineAvailability))
utilization_reward *= self.utilization_weight
utilization_reward = 0

if done:
return progress_reward + self.completion_bonus - total_time + utilization_reward
return progress_reward + utilization_reward

class JobShopGymEnv(gym.Env):
metadata: Dict[str, List[str]] = {'render.modes': ['human']}

def __init__(self, jobshop_env: jobshop.JobShopEnvironment, max_steps: int = 200,
observation_space: ObservationSpace = DefaultObservationSpace(),
reward_function: RewardFunction = MakespanRewardFunction()):
self.env: jobshop.JobShopEnvironment = jobshop_env
self.num_jobs: int = len(self.env.getJobs())
self.num_machines: int = self.env.getNumMachines()
self.max_operations: int = max(len(job.operations) for job in self.env.getJobs())
self.max_num_actions: int = self.num_jobs * self.num_machines * self.max_operations
self.action_space: spaces.Discrete = spaces.Discrete(self.max_num_actions)
from jsp.jsp_env import DefaultObservationSpace, MakespanRewardFunction, JobShopGymEnv, NormalizedObservationSpace, \

self.observation_space_impl = observation_space
self.observation_space = self.observation_space_impl.get_observation_space(self)
self.reward_function = reward_function

# Match C++ action indices calculation
self.action_indices = np.array([
[job * self.num_machines * self.max_operations + machine * self.max_operations
for machine in range(self.num_machines)]
for job in range(self.num_jobs)

# Match C++ environment state tracking
self.action_map: Dict[int, jobshop.Action] = {}
self.use_masking: bool = True
self._action_mask: Optional[np.ndarray] = None
self.max_steps: int = max_steps
self.current_step: int = 0
self.best_time: float = float('inf')
self.best_schedule: List[jobshop.Action] = []

def reset(self, **kwargs: Any) -> Tuple[np.ndarray, Dict[str, Any]]:
# Match C++ reset behavior
self.current_step = 0
self._action_mask = None
obs: np.ndarray = self.observation_space_impl.get_observation(self)
return obs, {}

def step(self, action_idx: int) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
self.current_step += 1

# Match C++ invalid action handling
if self._action_mask[action_idx] == 0:
reward: float = -1 # Penalty for invalid action
done: bool = self.current_step >= self.max_steps
obs: np.ndarray = self.observation_space_impl.get_observation(self)
info: Dict[str, Any] = {
'invalid_action': True,
'makespan': self.env.getTotalTime(),
'current_step': self.current_step,
'max_steps': self.max_steps

if done:
info["schedule_data"] = self.env.getScheduleData()
info["isDone"] = True

return obs, reward, done, False, info

# Match C++ action execution and state updates
action: jobshop.Action = self.action_map[action_idx]
state = self.env.step(action)

# Match C++ termination criteria
done: bool = self.env.isDone() or self.current_step >= self.max_steps
obs: np.ndarray = self.observation_space_impl.get_observation(self)

# Match C++ reward calculation
makespan: int = self.env.getTotalTime()
if makespan < self.best_time and self.env.isDone():
self.best_time = makespan
self.best_schedule = self.get_current_schedule()

info: Dict[str, Any] = {
'makespan': makespan,
'current_step': self.current_step,
'max_steps': self.max_steps

if done:
info["schedule_data"] = self.env.getScheduleData()
info["isDone"] = self.env.isDone()

reward: float = self.reward_function.calculate_reward(self, done)

return obs, reward, done, False, info

def _update_action_mask(self) -> None:
# Match C++ action masking logic
possible_actions: List[jobshop.Action] = self.env.getPossibleActions()
self._action_mask = np.zeros(self.max_num_actions, dtype=np.int8)

for action in possible_actions:
action_idx: int = self._action_to_index(action)
self._action_mask[action_idx] = 1
self.action_map[action_idx] = action

def _action_to_index(self, action: jobshop.Action) -> int:
# Match C++ action index calculation
return self.action_indices[action.job, action.machine] + action.operation

def get_current_schedule(self) -> List[jobshop.Action]:
# Match C++ schedule tracking
return self.env.getScheduleData()

def get_best_schedule(self) -> Tuple[List[jobshop.Action], float]:
# Match C++ best schedule tracking
return self.best_schedule, self.best_time

def action_masks(self) -> np.ndarray:
return self._action_mask

def get_jobshop_env(self) -> jobshop.JobShopEnvironment:
return self.env

class MakespanCallback(BaseCallback):
def __init__(self, verbose: int = 0, plotter: Optional[jobshop.LivePlotter] = None):
Expand Down Expand Up @@ -288,6 +65,7 @@ def _on_training_end(self) -> None:
print("No best schedule data available.")

def run_experiment(algorithm_name: str, taillard_instance: str, use_gui: bool, max_steps: int,
observation_space: str, reward_function: str) -> None:
def make_env() -> Callable[[], gym.Env]:
Expand Down Expand Up @@ -318,16 +96,19 @@ def make_env() -> Callable[[], gym.Env]:
#print(f"Optimal makespan: {ta_optimal}")
#print(f"Gap: {(makespan_callback.best_makespan - ta_optimal) / ta_optimal * 100:.2f}%")

if __name__ == "__main__":
parser: argparse.ArgumentParser = argparse.ArgumentParser(description="Run Job Shop Scheduling experiment with PPO")
parser.add_argument("--algorithm", choices=["PPO"], default="PPO", help="Algorithm type")
parser.add_argument("--taillard_instance", default="TA01", choices=[f"TA{i:02d}" for i in range(1, 81)], help="Taillard instance")
parser.add_argument("--algorithm", choices=["PPO"], default="PPO", help="Algorithm type")
parser.add_argument("--taillard_instance", default="TA01", choices=[f"TA{i:02d}" for i in range(1, 81)],
help="Taillard instance")
parser.add_argument("--no-gui", action="store_false", help="Disable GUI")
parser.add_argument("--max-steps", type=int, default=1000, help="Maximum number of steps per episode")
parser.add_argument("--observation-space", choices=["default", "normalized"], default="normalized", help="Observation space type")
parser.add_argument("--reward-function", choices=["makespan", "progress"], default="progress", help="Reward function type")
parser.add_argument("--observation-space", choices=["default", "normalized"], default="normalized",
help="Observation space type")
parser.add_argument("--reward-function", choices=["makespan", "progress"], default="progress",
help="Reward function type")
args: argparse.Namespace = parser.parse_args()

run_experiment(args.algorithm, args.taillard_instance, not args.no_gui, args.max_steps,
args.observation_space, args.reward_function)
args.observation_space, args.reward_function)

0 comments on commit 0f65a09

Please sign in to comment.