import numpy as np import placo from gymnasium import utils from gymnasium.envs.mujoco import MujocoEnv from gymnasium.spaces import Box from mini_bdx.walk_engine import WalkEngine init_pos = { "right_hip_yaw": 0, "right_hip_roll": 0, "right_hip_pitch": np.deg2rad(45), "right_knee_pitch": np.deg2rad(-90), "right_ankle_pitch": np.deg2rad(45), "left_hip_yaw": 0, "left_hip_roll": 0, "left_hip_pitch": np.deg2rad(45), "left_knee_pitch": np.deg2rad(-90), "left_ankle_pitch": np.deg2rad(45), "head_pitch1": np.deg2rad(-45), "head_pitch2": np.deg2rad(-45), "head_yaw": 0, } dofs = { "right_hip_yaw": 0, "right_hip_roll": 1, "right_hip_pitch": 2, "right_knee_pitch": 3, "right_ankle_pitch": 4, "left_hip_yaw": 5, "left_hip_roll": 6, "left_hip_pitch": 7, "left_knee_pitch": 8, "left_ankle_pitch": 9, "head_pitch1": 10, "head_pitch2": 11, "head_yaw": 12, } class BDXEnv(MujocoEnv, utils.EzPickle): """ ## Action space | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | | ---- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | -------- |------------- | | 0 | Set position of right_hip_yaw | -0.58TODO | 0.58TODO | right_hip_yaw | cylinder | pos (rad) | | 1 | Set position of right_hip_roll | -0.58TODO | 0.58TODO | right_hip_roll | cylinder | pos (rad) | | 2 | Set position of right_hip_pitch | -0.58TODO | 0.58TODO | right_hip_pitch | cylinder | pos (rad) | | 3 | Set position of right_knee_pitch | -0.58TODO | 0.58TODO | right_knee_pitch | cylinder | pos (rad) | | 4 | Set position of right_ankle_pitch | -0.58TODO | 0.58TODO | right_ankle_pitch | cylinder | pos (rad) | | 5 | Set position of left_hip_yaw | -0.58TODO | 0.58TODO | left_hip_yaw | cylinder | pos (rad) | | 6 | Set position of left_hip_roll | -0.58TODO | 0.58TODO | left_hip_roll | cylinder | pos (rad) | | 7 | Set position of left_hip_pitch | -0.58TODO | 0.58TODO | left_hip_pitch | cylinder | pos (rad) | | 8 | Set position of left_knee_pitch | -0.58TODO | 0.58TODO | left_knee_pitch | cylinder | pos (rad) | | 9 | Set position of left_ankle_pitch | -0.58TODO | 0.58TODO | left_ankle_pitch | cylinder | pos (rad) | | 9 | Set position of head_pitch1 | -0.58TODO | 0.58TODO | left_ankle_pitch | cylinder | pos (rad) | | 9 | Set position of head_pitch2 | -0.58TODO | 0.58TODO | left_ankle_pitch | cylinder | pos (rad) | | 9 | Set position of head_yaw | -0.58TODO | 0.58TODO | left_ankle_pitch | cylinder | pos (rad) | ## Observation space | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | -------------------------------------------------------- | ---- | --- | -------------------------------- | -------- | ------------------------ | | 0 | Rotation right_hip_yaw | -Inf | Inf | right_hip_yaw | cylinder | angle (rad) | | 1 | Rotation right_hip_roll | -Inf | Inf | right_hip_roll | cylinder | angle (rad) | | 2 | Rotation right_hip_pitch | -Inf | Inf | right_hip_pitch | cylinder | angle (rad) | | 3 | Rotation right_knee_pitch | -Inf | Inf | right_knee_pitch | cylinder | angle (rad) | | 4 | Rotation right_ankle_pitch | -Inf | Inf | right_ankle_pitch | cylinder | angle (rad) | | 5 | Rotation left_hip_yaw | -Inf | Inf | left_hip_yaw | cylinder | angle (rad) | | 6 | Rotation left_hip_roll | -Inf | Inf | left_hip_roll | cylinder | angle (rad) | | 7 | Rotation left_hip_pitch | -Inf | Inf | left_hip_pitch | cylinder | angle (rad) | | 8 | Rotation left_knee_pitch | -Inf | Inf | left_knee_pitch | cylinder | angle (rad) | | 9 | Rotation left_ankle_pitch | -Inf | Inf | left_ankle_pitch | cylinder | angle (rad) | | 10 | Rotation head_pitch1 | -Inf | Inf | head_pitch1 | cylinder | angle (rad) | | 11 | Rotation head_pitch2 | -Inf | Inf | head_pitch2 | cylinder | angle (rad) | | 12 | Rotation head_yaw | -Inf | Inf | head_yaw | cylinder | angle (rad) | | 13 | Velocity of right_hip_yaw | -Inf | Inf | right_hip_yaw | cylinder | speed (rad/s) | | 14 | Velocity of right_hip_roll | -Inf | Inf | right_hip_roll | cylinder | speed (rad/s) | | 15 | Velocity of right_hip_pitch | -Inf | Inf | right_hip_pitch | cylinder | speed (rad/s) | | 16 | Velocity of right_knee_pitch | -Inf | Inf | right_knee_pitch | cylinder | speed (rad/s) | | 17 | Velocity of right_ankle_pitch | -Inf | Inf | right_ankle_pitch | cylinder | speed (rad/s) | | 18 | Velocity of left_hip_yaw | -Inf | Inf | left_hip_yaw | cylinder | speed (rad/s) | | 19 | Velocity of left_hip_roll | -Inf | Inf | left_hip_roll | cylinder | speed (rad/s) | | 20 | Velocity of left_hip_pitch | -Inf | Inf | left_hip_pitch | cylinder | speed (rad/s) | | 21 | Velocity of left_knee_pitch | -Inf | Inf | left_knee_pitch | cylinder | speed (rad/s) | | 22 | Velocity of left_ankle_pitch | -Inf | Inf | left_ankle_pitch | cylinder | speed (rad/s) | | 23 | Velocity of head_pitch1 | -Inf | Inf | head_pitch1 | cylinder | speed (rad/s) | | 24 | Velocity of head_pitch2 | -Inf | Inf | head_pitch2 | cylinder | speed (rad/s) | | 25 | Velocity of head_yaw | -Inf | Inf | head_yaw | cylinder | speed (rad/s) | | 26 | roll angular velocity | -Inf | Inf | | | |e | 27 | pitch angular velocity | -Inf | Inf | | | | | 28 | yaw angular velocity | -Inf | Inf | | | | | 29 | current x linear velocity | -Inf | Inf | | | | | 30 | current y linear velocity | -Inf | Inf | | | | | 31 | current z linear velocity | -Inf | Inf | | | | | 32 | current x target linear velocity | -Inf | Inf | | | | | 33 | current y target linear velocity | -Inf | Inf | | | | | 34 | current yaw target angular velocity | -Inf | Inf | | | | | 35 | t-1 right_hip_yaw rotation error | -Inf | Inf | | | | | 36 | t-1 right_hip_roll rotation error | -Inf | Inf | | | | | 37 | t-1 right_hip_pitch rotation error | -Inf | Inf | | | | | 38 | t-1 right_knee_pitch rotation error | -Inf | Inf | | | | | 39 | t-1 right_ankle_pitch rotation error | -Inf | Inf | | | | | 40 | t-1 left_hip_yaw rotation error | -Inf | Inf | | | | | 41 | t-1 left_hip_roll rotation error | -Inf | Inf | | | | | 42 | t-1 left_hip_pitch rotation error | -Inf | Inf | | | | | 43 | t-1 left_knee_pitch rotation error | -Inf | Inf | | | | | 44 | t-1 left_ankle_pitch rotation error | -Inf | Inf | | | | | 45 | t-1 head_pitch1 rotation error | -Inf | Inf | | | | | 46 | t-1 head_pitch2 rotation error | -Inf | Inf | | | | | 47 | t-1 head_yaw rotation error | -Inf | Inf | | | | | 48 | t-2 right_hip_yaw rotation error | -Inf | Inf | | | | | 49 | t-2 right_hip_roll rotation error | -Inf | Inf | | | | | 50 | t-2 right_hip_pitch rotation error | -Inf | Inf | | | | | 51 | t-2 right_knee_pitch rotation error | -Inf | Inf | | | | | 52 | t-2 right_ankle_pitch rotation error | -Inf | Inf | | | | | 53 | t-2 left_hip_yaw rotation error | -Inf | Inf | | | | | 54 | t-2 left_hip_roll rotation error | -Inf | Inf | | | | | 55 | t-2 left_hip_pitch rotation error | -Inf | Inf | | | | | 56 | t-2 left_knee_pitch rotation error | -Inf | Inf | | | | | 57 | t-2 left_ankle_pitch rotation error | -Inf | Inf | | | | | 58 | t-2 head_pitch1 rotation error | -Inf | Inf | | | | | 59 | t-2 head_pitch2 rotation error | -Inf | Inf | | | | | 60 | t-2 head_yaw rotation error | -Inf | Inf | | | | | 61 | t-3 right_hip_yaw rotation error | -Inf | Inf | | | | | 62 | t-3 right_hip_roll rotation error | -Inf | Inf | | | | | 63 | t-3 right_hip_pitch rotation error | -Inf | Inf | | | | | 64 | t-3 right_knee_pitch rotation error | -Inf | Inf | | | | | 65 | t-3 right_ankle_pitch rotation error | -Inf | Inf | | | | | 66 | t-3 left_hip_yaw rotation error | -Inf | Inf | | | | | 67 | t-3 left_hip_roll rotation error | -Inf | Inf | | | | | 68 | t-3 left_hip_pitch rotation error | -Inf | Inf | | | | | 69 | t-3 left_knee_pitch rotation error | -Inf | Inf | | | | | 70 | t-3 left_ankle_pitch rotation error | -Inf | Inf | | | | | 71 | t-3 head_pitch1 rotation error | -Inf | Inf | | | | | 72 | t-3 head_pitch2 rotation error | -Inf | Inf | | | | | 73 | t-3 head_yaw rotation error | -Inf | Inf | | | | | 74 | left foot in contact with the floor | -Inf | Inf | | | | | 75 | right foot in contact with the floor | -Inf | Inf | | | | | 76 | t | -Inf | Inf | | | | | x74 | sinus | -Inf | Inf | | | | """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 100, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(77,), dtype=np.float64) self.target_velocity = np.asarray([1, 0, 0]) # x, y, yaw self.joint_history_length = 3 self.joint_error_history = self.joint_history_length * [13 * [0]] self.joint_ctrl_history = self.joint_history_length * [13 * [0]] self.left_foot_in_contact = 0 self.right_foot_in_contact = 0 self.prev_t = 0 robot = placo.RobotWrapper( "../../mini_bdx/robots/bdx/robot.urdf", placo.Flags.ignore_collisions ) self.walk_engine = WalkEngine(robot) MujocoEnv.__init__( self, "/home/antoine/MISC/mini_BDX/mini_bdx/robots/bdx/scene.xml", 5, observation_space=observation_space, **kwargs, ) # self.frame_skip = 30 def check_contact(self, body1_name, body2_name): body1_id = self.data.body(body1_name).id body2_id = self.data.body(body2_name).id for i in range(self.data.ncon): contact = self.data.contact[i] if ( self.model.geom_bodyid[contact.geom1] == body1_id and self.model.geom_bodyid[contact.geom2] == body2_id ) or ( self.model.geom_bodyid[contact.geom1] == body2_id and self.model.geom_bodyid[contact.geom2] == body1_id ): return True return False def smoothness_reward(self): # Warning, this function only works if the history is 3 :) smooth = 0 t0 = self.joint_ctrl_history[0] t_minus1 = self.joint_ctrl_history[1] t_minus2 = self.joint_ctrl_history[2] for i in range(13): smooth += 2.5 * np.square(t0[i] - t_minus1[i]) + 1.5 * np.square( t0[i] - 2 * t_minus1[i] + t_minus2[i] ) return -smooth def feet_contact_reward(self): return self.right_foot_in_contact + self.left_foot_in_contact def velocity_tracking_reward(self): base_velocity = list(self.data.body("base").cvel[3:][:2]) + [ self.data.body("base").cvel[:3][2] ] base_velocity = np.asarray(base_velocity) return np.exp(-np.square(base_velocity - self.target_velocity).sum()) def joint_angle_deviation_reward(self): current_ctrl = self.data.ctrl init_ctrl = np.array(list(init_pos.values())) return -np.square(current_ctrl - init_ctrl).sum() def walking_height_reward(self): return ( -np.square((self.get_body_com("base")[2] - 0.14)) * 100 ) # "normal" walking height is about 0.14m def upright_reward(self): # angular distance to upright position in reward Z_vec = np.array(self.data.body("base").xmat).reshape(3, 3)[:, 2] return np.square(np.dot(np.array([0, 0, 1]), Z_vec)) def is_terminated(self) -> bool: rot = np.array(self.data.body("base").xmat).reshape(3, 3) Z_vec = rot[:, 2] upright = np.array([0, 0, 1]) return ( self.data.body("base").xpos[2] < 0.08 or np.dot(upright, Z_vec) <= 0 ) # base z is below 0.08m or base has more than 90 degrees of tilt def follow_walk_engine_reward(self, dt): self.walk_engine.update( True, [0, 0, 0], [0, 0, 0], self.left_foot_in_contact, self.right_foot_in_contact, 0.03, 0, 0, 0, 0, 0, dt, ignore_feet_contact=True, ) angles = self.walk_engine.get_angles() return -np.square(self.data.ctrl - list(angles.values())).sum() def step(self, a): # https://www.nature.com/articles/s41598-023-38259-7.pdf # add random force # if np.random.rand() < 0.05: # self.data.xfrc_applied[self.data.body("base").id][:3] = [ # np.random.randint(-5, 5), # np.random.randint(-5, 5), # np.random.randint(-5, 5), # ] # absolute t = self.data.time dt = t - self.prev_t self.do_simulation(a, 1) # self.do_simulation( # a, self.frame_skip # ) # TODO maybe set frame_skip to 1 when bootstrapping with walk engine self.right_foot_in_contact = self.check_contact("foot_module", "floor") self.left_foot_in_contact = self.check_contact("foot_module_2", "floor") reward = ( 0.005 # time reward # + 0.1 * self.walking_height_reward() # + 0.1 * self.upright_reward() # + 0.1 * self.velocity_tracking_reward() # + 0.1 * self.smoothness_reward() # + 0.1 * self.feet_contact_reward() # + 0.1 * self.joint_angle_deviation_reward() # + 0.01 * self.follow_walk_engine_reward(dt) ) # print("time reward", 0.005) # print("walking height reward", 0.1 * self.walking_height_reward()) # print("upright reward", 0.1 * self.upright_reward()) # print("velocity tracking reward", 0.1 * self.velocity_tracking_reward()) # # print("smoothness reward", 0.1 * self.smoothness_reward()) # print("feet contact reward", 0.1 * self.feet_contact_reward()) # # print("joint angle deviation reward", 0.1 * self.joint_angle_deviation_reward()) # # print("follow walk engine reward", 0.01 * self.follow_walk_engine_reward(dt)) # print("reward", reward) # print("===") # if self.is_terminated(): # reward = -10 ob = self._get_obs() if self.render_mode == "human": self.render() self.prev_t = t return ( ob, reward, self.is_terminated(), # terminated False, # truncated dict( time_reward=0.5, walking_height_reward=0.5 * self.walking_height_reward(), upright_reward=0.5 * self.upright_reward(), velocity_tracking_reward=1.0 * self.velocity_tracking_reward(), smoothness_reward=0.1 * self.smoothness_reward(), feet_contact_reward=0.2 * self.feet_contact_reward(), # joint_angle_deviation_reward=0.1 * self.joint_angle_deviation_reward(), ), ) def reset_model(self): self.prev_t = self.data.time # self.model.opt.gravity[:] = [0, 0, 0] # no gravity qpos = self.data.qpos # LATEST # added randomization to the initial position for i in range(7, len(qpos)): qpos[i] = np.random.uniform(-0.3, 0.3) self.init_qpos = qpos.copy().flatten() # Randomize later # self.target_velocity = np.asarray([0, 0, 0]) # x, y, yaw self.target_velocity = np.asarray([2, 0, 0]) # x, y, yaw self.set_state(qpos, self.init_qvel) return self._get_obs() def goto_init(self): self.data.qpos[:] = self.init_qpos[:] for i, value in enumerate(init_pos.values()): self.data.qpos[i + 7] = value def _get_obs(self): joints_rotations = self.data.qpos[7 : 7 + 13] joints_velocities = self.data.qvel[6 : 6 + 13] joints_error = self.data.ctrl - self.data.qpos[7 : 7 + 13] self.joint_error_history.append(joints_error) self.joint_error_history = self.joint_error_history[ -self.joint_history_length : ] angular_velocity = self.data.body("base").cvel[ :3 ] # TODO this is imu, add noise to it later linear_velocity = self.data.body("base").cvel[3:] self.joint_ctrl_history.append(self.data.ctrl.copy()) self.joint_ctrl_history = self.joint_ctrl_history[-self.joint_history_length :] return np.concatenate( [ joints_rotations, joints_velocities, angular_velocity, linear_velocity, self.target_velocity, np.array(self.joint_error_history).flatten(), [self.left_foot_in_contact, self.right_foot_in_contact], [self.data.time], ] )