Merge pull request #160 from MyoHub/dev

Update Inverse Dynamics Tutorial
MyoHub · May 1, 2024 · 48879cb · 48879cb
2 parents 4e05093 + 29e1144
commit 48879cb
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 111 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -4,9 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [2.3.0] - 2024-05-01
 [FEATURE] Support for both Gym/Gymnasium (#142)
 [FEATURE] Add support for TorchRL by @vmoens (5efdf93)
+[FEATURE] Improve Inverse Dynamics tutorial (98daff2). Thanks to @andreh1111
 
 ## [2.2.0] - 2024-01-20
 [FEATURE] Inverse dynamics tutorial. Thanks to @andreh1111 #121

diff --git a/docs/source/tutorials/6_Inverse_Dynamics.ipynb b/docs/source/tutorials/6_Inverse_Dynamics.ipynb
@@ -11,14 +11,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 80,
+      "execution_count": null,
       "metadata": {
         "id": "HnDtiLaLUDOQ"
       },
       "outputs": [],
       "source": [
         "from myosuite.simhive.myo_sim.test_sims import TestSims as loader\n",
-        "from scipy.signal import butter, filtfilt\n",
         "from IPython.display import HTML\n",
         "import matplotlib.pyplot as plt\n",
         "from base64 import b64encode\n",
@@ -42,7 +41,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 81,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -66,35 +65,53 @@
         "    res = m.solve()\n",
         "    return res.x\n",
         "\n",
-        "def lowpass_filter(signal, cutoff, fs, order=5):\n",
-        "    \"\"\"\n",
-        "    Low-pass filter a signal.\n",
-        "    \"\"\"\n",
-        "    norm_cutoff = cutoff / (0.5 * fs)\n",
-        "    b, a = butter(order, norm_cutoff, btype='low', analog=False)\n",
-        "    return filtfilt(b, a, signal)\n",
-        "\n",
-        "def plot_qxxx(qxxx, joint_names, label1, label2):\n",
+        "def plot_qxxx(qxxx, joint_names, labels):\n",
         "    \"\"\"\n",
         "    Plot generalized variables to be compared.\n",
-        "    qxxx[:,0,:] = time axis\n",
-        "    qxxx[:,1:,0] = reference sequence\n",
+        "    qxxx[:,0,-1] = time axis\n",
         "    qxxx[:,1:,n] = n-th sequence\n",
+        "    qxxx[:,1:,-1] = reference sequence\n",
         "    \"\"\"\n",
         "    fig, axs = plt.subplots(4, 6, figsize=(12, 8))\n",
         "    axs = axs.flatten()\n",
         "    line_objects = []\n",
+        "    linestyle = ['-'] * qxxx.shape[2]\n",
+        "    linestyle[-1] = '--'\n",
         "    for j in range(1, len(joint_names)+1):\n",
         "        ax = axs[j-1]\n",
         "        for i in range(qxxx.shape[2]):\n",
-        "            line, = ax.plot(qxxx[:, 0, 0], qxxx[:, j, i])\n",
+        "            line, = ax.plot(qxxx[:, 0, -1], qxxx[:, j, i], linestyle[i])\n",
         "            if j == 1: # add only one set of lines to the legend\n",
         "                line_objects.append(line)\n",
+        "        ax.set_xlim([qxxx[:, 0].min(), qxxx[:, 0].max()])\n",
         "        ax.set_ylim([qxxx[:, 1:, :].min(), qxxx[:, 1:, :].max()])\n",
         "        ax.set_title(joint_names[j-1])\n",
         "    legend_ax = axs[len(joint_names)] # create legend in the 24th subplot area\n",
         "    legend_ax.axis('off')\n",
-        "    labels = [label1, label2]\n",
+        "    legend_ax.legend(line_objects, labels, loc='center')\n",
+        "    plt.tight_layout()\n",
+        "    plt.show()\n",
+        "\n",
+        "def plot_uxxx(uxxx, muscle_names, labels):\n",
+        "    \"\"\"\n",
+        "    Plot actuator variables to be compared.\n",
+        "    uxxx[:,0,-1] = time axis\n",
+        "    uxxx[:,1:,n] = n-th sequence\n",
+        "    \"\"\"\n",
+        "    fig, axs = plt.subplots(5, 8, figsize=(12, 8))\n",
+        "    axs = axs.flatten()\n",
+        "    line_objects = []\n",
+        "    for j in range(1, len(muscle_names)+1):\n",
+        "        ax = axs[j-1]\n",
+        "        for i in range(uxxx.shape[2]):\n",
+        "            line, = ax.plot(uxxx[:, 0, -1], uxxx[:, j, i])\n",
+        "            if j == 1: # add only one set of lines to the legend\n",
+        "                line_objects.append(line)\n",
+        "        ax.set_xlim([uxxx[:, 0].min(), uxxx[:, 0].max()])\n",
+        "        ax.set_ylim([uxxx[:, 1:, :].min(), uxxx[:, 1:, :].max()])\n",
+        "        ax.set_title(muscle_names[j-1])\n",
+        "    legend_ax = axs[len(muscle_names)] # create legend in the 40th subplot area\n",
+        "    legend_ax.axis('off')\n",
         "    legend_ax.legend(line_objects, labels, loc='center')\n",
         "    plt.tight_layout()\n",
         "    plt.show()"
@@ -124,12 +141,12 @@
       "metadata": {},
       "source": [
         "# Computation of the generalized force\n",
-        "The computation of *ctrl* is dependent on *qfrc*, which can be obtained using inverse dynamics."
+        "The computation of *ctrl* is dependent on *qfrc*, which can be obtained using inverse dynamics. Disabling the constraint solver during this phase avoids simulation divergence."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 82,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -138,11 +155,10 @@
         "    Compute the generalized force needed to reach the target position in the next mujoco step.\n",
         "    \"\"\"\n",
         "    data_copy = deepcopy(data)\n",
-        "    data_copy.qpos = target_qpos\n",
-        "    data_copy.qvel = (target_qpos - data.qpos) / model.opt.timestep\n",
-        "    mujoco.mj_forward(model, data_copy)\n",
-        "    data_copy.qacc = 0\n",
+        "    data_copy.qacc = (((target_qpos - data.qpos) / model.opt.timestep) - data.qvel) / model.opt.timestep\n",
+        "    model.opt.disableflags += mujoco.mjtDisableBit.mjDSBL_CONSTRAINT\n",
         "    mujoco.mj_inverse(model, data_copy)\n",
+        "    model.opt.disableflags -= mujoco.mjtDisableBit.mjDSBL_CONSTRAINT\n",
         "    return data_copy.qfrc_inverse"
       ]
     },
@@ -161,68 +177,25 @@
       "source": [
         "model0 = loader.get_sim(None, 'hand/myohand.xml')\n",
         "data0 = mujoco.MjData(model0)\n",
-        "all_qpos = np.zeros_like(traj)\n",
-        "all_qfrc = np.zeros_like(traj)\n",
-        "for idx in (pbar := tqdm(range(traj.shape[0]))):\n",
+        "qpos_eval = np.zeros((traj.shape[0], traj.shape[1], 2))\n",
+        "qpos_eval[:,:,-1] = traj\n",
+        "for idx in tqdm(range(traj.shape[0])):\n",
         "    target_qpos = traj[idx, 1:]\n",
         "    qfrc = get_qfrc(model0, data0, target_qpos)\n",
         "    data0.qfrc_applied = qfrc\n",
         "    mujoco.mj_step(model0, data0)\n",
-        "    all_qpos[idx,:] = np.hstack((data0.time, data0.qpos))\n",
-        "    all_qfrc[idx,:] = np.hstack((data0.time, qfrc))\n",
-        "qpos_eval = np.zeros((traj.shape[0], traj.shape[1], 2))\n",
-        "qpos_eval[:,:,0] = traj\n",
-        "qpos_eval[:,:,1] = all_qpos\n",
-        "error = ((qpos_eval[:,1:,1] - qpos_eval[:,1:,0])**2).mean(axis=0)\n",
-        "print(f'Error max (rad): {error.max()}')\n",
+        "    qpos_eval[idx,:,0] = np.hstack((data0.time, data0.qpos))\n",
+        "error = ((qpos_eval[:,1:,0] - qpos_eval[:,1:,-1])**2).mean(axis=0)\n",
+        "print(f'error max (rad): {error.max()}')\n",
         "joint_names = [model0.joint(i).name for i in range(model0.nq)]\n",
-        "plot_qxxx(qpos_eval, joint_names, 'Reference qpos', 'Achieved qpos')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "The difference between the reference trajectory and the achieved one is negligible. However, looking at the computed *qfrc* (e.g., shown below for mcp2_flexion over a limited time interval for the sake of visualization), its trend is highly oscillatory, and its use to compute *ctrl* would result in oscillatory and unphysiological muscle activations."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "x1 = 2850; x2 = 3500; qidx = 7\n",
-        "plt.plot(all_qfrc[x1:x2,0], all_qfrc[x1:x2,1+qidx])\n",
-        "plt.title(f'Obtained qfrc for {joint_names[qidx]}')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Therefore, *qfrc* is low-pass filtered with a cutoff frequency of 5 Hz."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 85,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "filt_qfrc = np.zeros_like(all_qfrc)\n",
-        "filt_qfrc[:,0] = all_qfrc[:,0]\n",
-        "fs = 1/model0.opt.timestep\n",
-        "cutoff = 5 # Hz\n",
-        "filt_qfrc[:,1:] = np.apply_along_axis(lowpass_filter, 0, all_qfrc[:,1:], cutoff, fs)"
+        "plot_qxxx(qpos_eval, joint_names, ['Achieved qpos', 'Reference qpos'])"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "The effectiveness of the filtered *qfrc* can be tested below. The obtained error is slightly higher than the previous one, but still negligible. The performance variation w.r.t. cutoff frequency changes is up to the reader."
+        "The difference between the reference trajectory and the achieved one is practically zero. It was observed, however, that by scaling the computed *qfrc* it is possible to achieve an equally valid replication with a larger but still negligible error. Below are examples of the result that can be obtained by dividing the computed *qfrc* by 10, 100, and 1000. Among the three, using a scaler up to 100 allows good replication, while 1000 does not. The advantage in using lower *qfrc* is the easier solution of the optimization problem during the computation of *ctrl* in the next phase."
       ]
     },
     {
@@ -231,17 +204,23 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "model0 = loader.get_sim(None, 'hand/myohand.xml')\n",
-        "data0 = mujoco.MjData(model0)\n",
-        "all_qpos = np.zeros_like(traj)\n",
-        "for idx in (pbar := tqdm(range(traj.shape[0]))):\n",
-        "    data0.qfrc_applied = filt_qfrc[idx, 1:]\n",
-        "    mujoco.mj_step(model0, data0)\n",
-        "    all_qpos[idx,:] = np.hstack((data0.time, data0.qpos))\n",
-        "qpos_eval[:,:,1] = all_qpos\n",
-        "error = ((qpos_eval[:,1:,1] - qpos_eval[:,1:,0])**2).mean(axis=0)\n",
-        "print(f'Error max (rad): {error.max()}')\n",
-        "plot_qxxx(qpos_eval, joint_names, 'Reference qpos', 'Achieved qpos')"
+        "all_qfrc_scaler = [10, 100, 1000]\n",
+        "qpos_eval = np.zeros((traj.shape[0], traj.shape[1], len(all_qfrc_scaler)+1))\n",
+        "qpos_eval[:,:,-1] = traj\n",
+        "labels = []\n",
+        "for i_scaler, scaler in enumerate(all_qfrc_scaler):\n",
+        "    data0 = mujoco.MjData(model0)\n",
+        "    for idx in tqdm(range(traj.shape[0])):\n",
+        "        target_qpos = traj[idx, 1:]\n",
+        "        qfrc = get_qfrc(model0, data0, target_qpos)\n",
+        "        data0.qfrc_applied = qfrc/scaler\n",
+        "        mujoco.mj_step(model0, data0)\n",
+        "        qpos_eval[idx,:,i_scaler] = np.hstack((data0.time, data0.qpos))\n",
+        "    error = ((qpos_eval[:,1:,i_scaler] - qpos_eval[:,1:,-1])**2).mean(axis=0)\n",
+        "    print(f'qfrc scaler: {scaler} - error max (rad): {error.max()}')\n",
+        "    labels.append(f'Achieved qpos\\nscaler:{scaler}')\n",
+        "labels.append('Reference qpos')\n",
+        "plot_qxxx(qpos_eval, joint_names, labels)"
       ]
     },
     {
@@ -299,13 +278,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 60,
+      "execution_count": null,
       "metadata": {
         "id": "gqTH6NTJUDOV"
       },
       "outputs": [],
       "source": [
-        "def get_ctrl(model, data, target_qpos, qfrc):\n",
+        "def get_ctrl(model, data, target_qpos, qfrc, qfrc_scaler, qvel_scaler):\n",
         "    \"\"\"\n",
         "    Compute the control needed to reach the target position in the next mujoco step.\n",
         "    qfrc: generalized force resulting from inverse dynamics.\n",
@@ -316,28 +295,28 @@
         "    tA = model.actuator_dynprm[:,0] * (0.5 + 1.5 * act)\n",
         "    tD = model.actuator_dynprm[:,1] / (0.5 + 1.5 * act)\n",
         "    tausmooth = model.actuator_dynprm[:,2]\n",
+        "    t1 = (tA - tD) * 1.875 / tausmooth\n",
+        "    t2 = (tA + tD) * 0.5\n",
         "    # ---- gain, bias, and moment computation\n",
         "    data_copy = deepcopy(data)\n",
         "    data_copy.qpos = target_qpos\n",
-        "    data_copy.qvel = (target_qpos - data.qpos) / model.opt.timestep\n",
+        "    data_copy.qvel = ((target_qpos - data.qpos) / model.opt.timestep) / qvel_scaler\n",
         "    mujoco.mj_step1(model, data_copy) # gain, bias, and moment depend on qpos and qvel\n",
-        "    gain = np.array([])\n",
-        "    bias = np.array([])\n",
+        "    gain = np.zeros(model.nu)\n",
+        "    bias = np.zeros(model.nu)\n",
         "    for idx_actuator in range(model.nu):\n",
         "        length = data_copy.actuator_length[idx_actuator]\n",
         "        lengthrange = model.actuator_lengthrange[idx_actuator]\n",
         "        velocity = data_copy.actuator_velocity[idx_actuator]\n",
         "        acc0 = model.actuator_acc0[idx_actuator]\n",
         "        prmb = model.actuator_biasprm[idx_actuator,:9]\n",
         "        prmg = model.actuator_gainprm[idx_actuator,:9]\n",
-        "        bias = np.append(bias, mujoco.mju_muscleBias(length, lengthrange, acc0, prmb))\n",
-        "        gain = np.append(gain, min(-1, mujoco.mju_muscleGain(length, velocity, lengthrange, acc0, prmg)))\n",
+        "        bias[idx_actuator] = mujoco.mju_muscleBias(length, lengthrange, acc0, prmb)\n",
+        "        gain[idx_actuator] = min(-1, mujoco.mju_muscleGain(length, velocity, lengthrange, acc0, prmg))\n",
         "    AM = data_copy.actuator_moment.T\n",
         "    # ---- ctrl computation\n",
-        "    t1 = (tA - tD) * 1.875 / tausmooth\n",
-        "    t2 = (tA + tD) * 0.5\n",
         "    P = 2 * AM.T @ AM\n",
-        "    k = AM @ (gain * act) + AM @ bias - qfrc\n",
+        "    k = AM @ (gain * act) + AM @ bias - (qfrc / qfrc_scaler)\n",
         "    q = 2 * k @ AM\n",
         "    lb = gain * (1 - act) * ts / (t2 + t1 * (1 - act))\n",
         "    ub = - gain * act * ts / (t2 - t1 * act)\n",
@@ -358,7 +337,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 45,
+      "execution_count": null,
       "metadata": {
         "id": "mHhY8syiUDOW"
       },
@@ -369,15 +348,55 @@
         "model1.actuator_dynprm[:,2] = tausmooth\n",
         "data1 = mujoco.MjData(model1)\n",
         "all_ctrl = np.zeros((traj.shape[0], 1+model1.nu))\n",
-        "for idx in (pbar := tqdm(range(traj.shape[0]))):\n",
+        "for idx in tqdm(range(traj.shape[0])):\n",
         "    target_qpos = traj[idx, 1:]\n",
-        "    qfrc = filt_qfrc[idx, 1:]\n",
-        "    ctrl = get_ctrl(model1, data1, target_qpos, qfrc)\n",
+        "    qfrc = get_qfrc(model1, data1, target_qpos)\n",
+        "    ctrl = get_ctrl(model1, data1, target_qpos, qfrc, 100, 5)\n",
         "    data1.ctrl = ctrl\n",
         "    mujoco.mj_step(model1, data1)\n",
         "    all_ctrl[idx,:] = np.hstack((data1.time, ctrl))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The use of a new scaler is also included in this phase. Indeed, it was observed that by reducing the velocity set for *gain* computation, the obtained *ctrl* is more stable. Below is an example to compare the results using a scaler equal to 5."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "all_qvel_scaler = [1, 5]\n",
+        "qpos_eval = np.zeros((traj.shape[0], traj.shape[1], len(all_qvel_scaler)+1))\n",
+        "qpos_eval[:,:,-1] = traj\n",
+        "labels_qxxx = []\n",
+        "ctrl_eval = np.zeros((traj.shape[0], 1+model1.nu, len(all_qvel_scaler)))\n",
+        "labels_uxxx = []\n",
+        "for i_scaler, scaler in enumerate(all_qvel_scaler):\n",
+        "    data1 = mujoco.MjData(model1)\n",
+        "    for idx in tqdm(range(traj.shape[0])):\n",
+        "        target_qpos = traj[idx, 1:]\n",
+        "        qfrc = get_qfrc(model1, data1, target_qpos)\n",
+        "        ctrl = get_ctrl(model1, data1, target_qpos, qfrc, 100, scaler)\n",
+        "        data1.ctrl = ctrl\n",
+        "        mujoco.mj_step(model1, data1)\n",
+        "        qpos_eval[idx,:,i_scaler] = np.hstack((data0.time, data1.qpos))\n",
+        "        ctrl_eval[idx,:,i_scaler] = np.hstack((data1.time, ctrl))\n",
+        "    error = ((qpos_eval[:,1:,i_scaler] - qpos_eval[:,1:,-1])**2).mean(axis=0)\n",
+        "    print(f'qvel scaler: {scaler} - error max (rad): {error.max()}')\n",
+        "    labels_qxxx.append(f'Achieved qpos\\nscaler:{scaler}')\n",
+        "    labels_uxxx.append(f'Achieved ctrl\\nscaler:{scaler}')\n",
+        "labels_qxxx.append('Reference qpos')\n",
+        "joint_names = [model1.joint(i).name for i in range(model0.nq)]\n",
+        "plot_qxxx(qpos_eval, joint_names, labels_qxxx)\n",
+        "muscle_names = [model1.actuator(i).name for i in range(model0.nu)]\n",
+        "plot_uxxx(ctrl_eval, muscle_names, labels_uxxx)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -426,22 +445,21 @@
         "renderer_test.scene.flags[:] = 0\n",
         "# ---- generation loop\n",
         "frames = []\n",
-        "all_qpos = np.zeros_like(traj)\n",
-        "for idx in (pbar := tqdm(range(traj.shape[0]))):\n",
+        "for idx in tqdm(range(traj.shape[0])):\n",
         "    # -- reference trajectory\n",
         "    data_ref.qpos = traj[idx, 1:]\n",
-        "    mujoco.mj_step(model_ref, data_ref)\n",
-        "    renderer_ref.update_scene(data_ref, camera=camera, scene_option=options_ref)\n",
-        "    frame_ref = renderer_ref.render()\n",
+        "    mujoco.mj_step1(model_ref, data_ref)\n",
         "    # -- achieved trajectory\n",
         "    data_test.ctrl = all_ctrl[idx, 1:]\n",
         "    mujoco.mj_step(model_test, data_test)\n",
-        "    all_qpos[idx,:] = np.hstack((data_test.time, data_test.qpos))\n",
-        "    renderer_test.update_scene(data_test, camera=camera, scene_option=options_test)\n",
-        "    frame_test = renderer_test.render()\n",
-        "    # -- frames merging \n",
-        "    frame_merged = np.append(frame_ref, frame_test, axis=1)\n",
-        "    frames.append(frame_merged)\n",
+        "    # -- frames generation\n",
+        "    if not idx % round(0.3/(model_test.opt.timestep*25)):\n",
+        "        renderer_ref.update_scene(data_ref, camera=camera, scene_option=options_ref)\n",
+        "        frame_ref = renderer_ref.render()\n",
+        "        renderer_test.update_scene(data_test, camera=camera, scene_option=options_test)\n",
+        "        frame_test = renderer_test.render()\n",
+        "        frame_merged = np.append(frame_ref, frame_test, axis=1)\n",
+        "        frames.append(frame_merged)\n",
         "# -- frames writing\n",
         "os.makedirs('videos', exist_ok = True)\n",
         "output_name = 'videos/myohand_freemovement.mp4'\n",