From 77b8cac74379496f29b29e52d6b2c6cdcb9973ff Mon Sep 17 00:00:00 2001
From: diaby24 <tomzw11@users.noreply.github.com>
Date: Thu, 9 Jan 2025 00:27:08 +0800
Subject: [PATCH 1/4] upload nn

---
 nn api.ipynb | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)
 create mode 100644 nn api.ipynb

diff --git a/nn api.ipynb b/nn api.ipynb
new file mode 100644
index 0000000..293a879
--- /dev/null
+++ b/nn api.ipynb	
@@ -0,0 +1,262 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModuleDict(\n",
+      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      ")\n",
+      "ModuleDict(\n",
+      "  (relu): ReLU()\n",
+      "  (dict1): ModuleDict(\n",
+      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# moduledict\n",
+    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
+    "print(moduledict1)\n",
+    "print(moduledict2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([1.6873, 1.1444, 0.3280, 0.0000, 0.2289, 0.2976, 0.0000, 0.4835, 1.1768,\n",
+      "        2.1066])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ReLU\n",
+    "input = torch.randn(10)\n",
+    "output = nn.ReLU()(input)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 30])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Linear\n",
+    "input = torch.randn(128, 20)\n",
+    "output = nn.Linear(20,30)(input)\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(0.7983, grad_fn=<BinaryCrossEntropyBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# BCELoss\n",
+    "m = nn.Sigmoid()\n",
+    "input = torch.randn(3, 2, requires_grad=True)\n",
+    "target = torch.rand(3, 2, requires_grad=False)\n",
+    "output = nn.BCELoss()(m(input), target)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[[1., 2.],\n",
+      "          [3., 4.]]]])\n",
+      "tensor([[[[1., 1., 2., 2.],\n",
+      "          [1., 1., 2., 2.],\n",
+      "          [3., 3., 4., 4.],\n",
+      "          [3., 3., 4., 4.]]]])\n",
+      "tensor([[[[1.0000, 1.2500, 1.7500, 2.0000],\n",
+      "          [1.5000, 1.7500, 2.2500, 2.5000],\n",
+      "          [2.5000, 2.7500, 3.2500, 3.5000],\n",
+      "          [3.0000, 3.2500, 3.7500, 4.0000]]]])\n",
+      "tensor([[[[1.0000, 1.3333, 1.6667, 2.0000],\n",
+      "          [1.6667, 2.0000, 2.3333, 2.6667],\n",
+      "          [2.3333, 2.6667, 3.0000, 3.3333],\n",
+      "          [3.0000, 3.3333, 3.6667, 4.0000]]]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)\n",
+    "print(input)\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='nearest')\n",
+    "print(m(input))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "print(m(input))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "print(m(input))\n",
+    "\n",
+    "# # Try scaling the same data in a larger tensor\n",
+    "# input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)\n",
+    "# input_3x3[:, :, :2, :2].copy_(input)\n",
+    "# input_3x3\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "# # Notice that values in top left corner are the same with the small input (except at boundary)\n",
+    "# m(input_3x3)\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "# # Notice that values in top left corner are now changed\n",
+    "# m(input_3x3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 20])\n"
+     ]
+    }
+   ],
+   "source": [
+    "m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)\n",
+    "input = torch.randn(128, 20)\n",
+    "output = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)(input)\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[-0.0402,  0.0576,  0.2065, -0.1296,  0.1422, -0.0145, -0.5225,\n",
+      "          -0.3057,  0.1891,  0.5322, -0.0604,  0.0228, -0.0846, -0.5631,\n",
+      "           0.1770,  0.2748, -0.0023,  0.2098,  0.0771,  0.2433, -0.0380,\n",
+      "           0.1194,  0.0113,  0.4350, -0.0378, -0.0177,  0.0755,  0.1787,\n",
+      "           0.2030, -0.0697,  0.2827, -0.6169]],\n",
+      "\n",
+      "        [[ 0.0134, -0.0031,  0.2839, -0.1390,  0.2343,  0.0092, -0.6052,\n",
+      "          -0.1563,  0.1205,  0.5575, -0.0382,  0.0488, -0.1163, -0.5516,\n",
+      "           0.1896,  0.2201,  0.0050,  0.2661, -0.0560,  0.0487,  0.0055,\n",
+      "          -0.0456, -0.0158,  0.5236, -0.0962, -0.0434,  0.0006,  0.2224,\n",
+      "           0.1588, -0.1222,  0.3569, -0.5909]]], grad_fn=<ViewBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
+    "query = torch.randn(2, 1, 32)\n",
+    "key = torch.randn(2, 1, 32)\n",
+    "value = torch.randn(2, 1, 32)\n",
+    "attn_output, attn_output_weights = multihead_attn(query, key, value)\n",
+    "print(attn_output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([20, 32, 512])\n"
+     ]
+    }
+   ],
+   "source": [
+    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
+    "src = torch.rand((10, 32, 512))\n",
+    "tgt = torch.rand((20, 32, 512))\n",
+    "out = transformer_model(src, tgt)\n",
+    "print(out.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From ed2c02fd4bc0d2589763a890b82ba73e2b0612ab Mon Sep 17 00:00:00 2001
From: tomzwang11 <tomzwang11@gmail.com>
Date: Thu, 9 Jan 2025 11:53:57 +0800
Subject: [PATCH 2/4] first update

---
 api-examples/nn/nn.celldict.ipynb | 131 +++++++++++++++
 nn api.ipynb                      | 262 ------------------------------
 2 files changed, 131 insertions(+), 262 deletions(-)
 create mode 100644 api-examples/nn/nn.celldict.ipynb
 delete mode 100644 nn api.ipynb

diff --git a/api-examples/nn/nn.celldict.ipynb b/api-examples/nn/nn.celldict.ipynb
new file mode 100644
index 0000000..26cfffd
--- /dev/null
+++ b/api-examples/nn/nn.celldict.ipynb
@@ -0,0 +1,131 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModuleDict(\n",
+      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      ")\n",
+      "ModuleDict(\n",
+      "  (relu): ReLU()\n",
+      "  (dict1): ModuleDict(\n",
+      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# moduledict\n",
+    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
+    "print(moduledict1)\n",
+    "print(moduledict2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CellDict<\n",
+      "  (cond2d): Conv2d<input_channels=10, output_channels=10, kernel_size=(3, 3), stride=(1, 1), pad_mode=same, padding=0, dilation=(1, 1), group=1, has_bias=False, weight_init=<mindspore.common.initializer.HeUniform object at 0x000001D5E2CC5A10>, bias_init=None, format=NCHW>\n",
+      "  >\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "celldict1 = nn.CellDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "print(celldict1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore支持CellDict(Cell)**\n",
+    "- 打印出的结构没有缩进；weight_init过于详细；\n",
+    "- 建议默认参数不打印，提供选择开关（print_all）"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m celldict2 \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mCellDict({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m'\u001b[39m: nn\u001b[38;5;241m.\u001b[39mReLU(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdict1\u001b[39m\u001b[38;5;124m'\u001b[39m: celldict1})\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(celldict2)\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:583\u001b[0m, in \u001b[0;36mCellDict.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    581\u001b[0m Cell\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, auto_prefix)\n\u001b[0;32m    582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 583\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate(args[\u001b[38;5;241m0\u001b[39m])\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:720\u001b[0m, in \u001b[0;36mCellDict.update\u001b[1;34m(self, cells)\u001b[0m\n\u001b[0;32m    718\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cells, (OrderedDict, CellDict, abc\u001b[38;5;241m.\u001b[39mMapping)):\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, cell \u001b[38;5;129;01min\u001b[39;00m cells\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m--> 720\u001b[0m         \u001b[38;5;28mself\u001b[39m[key] \u001b[38;5;241m=\u001b[39m cell\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mid\u001b[39m, k_v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(cells):\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:590\u001b[0m, in \u001b[0;36mCellDict.__setitem__\u001b[1;34m(self, key, cell)\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, cell):\n\u001b[0;32m    589\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key)\n\u001b[1;32m--> 590\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_cell_type(cell)\n\u001b[0;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_cell_para_name(key, cell)\n\u001b[0;32m    592\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cells[key] \u001b[38;5;241m=\u001b[39m cell\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:631\u001b[0m, in \u001b[0;36mCellDict._validate_cell_type\u001b[1;34m(self, cell)\u001b[0m\n\u001b[0;32m    628\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell should be Cell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    629\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cell, (CellDict, CellList, SequentialCell)):\n\u001b[1;32m--> 631\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell can not be CellDict, CellList or SequentialCell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    632\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mTypeError\u001b[0m: For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict."
+     ]
+    }
+   ],
+   "source": [
+    "celldict2 = nn.CellDict({'relu': nn.ReLU(), 'dict1': celldict1})\n",
+    "print(celldict2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore不支持CellDict(CellDict)**\n",
+    "- PyTorch支持CellDict/SequentialCell/CellList嵌套"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nn api.ipynb b/nn api.ipynb
deleted file mode 100644
index 293a879..0000000
--- a/nn api.ipynb	
+++ /dev/null
@@ -1,262 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ModuleDict(\n",
-      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      ")\n",
-      "ModuleDict(\n",
-      "  (relu): ReLU()\n",
-      "  (dict1): ModuleDict(\n",
-      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      "  )\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "# moduledict\n",
-    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
-    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
-    "print(moduledict1)\n",
-    "print(moduledict2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([1.6873, 1.1444, 0.3280, 0.0000, 0.2289, 0.2976, 0.0000, 0.4835, 1.1768,\n",
-      "        2.1066])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# ReLU\n",
-    "input = torch.randn(10)\n",
-    "output = nn.ReLU()(input)\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([128, 30])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Linear\n",
-    "input = torch.randn(128, 20)\n",
-    "output = nn.Linear(20,30)(input)\n",
-    "print(output.size())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor(0.7983, grad_fn=<BinaryCrossEntropyBackward0>)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# BCELoss\n",
-    "m = nn.Sigmoid()\n",
-    "input = torch.randn(3, 2, requires_grad=True)\n",
-    "target = torch.rand(3, 2, requires_grad=False)\n",
-    "output = nn.BCELoss()(m(input), target)\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[[1., 2.],\n",
-      "          [3., 4.]]]])\n",
-      "tensor([[[[1., 1., 2., 2.],\n",
-      "          [1., 1., 2., 2.],\n",
-      "          [3., 3., 4., 4.],\n",
-      "          [3., 3., 4., 4.]]]])\n",
-      "tensor([[[[1.0000, 1.2500, 1.7500, 2.0000],\n",
-      "          [1.5000, 1.7500, 2.2500, 2.5000],\n",
-      "          [2.5000, 2.7500, 3.2500, 3.5000],\n",
-      "          [3.0000, 3.2500, 3.7500, 4.0000]]]])\n",
-      "tensor([[[[1.0000, 1.3333, 1.6667, 2.0000],\n",
-      "          [1.6667, 2.0000, 2.3333, 2.6667],\n",
-      "          [2.3333, 2.6667, 3.0000, 3.3333],\n",
-      "          [3.0000, 3.3333, 3.6667, 4.0000]]]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)\n",
-    "print(input)\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='nearest')\n",
-    "print(m(input))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
-    "print(m(input))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
-    "print(m(input))\n",
-    "\n",
-    "# # Try scaling the same data in a larger tensor\n",
-    "# input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)\n",
-    "# input_3x3[:, :, :2, :2].copy_(input)\n",
-    "# input_3x3\n",
-    "\n",
-    "# m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
-    "# # Notice that values in top left corner are the same with the small input (except at boundary)\n",
-    "# m(input_3x3)\n",
-    "\n",
-    "# m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
-    "# # Notice that values in top left corner are now changed\n",
-    "# m(input_3x3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([128, 20])\n"
-     ]
-    }
-   ],
-   "source": [
-    "m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)\n",
-    "input = torch.randn(128, 20)\n",
-    "output = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)(input)\n",
-    "print(output.size())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[-0.0402,  0.0576,  0.2065, -0.1296,  0.1422, -0.0145, -0.5225,\n",
-      "          -0.3057,  0.1891,  0.5322, -0.0604,  0.0228, -0.0846, -0.5631,\n",
-      "           0.1770,  0.2748, -0.0023,  0.2098,  0.0771,  0.2433, -0.0380,\n",
-      "           0.1194,  0.0113,  0.4350, -0.0378, -0.0177,  0.0755,  0.1787,\n",
-      "           0.2030, -0.0697,  0.2827, -0.6169]],\n",
-      "\n",
-      "        [[ 0.0134, -0.0031,  0.2839, -0.1390,  0.2343,  0.0092, -0.6052,\n",
-      "          -0.1563,  0.1205,  0.5575, -0.0382,  0.0488, -0.1163, -0.5516,\n",
-      "           0.1896,  0.2201,  0.0050,  0.2661, -0.0560,  0.0487,  0.0055,\n",
-      "          -0.0456, -0.0158,  0.5236, -0.0962, -0.0434,  0.0006,  0.2224,\n",
-      "           0.1588, -0.1222,  0.3569, -0.5909]]], grad_fn=<ViewBackward0>)\n"
-     ]
-    }
-   ],
-   "source": [
-    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
-    "query = torch.randn(2, 1, 32)\n",
-    "key = torch.randn(2, 1, 32)\n",
-    "value = torch.randn(2, 1, 32)\n",
-    "attn_output, attn_output_weights = multihead_attn(query, key, value)\n",
-    "print(attn_output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([20, 32, 512])\n"
-     ]
-    }
-   ],
-   "source": [
-    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
-    "src = torch.rand((10, 32, 512))\n",
-    "tgt = torch.rand((20, 32, 512))\n",
-    "out = transformer_model(src, tgt)\n",
-    "print(out.size())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From f528ee359263cb39ee666c9d1fb061eec9584b29 Mon Sep 17 00:00:00 2001
From: tomzwang11 <tomzwang11@gmail.com>
Date: Thu, 9 Jan 2025 21:21:18 +0800
Subject: [PATCH 3/4] update nn

---
 .../nn.bceloss-checkpoint.ipynb               |  93 ++++++++++
 .../nn.celldict-checkpoint.ipynb              | 131 ++++++++++++++
 .../nn.dense-checkpoint.ipynb                 | 113 +++++++++++++
 .../nn.embedding-checkpoint.ipynb             | 151 +++++++++++++++++
 .../nn.identity-checkpoint.ipynb              |  87 ++++++++++
 .../nn.multiheadattention-checkpoint.ipynb    | 160 ++++++++++++++++++
 .../nn.relu-checkpoint.ipynb                  | 131 ++++++++++++++
 .../nn.transformer-checkpoint.ipynb           | 108 ++++++++++++
 .../nn.upsample-checkpoint.ipynb              | 136 +++++++++++++++
 api-examples/nn/nn.bceloss.ipynb              |  93 ++++++++++
 api-examples/nn/nn.dense.ipynb                | 113 +++++++++++++
 api-examples/nn/nn.embedding.ipynb            | 151 +++++++++++++++++
 api-examples/nn/nn.identity.ipynb             |  87 ++++++++++
 api-examples/nn/nn.multiheadattention.ipynb   | 160 ++++++++++++++++++
 api-examples/nn/nn.relu.ipynb                 | 109 ++++++++++++
 api-examples/nn/nn.transformer.ipynb          | 108 ++++++++++++
 api-examples/nn/nn.upsample.ipynb             | 136 +++++++++++++++
 17 files changed, 2067 insertions(+)
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
 create mode 100644 api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb
 create mode 100644 api-examples/nn/nn.bceloss.ipynb
 create mode 100644 api-examples/nn/nn.dense.ipynb
 create mode 100644 api-examples/nn/nn.embedding.ipynb
 create mode 100644 api-examples/nn/nn.identity.ipynb
 create mode 100644 api-examples/nn/nn.multiheadattention.ipynb
 create mode 100644 api-examples/nn/nn.relu.ipynb
 create mode 100644 api-examples/nn/nn.transformer.ipynb
 create mode 100644 api-examples/nn/nn.upsample.ipynb

diff --git a/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
new file mode 100644
index 0000000..aea0394
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(0.8536, grad_fn=<BinaryCrossEntropyBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# BCELoss\n",
+    "m = nn.Sigmoid()\n",
+    "input_np = np.random.rand(3,2)\n",
+    "target_np = np.random.rand(3,2)\n",
+    "output = nn.BCELoss()(m(Tensor(input)), Tensor(target))\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.71798706\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "m = nn.Sigmoid()\n",
+    "loss = nn.BCELoss(weight=None, reduction='mean')\n",
+    "input = ms.Tensor(input_np, ms.float32)\n",
+    "target = ms.Tensor(target_np, ms.float32)\n",
+    "output = loss(m(input), target)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- 默认reduction=mean下输出与PyTorch似乎不一致\n",
+    "- mint.nn.BCELoss待验证，CPU上不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
new file mode 100644
index 0000000..26cfffd
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
@@ -0,0 +1,131 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModuleDict(\n",
+      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      ")\n",
+      "ModuleDict(\n",
+      "  (relu): ReLU()\n",
+      "  (dict1): ModuleDict(\n",
+      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# moduledict\n",
+    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
+    "print(moduledict1)\n",
+    "print(moduledict2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CellDict<\n",
+      "  (cond2d): Conv2d<input_channels=10, output_channels=10, kernel_size=(3, 3), stride=(1, 1), pad_mode=same, padding=0, dilation=(1, 1), group=1, has_bias=False, weight_init=<mindspore.common.initializer.HeUniform object at 0x000001D5E2CC5A10>, bias_init=None, format=NCHW>\n",
+      "  >\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "celldict1 = nn.CellDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "print(celldict1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore支持CellDict(Cell)**\n",
+    "- 打印出的结构没有缩进；weight_init过于详细；\n",
+    "- 建议默认参数不打印，提供选择开关（print_all）"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m celldict2 \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mCellDict({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m'\u001b[39m: nn\u001b[38;5;241m.\u001b[39mReLU(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdict1\u001b[39m\u001b[38;5;124m'\u001b[39m: celldict1})\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(celldict2)\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:583\u001b[0m, in \u001b[0;36mCellDict.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    581\u001b[0m Cell\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, auto_prefix)\n\u001b[0;32m    582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 583\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate(args[\u001b[38;5;241m0\u001b[39m])\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:720\u001b[0m, in \u001b[0;36mCellDict.update\u001b[1;34m(self, cells)\u001b[0m\n\u001b[0;32m    718\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cells, (OrderedDict, CellDict, abc\u001b[38;5;241m.\u001b[39mMapping)):\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, cell \u001b[38;5;129;01min\u001b[39;00m cells\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m--> 720\u001b[0m         \u001b[38;5;28mself\u001b[39m[key] \u001b[38;5;241m=\u001b[39m cell\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mid\u001b[39m, k_v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(cells):\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:590\u001b[0m, in \u001b[0;36mCellDict.__setitem__\u001b[1;34m(self, key, cell)\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, cell):\n\u001b[0;32m    589\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key)\n\u001b[1;32m--> 590\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_cell_type(cell)\n\u001b[0;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_cell_para_name(key, cell)\n\u001b[0;32m    592\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cells[key] \u001b[38;5;241m=\u001b[39m cell\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:631\u001b[0m, in \u001b[0;36mCellDict._validate_cell_type\u001b[1;34m(self, cell)\u001b[0m\n\u001b[0;32m    628\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell should be Cell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    629\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cell, (CellDict, CellList, SequentialCell)):\n\u001b[1;32m--> 631\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell can not be CellDict, CellList or SequentialCell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    632\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mTypeError\u001b[0m: For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict."
+     ]
+    }
+   ],
+   "source": [
+    "celldict2 = nn.CellDict({'relu': nn.ReLU(), 'dict1': celldict1})\n",
+    "print(celldict2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore不支持CellDict(CellDict)**\n",
+    "- PyTorch支持CellDict/SequentialCell/CellList嵌套"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
new file mode 100644
index 0000000..e4bb35f
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 30])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Linear\n",
+    "input = np.random.randn(128, 20)\n",
+    "output = nn.Linear(20,30)(Tensor(input))\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 30)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import flax.linen as nn\n",
+    "from jax import numpy as jnp\n",
+    "from jax import random\n",
+    "\n",
+    "input = jnp.ones((128,20))\n",
+    "dense = nn.Dense(features=30)\n",
+    "variables = dense.init(random.key(42), input)\n",
+    "output = dense.apply(variables, input)\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 30)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "output = nn.Dense(20,30)(Tensor(input, dtype=ms.float32))\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore Dense用法与输出与PyTorch一致**\n",
+    "- MindSpore的命名与jax保持一致（Dense）\n",
+    "- jax需要额外定义参数"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
new file mode 100644
index 0000000..69871f1
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 1.1268, -0.9513,  0.6823],\n",
+      "         [-0.3245, -0.8497, -0.7579],\n",
+      "         [-0.6075,  2.7415, -2.0689],\n",
+      "         [-1.4612, -1.0289, -0.4242]],\n",
+      "\n",
+      "        [[-0.6075,  2.7415, -2.0689],\n",
+      "         [ 2.1071,  0.1938,  0.0255],\n",
+      "         [-0.3245, -0.8497, -0.7579],\n",
+      "         [-0.4831, -1.7133,  0.5985]]], grad_fn=<EmbeddingBackward0>)\n",
+      "tensor([[[ 0.0000,  0.0000,  0.0000],\n",
+      "         [-1.0849,  1.1815,  1.0977],\n",
+      "         [ 0.9290,  1.8014, -0.3725],\n",
+      "         [ 0.0787,  1.9454,  1.1112]],\n",
+      "\n",
+      "        [[ 1.7580, -0.3959,  0.9574],\n",
+      "         [ 1.7580, -0.3959,  0.9574],\n",
+      "         [-1.0849,  1.1815,  1.0977],\n",
+      "         [ 0.3496,  1.7060, -0.4346]]], grad_fn=<EmbeddingBackward0>)\n",
+      "0.004005908966064453\n"
+     ]
+    }
+   ],
+   "source": [
+    "start = time.time()\n",
+    "# an Embedding module containing 10 tensors of size 3\n",
+    "embedding = nn.Embedding(10, 3)\n",
+    "# a batch of 2 samples of 4 indices each\n",
+    "input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "\n",
+    "# example with padding_idx\n",
+    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
+    "input = torch.LongTensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "end = time.time()\n",
+    "print(end-start)\n",
+    "\n",
+    "# # example of changing `pad` vector\n",
+    "# padding_idx = 0\n",
+    "# embedding = nn.Embedding(3, 3, padding_idx=padding_idx)\n",
+    "# print(embedding.weight)\n",
+    "# with torch.no_grad():\n",
+    "#     embedding.weight[padding_idx] = torch.ones(3)\n",
+    "# print(embedding.weight)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[-0.0065871   0.01535338  0.01434076]\n",
+      "  [ 0.00331824  0.00791553 -0.02024699]\n",
+      "  [ 0.01239492  0.00385856  0.00478753]\n",
+      "  [-0.01227701  0.00011452  0.0142579 ]]\n",
+      "\n",
+      " [[ 0.01239492  0.00385856  0.00478753]\n",
+      "  [-0.00323425  0.00501913 -0.00874054]\n",
+      "  [ 0.00331824  0.00791553 -0.02024699]\n",
+      "  [ 0.01642694 -0.0065531   0.00681009]]]\n",
+      "[[[ 0.          0.          0.        ]\n",
+      "  [ 0.00261477  0.01124485  0.00828796]\n",
+      "  [-0.00046545  0.00450285  0.00737254]\n",
+      "  [-0.01147073  0.00254146  0.00321731]]\n",
+      "\n",
+      " [[-0.00656487 -0.00497597 -0.00669393]\n",
+      "  [-0.00656487 -0.00497597 -0.00669393]\n",
+      "  [ 0.00261477  0.01124485  0.00828796]\n",
+      "  [ 0.00989333  0.02307425  0.01142865]]]\n",
+      "0.00896143913269043\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "start = time.time()\n",
+    "embedding = nn.Embedding(10, 3)\n",
+    "# a batch of 2 samples of 4 indices each\n",
+    "input = Tensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "\n",
+    "# example with padding_idx\n",
+    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
+    "input = Tensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore Embedding用法与输出与PyTorch一致**\n",
+    "- mint.nn.embedding待验证，CPU不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
new file mode 100644
index 0000000..699b21e
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 20])\n"
+     ]
+    }
+   ],
+   "source": [
+    "input = np.random.rand(128, 20)\n",
+    "\n",
+    "m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)\n",
+    "output = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)(Tensor(input))\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 20)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "output = nn.Identity()(Tensor(input, dtype=ms.float32))\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore identity用法与输出与PyTorch一致**\n",
+    "- MindSpore的size/shape打印比PyTorch简洁一点"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
new file mode 100644
index 0000000..5ab3e11
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 0.0109,  0.3142,  0.3060, -0.0961, -0.2895,  0.2136, -0.1708,\n",
+      "          -0.0997,  0.0912, -0.0031, -0.1519,  0.0259,  0.0421,  0.1709,\n",
+      "           0.2150, -0.1000,  0.3393,  0.2651, -0.0085, -0.0135,  0.0730,\n",
+      "           0.0116,  0.1105, -0.1352, -0.0823,  0.3899,  0.1814,  0.1009,\n",
+      "          -0.2097, -0.3307,  0.1143,  0.2149]],\n",
+      "\n",
+      "        [[ 0.0072,  0.3131,  0.3063, -0.0994, -0.2912,  0.2116, -0.1724,\n",
+      "          -0.1033,  0.0965,  0.0024, -0.1512,  0.0233,  0.0406,  0.1782,\n",
+      "           0.2066, -0.0972,  0.3377,  0.2652, -0.0051, -0.0142,  0.0742,\n",
+      "           0.0048,  0.1180, -0.1363, -0.0826,  0.3924,  0.1792,  0.0996,\n",
+      "          -0.2099, -0.3327,  0.1132,  0.2122]]], grad_fn=<ViewBackward0>)\n",
+      "torch.Size([2, 1, 32])\n"
+     ]
+    }
+   ],
+   "source": [
+    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
+    "query = np.random.rand(2, 1, 32)\n",
+    "key = np.random.rand(2, 1, 32)\n",
+    "value = np.random.rand(2, 1, 32)\n",
+    "attn_output, attn_output_weights = multihead_attn(Tensor(query), Tensor(key), Tensor(value))\n",
+    "print(attn_output)\n",
+    "print(attn_output.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2, 1, 32)\n",
+      "[[[ 0.21025246 -1.3815589   1.1856667   0.5420509  -0.03878744\n",
+      "    0.36193654 -0.30533046 -1.1700163  -0.57054275  0.46141142\n",
+      "   -0.30795845  0.39367586 -0.54798263  0.19380882 -0.5910839\n",
+      "   -0.4666907  -0.97481775  0.45212376 -0.10193141 -0.6575114\n",
+      "    0.25120044  0.122086   -1.1403954  -0.42054623 -0.61952204\n",
+      "   -0.6566897   0.73287785 -0.35822588 -0.5958375  -0.32572877\n",
+      "    1.2399867  -0.22128716]]\n",
+      "\n",
+      " [[ 0.03517252 -0.6995417   0.97694     0.07682744  0.24728103\n",
+      "    0.08959051  0.01169628 -0.8235637  -0.2518189   0.59149474\n",
+      "   -0.40409014 -0.5977189   0.09025317  0.08469346 -0.29417813\n",
+      "   -0.34428942 -0.61110383  0.7962756   0.02414452 -0.67949474\n",
+      "    0.6065721  -0.24253327 -0.3226804   0.27078897 -1.3271445\n",
+      "    0.25307137  0.3808884   0.18227924 -0.38555723 -0.42521483\n",
+      "    1.2446275  -0.62895083]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import flax.linen as nn\n",
+    "from jax import numpy as jnp\n",
+    "from jax import random\n",
+    "\n",
+    "input = jnp.ones((2,1,32))\n",
+    "layer = nn.MultiHeadAttention(num_heads=8, qkv_features=32)\n",
+    "variables = layer.init(random.key(42), input)\n",
+    "output = layer.apply(variables, Tensor(query), Tensor(key), Tensor(value))\n",
+    "print(output.shape)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[ 0.00069493 -0.03911137 -0.17373352 -0.08817849  0.00585917\n",
+      "    0.0038011   0.13423112  0.12640306 -0.13031635 -0.10004204\n",
+      "   -0.22439755 -0.07857274 -0.31675625 -0.07869545  0.18808702\n",
+      "    0.18783928  0.21366805  0.16640219 -0.2740562   0.02806476\n",
+      "   -0.16295458  0.29520345  0.17730789  0.13442674  0.35357815\n",
+      "    0.15538093  0.08111625 -0.09462467 -0.03054206  0.1470299\n",
+      "    0.03899852  0.20147526]]\n",
+      "\n",
+      " [[-0.00058219 -0.03654152 -0.17798103 -0.08727801  0.00280233\n",
+      "    0.00116073  0.1349529   0.12789461 -0.13350655 -0.10066106\n",
+      "   -0.2256635  -0.07729645 -0.31817535 -0.0761395   0.19332808\n",
+      "    0.19098276  0.21913132  0.16640013 -0.27903354  0.03269039\n",
+      "   -0.16899411  0.2947319   0.18080507  0.13609359  0.34764683\n",
+      "    0.16034965  0.08051767 -0.09571798 -0.03570132  0.15065208\n",
+      "    0.03331698  0.19594805]]]\n",
+      "(2, 1, 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
+    "attn_output, attn_output_weights = multihead_attn(Tensor(query, ms.float32), Tensor(key, ms.float32), Tensor(value, ms.float32))\n",
+    "print(attn_output)\n",
+    "print(attn_output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore MHA用法与输出与PyTorch一致**\n",
+    "- MindSpore的Tensor()没有像torch.Tensor()一样内置类型转换，传入float64会报错"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
new file mode 100644
index 0000000..26cfffd
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
@@ -0,0 +1,131 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModuleDict(\n",
+      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      ")\n",
+      "ModuleDict(\n",
+      "  (relu): ReLU()\n",
+      "  (dict1): ModuleDict(\n",
+      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# moduledict\n",
+    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
+    "print(moduledict1)\n",
+    "print(moduledict2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CellDict<\n",
+      "  (cond2d): Conv2d<input_channels=10, output_channels=10, kernel_size=(3, 3), stride=(1, 1), pad_mode=same, padding=0, dilation=(1, 1), group=1, has_bias=False, weight_init=<mindspore.common.initializer.HeUniform object at 0x000001D5E2CC5A10>, bias_init=None, format=NCHW>\n",
+      "  >\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "celldict1 = nn.CellDict({'cond2d': nn.Conv2d(10,10,3)})\n",
+    "print(celldict1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore支持CellDict(Cell)**\n",
+    "- 打印出的结构没有缩进；weight_init过于详细；\n",
+    "- 建议默认参数不打印，提供选择开关（print_all）"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m celldict2 \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mCellDict({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m'\u001b[39m: nn\u001b[38;5;241m.\u001b[39mReLU(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdict1\u001b[39m\u001b[38;5;124m'\u001b[39m: celldict1})\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(celldict2)\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:583\u001b[0m, in \u001b[0;36mCellDict.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    581\u001b[0m Cell\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, auto_prefix)\n\u001b[0;32m    582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 583\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate(args[\u001b[38;5;241m0\u001b[39m])\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:720\u001b[0m, in \u001b[0;36mCellDict.update\u001b[1;34m(self, cells)\u001b[0m\n\u001b[0;32m    718\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cells, (OrderedDict, CellDict, abc\u001b[38;5;241m.\u001b[39mMapping)):\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, cell \u001b[38;5;129;01min\u001b[39;00m cells\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m--> 720\u001b[0m         \u001b[38;5;28mself\u001b[39m[key] \u001b[38;5;241m=\u001b[39m cell\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mid\u001b[39m, k_v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(cells):\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:590\u001b[0m, in \u001b[0;36mCellDict.__setitem__\u001b[1;34m(self, key, cell)\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, cell):\n\u001b[0;32m    589\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key)\n\u001b[1;32m--> 590\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_cell_type(cell)\n\u001b[0;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_cell_para_name(key, cell)\n\u001b[0;32m    592\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cells[key] \u001b[38;5;241m=\u001b[39m cell\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:631\u001b[0m, in \u001b[0;36mCellDict._validate_cell_type\u001b[1;34m(self, cell)\u001b[0m\n\u001b[0;32m    628\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell should be Cell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    629\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cell, (CellDict, CellList, SequentialCell)):\n\u001b[1;32m--> 631\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell can not be CellDict, CellList or SequentialCell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    632\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mTypeError\u001b[0m: For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict."
+     ]
+    }
+   ],
+   "source": [
+    "celldict2 = nn.CellDict({'relu': nn.ReLU(), 'dict1': celldict1})\n",
+    "print(celldict2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore不支持CellDict(CellDict)**\n",
+    "- PyTorch支持CellDict/SequentialCell/CellList嵌套"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
new file mode 100644
index 0000000..3ec48af
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn\n",
+    "\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\wangzeyangyi\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\torch\\nn\\modules\\transformer.py:379: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([20, 32, 512])\n",
+      "2.4219019412994385\n"
+     ]
+    }
+   ],
+   "source": [
+    "start = time.time()\n",
+    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
+    "src = np.random.rand(10, 32, 512)\n",
+    "tgt = np.random.rand(20, 32, 512)\n",
+    "out = transformer_model(Tensor(src), Tensor(tgt))\n",
+    "print(out.size())\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(20, 32, 512)\n",
+      "13.059895038604736\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "start = time.time()\n",
+    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
+    "out = transformer_model(Tensor(src, ms.float32), Tensor(tgt, ms.float32))\n",
+    "print(out.shape)\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore transformer用法与输出与PyTorch一致**\n",
+    "- MindSpore transformer在CPU上的性能待优化（~6x PyTorch）\n",
+    "- jax没有官方transformer API"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb
new file mode 100644
index 0000000..794c7b5
--- /dev/null
+++ b/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[[1 1]\n",
+      "   [1 0]]]]\n",
+      "tensor([[[[1., 1., 1., 1.],\n",
+      "          [1., 1., 1., 1.],\n",
+      "          [1., 1., 0., 0.],\n",
+      "          [1., 1., 0., 0.]]]])\n",
+      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
+      "          [1.0000, 0.9375, 0.8125, 0.7500],\n",
+      "          [1.0000, 0.8125, 0.4375, 0.2500],\n",
+      "          [1.0000, 0.7500, 0.2500, 0.0000]]]])\n",
+      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
+      "          [1.0000, 0.8889, 0.7778, 0.6667],\n",
+      "          [1.0000, 0.7778, 0.5556, 0.3333],\n",
+      "          [1.0000, 0.6667, 0.3333, 0.0000]]]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)\n",
+    "input = np.random.randint(2, size=(1,1,2,2))\n",
+    "print(input)\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='nearest')\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "# # Try scaling the same data in a larger tensor\n",
+    "# input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)\n",
+    "# input_3x3[:, :, :2, :2].copy_(input)\n",
+    "# input_3x3\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "# # Notice that values in top left corner are the same with the small input (except at boundary)\n",
+    "# m(input_3x3)\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "# # Notice that values in top left corner are now changed\n",
+    "# m(input_3x3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[6], line 6\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmindspore\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m      5\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n\u001b[0;32m      8\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbilinear\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# align_corners=False\u001b[39;00m\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\cell.py:733\u001b[0m, in \u001b[0;36mCell.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    730\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequires_grad \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dynamic_shape_inputs \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmixed_precision_type):\n\u001b[0;32m    731\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hook \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m    732\u001b[0m             \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shard_fn \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_recompute_cell \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_bprop \u001b[38;5;129;01mand\u001b[39;00m _pynative_executor\u001b[38;5;241m.\u001b[39mrequires_grad())):\n\u001b[1;32m--> 733\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconstruct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    735\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_construct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_complex_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\basic.py:473\u001b[0m, in \u001b[0;36mUpsample.construct\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    472\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconstruct\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m--> 473\u001b[0m     out \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39minterpolate(x, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscale_factor, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m    474\u001b[0m                         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39malign_corners, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrecompute_scale_factor)\n\u001b[0;32m    475\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2613\u001b[0m, in \u001b[0;36minterpolate\u001b[1;34m(input, size, scale_factor, mode, align_corners, recompute_scale_factor)\u001b[0m\n\u001b[0;32m   2611\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   2612\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m dim_unknown \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m-> 2613\u001b[0m         _interpolate_scale_factor_check(scale_factor, mode, rank,\n\u001b[0;32m   2614\u001b[0m                                         supported_dict)\n\u001b[0;32m   2616\u001b[0m \u001b[38;5;66;03m# align_corners\u001b[39;00m\n\u001b[0;32m   2617\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m align_corners \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2293\u001b[0m, in \u001b[0;36m_interpolate_scale_factor_check\u001b[1;34m(scale_factor, mode, rank, supported_dict)\u001b[0m\n\u001b[0;32m   2288\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2289\u001b[0m \u001b[38;5;124;03mscale_factor check\u001b[39;00m\n\u001b[0;32m   2290\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m scale_factor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m supported_dict\u001b[38;5;241m.\u001b[39mget(\n\u001b[0;32m   2292\u001b[0m         mode, {})\u001b[38;5;241m.\u001b[39mget(rank):\n\u001b[1;32m-> 2293\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   2294\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minterpolate\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m option cannot currently be set with the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   2295\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and dim = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrank\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mD.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mValueError\u001b[0m: For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D."
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='nearest')\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='bilinear')  # align_corners=False\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='bilinear', align_corners=True)\n",
+    "print(m(Tensor(input)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- jax没有upsample接口\n",
+    "- MindSpore的scale_factor参数必须为float\n",
+    "- MindSpore部分输入不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.bceloss.ipynb b/api-examples/nn/nn.bceloss.ipynb
new file mode 100644
index 0000000..aea0394
--- /dev/null
+++ b/api-examples/nn/nn.bceloss.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(0.8536, grad_fn=<BinaryCrossEntropyBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# BCELoss\n",
+    "m = nn.Sigmoid()\n",
+    "input_np = np.random.rand(3,2)\n",
+    "target_np = np.random.rand(3,2)\n",
+    "output = nn.BCELoss()(m(Tensor(input)), Tensor(target))\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.71798706\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "m = nn.Sigmoid()\n",
+    "loss = nn.BCELoss(weight=None, reduction='mean')\n",
+    "input = ms.Tensor(input_np, ms.float32)\n",
+    "target = ms.Tensor(target_np, ms.float32)\n",
+    "output = loss(m(input), target)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- 默认reduction=mean下输出与PyTorch似乎不一致\n",
+    "- mint.nn.BCELoss待验证，CPU上不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.dense.ipynb b/api-examples/nn/nn.dense.ipynb
new file mode 100644
index 0000000..e4bb35f
--- /dev/null
+++ b/api-examples/nn/nn.dense.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 30])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Linear\n",
+    "input = np.random.randn(128, 20)\n",
+    "output = nn.Linear(20,30)(Tensor(input))\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 30)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import flax.linen as nn\n",
+    "from jax import numpy as jnp\n",
+    "from jax import random\n",
+    "\n",
+    "input = jnp.ones((128,20))\n",
+    "dense = nn.Dense(features=30)\n",
+    "variables = dense.init(random.key(42), input)\n",
+    "output = dense.apply(variables, input)\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 30)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "output = nn.Dense(20,30)(Tensor(input, dtype=ms.float32))\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore Dense用法与输出与PyTorch一致**\n",
+    "- MindSpore的命名与jax保持一致（Dense）\n",
+    "- jax需要额外定义参数"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.embedding.ipynb b/api-examples/nn/nn.embedding.ipynb
new file mode 100644
index 0000000..69871f1
--- /dev/null
+++ b/api-examples/nn/nn.embedding.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 1.1268, -0.9513,  0.6823],\n",
+      "         [-0.3245, -0.8497, -0.7579],\n",
+      "         [-0.6075,  2.7415, -2.0689],\n",
+      "         [-1.4612, -1.0289, -0.4242]],\n",
+      "\n",
+      "        [[-0.6075,  2.7415, -2.0689],\n",
+      "         [ 2.1071,  0.1938,  0.0255],\n",
+      "         [-0.3245, -0.8497, -0.7579],\n",
+      "         [-0.4831, -1.7133,  0.5985]]], grad_fn=<EmbeddingBackward0>)\n",
+      "tensor([[[ 0.0000,  0.0000,  0.0000],\n",
+      "         [-1.0849,  1.1815,  1.0977],\n",
+      "         [ 0.9290,  1.8014, -0.3725],\n",
+      "         [ 0.0787,  1.9454,  1.1112]],\n",
+      "\n",
+      "        [[ 1.7580, -0.3959,  0.9574],\n",
+      "         [ 1.7580, -0.3959,  0.9574],\n",
+      "         [-1.0849,  1.1815,  1.0977],\n",
+      "         [ 0.3496,  1.7060, -0.4346]]], grad_fn=<EmbeddingBackward0>)\n",
+      "0.004005908966064453\n"
+     ]
+    }
+   ],
+   "source": [
+    "start = time.time()\n",
+    "# an Embedding module containing 10 tensors of size 3\n",
+    "embedding = nn.Embedding(10, 3)\n",
+    "# a batch of 2 samples of 4 indices each\n",
+    "input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "\n",
+    "# example with padding_idx\n",
+    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
+    "input = torch.LongTensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "end = time.time()\n",
+    "print(end-start)\n",
+    "\n",
+    "# # example of changing `pad` vector\n",
+    "# padding_idx = 0\n",
+    "# embedding = nn.Embedding(3, 3, padding_idx=padding_idx)\n",
+    "# print(embedding.weight)\n",
+    "# with torch.no_grad():\n",
+    "#     embedding.weight[padding_idx] = torch.ones(3)\n",
+    "# print(embedding.weight)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[-0.0065871   0.01535338  0.01434076]\n",
+      "  [ 0.00331824  0.00791553 -0.02024699]\n",
+      "  [ 0.01239492  0.00385856  0.00478753]\n",
+      "  [-0.01227701  0.00011452  0.0142579 ]]\n",
+      "\n",
+      " [[ 0.01239492  0.00385856  0.00478753]\n",
+      "  [-0.00323425  0.00501913 -0.00874054]\n",
+      "  [ 0.00331824  0.00791553 -0.02024699]\n",
+      "  [ 0.01642694 -0.0065531   0.00681009]]]\n",
+      "[[[ 0.          0.          0.        ]\n",
+      "  [ 0.00261477  0.01124485  0.00828796]\n",
+      "  [-0.00046545  0.00450285  0.00737254]\n",
+      "  [-0.01147073  0.00254146  0.00321731]]\n",
+      "\n",
+      " [[-0.00656487 -0.00497597 -0.00669393]\n",
+      "  [-0.00656487 -0.00497597 -0.00669393]\n",
+      "  [ 0.00261477  0.01124485  0.00828796]\n",
+      "  [ 0.00989333  0.02307425  0.01142865]]]\n",
+      "0.00896143913269043\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "start = time.time()\n",
+    "embedding = nn.Embedding(10, 3)\n",
+    "# a batch of 2 samples of 4 indices each\n",
+    "input = Tensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "\n",
+    "# example with padding_idx\n",
+    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
+    "input = Tensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
+    "print(embedding(input))\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore Embedding用法与输出与PyTorch一致**\n",
+    "- mint.nn.embedding待验证，CPU不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.identity.ipynb b/api-examples/nn/nn.identity.ipynb
new file mode 100644
index 0000000..699b21e
--- /dev/null
+++ b/api-examples/nn/nn.identity.ipynb
@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 20])\n"
+     ]
+    }
+   ],
+   "source": [
+    "input = np.random.rand(128, 20)\n",
+    "\n",
+    "m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)\n",
+    "output = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)(Tensor(input))\n",
+    "print(output.size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(128, 20)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "output = nn.Identity()(Tensor(input, dtype=ms.float32))\n",
+    "print(output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore identity用法与输出与PyTorch一致**\n",
+    "- MindSpore的size/shape打印比PyTorch简洁一点"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.multiheadattention.ipynb b/api-examples/nn/nn.multiheadattention.ipynb
new file mode 100644
index 0000000..5ab3e11
--- /dev/null
+++ b/api-examples/nn/nn.multiheadattention.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[ 0.0109,  0.3142,  0.3060, -0.0961, -0.2895,  0.2136, -0.1708,\n",
+      "          -0.0997,  0.0912, -0.0031, -0.1519,  0.0259,  0.0421,  0.1709,\n",
+      "           0.2150, -0.1000,  0.3393,  0.2651, -0.0085, -0.0135,  0.0730,\n",
+      "           0.0116,  0.1105, -0.1352, -0.0823,  0.3899,  0.1814,  0.1009,\n",
+      "          -0.2097, -0.3307,  0.1143,  0.2149]],\n",
+      "\n",
+      "        [[ 0.0072,  0.3131,  0.3063, -0.0994, -0.2912,  0.2116, -0.1724,\n",
+      "          -0.1033,  0.0965,  0.0024, -0.1512,  0.0233,  0.0406,  0.1782,\n",
+      "           0.2066, -0.0972,  0.3377,  0.2652, -0.0051, -0.0142,  0.0742,\n",
+      "           0.0048,  0.1180, -0.1363, -0.0826,  0.3924,  0.1792,  0.0996,\n",
+      "          -0.2099, -0.3327,  0.1132,  0.2122]]], grad_fn=<ViewBackward0>)\n",
+      "torch.Size([2, 1, 32])\n"
+     ]
+    }
+   ],
+   "source": [
+    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
+    "query = np.random.rand(2, 1, 32)\n",
+    "key = np.random.rand(2, 1, 32)\n",
+    "value = np.random.rand(2, 1, 32)\n",
+    "attn_output, attn_output_weights = multihead_attn(Tensor(query), Tensor(key), Tensor(value))\n",
+    "print(attn_output)\n",
+    "print(attn_output.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2, 1, 32)\n",
+      "[[[ 0.21025246 -1.3815589   1.1856667   0.5420509  -0.03878744\n",
+      "    0.36193654 -0.30533046 -1.1700163  -0.57054275  0.46141142\n",
+      "   -0.30795845  0.39367586 -0.54798263  0.19380882 -0.5910839\n",
+      "   -0.4666907  -0.97481775  0.45212376 -0.10193141 -0.6575114\n",
+      "    0.25120044  0.122086   -1.1403954  -0.42054623 -0.61952204\n",
+      "   -0.6566897   0.73287785 -0.35822588 -0.5958375  -0.32572877\n",
+      "    1.2399867  -0.22128716]]\n",
+      "\n",
+      " [[ 0.03517252 -0.6995417   0.97694     0.07682744  0.24728103\n",
+      "    0.08959051  0.01169628 -0.8235637  -0.2518189   0.59149474\n",
+      "   -0.40409014 -0.5977189   0.09025317  0.08469346 -0.29417813\n",
+      "   -0.34428942 -0.61110383  0.7962756   0.02414452 -0.67949474\n",
+      "    0.6065721  -0.24253327 -0.3226804   0.27078897 -1.3271445\n",
+      "    0.25307137  0.3808884   0.18227924 -0.38555723 -0.42521483\n",
+      "    1.2446275  -0.62895083]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import flax.linen as nn\n",
+    "from jax import numpy as jnp\n",
+    "from jax import random\n",
+    "\n",
+    "input = jnp.ones((2,1,32))\n",
+    "layer = nn.MultiHeadAttention(num_heads=8, qkv_features=32)\n",
+    "variables = layer.init(random.key(42), input)\n",
+    "output = layer.apply(variables, Tensor(query), Tensor(key), Tensor(value))\n",
+    "print(output.shape)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[ 0.00069493 -0.03911137 -0.17373352 -0.08817849  0.00585917\n",
+      "    0.0038011   0.13423112  0.12640306 -0.13031635 -0.10004204\n",
+      "   -0.22439755 -0.07857274 -0.31675625 -0.07869545  0.18808702\n",
+      "    0.18783928  0.21366805  0.16640219 -0.2740562   0.02806476\n",
+      "   -0.16295458  0.29520345  0.17730789  0.13442674  0.35357815\n",
+      "    0.15538093  0.08111625 -0.09462467 -0.03054206  0.1470299\n",
+      "    0.03899852  0.20147526]]\n",
+      "\n",
+      " [[-0.00058219 -0.03654152 -0.17798103 -0.08727801  0.00280233\n",
+      "    0.00116073  0.1349529   0.12789461 -0.13350655 -0.10066106\n",
+      "   -0.2256635  -0.07729645 -0.31817535 -0.0761395   0.19332808\n",
+      "    0.19098276  0.21913132  0.16640013 -0.27903354  0.03269039\n",
+      "   -0.16899411  0.2947319   0.18080507  0.13609359  0.34764683\n",
+      "    0.16034965  0.08051767 -0.09571798 -0.03570132  0.15065208\n",
+      "    0.03331698  0.19594805]]]\n",
+      "(2, 1, 32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
+    "attn_output, attn_output_weights = multihead_attn(Tensor(query, ms.float32), Tensor(key, ms.float32), Tensor(value, ms.float32))\n",
+    "print(attn_output)\n",
+    "print(attn_output.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore MHA用法与输出与PyTorch一致**\n",
+    "- MindSpore的Tensor()没有像torch.Tensor()一样内置类型转换，传入float64会报错"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.relu.ipynb b/api-examples/nn/nn.relu.ipynb
new file mode 100644
index 0000000..c112926
--- /dev/null
+++ b/api-examples/nn/nn.relu.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([0.7546, 0.1698, 0.7326, 0.5446, 0.9181, 0.1017, 0.3542, 0.0074, 0.8615,\n",
+      "        0.0965])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ReLU\n",
+    "input_np = np.random.rand(10)\n",
+    "output = nn.ReLU()(Tensor(input_np))\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.75460404 0.16979711 0.7325533  0.544558   0.9180574  0.1017127\n",
+      " 0.35418442 0.00744265 0.86148655 0.09653632]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "output = jax.nn.relu(input_np)\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.75460406 0.16979711 0.7325533  0.544558   0.91805736 0.1017127\n",
+      " 0.35418441 0.00744265 0.86148653 0.09653632]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "output = nn.ReLU()(Tensor(input_np))\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore ReLU用法与输出与PyTorch一致**\n",
+    "- MindSpore的输出打印为list格式，与jax一致；PyTorch打印了tensor类信息，元素之间有逗号分隔\n",
+    "- jax可以直接输入numpy向量"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.transformer.ipynb b/api-examples/nn/nn.transformer.ipynb
new file mode 100644
index 0000000..3ec48af
--- /dev/null
+++ b/api-examples/nn/nn.transformer.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn\n",
+    "\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\wangzeyangyi\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\torch\\nn\\modules\\transformer.py:379: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([20, 32, 512])\n",
+      "2.4219019412994385\n"
+     ]
+    }
+   ],
+   "source": [
+    "start = time.time()\n",
+    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
+    "src = np.random.rand(10, 32, 512)\n",
+    "tgt = np.random.rand(20, 32, 512)\n",
+    "out = transformer_model(Tensor(src), Tensor(tgt))\n",
+    "print(out.size())\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(20, 32, 512)\n",
+      "13.059895038604736\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "start = time.time()\n",
+    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
+    "out = transformer_model(Tensor(src, ms.float32), Tensor(tgt, ms.float32))\n",
+    "print(out.shape)\n",
+    "end = time.time()\n",
+    "print(end-start)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**MindSpore transformer用法与输出与PyTorch一致**\n",
+    "- MindSpore transformer在CPU上的性能待优化（~6x PyTorch）\n",
+    "- jax没有官方transformer API"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/api-examples/nn/nn.upsample.ipynb b/api-examples/nn/nn.upsample.ipynb
new file mode 100644
index 0000000..794c7b5
--- /dev/null
+++ b/api-examples/nn/nn.upsample.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[[1 1]\n",
+      "   [1 0]]]]\n",
+      "tensor([[[[1., 1., 1., 1.],\n",
+      "          [1., 1., 1., 1.],\n",
+      "          [1., 1., 0., 0.],\n",
+      "          [1., 1., 0., 0.]]]])\n",
+      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
+      "          [1.0000, 0.9375, 0.8125, 0.7500],\n",
+      "          [1.0000, 0.8125, 0.4375, 0.2500],\n",
+      "          [1.0000, 0.7500, 0.2500, 0.0000]]]])\n",
+      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
+      "          [1.0000, 0.8889, 0.7778, 0.6667],\n",
+      "          [1.0000, 0.7778, 0.5556, 0.3333],\n",
+      "          [1.0000, 0.6667, 0.3333, 0.0000]]]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)\n",
+    "input = np.random.randint(2, size=(1,1,2,2))\n",
+    "print(input)\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='nearest')\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "# # Try scaling the same data in a larger tensor\n",
+    "# input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)\n",
+    "# input_3x3[:, :, :2, :2].copy_(input)\n",
+    "# input_3x3\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
+    "# # Notice that values in top left corner are the same with the small input (except at boundary)\n",
+    "# m(input_3x3)\n",
+    "\n",
+    "# m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
+    "# # Notice that values in top left corner are now changed\n",
+    "# m(input_3x3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[6], line 6\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmindspore\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m      5\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n\u001b[0;32m      8\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbilinear\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# align_corners=False\u001b[39;00m\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\cell.py:733\u001b[0m, in \u001b[0;36mCell.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    730\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequires_grad \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dynamic_shape_inputs \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmixed_precision_type):\n\u001b[0;32m    731\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hook \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m    732\u001b[0m             \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shard_fn \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_recompute_cell \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_bprop \u001b[38;5;129;01mand\u001b[39;00m _pynative_executor\u001b[38;5;241m.\u001b[39mrequires_grad())):\n\u001b[1;32m--> 733\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconstruct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    735\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_construct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_complex_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\basic.py:473\u001b[0m, in \u001b[0;36mUpsample.construct\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    472\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconstruct\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m--> 473\u001b[0m     out \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39minterpolate(x, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscale_factor, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m    474\u001b[0m                         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39malign_corners, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrecompute_scale_factor)\n\u001b[0;32m    475\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2613\u001b[0m, in \u001b[0;36minterpolate\u001b[1;34m(input, size, scale_factor, mode, align_corners, recompute_scale_factor)\u001b[0m\n\u001b[0;32m   2611\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   2612\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m dim_unknown \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m-> 2613\u001b[0m         _interpolate_scale_factor_check(scale_factor, mode, rank,\n\u001b[0;32m   2614\u001b[0m                                         supported_dict)\n\u001b[0;32m   2616\u001b[0m \u001b[38;5;66;03m# align_corners\u001b[39;00m\n\u001b[0;32m   2617\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m align_corners \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2293\u001b[0m, in \u001b[0;36m_interpolate_scale_factor_check\u001b[1;34m(scale_factor, mode, rank, supported_dict)\u001b[0m\n\u001b[0;32m   2288\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2289\u001b[0m \u001b[38;5;124;03mscale_factor check\u001b[39;00m\n\u001b[0;32m   2290\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m scale_factor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m supported_dict\u001b[38;5;241m.\u001b[39mget(\n\u001b[0;32m   2292\u001b[0m         mode, {})\u001b[38;5;241m.\u001b[39mget(rank):\n\u001b[1;32m-> 2293\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   2294\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minterpolate\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m option cannot currently be set with the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   2295\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and dim = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrank\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mD.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[1;31mValueError\u001b[0m: For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D."
+     ]
+    }
+   ],
+   "source": [
+    "import mindspore as ms\n",
+    "from mindspore import nn\n",
+    "from mindspore import Tensor\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='nearest')\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='bilinear')  # align_corners=False\n",
+    "print(m(Tensor(input)))\n",
+    "\n",
+    "m = nn.Upsample(scale_factor=2.0, mode='bilinear', align_corners=True)\n",
+    "print(m(Tensor(input)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- jax没有upsample接口\n",
+    "- MindSpore的scale_factor参数必须为float\n",
+    "- MindSpore部分输入不支持"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From d3a7506021d7010bbd2683570b7c08350daef783 Mon Sep 17 00:00:00 2001
From: tomzwang11 <tomzwang11@gmail.com>
Date: Thu, 9 Jan 2025 21:28:05 +0800
Subject: [PATCH 4/4] remove checkpoints

---
 .../nn.bceloss-checkpoint.ipynb               |  93 ----------
 .../nn.celldict-checkpoint.ipynb              | 131 --------------
 .../nn.dense-checkpoint.ipynb                 | 113 -------------
 .../nn.embedding-checkpoint.ipynb             | 151 -----------------
 .../nn.identity-checkpoint.ipynb              |  87 ----------
 .../nn.multiheadattention-checkpoint.ipynb    | 160 ------------------
 .../nn.relu-checkpoint.ipynb                  | 131 --------------
 .../nn.transformer-checkpoint.ipynb           | 108 ------------
 .../nn.upsample-checkpoint.ipynb              | 136 ---------------
 9 files changed, 1110 deletions(-)
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
 delete mode 100644 api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb

diff --git a/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
deleted file mode 100644
index aea0394..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.bceloss-checkpoint.ipynb
+++ /dev/null
@@ -1,93 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor(0.8536, grad_fn=<BinaryCrossEntropyBackward0>)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# BCELoss\n",
-    "m = nn.Sigmoid()\n",
-    "input_np = np.random.rand(3,2)\n",
-    "target_np = np.random.rand(3,2)\n",
-    "output = nn.BCELoss()(m(Tensor(input)), Tensor(target))\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.71798706\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "\n",
-    "m = nn.Sigmoid()\n",
-    "loss = nn.BCELoss(weight=None, reduction='mean')\n",
-    "input = ms.Tensor(input_np, ms.float32)\n",
-    "target = ms.Tensor(target_np, ms.float32)\n",
-    "output = loss(m(input), target)\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- 默认reduction=mean下输出与PyTorch似乎不一致\n",
-    "- mint.nn.BCELoss待验证，CPU上不支持"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
deleted file mode 100644
index 26cfffd..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.celldict-checkpoint.ipynb
+++ /dev/null
@@ -1,131 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ModuleDict(\n",
-      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      ")\n",
-      "ModuleDict(\n",
-      "  (relu): ReLU()\n",
-      "  (dict1): ModuleDict(\n",
-      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      "  )\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "# moduledict\n",
-    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
-    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
-    "print(moduledict1)\n",
-    "print(moduledict2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CellDict<\n",
-      "  (cond2d): Conv2d<input_channels=10, output_channels=10, kernel_size=(3, 3), stride=(1, 1), pad_mode=same, padding=0, dilation=(1, 1), group=1, has_bias=False, weight_init=<mindspore.common.initializer.HeUniform object at 0x000001D5E2CC5A10>, bias_init=None, format=NCHW>\n",
-      "  >\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "celldict1 = nn.CellDict({'cond2d': nn.Conv2d(10,10,3)})\n",
-    "print(celldict1)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore支持CellDict(Cell)**\n",
-    "- 打印出的结构没有缩进；weight_init过于详细；\n",
-    "- 建议默认参数不打印，提供选择开关（print_all）"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m celldict2 \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mCellDict({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m'\u001b[39m: nn\u001b[38;5;241m.\u001b[39mReLU(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdict1\u001b[39m\u001b[38;5;124m'\u001b[39m: celldict1})\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(celldict2)\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:583\u001b[0m, in \u001b[0;36mCellDict.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    581\u001b[0m Cell\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, auto_prefix)\n\u001b[0;32m    582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 583\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate(args[\u001b[38;5;241m0\u001b[39m])\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:720\u001b[0m, in \u001b[0;36mCellDict.update\u001b[1;34m(self, cells)\u001b[0m\n\u001b[0;32m    718\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cells, (OrderedDict, CellDict, abc\u001b[38;5;241m.\u001b[39mMapping)):\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, cell \u001b[38;5;129;01min\u001b[39;00m cells\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m--> 720\u001b[0m         \u001b[38;5;28mself\u001b[39m[key] \u001b[38;5;241m=\u001b[39m cell\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mid\u001b[39m, k_v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(cells):\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:590\u001b[0m, in \u001b[0;36mCellDict.__setitem__\u001b[1;34m(self, key, cell)\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, cell):\n\u001b[0;32m    589\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key)\n\u001b[1;32m--> 590\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_cell_type(cell)\n\u001b[0;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_cell_para_name(key, cell)\n\u001b[0;32m    592\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cells[key] \u001b[38;5;241m=\u001b[39m cell\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:631\u001b[0m, in \u001b[0;36mCellDict._validate_cell_type\u001b[1;34m(self, cell)\u001b[0m\n\u001b[0;32m    628\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell should be Cell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    629\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cell, (CellDict, CellList, SequentialCell)):\n\u001b[1;32m--> 631\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell can not be CellDict, CellList or SequentialCell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    632\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[1;31mTypeError\u001b[0m: For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict."
-     ]
-    }
-   ],
-   "source": [
-    "celldict2 = nn.CellDict({'relu': nn.ReLU(), 'dict1': celldict1})\n",
-    "print(celldict2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore不支持CellDict(CellDict)**\n",
-    "- PyTorch支持CellDict/SequentialCell/CellList嵌套"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
deleted file mode 100644
index e4bb35f..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.dense-checkpoint.ipynb
+++ /dev/null
@@ -1,113 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([128, 30])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Linear\n",
-    "input = np.random.randn(128, 20)\n",
-    "output = nn.Linear(20,30)(Tensor(input))\n",
-    "print(output.size())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(128, 30)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import jax\n",
-    "import flax.linen as nn\n",
-    "from jax import numpy as jnp\n",
-    "from jax import random\n",
-    "\n",
-    "input = jnp.ones((128,20))\n",
-    "dense = nn.Dense(features=30)\n",
-    "variables = dense.init(random.key(42), input)\n",
-    "output = dense.apply(variables, input)\n",
-    "print(output.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(128, 30)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "output = nn.Dense(20,30)(Tensor(input, dtype=ms.float32))\n",
-    "print(output.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore Dense用法与输出与PyTorch一致**\n",
-    "- MindSpore的命名与jax保持一致（Dense）\n",
-    "- jax需要额外定义参数"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
deleted file mode 100644
index 69871f1..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.embedding-checkpoint.ipynb
+++ /dev/null
@@ -1,151 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[ 1.1268, -0.9513,  0.6823],\n",
-      "         [-0.3245, -0.8497, -0.7579],\n",
-      "         [-0.6075,  2.7415, -2.0689],\n",
-      "         [-1.4612, -1.0289, -0.4242]],\n",
-      "\n",
-      "        [[-0.6075,  2.7415, -2.0689],\n",
-      "         [ 2.1071,  0.1938,  0.0255],\n",
-      "         [-0.3245, -0.8497, -0.7579],\n",
-      "         [-0.4831, -1.7133,  0.5985]]], grad_fn=<EmbeddingBackward0>)\n",
-      "tensor([[[ 0.0000,  0.0000,  0.0000],\n",
-      "         [-1.0849,  1.1815,  1.0977],\n",
-      "         [ 0.9290,  1.8014, -0.3725],\n",
-      "         [ 0.0787,  1.9454,  1.1112]],\n",
-      "\n",
-      "        [[ 1.7580, -0.3959,  0.9574],\n",
-      "         [ 1.7580, -0.3959,  0.9574],\n",
-      "         [-1.0849,  1.1815,  1.0977],\n",
-      "         [ 0.3496,  1.7060, -0.4346]]], grad_fn=<EmbeddingBackward0>)\n",
-      "0.004005908966064453\n"
-     ]
-    }
-   ],
-   "source": [
-    "start = time.time()\n",
-    "# an Embedding module containing 10 tensors of size 3\n",
-    "embedding = nn.Embedding(10, 3)\n",
-    "# a batch of 2 samples of 4 indices each\n",
-    "input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
-    "print(embedding(input))\n",
-    "\n",
-    "# example with padding_idx\n",
-    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
-    "input = torch.LongTensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
-    "print(embedding(input))\n",
-    "end = time.time()\n",
-    "print(end-start)\n",
-    "\n",
-    "# # example of changing `pad` vector\n",
-    "# padding_idx = 0\n",
-    "# embedding = nn.Embedding(3, 3, padding_idx=padding_idx)\n",
-    "# print(embedding.weight)\n",
-    "# with torch.no_grad():\n",
-    "#     embedding.weight[padding_idx] = torch.ones(3)\n",
-    "# print(embedding.weight)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[[-0.0065871   0.01535338  0.01434076]\n",
-      "  [ 0.00331824  0.00791553 -0.02024699]\n",
-      "  [ 0.01239492  0.00385856  0.00478753]\n",
-      "  [-0.01227701  0.00011452  0.0142579 ]]\n",
-      "\n",
-      " [[ 0.01239492  0.00385856  0.00478753]\n",
-      "  [-0.00323425  0.00501913 -0.00874054]\n",
-      "  [ 0.00331824  0.00791553 -0.02024699]\n",
-      "  [ 0.01642694 -0.0065531   0.00681009]]]\n",
-      "[[[ 0.          0.          0.        ]\n",
-      "  [ 0.00261477  0.01124485  0.00828796]\n",
-      "  [-0.00046545  0.00450285  0.00737254]\n",
-      "  [-0.01147073  0.00254146  0.00321731]]\n",
-      "\n",
-      " [[-0.00656487 -0.00497597 -0.00669393]\n",
-      "  [-0.00656487 -0.00497597 -0.00669393]\n",
-      "  [ 0.00261477  0.01124485  0.00828796]\n",
-      "  [ 0.00989333  0.02307425  0.01142865]]]\n",
-      "0.00896143913269043\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "\n",
-    "start = time.time()\n",
-    "embedding = nn.Embedding(10, 3)\n",
-    "# a batch of 2 samples of 4 indices each\n",
-    "input = Tensor([[1, 2, 4, 5], [4, 3, 2, 9]])\n",
-    "print(embedding(input))\n",
-    "\n",
-    "# example with padding_idx\n",
-    "embedding = nn.Embedding(10, 3, padding_idx=0)\n",
-    "input = Tensor([[0, 2, 4, 5], [3, 3, 2, 9]])\n",
-    "print(embedding(input))\n",
-    "end = time.time()\n",
-    "print(end-start)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore Embedding用法与输出与PyTorch一致**\n",
-    "- mint.nn.embedding待验证，CPU不支持"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
deleted file mode 100644
index 699b21e..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.identity-checkpoint.ipynb
+++ /dev/null
@@ -1,87 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([128, 20])\n"
-     ]
-    }
-   ],
-   "source": [
-    "input = np.random.rand(128, 20)\n",
-    "\n",
-    "m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)\n",
-    "output = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)(Tensor(input))\n",
-    "print(output.size())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(128, 20)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "output = nn.Identity()(Tensor(input, dtype=ms.float32))\n",
-    "print(output.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore identity用法与输出与PyTorch一致**\n",
-    "- MindSpore的size/shape打印比PyTorch简洁一点"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
deleted file mode 100644
index 5ab3e11..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.multiheadattention-checkpoint.ipynb
+++ /dev/null
@@ -1,160 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[ 0.0109,  0.3142,  0.3060, -0.0961, -0.2895,  0.2136, -0.1708,\n",
-      "          -0.0997,  0.0912, -0.0031, -0.1519,  0.0259,  0.0421,  0.1709,\n",
-      "           0.2150, -0.1000,  0.3393,  0.2651, -0.0085, -0.0135,  0.0730,\n",
-      "           0.0116,  0.1105, -0.1352, -0.0823,  0.3899,  0.1814,  0.1009,\n",
-      "          -0.2097, -0.3307,  0.1143,  0.2149]],\n",
-      "\n",
-      "        [[ 0.0072,  0.3131,  0.3063, -0.0994, -0.2912,  0.2116, -0.1724,\n",
-      "          -0.1033,  0.0965,  0.0024, -0.1512,  0.0233,  0.0406,  0.1782,\n",
-      "           0.2066, -0.0972,  0.3377,  0.2652, -0.0051, -0.0142,  0.0742,\n",
-      "           0.0048,  0.1180, -0.1363, -0.0826,  0.3924,  0.1792,  0.0996,\n",
-      "          -0.2099, -0.3327,  0.1132,  0.2122]]], grad_fn=<ViewBackward0>)\n",
-      "torch.Size([2, 1, 32])\n"
-     ]
-    }
-   ],
-   "source": [
-    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
-    "query = np.random.rand(2, 1, 32)\n",
-    "key = np.random.rand(2, 1, 32)\n",
-    "value = np.random.rand(2, 1, 32)\n",
-    "attn_output, attn_output_weights = multihead_attn(Tensor(query), Tensor(key), Tensor(value))\n",
-    "print(attn_output)\n",
-    "print(attn_output.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(2, 1, 32)\n",
-      "[[[ 0.21025246 -1.3815589   1.1856667   0.5420509  -0.03878744\n",
-      "    0.36193654 -0.30533046 -1.1700163  -0.57054275  0.46141142\n",
-      "   -0.30795845  0.39367586 -0.54798263  0.19380882 -0.5910839\n",
-      "   -0.4666907  -0.97481775  0.45212376 -0.10193141 -0.6575114\n",
-      "    0.25120044  0.122086   -1.1403954  -0.42054623 -0.61952204\n",
-      "   -0.6566897   0.73287785 -0.35822588 -0.5958375  -0.32572877\n",
-      "    1.2399867  -0.22128716]]\n",
-      "\n",
-      " [[ 0.03517252 -0.6995417   0.97694     0.07682744  0.24728103\n",
-      "    0.08959051  0.01169628 -0.8235637  -0.2518189   0.59149474\n",
-      "   -0.40409014 -0.5977189   0.09025317  0.08469346 -0.29417813\n",
-      "   -0.34428942 -0.61110383  0.7962756   0.02414452 -0.67949474\n",
-      "    0.6065721  -0.24253327 -0.3226804   0.27078897 -1.3271445\n",
-      "    0.25307137  0.3808884   0.18227924 -0.38555723 -0.42521483\n",
-      "    1.2446275  -0.62895083]]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import jax\n",
-    "import flax.linen as nn\n",
-    "from jax import numpy as jnp\n",
-    "from jax import random\n",
-    "\n",
-    "input = jnp.ones((2,1,32))\n",
-    "layer = nn.MultiHeadAttention(num_heads=8, qkv_features=32)\n",
-    "variables = layer.init(random.key(42), input)\n",
-    "output = layer.apply(variables, Tensor(query), Tensor(key), Tensor(value))\n",
-    "print(output.shape)\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[[ 0.00069493 -0.03911137 -0.17373352 -0.08817849  0.00585917\n",
-      "    0.0038011   0.13423112  0.12640306 -0.13031635 -0.10004204\n",
-      "   -0.22439755 -0.07857274 -0.31675625 -0.07869545  0.18808702\n",
-      "    0.18783928  0.21366805  0.16640219 -0.2740562   0.02806476\n",
-      "   -0.16295458  0.29520345  0.17730789  0.13442674  0.35357815\n",
-      "    0.15538093  0.08111625 -0.09462467 -0.03054206  0.1470299\n",
-      "    0.03899852  0.20147526]]\n",
-      "\n",
-      " [[-0.00058219 -0.03654152 -0.17798103 -0.08727801  0.00280233\n",
-      "    0.00116073  0.1349529   0.12789461 -0.13350655 -0.10066106\n",
-      "   -0.2256635  -0.07729645 -0.31817535 -0.0761395   0.19332808\n",
-      "    0.19098276  0.21913132  0.16640013 -0.27903354  0.03269039\n",
-      "   -0.16899411  0.2947319   0.18080507  0.13609359  0.34764683\n",
-      "    0.16034965  0.08051767 -0.09571798 -0.03570132  0.15065208\n",
-      "    0.03331698  0.19594805]]]\n",
-      "(2, 1, 32)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "\n",
-    "multihead_attn = nn.MultiheadAttention(32, 8)\n",
-    "attn_output, attn_output_weights = multihead_attn(Tensor(query, ms.float32), Tensor(key, ms.float32), Tensor(value, ms.float32))\n",
-    "print(attn_output)\n",
-    "print(attn_output.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore MHA用法与输出与PyTorch一致**\n",
-    "- MindSpore的Tensor()没有像torch.Tensor()一样内置类型转换，传入float64会报错"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
deleted file mode 100644
index 26cfffd..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.relu-checkpoint.ipynb
+++ /dev/null
@@ -1,131 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ModuleDict(\n",
-      "  (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      ")\n",
-      "ModuleDict(\n",
-      "  (relu): ReLU()\n",
-      "  (dict1): ModuleDict(\n",
-      "    (cond2d): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))\n",
-      "  )\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "# moduledict\n",
-    "moduledict1 = nn.ModuleDict({'cond2d': nn.Conv2d(10,10,3)})\n",
-    "moduledict2 = nn.ModuleDict({'relu': nn.ReLU(), 'dict1': moduledict1})\n",
-    "print(moduledict1)\n",
-    "print(moduledict2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CellDict<\n",
-      "  (cond2d): Conv2d<input_channels=10, output_channels=10, kernel_size=(3, 3), stride=(1, 1), pad_mode=same, padding=0, dilation=(1, 1), group=1, has_bias=False, weight_init=<mindspore.common.initializer.HeUniform object at 0x000001D5E2CC5A10>, bias_init=None, format=NCHW>\n",
-      "  >\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "celldict1 = nn.CellDict({'cond2d': nn.Conv2d(10,10,3)})\n",
-    "print(celldict1)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore支持CellDict(Cell)**\n",
-    "- 打印出的结构没有缩进；weight_init过于详细；\n",
-    "- 建议默认参数不打印，提供选择开关（print_all）"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[7], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m celldict2 \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mCellDict({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelu\u001b[39m\u001b[38;5;124m'\u001b[39m: nn\u001b[38;5;241m.\u001b[39mReLU(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdict1\u001b[39m\u001b[38;5;124m'\u001b[39m: celldict1})\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(celldict2)\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:583\u001b[0m, in \u001b[0;36mCellDict.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    581\u001b[0m Cell\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, auto_prefix)\n\u001b[0;32m    582\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m--> 583\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mupdate(args[\u001b[38;5;241m0\u001b[39m])\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:720\u001b[0m, in \u001b[0;36mCellDict.update\u001b[1;34m(self, cells)\u001b[0m\n\u001b[0;32m    718\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cells, (OrderedDict, CellDict, abc\u001b[38;5;241m.\u001b[39mMapping)):\n\u001b[0;32m    719\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m key, cell \u001b[38;5;129;01min\u001b[39;00m cells\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m--> 720\u001b[0m         \u001b[38;5;28mself\u001b[39m[key] \u001b[38;5;241m=\u001b[39m cell\n\u001b[0;32m    721\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    722\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mid\u001b[39m, k_v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(cells):\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:590\u001b[0m, in \u001b[0;36mCellDict.__setitem__\u001b[1;34m(self, key, cell)\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, cell):\n\u001b[0;32m    589\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key)\n\u001b[1;32m--> 590\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_cell_type(cell)\n\u001b[0;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_cell_para_name(key, cell)\n\u001b[0;32m    592\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cells[key] \u001b[38;5;241m=\u001b[39m cell\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\container.py:631\u001b[0m, in \u001b[0;36mCellDict._validate_cell_type\u001b[1;34m(self, cell)\u001b[0m\n\u001b[0;32m    628\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell should be Cell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    629\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(cell, (CellDict, CellList, SequentialCell)):\n\u001b[1;32m--> 631\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, the type of cell can not be CellDict, CellList or SequentialCell, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    632\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(cell)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[1;31mTypeError\u001b[0m: For 'CellDict', the type of cell can not be CellDict, CellList or SequentialCell, but got CellDict."
-     ]
-    }
-   ],
-   "source": [
-    "celldict2 = nn.CellDict({'relu': nn.ReLU(), 'dict1': celldict1})\n",
-    "print(celldict2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore不支持CellDict(CellDict)**\n",
-    "- PyTorch支持CellDict/SequentialCell/CellList嵌套"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
deleted file mode 100644
index 3ec48af..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.transformer-checkpoint.ipynb
+++ /dev/null
@@ -1,108 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn\n",
-    "\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\wangzeyangyi\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\torch\\nn\\modules\\transformer.py:379: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([20, 32, 512])\n",
-      "2.4219019412994385\n"
-     ]
-    }
-   ],
-   "source": [
-    "start = time.time()\n",
-    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
-    "src = np.random.rand(10, 32, 512)\n",
-    "tgt = np.random.rand(20, 32, 512)\n",
-    "out = transformer_model(Tensor(src), Tensor(tgt))\n",
-    "print(out.size())\n",
-    "end = time.time()\n",
-    "print(end-start)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(20, 32, 512)\n",
-      "13.059895038604736\n"
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "\n",
-    "start = time.time()\n",
-    "transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)\n",
-    "out = transformer_model(Tensor(src, ms.float32), Tensor(tgt, ms.float32))\n",
-    "print(out.shape)\n",
-    "end = time.time()\n",
-    "print(end-start)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**MindSpore transformer用法与输出与PyTorch一致**\n",
-    "- MindSpore transformer在CPU上的性能待优化（~6x PyTorch）\n",
-    "- jax没有官方transformer API"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb b/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb
deleted file mode 100644
index 794c7b5..0000000
--- a/api-examples/nn/.ipynb_checkpoints/nn.upsample-checkpoint.ipynb
+++ /dev/null
@@ -1,136 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "from torch import Tensor\n",
-    "from torch import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[[[1 1]\n",
-      "   [1 0]]]]\n",
-      "tensor([[[[1., 1., 1., 1.],\n",
-      "          [1., 1., 1., 1.],\n",
-      "          [1., 1., 0., 0.],\n",
-      "          [1., 1., 0., 0.]]]])\n",
-      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
-      "          [1.0000, 0.9375, 0.8125, 0.7500],\n",
-      "          [1.0000, 0.8125, 0.4375, 0.2500],\n",
-      "          [1.0000, 0.7500, 0.2500, 0.0000]]]])\n",
-      "tensor([[[[1.0000, 1.0000, 1.0000, 1.0000],\n",
-      "          [1.0000, 0.8889, 0.7778, 0.6667],\n",
-      "          [1.0000, 0.7778, 0.5556, 0.3333],\n",
-      "          [1.0000, 0.6667, 0.3333, 0.0000]]]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)\n",
-    "input = np.random.randint(2, size=(1,1,2,2))\n",
-    "print(input)\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='nearest')\n",
-    "print(m(Tensor(input)))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
-    "print(m(Tensor(input)))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
-    "print(m(Tensor(input)))\n",
-    "\n",
-    "# # Try scaling the same data in a larger tensor\n",
-    "# input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)\n",
-    "# input_3x3[:, :, :2, :2].copy_(input)\n",
-    "# input_3x3\n",
-    "\n",
-    "# m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False\n",
-    "# # Notice that values in top left corner are the same with the small input (except at boundary)\n",
-    "# m(input_3x3)\n",
-    "\n",
-    "# m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n",
-    "# # Notice that values in top left corner are now changed\n",
-    "# m(input_3x3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[6], line 6\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmindspore\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m      5\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n\u001b[0;32m      8\u001b[0m m \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mUpsample(scale_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2.0\u001b[39m, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbilinear\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# align_corners=False\u001b[39;00m\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(m(Tensor(\u001b[38;5;28minput\u001b[39m)))\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\cell.py:733\u001b[0m, in \u001b[0;36mCell.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    730\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequires_grad \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dynamic_shape_inputs \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmixed_precision_type):\n\u001b[0;32m    731\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hook \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hook \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m    732\u001b[0m             \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shard_fn \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_recompute_cell \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_bprop \u001b[38;5;129;01mand\u001b[39;00m _pynative_executor\u001b[38;5;241m.\u001b[39mrequires_grad())):\n\u001b[1;32m--> 733\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconstruct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    735\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_construct(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_complex_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\nn\\layer\\basic.py:473\u001b[0m, in \u001b[0;36mUpsample.construct\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    472\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconstruct\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m--> 473\u001b[0m     out \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39minterpolate(x, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscale_factor, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m    474\u001b[0m                         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39malign_corners, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrecompute_scale_factor)\n\u001b[0;32m    475\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2613\u001b[0m, in \u001b[0;36minterpolate\u001b[1;34m(input, size, scale_factor, mode, align_corners, recompute_scale_factor)\u001b[0m\n\u001b[0;32m   2611\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   2612\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m dim_unknown \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m-> 2613\u001b[0m         _interpolate_scale_factor_check(scale_factor, mode, rank,\n\u001b[0;32m   2614\u001b[0m                                         supported_dict)\n\u001b[0;32m   2616\u001b[0m \u001b[38;5;66;03m# align_corners\u001b[39;00m\n\u001b[0;32m   2617\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m align_corners \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
-      "File \u001b[1;32m~\\anaconda3\\envs\\tutorial\\Lib\\site-packages\\mindspore\\ops\\function\\nn_func.py:2293\u001b[0m, in \u001b[0;36m_interpolate_scale_factor_check\u001b[1;34m(scale_factor, mode, rank, supported_dict)\u001b[0m\n\u001b[0;32m   2288\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2289\u001b[0m \u001b[38;5;124;03mscale_factor check\u001b[39;00m\n\u001b[0;32m   2290\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m scale_factor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m supported_dict\u001b[38;5;241m.\u001b[39mget(\n\u001b[0;32m   2292\u001b[0m         mode, {})\u001b[38;5;241m.\u001b[39mget(rank):\n\u001b[1;32m-> 2293\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   2294\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minterpolate\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mscale_factor\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m option cannot currently be set with the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   2295\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and dim = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrank\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mD.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[1;31mValueError\u001b[0m: For 'interpolate', 'scale_factor' option cannot currently be set with the mode = nearest and dim = 4D."
-     ]
-    }
-   ],
-   "source": [
-    "import mindspore as ms\n",
-    "from mindspore import nn\n",
-    "from mindspore import Tensor\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2.0, mode='nearest')\n",
-    "print(m(Tensor(input)))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2.0, mode='bilinear')  # align_corners=False\n",
-    "print(m(Tensor(input)))\n",
-    "\n",
-    "m = nn.Upsample(scale_factor=2.0, mode='bilinear', align_corners=True)\n",
-    "print(m(Tensor(input)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- jax没有upsample接口\n",
-    "- MindSpore的scale_factor参数必须为float\n",
-    "- MindSpore部分输入不支持"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}