add bench

yiliu30 · yiliu30 · commit 0fe6649fe747 · 2024-10-15T01:32:51.000-04:00
Signed-off-by: yiliu30 &lt;yi4.liu@intel.com&gt;
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -7,6 +7,6 @@ ENV FLIT_ROOT_INSTALL=1
 
 COPY pyproject.toml .
 RUN touch README.md \
-    && mkdir -p src/python_package \
+    && mkdir -p src/torchutils \
     && python -m flit install --only-deps --deps develop \
     && rm -r pyproject.toml README.md src
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ You can also use a Dockerfile to automate dev container creation. In your Docker
 #### Setup
 This project includes three files in the .devcontainer and .vscode directories that enable you to use GitHub Codespaces or Docker and VSCode locally to set up an environment that includes all the necessary extensions and tools for Python development.
 
-The Dockerfile specifies the base image and dependencies needed for the development container. The Dockerfile installs the necessary dependencies for the development container, including Python 3 and flit, a tool used to build and publish Python packages. It sets an environment variable to indicate that flit should be installed globally. It then copies the pyproject.toml file into the container and creates an empty README.md file. It creates a directory src/python_package and installs only the development dependencies using flit. Finally, it removes unnecessary files, including the pyproject.toml, README.md, and src directory.
+The Dockerfile specifies the base image and dependencies needed for the development container. The Dockerfile installs the necessary dependencies for the development container, including Python 3 and flit, a tool used to build and publish Python packages. It sets an environment variable to indicate that flit should be installed globally. It then copies the pyproject.toml file into the container and creates an empty README.md file. It creates a directory src/torchutils and installs only the development dependencies using flit. Finally, it removes unnecessary files, including the pyproject.toml, README.md, and src directory.
 
 The devcontainer.json file is a configuration file that defines the development container's settings, including the Docker image to use, any additional VSCode extensions to install, and whether or not to mount the project directory into the container. It uses the python-3-miniconda container as its base, which is provided by Microsoft, and also includes customizations for VSCode, such as recommended extensions for Python development and specific settings for those extensions. In addition to the above, the settings.json file also contains a handy command that can automatically install pre-commit hooks. These hooks can help ensure the quality of the code before it's committed to the repository, improving the overall codebase and making collaboration easier.
 
diff --git a/docs/modules.rst b/docs/modules.rst
@@ -4,4 +4,4 @@ src
 .. toctree::
    :maxdepth: 4
 
-   python_package
+   torchutils
diff --git a/docs/python_package.hello_world.rst b/docs/python_package.hello_world.rst
@@ -7,15 +7,15 @@ Submodules
 python\_package.hello\_world.hello\_world module
 ------------------------------------------------
 
-.. automodule:: python_package.hello_world.hello_world
+.. automodule:: torchutils.hello_world.hello_world
    :members:
    :undoc-members:
    :show-inheritance:
 
 Module contents
 ---------------
 
-.. automodule:: python_package.hello_world
+.. automodule:: torchutils.hello_world
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/python_package.rst b/docs/python_package.rst
@@ -7,23 +7,23 @@ Subpackages
 .. toctree::
    :maxdepth: 4
 
-   python_package.hello_world
+   torchutils.hello_world
 
 Submodules
 ----------
 
 python\_package.setup module
 ----------------------------
 
-.. automodule:: python_package.setup
+.. automodule:: torchutils.setup
    :members:
    :undoc-members:
    :show-inheritance:
 
 Module contents
 ---------------
 
-.. automodule:: python_package
+.. automodule:: torchutils
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,7 +52,7 @@ Source = "https://github.com/microsoft/python-package-template"
 Tracker = "https://github.com/microsoft/python-package-template/issues"
 
 [tool.flit.module]
-name = "python_package"
+name = "torchutils"
 
 [tool.bandit]
 exclude_dirs = ["build","dist","tests","scripts"]
diff --git a/src/torchutils/__init__.py b/src/torchutils/__init__.py
@@ -6,3 +6,6 @@
 from __future__ import annotations
 
 __version__ = "0.0.2"
+
+
+from torchutils.bench import bench_module, bench_more, inspect_tensor, see_memory_usage
diff --git a/src/torchutils/bench.py b/src/torchutils/bench.py
@@ -0,0 +1,101 @@
+seed = 0
+
+
+import os
+
+DEBUG = os.environ.get("DEBUG", "0") == "1"
+
+from triton.testing import do_bench
+import torch
+import time
+
+
+def freeze_seed(seed):
+    import random
+
+    random.seed(seed)
+    import torch
+
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    import numpy as np
+
+    np.random.seed(seed)
+
+
+freeze_seed(seed)
+
+
+def bench_module(func, warmup=25, rep=200):
+    torch.cuda.synchronize()
+    for i in range(warmup):
+        func()
+    torch.cuda.synchronize()
+    start = time.perf_counter()
+    for i in range(rep):
+        func()
+        torch.cuda.synchronize()
+    end = time.perf_counter()
+    return (end - start) / rep * 1000
+
+
+@torch.no_grad()
+def bench_more(func, warmup=25, rep=200, kernel=True, profile=True, msg="", export_trace=False):
+    from triton.testing import do_bench
+    import torch
+
+    module_bench_time = bench_module(func, warmup, rep)
+    kernel_bench_time = do_bench(func, warmup, rep) if kernel else None
+    if profile:
+        print(f"----{msg}----")
+        from torch.profiler import profile, record_function, ProfilerActivity
+
+        activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA]
+        with profile(activities=activities, with_stack=True, use_cuda=True) as prof:
+            for i in range(rep):
+                func()
+        if export_trace or os.environ.get("EXPORT_TRACE", "0") == "1":
+            prof.export_chrome_trace(f"{msg}.json")
+            print(f"Exported trace to {msg}.json")
+        print("----" * 10, "CPU time", "----" * 10)
+        print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=20))
+        print("----" * 10, "CUDA time", "----" * 10)
+        print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20))
+    return module_bench_time, kernel_bench_time
+
+
+def inspect_tensor(x, msg="", force=False):
+    print(
+        f"{msg}\n: shape={x.shape}, dtype={x.dtype}, device={x.device}, layout={x.layout}, strides={x.stride()}, is_contiguous={x.is_contiguous()}"
+    )
+    if DEBUG or force:
+        print(x)
+
+
+def see_memory_usage(message, force=True):
+    # Modified from DeepSpeed
+    import gc
+    import warnings
+
+    import torch.distributed as dist
+    breakpoint()
+
+    if not force:
+        return
+    # if dist.is_initialized() and not dist.get_rank() == 0:
+    #     return
+
+    # python doesn't do real-time garbage collection so do it explicitly to get the correct RAM reports
+    gc.collect()
+
+    # Print message except when distributed but not rank 0
+    print(message)
+    print(
+        f"AllocatedMem {round(torch.cuda.memory_allocated() / (1024 * 1024 * 1024),2 )} GB \
+        MaxAllocatedMem {round(torch.cuda.max_memory_allocated() / (1024 * 1024 * 1024),2)} GB \
+        ReservedMem {round(torch.cuda.memory_reserved() / (1024 * 1024 * 1024),2)} GB \
+        MaxReservedMem {round(torch.cuda.max_memory_reserved() / (1024 * 1024 * 1024))} GB "
+    )
+
+    # get the peak memory to report correct data, so reset the counter for the next call
+    torch.cuda.reset_peak_memory_stats()
diff --git a/src/torchutils/config.py b/src/torchutils/config.py
@@ -0,0 +1,10 @@
+
+import os
+import dataclasses
+
+@dataclasses.dataclass
+class Config:
+    debug: bool = False
+    
+    
+config = Config(debug=os.environ.get("DEBUG", "0") == "1")
diff --git a/src/torchutils/hello_world.py b/src/torchutils/hello_world.py
diff --git a/tests/test_methods.py b/tests/test_methods.py
@@ -5,7 +5,7 @@
 """This is a sample python file for testing functions from the source code."""
 from __future__ import annotations
 
-from python_package.hello_world import hello_world
+from torchutils.hello_world import hello_world
 
 
 def hello_test():