diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..612c6fb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,205 @@
+cmake_minimum_required(VERSION 3.0.2)
+project(gsmini)
+
+## Compile as C++11, supported in ROS Kinetic and newer
+# add_compile_options(-std=c++11)
+
+## Find catkin macros and libraries
+## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
+## is used, also find other catkin packages
+find_package(catkin REQUIRED
+  std_msgs
+  message_generation
+  message_runtime
+)
+
+## System dependencies are found with CMake's conventions
+# find_package(Boost REQUIRED COMPONENTS system)
+
+
+## Uncomment this if the package has a setup.py. This macro ensures
+## modules and global scripts declared therein get installed
+## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
+catkin_python_setup()
+
+################################################
+## Declare ROS messages, services and actions ##
+################################################
+
+## To declare and build messages, services or actions from within this
+## package, follow these steps:
+## * Let MSG_DEP_SET be the set of packages whose message types you use in
+##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
+## * In the file package.xml:
+##   * add a build_depend tag for "message_generation"
+##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
+##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
+##     but can be declared for certainty nonetheless:
+##     * add a exec_depend tag for "message_runtime"
+## * In this file (CMakeLists.txt):
+##   * add "message_generation" and every package in MSG_DEP_SET to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * add "message_runtime" and every package in MSG_DEP_SET to
+##     catkin_package(CATKIN_DEPENDS ...)
+##   * uncomment the add_*_files sections below as needed
+##     and list every .msg/.srv/.action file to be processed
+##   * uncomment the generate_messages entry below
+##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
+
+## Generate messages in the 'msg' folder
+add_message_files(
+  FILES
+  Shear.msg
+)
+
+## Generate services in the 'srv' folder
+# add_service_files(
+#   FILES
+#   Service1.srv
+#   Service2.srv
+# )
+
+## Generate actions in the 'action' folder
+# add_action_files(
+#   FILES
+#   Action1.action
+#   Action2.action
+# )
+
+## Generate added messages and services with any dependencies listed here
+generate_messages(
+  DEPENDENCIES
+  std_msgs  # Or other packages containing msgs
+)
+
+################################################
+## Declare ROS dynamic reconfigure parameters ##
+################################################
+
+## To declare and build dynamic reconfigure parameters within this
+## package, follow these steps:
+## * In the file package.xml:
+##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
+## * In this file (CMakeLists.txt):
+##   * add "dynamic_reconfigure" to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * uncomment the "generate_dynamic_reconfigure_options" section below
+##     and list every .cfg file to be processed
+
+## Generate dynamic reconfigure parameters in the 'cfg' folder
+# generate_dynamic_reconfigure_options(
+#   cfg/DynReconf1.cfg
+#   cfg/DynReconf2.cfg
+# )
+
+###################################
+## catkin specific configuration ##
+###################################
+## The catkin_package macro generates cmake config files for your package
+## Declare things to be passed to dependent projects
+## INCLUDE_DIRS: uncomment this if your package contains header files
+## LIBRARIES: libraries you create in this project that dependent projects also need
+## CATKIN_DEPENDS: catkin_packages dependent projects also need
+## DEPENDS: system dependencies of this project that dependent projects also need
+catkin_package(
+#  INCLUDE_DIRS include
+#  LIBRARIES gsmini
+#  CATKIN_DEPENDS other_catkin_pkg
+#  DEPENDS system_lib
+)
+
+###########
+## Build ##
+###########
+
+## Specify additional locations of header files
+## Your package locations should be listed before other locations
+include_directories(
+# include
+# ${catkin_INCLUDE_DIRS}
+)
+
+## Declare a C++ library
+# add_library(${PROJECT_NAME}
+#   src/${PROJECT_NAME}/gsmini.cpp
+# )
+
+## Add cmake target dependencies of the library
+## as an example, code may need to be generated before libraries
+## either from message generation or dynamic reconfigure
+# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Declare a C++ executable
+## With catkin_make all packages are built within a single CMake context
+## The recommended prefix ensures that target names across packages don't collide
+# add_executable(${PROJECT_NAME}_node src/gsmini_node.cpp)
+
+## Rename C++ executable without prefix
+## The above recommended prefix causes long target names, the following renames the
+## target back to the shorter version for ease of user use
+## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
+# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
+
+## Add cmake target dependencies of the executable
+## same as for the library above
+# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Specify libraries to link a library or executable target against
+# target_link_libraries(${PROJECT_NAME}_node
+#   ${catkin_LIBRARIES}
+# )
+
+#############
+## Install ##
+#############
+
+# all install targets should use catkin DESTINATION variables
+# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
+
+## Mark executable scripts (Python etc.) for installation
+## in contrast to setup.py, you can choose the destination
+# catkin_install_python(PROGRAMS
+#   scripts/my_python_script
+#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark executables for installation
+## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
+# install(TARGETS ${PROJECT_NAME}_node
+#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark libraries for installation
+## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
+# install(TARGETS ${PROJECT_NAME}
+#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
+# )
+
+## Mark cpp header files for installation
+# install(DIRECTORY include/${PROJECT_NAME}/
+#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+#   FILES_MATCHING PATTERN "*.h"
+#   PATTERN ".svn" EXCLUDE
+# )
+
+## Mark other files for installation (e.g. launch and bag files, etc.)
+# install(FILES
+#   # myfile1
+#   # myfile2
+#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+# )
+
+#############
+## Testing ##
+#############
+
+## Add gtest based cpp test target and link libraries
+# catkin_add_gtest(${PROJECT_NAME}-test test/test_gsmini.cpp)
+# if(TARGET ${PROJECT_NAME}-test)
+#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
+# endif()
+
+## Add folders to be run by python nosetests
+# catkin_add_nosetests(test)
diff --git a/examples/gsmini_node.py b/examples/gsmini_node.py
new file mode 100755
index 0000000..fb13565
--- /dev/null
+++ b/examples/gsmini_node.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+
+import cv2
+import numpy as np
+import rospy
+from cv_bridge import CvBridge
+from sensor_msgs.msg import Image
+
+from gsmini import gs3drecon, gsdevice
+from gsmini.msg import Shear
+
+
+def main():
+    rospy.init_node("gsminiros")
+
+    # Set flags
+    DEVICE = rospy.get_param("~device", "cuda")  # robot radius in grid units
+    CAMERA_DEVICE = rospy.get_param("~camera_device")
+    MASK_MARKERS_FLAG = rospy.get_param("~mask_markers", True)
+    CALCULATE_SHEAR_FLAG = rospy.get_param("~calculate_shear", True)
+    CALCULATE_DEPTH_FLAG = rospy.get_param("~calculate_depth", False)
+    SHOW_NOW = rospy.get_param("~show_now", False)
+
+    dev = gsdevice.Camera(
+        CAMERA_DEVICE,
+        calcDepth=CALCULATE_DEPTH_FLAG,
+        calcShear=CALCULATE_SHEAR_FLAG,
+        device=DEVICE,
+        maskMarkersFlag=MASK_MARKERS_FLAG,
+    )
+
+    cvbridge = CvBridge()
+    image_pub = rospy.Publisher("gsmini_image", Image, queue_size=1)
+
+    if CALCULATE_DEPTH_FLAG:
+        depth_pub = rospy.Publisher("gsmini_depth", Image, queue_size=1)
+
+        """ use this to plot just the 3d """
+        if SHOW_NOW:
+            vis3d = gs3drecon.Visualize3D(dev.imgw, dev.imgh, "", dev.mmpp)
+
+    if CALCULATE_SHEAR_FLAG:
+        shear_pub = rospy.Publisher("gsmini_shear", Shear, queue_size=1)
+
+    rate = rospy.Rate(14)
+    while not rospy.is_shutdown():
+        rate.sleep()
+
+        f1 = dev.get_image()
+        dev.process_image(f1)
+
+        """ publish images """
+        image_pub.publish(cvbridge.cv2_to_imgmsg(f1, encoding="bgr8"))
+
+        if CALCULATE_DEPTH_FLAG:
+            dm = dev.get_depth()
+            depth_pub.publish(cvbridge.cv2_to_imgmsg(dm, encoding="32FC1"))
+
+        if CALCULATE_SHEAR_FLAG:
+            shear_np = dev.get_shear()
+            shear_msg = Shear()
+            shear_msg.header.stamp = rospy.Time.now()
+            shear_msg.n = dev.marker_shape[0]
+            shear_msg.m = dev.marker_shape[1]
+            shear_msg.header.frame_id = rospy.get_name()
+            shear_msg.initial = shear_np[:, 0, :].flatten()
+            shear_msg.markers = shear_np[:, 1, :].flatten()
+            shear_pub.publish(shear_msg)
+        """ Display the results """
+        if SHOW_NOW:
+            cv2.imshow("Image", f1)
+            if CALCULATE_DEPTH_FLAG:
+                vis3d.update(dm)
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                break
+    dev.disconnect()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/ros-node.py b/examples/ros-node.py
deleted file mode 100644
index 1241bb1..0000000
--- a/examples/ros-node.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import cv2
-import numpy as np
-import rospy
-from cv_bridge import CvBridge
-from rospy.numpy_msg import numpy_msg
-from rospy_tutorials.msg import Floats
-from sensor_msgs.msg import Image
-
-from gsmini import gs3drecon, gsdevice
-
-
-def main():
-    rospy.init_node("gsminiros", anonymous=True)
-
-    # Set flags
-    DEVICE = "cuda"
-    MASK_MARKERS_FLAG = True
-    CALCULATE_DEPTH_FLAG = True
-    CALCULATE_SHEAR_FLAG = True
-    SHOW_NOW = False
-
-    # the device ID can change after unplugging and changing the usb ports.
-    # on linux run, v4l2-ctl --list-devices, in the terminal to get the device ID for camera
-    cam_id = gsdevice.get_camera_id("GelSight Mini")
-    dev = gsdevice.Camera(
-        cam_id,
-        calcDepth=CALCULATE_DEPTH_FLAG,
-        calcShear=CALCULATE_SHEAR_FLAG,
-        device=DEVICE,
-        maskMarkersFlag=MASK_MARKERS_FLAG,
-    )
-
-    cvbridge = CvBridge()
-    image_pub = rospy.Publisher("/gsmini_image", Image, queue_size=1)
-
-    if CALCULATE_DEPTH_FLAG:
-        depth_pub = rospy.Publisher("/gsmini_depth", numpy_msg(Floats), queue_size=1)
-
-        """ use this to plot just the 3d """
-        if SHOW_NOW:
-            vis3d = gs3drecon.Visualize3D(dev.imgw, dev.imgh, "", dev.mmpp)
-
-    if CALCULATE_SHEAR_FLAG:
-        shear_pub = rospy.Publisher("/gsmini_shear", numpy_msg(Floats), queue_size=1)
-
-    rate = rospy.Rate(14)
-    while not rospy.is_shutdown():
-        rate.sleep()
-
-        f1 = dev.get_image()
-        dev.process_image(f1)
-
-        """ publish images """
-        image_pub.publish(cvbridge.cv2_to_imgmsg(f1, encoding="passthrough"))
-
-        if CALCULATE_DEPTH_FLAG:
-            dm = dev.get_depth()
-            depth_pub.publish(dm.flatten())
-
-        if CALCULATE_SHEAR_FLAG:
-            markers = dev.get_markers()
-            shear = np.stack((dev.get_initial_markers(), markers), axis=1)
-            shear_pub.publish(shear.flatten())
-        """ Display the results """
-        if SHOW_NOW:
-            cv2.imshow("Image", f1)
-            if CALCULATE_DEPTH_FLAG:
-                vis3d.update(dm)
-            if cv2.waitKey(1) & 0xFF == ord("q"):
-                break
-    dev.disconnect()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/gsmini/gs3drecon.py b/gsmini/gs3drecon.py
index ec0480e..1ce392d 100644
--- a/gsmini/gs3drecon.py
+++ b/gsmini/gs3drecon.py
@@ -317,7 +317,7 @@ def get_depthmap(self, frame, mask_markers, cm=None):
         # gy = (b-a) * ((gy - gy.min()) / (gy.max() - gy.min())) + a
         """OPTION#2 calculate gx, gy from nx, ny. """
         # normalize normals to get gradients for poisson
-        nz = np.sqrt(1 - nx**2 - ny**2)
+        nz = np.sqrt(1 - nx ** 2 - ny ** 2)
         if np.isnan(nz).any():
             print("nan found")
         nz[np.where(np.isnan(nz))] = np.nanmean(nz)
diff --git a/gsmini/gsdevice.py b/gsmini/gsdevice.py
index a7e0669..6bb8f64 100644
--- a/gsmini/gsdevice.py
+++ b/gsmini/gsdevice.py
@@ -1,34 +1,55 @@
 import os
 import re
 from threading import Thread
-
+import time
 import cv2
 import numpy as np
 
 from . import gs3drecon
-from .markertracker import MarkerTracker
+import subprocess
 
 
 def get_camera_id(camera_name):
-    cam_num = None
-
-    for file in os.listdir("/sys/class/video4linux"):
-        real_file = os.path.realpath("/sys/class/video4linux/" + file + "/name")
-        with open(real_file, "rt") as name_file:
-            name = name_file.read().rstrip()
-        if camera_name in name:
-            cam_num = int(re.search("\d+$", file).group(0))
-            found = "FOUND " + str(cam_num) + " !"
-        else:
-            found = "      "
-        print("{} {} -> {}".format(found, file, name))
+    # Execute the command and capture its output
+    output = subprocess.check_output("v4l2-ctl --list-devices", shell=True, text=True)
+
+    # Split the output into lines
+    lines = output.split("\n")
+
+    # Initialize variables
+    camera_found = False
+    video_devices = []
+
+    # Iterate over the lines
+    for line in lines:
+        # Check if the current line contains the camera name
+        if camera_name in line:
+            camera_found = True
+            continue
+
+        # If camera is found, look for video devices
+        if camera_found:
+            match = re.search(r"/dev/video(\d+)", line)
+            if match:
+                video_devices.append(int(match.group(1)))
+            else:
+                # No more video devices for the camera, break the loop
+                break
 
-    return cam_num - 1
+    # Filter for even video devices
+    even_video_devices = [dev for dev in video_devices if dev % 2 == 0]
+
+    assert (
+        len(even_video_devices) == 1
+    ), f"Video device not found or multiple devices found. Only one video device should be connected.{even_video_devices}"
+
+    return even_video_devices[0]
 
 
 def resize_crop_mini(img, imgw, imgh):
-    border_size_x, border_size_y = int(img.shape[0] * (1 / 7)), int(
-        np.floor(img.shape[1] * (1 / 7))
+    border_size_x, border_size_y = (
+        int(img.shape[0] * (1 / 7)),
+        int(np.floor(img.shape[1] * (1 / 7))),
     )  # remove 1/7th of border from each size
     img = img[
         border_size_x : img.shape[0] - border_size_x,
@@ -57,11 +78,7 @@ def __init__(
         self.imgh = 240
         self.mmpp = mmpp
         self._dm = None
-        self._Ox = None
-        self._Oy = None
-        self._p0 = None
         self._dm_dirty = False
-        self._shear_dirty = False
         self.dev_id = dev_id
         self.cam = None
         self.enableDepth = calcDepth
@@ -73,61 +90,40 @@ def __init__(
         self.connect()
 
     def connect(self):
-        if (
-            type(self.dev_id) == str
-        ):  # if dev_id is a string, then it is a path used to initialize the depth and marker
-            import glob
-
-            paths = glob.glob(os.path.join(self.dev_id, "*.jpg"))
-            for path in paths:
-                self._img = cv2.imread(path)
-                if self.enableDepth:
+        # if dev_id is a string, then it is a path used to initialize the depth and marker
+        self.cam = cv2.VideoCapture(self.dev_id)
+        if self.cam is None or not self.cam.isOpened():
+            print("Warning: unable to open video source: ", self.dev_id)
+        self._img = self.get_raw_image()
+        if self.enableDepth:
+            ret, self._img = self.cam.read()
+            while self._img is None:
+                print("Warning: unable to read image from camera")
+                ret, self._img = self.cam.read()
+                time.sleep(0.5)
+
+            while self.nn.dm_zero_counter < 50:
+                self._img = resize_crop_mini(self._img, self.imgw, self.imgh)
+                if ret:
                     self._dm = self.nn.get_depthmap(self._img, self.maskMarkersFlag)
-        else:
-            self.cam = cv2.VideoCapture(self.dev_id)
-            if self.cam is None or not self.cam.isOpened():
-                print("Warning: unable to open video source: ", self.dev_id)
-            self._img = self.get_raw_image()
-            if self.enableDepth:
-                while self.nn.dm_zero_counter < 50:
-                    ret, self._img = self.cam.read()
-                    self._img = resize_crop_mini(self._img, self.imgw, self.imgh)
-                    if ret:
-                        self._dm = self.nn.get_depthmap(self._img, self.maskMarkersFlag)
+                ret, self._img = self.cam.read()
         if self.enableShear:
-            self._old_gray = cv2.cvtColor(self._img, cv2.COLOR_BGR2GRAY)
-            mtracker = MarkerTracker(np.float32(self._img) / 255.0)
-            self._Ox = mtracker.initial_marker_center[:, 1]
-            self._Oy = mtracker.initial_marker_center[:, 0]
-            self._initial_markers = np.array(
-                (self._Ox, self._Oy), np.float32
-            ).T.reshape((-1, 2))
-            self._nct = len(mtracker.initial_marker_center)
-            self._lk_params = dict(
-                winSize=(15, 15),
-                maxLevel=2,
-                criteria=(
-                    cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
-                    10,
-                    0.03,
-                ),
-            )
-            self._p0 = np.array((self._Ox, self._Oy), np.float32).T.reshape((-1, 1, 2))
-            # finished initializing p0
+            from gsmini.marker_tracker.optical_flow import MarkerTracking
+
+            self._marker_tracker = MarkerTracking(self._img)
         if self.cam is not None:
             self._stop = False
             Thread(target=self._update_image).start()
         return
 
     def get_raw_image(self):
+        ret, f0 = self.cam.read()
         for _ in range(10):  # flush out fist 10 frames to remove black frames
             ret, f0 = self.cam.read()
-        ret, f0 = self.cam.read()
         if ret:
             f0 = resize_crop_mini(f0, self.imgw, self.imgh)
         else:
             print("ERROR! reading image from camera")
-
         return f0
 
     def get_image(self):
@@ -139,17 +135,11 @@ def get_depth(self):
             return None
         return self._dm.copy()
 
-    def get_markers(self):
-        if not self.enableShear:
-            print("ERROR! shear is not enabled")
-            return None
-        return np.squeeze(self._p0.copy())
-
-    def get_initial_markers(self):
+    def get_shear(self):
         if not self.enableShear:
             print("ERROR! shear is not enabled")
             return None
-        return self._initial_markers.copy()
+        return self._shear.copy()
 
     def process_image(self, img):
         if self.enableDepth:
@@ -159,7 +149,10 @@ def process_image(self, img):
 
     def _update_image(self):
         while not self._stop:
-            ret, f0 = self.cam.read()
+            try:
+                ret, f0 = self.cam.read()
+            except Exception:
+                ret = False
             if ret:
                 f0 = resize_crop_mini(f0, self.imgw, self.imgh)
                 self._img = f0
@@ -168,15 +161,13 @@ def _update_depth(self, img):
         self._dm = self.nn.get_depthmap(img, self.maskMarkersFlag)
 
     def _update_shear(self, img):
-        new_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        p1, st, _ = cv2.calcOpticalFlowPyrLK(
-            self._old_gray, new_gray, self._p0, None, **self._lk_params
-        )
-        if np.sum(st) < self._nct:
-            print("all pts did not converge")
-        else:
-            self._p0 = p1.reshape(-1, 1, 2)
-            self._old_gray = new_gray
+        # shape: (N,M,2,2)
+        # ([init_markers,curr_markers],N,M,[x,y])
+        self._shear = self._marker_tracker.get_flow(img)
 
     def disconnect(self):
         self._stop = True
+
+    @property
+    def marker_shape(self):
+        return self._marker_tracker.marker_shape
diff --git a/gsmini/marker_tracker/__init__.py b/gsmini/marker_tracker/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gsmini/marker_tracker/find_marker.cpp b/gsmini/marker_tracker/find_marker.cpp
new file mode 100644
index 0000000..f568ee1
--- /dev/null
+++ b/gsmini/marker_tracker/find_marker.cpp
@@ -0,0 +1,267 @@
+// File Name: tracking.cpp
+// Author: Shaoxiong Wang
+// Create Time: 2018/12/20 20:35
+
+#include "opencv2/opencv.hpp"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <iostream>
+#include <time.h>
+#include "tracking_class.h"
+
+using namespace cv;
+using namespace std;
+
+#define CAMERA_WIDTH 720
+#define CAMERA_HEIGHT 576
+
+#define SCALE_DOWN 1
+
+Point_t centers[MAXNM];
+Matching matcher;
+
+void example(Mat gray){
+
+    for(int y=0;y<gray.rows;y++)
+    {
+        for(int x=0;x<gray.cols;x++)
+        {
+            // get pixel
+            Vec3b color = gray.at<Vec3b>(Point(x,y));
+            
+            // cout<<color<<" ";
+
+            // set pixel
+            // image.at<Vec3b>(Point(x,y)) = color;
+        }
+        cout<<endl;
+    }
+}
+
+Mat find_marker(Mat frame) {
+    Mat blur, I, gray, mask, mask1, mask2;
+    Mat planes[3];
+
+    // int threshold_rg = 40;
+    // int threshold_gray = 20;
+    // int threshold_gray = 15;
+    int threshold_gray = 15;
+
+    GaussianBlur(frame, blur, Size( 21 * 4 + 1, 21 * 4 + 1), 0, 0);
+    // GaussianBlur(frame, blur, Size( 25, 25), 0, 0);
+    // I = frame - blur;
+    I = blur * 1.0 - frame;
+    // mask = I;
+
+    // cv::cvtColor(I, gray, CV_BGR2GRAY);
+
+    // mask = gray > threshold_gray;
+
+    split(I, planes);
+    mask1 = (planes[0] > threshold_gray) | (planes[1] > threshold_gray) | (planes[2] > threshold_gray);
+    mask2 = (planes[0] > threshold_gray / 3) & (planes[1] > threshold_gray / 3) & (planes[2] > threshold_gray / 3);
+    // cout<<mask;
+    mask = mask1 & mask2;
+    return mask;
+}
+
+void MyFilledCircle( Mat img, Point center )
+{
+ int thickness = 2;
+ // int thickness = -1;
+ int lineType = 8;
+
+ circle( img,
+         center,
+         10,
+         Scalar( 0, 0, 255 ),
+         thickness,
+         lineType );
+}
+
+void marker_center(Mat mask, Mat frame) {
+    vector<vector<Point> > contours;
+    vector<Vec4i> hierarchy;
+
+    findContours(mask, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
+    // cout<<contours.size()<<" "<<contours[0];
+
+    for(int y=0;y<mask.rows;y++)
+    {
+        for(int x=0;x<mask.cols;x++)
+        {
+            // get pixel
+            Vec3b color = frame.at<Vec3b>(Point(x,y));
+            Vec3b color_mask = mask.at<Vec3b>(Point(x,y));
+            // Vec3b color(0, 0, 0);
+            // color[0] = color[0] - color_mask[0] * 0.2;
+            
+            // cout<<color<<" ";
+
+            // set pixel
+            frame.at<Vec3b>(Point(x,y)) = color;
+        }
+        // cout<<endl;
+    }
+
+    int marker_size_min = 30 * SCALE_DOWN * SCALE_DOWN, marker_size_max = 360 * SCALE_DOWN * SCALE_DOWN;
+    // int marker_size_min = 10, marker_size_max = 80;
+    // int marker_size_min = 10, marker_size_max = 80;
+    int count = 0;
+
+    /// Get the moments
+    vector<Moments> mu(contours.size() );
+
+    ///  Get the mass centers:
+    vector<Point2f> mc( contours.size() );
+
+    for( int i = 0; i< contours.size(); i++) {
+        int a = contourArea(contours[i]);
+        // cout<<endl<<a;
+        if (a < marker_size_min || a > marker_size_max) continue;
+
+        // cout<<"+++";
+
+        mu[i] = moments( contours[i], false );
+        mc[i] = Point2f( mu[i].m10/mu[i].m00 , mu[i].m01/mu[i].m00 );
+
+        // cout << mc[i].x <<" " << mc[i].y << endl;
+        // MyFilledCircle( frame, Point( mc[i].x, mc[i].y) );
+        centers[count].x = mc[i].x;
+        centers[count].y = mc[i].y;
+        centers[count].id = count;
+
+        count++;
+
+    }
+    cout<<"COUNT "<<count<<"\t";
+
+    clock_t t;
+    t = clock();
+
+    matcher.init(centers, count);
+    matcher.run();
+
+
+    int i, j;
+    for(i = 0; i < N; i++){
+        for(j = 0; j < M; j++){
+            Point a(matcher.O[i][j].x, matcher.O[i][j].y), b(matcher.MinD[i][j].x + 2 * (matcher.MinD[i][j].x - matcher.O[i][j].x), matcher.MinD[i][j].y + 2 * (matcher.MinD[i][j].y - matcher.O[i][j].y));
+            Vec3b color(0,0,255);
+            if (matcher.MinOccupied[i][j] <= -1) {
+                printf("%d %d INFERED!!\n",i, j);
+                color[0] = 127;
+                color[1] = 127;
+                color[2] = 255;
+            }
+            // else{
+            // }
+
+            arrowedLine(frame, a, b, color, 4);
+        }
+    }
+
+    printf("track: %.6lf seconds\n", ((double)clock() - t)/CLOCKS_PER_SEC);
+}
+
+Mat init(Mat img) {
+    int offset_x0 = 0, offset_x1 = 0;
+    int offset_y0 = 0, offset_y1 = 0;
+
+    cv::Rect roi;
+    roi.x = offset_x0;
+    roi.y = offset_y0;
+    roi.width = img.size().width - (offset_x0 + offset_x1);
+    roi.height = img.size().height - (offset_y0 + offset_y1);
+
+    return img(roi);
+}
+
+Mat init_HSR(Mat img) {
+    float WARP_W = 290, WARP_H = 290;
+    Point2f src[4] = {
+        Point2f(185, 260),
+        Point2f(450, 260),
+        Point2f(0, 460),
+        Point2f(620, 460)
+    };
+    Point2f dst[4] = {
+        Point2f(0, 0),
+        Point2f(WARP_W, 0),
+        Point2f(0, WARP_H),
+        Point2f(WARP_W, WARP_H)
+    };
+    Mat matrix = getPerspectiveTransform(src, dst);
+    Mat ret;
+    warpPerspective(img, ret, matrix, Size(WARP_W, WARP_H));
+    return ret;
+}
+
+int main(int argc, char** argv){
+    // cout<< dist_sqr(O[0][0], O[0][1]);
+
+    // VideoCapture capture("../data/GelSight_Shear_Test_2.mov");
+    // VideoCapture capture("../data/GelSight_Twist_Test_2.mov");
+    VideoCapture capture(1);
+
+    VideoWriter output("HSR.avi",CV_FOURCC('M','J','P','G'),30, Size(290,290));
+
+    // VideoCapture capture(0);
+    Mat frame, mask;
+    int key;
+
+    namedWindow("frame", WINDOW_OPENGL); 
+
+    clock_t t;
+    t = clock();
+    int cnt_frame = 0;
+    while(1){
+        cnt_frame++;
+
+        t = clock();
+
+        cout<<"frame "<<cnt_frame<<"\t";
+        capture >> frame;   
+        if (cnt_frame < 25) continue;
+
+        // if (cnt_frame > 1 && cnt_frame < 485) continue;
+        // if (cnt_frame > 380) break;
+        if(frame.empty()) break;
+
+        frame = init_HSR(frame);
+        cv::resize(frame, frame, cv::Size(), SCALE_DOWN, SCALE_DOWN);
+
+        printf("loading: %.6lf seconds\t", ((double)clock() - t)/CLOCKS_PER_SEC);
+        
+        t = clock();
+
+        mask = find_marker(frame);
+
+        printf("centers: %.6lf seconds\t", ((double)clock() - t)/CLOCKS_PER_SEC);
+        
+
+        t = clock();
+
+        marker_center(mask, frame);
+
+        printf("matching: %.6lf seconds\n", ((double)clock() - t)/CLOCKS_PER_SEC);
+        
+
+        cout << frame.size;
+        cout<<endl;
+        // cv::resize(frame, frame, cv::Size(), 0.5, 0.5);;
+        imshow("frame", frame);
+        // imshow("frame", frame);
+        output.write(frame);
+        key = waitKey(1);
+        // break;
+
+        if (key == 'q')
+            break;
+
+    }
+    capture.release();
+    output.release();
+    destroyAllWindows();
+    return 0;
+}
\ No newline at end of file
diff --git a/gsmini/marker_tracker/find_marker.so b/gsmini/marker_tracker/find_marker.so
new file mode 100755
index 0000000..b927e0c
Binary files /dev/null and b/gsmini/marker_tracker/find_marker.so differ
diff --git a/gsmini/marker_tracker/makefile b/gsmini/marker_tracker/makefile
new file mode 100644
index 0000000..bb13aea
--- /dev/null
+++ b/gsmini/marker_tracker/makefile
@@ -0,0 +1,19 @@
+CC = g++
+SRCS = tracking_class.cpp
+
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+    CFLAGS = -g -Wall -std=c++11 -O3 -Wall -fPIC -shared -std=c++11 `python3 -m pybind11 --includes` -I ./
+    PROG = find_marker.so
+endif
+
+ifeq ($(UNAME_S),Darwin)
+    CFLAGS = -g -Wall -std=c++11 -O3 -Wall -shared -std=c++11 -undefined dynamic_lookup `python3 -m pybind11 --includes` -I ./
+    PROG = find_marker`python3-config --extension-suffix`
+endif
+
+$(PROG):$(SRCS)
+	$(CC) $(CFLAGS) -o $(PROG) $(SRCS)
+
+clean:
+	rm $(PROG)
diff --git a/gsmini/marker_tracker/marker_detection.py b/gsmini/marker_tracker/marker_detection.py
new file mode 100644
index 0000000..20c30e1
--- /dev/null
+++ b/gsmini/marker_tracker/marker_detection.py
@@ -0,0 +1,198 @@
+import cv2
+import numpy as np
+from scipy.ndimage.filters import maximum_filter, minimum_filter
+from scipy import ndimage
+from scipy.signal import fftconvolve
+
+
+def gkern(l=5, sig=1.0):
+    """ creates gaussian kernel with side length l and a sigma of sig """
+    ax = np.linspace(-(l - 1) / 2.0, (l - 1) / 2.0, l)
+    xx, yy = np.meshgrid(ax, ax)
+    kernel = np.exp(-0.5 * (np.square(xx) + np.square(yy)) / np.square(sig))
+    return kernel / np.sum(kernel)
+
+
+def normxcorr2(template, image, mode="same"):
+    """
+    Input arrays should be floating point numbers.
+    :param template: N-D array, of template or filter you are using for cross-correlation.
+    Must be less or equal dimensions to image.
+    Length of each dimension must be less than length of image.
+    :param image: N-D array
+    :param mode: Options, "full", "valid", "same"
+    full (Default): The output of fftconvolve is the full discrete linear convolution of the inputs.
+    Output size will be image size + 1/2 template size in each dimension.
+    valid: The output consists only of those elements that do not rely on the zero-padding.
+    same: The output is the same size as image, centered with respect to the ‘full’ output.
+    :return: N-D array of same dimensions as image. Size depends on mode parameter.
+    """
+    # If this happens, it is probably a mistake
+    if (
+        np.ndim(template) > np.ndim(image)
+        or len(
+            [i for i in range(np.ndim(template)) if template.shape[i] > image.shape[i]]
+        )
+        > 0
+    ):
+        print("normxcorr2: TEMPLATE larger than IMG. Arguments may be swapped.")
+    template = template - np.mean(template)
+    image = image - np.mean(image)
+    a1 = np.ones(template.shape)
+    # Faster to flip up down and left right then use fftconvolve instead of scipy's correlate
+    ar = np.flipud(np.fliplr(template))
+    out = fftconvolve(image, ar.conj(), mode=mode)
+    image = fftconvolve(np.square(image), a1, mode=mode) - np.square(
+        fftconvolve(image, a1, mode=mode)
+    ) / (np.prod(template.shape))
+    # Remove small machine precision errors after subtraction
+    image[np.where(image < 0)] = 0
+    template = np.sum(np.square(template))
+    out = out / np.sqrt(image * template)
+    # Remove any divisions by 0 or very close to 0
+    out[np.where(np.logical_not(np.isfinite(out)))] = 0
+    return out
+
+
+def init(frame):
+    RESCALE = 1.0
+    return cv2.resize(frame, (0, 0), fx=1.0 / RESCALE, fy=1.0 / RESCALE)
+
+
+def preprocessimg(img):
+    """
+    Pre-processing image to remove noise
+    """
+    dotspacepx = 36
+    ### speckle noise denoising
+    # dst = cv2.fastNlMeansDenoising(img_gray, None, 9, 15, 30)
+    ### adaptive histogram equalizer
+    # clahe = cv2.createCLAHE(clipLimit=15.0, tileGridSize=(10, 10))
+    # equalized = clahe.apply(img_gray)
+    ### Gaussian blur
+    # gsz = 2 * round(3 * mindiameterpx / 2) + 1
+    gsz = 2 * round(0.75 * dotspacepx / 2) + 1
+    blur = cv2.GaussianBlur(img, (51, 51), gsz / 6)
+    #### my linear varying filter
+    x = np.linspace(3, 1.5, img.shape[1])
+    y = np.linspace(3, 1.5, img.shape[0])
+    xx, yy = np.meshgrid(x, y)
+    mult = blur * yy
+    ### adjust contrast
+    res = cv2.convertScaleAbs(blur, alpha=2, beta=0)
+    return res
+
+
+def find_marker(frame):
+    ##### masking techinique for dots on mini
+    # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    # mask = cv2.inRange(gray, 5, 55)
+
+    gray = frame[:, :, 1]  ### use only the green channel
+    im_blur_3 = cv2.GaussianBlur(gray, (3, 3), 5)
+    im_blur_8 = cv2.GaussianBlur(gray, (15, 15), 5)
+    im_blur_sub = im_blur_8 - im_blur_3 + 128
+    mask = cv2.inRange(im_blur_sub, 140, 255)
+
+    # ''' normalized cross correlation '''
+    template = gkern(l=20, sig=3)
+    nrmcrimg = normxcorr2(template, mask)
+    # ''''''''''''''''''''''''''''''''''''
+    a = nrmcrimg
+    mask = np.asarray(a > 0.1)
+    mask = (mask).astype("uint8")
+
+    return mask
+
+
+def marker_center(mask, frame):
+    img3 = mask
+    neighborhood_size = 10
+    # threshold = 40 # for r1.5
+    threshold = 0  # for mini
+    data_max = maximum_filter(img3, neighborhood_size)
+    maxima = img3 == data_max
+    data_min = minimum_filter(img3, neighborhood_size)
+    diff = (data_max - data_min) > threshold
+    maxima[diff == 0] = 0
+    labeled, num_objects = ndimage.label(maxima)
+    MarkerCenter = np.array(
+        ndimage.center_of_mass(img3, labeled, range(1, num_objects + 1))
+    )
+    MarkerCenter[:, [0, 1]] = MarkerCenter[:, [1, 0]]
+    for i in range(MarkerCenter.shape[0]):
+        x0, y0 = int(MarkerCenter[i][0]), int(MarkerCenter[i][1])
+        cv2.circle(mask, (x0, y0), color=(0, 0, 0), radius=1, thickness=1)
+    return MarkerCenter
+
+
+def draw_flow(frame, flow):
+    Ox, Oy, Cx, Cy, Occupied = flow
+
+    dx = np.mean(np.abs(np.asarray(Ox) - np.asarray(Cx)))
+    dy = np.mean(np.abs(np.asarray(Oy) - np.asarray(Cy)))
+    dnet = np.sqrt(dx ** 2 + dy ** 2)
+    print(dnet * 0.075, "\n")
+
+    K = 1
+    for i in range(len(Ox)):
+        for j in range(len(Ox[i])):
+            pt1 = (int(Ox[i][j]), int(Oy[i][j]))
+            pt2 = (
+                int(Cx[i][j] + K * (Cx[i][j] - Ox[i][j])),
+                int(Cy[i][j] + K * (Cy[i][j] - Oy[i][j])),
+            )
+            color = (0, 0, 255)
+            if Occupied[i][j] <= -1:
+                color = (127, 127, 255)
+            cv2.arrowedLine(frame, pt1, pt2, color, 2, tipLength=0.25)
+
+
+def warp_perspective(img):
+
+    TOPLEFT = (175, 230)
+    TOPRIGHT = (380, 225)
+    BOTTOMLEFT = (10, 410)
+    BOTTOMRIGHT = (530, 400)
+
+    WARP_W = 215
+    WARP_H = 215
+
+    points1 = np.float32([TOPLEFT, TOPRIGHT, BOTTOMLEFT, BOTTOMRIGHT])
+    points2 = np.float32([[0, 0], [WARP_W, 0], [0, WARP_H], [WARP_W, WARP_H]])
+
+    matrix = cv2.getPerspectiveTransform(points1, points2)
+
+    result = cv2.warpPerspective(img, matrix, (WARP_W, WARP_H))
+
+    return result
+
+
+def init_HSR(img):
+    DIM = (640, 480)
+    img = cv2.resize(img, DIM)
+
+    K = np.array(
+        [
+            [225.57469247811056, 0.0, 280.0069549918857],
+            [0.0, 221.40607131318117, 294.82435570493794],
+            [0.0, 0.0, 1.0],
+        ]
+    )
+    D = np.array(
+        [
+            [0.7302503082668154],
+            [-0.18910060205317372],
+            [-0.23997727800712282],
+            [0.13938490908400802],
+        ]
+    )
+    h, w = img.shape[:2]
+    map1, map2 = cv2.fisheye.initUndistortRectifyMap(
+        K, D, np.eye(3), K, DIM, cv2.CV_16SC2
+    )
+    undistorted_img = cv2.remap(
+        img, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT
+    )
+
+    return warp_perspective(undistorted_img)
diff --git a/gsmini/marker_tracker/optical_flow.py b/gsmini/marker_tracker/optical_flow.py
new file mode 100644
index 0000000..abde544
--- /dev/null
+++ b/gsmini/marker_tracker/optical_flow.py
@@ -0,0 +1,153 @@
+import cv2
+import numpy as np
+from scipy import ndimage
+from scipy.ndimage.filters import maximum_filter, minimum_filter
+from scipy.signal import fftconvolve
+
+
+def gkern(l=5, sig=1.0):
+    """ creates gaussian kernel with side length l and a sigma of sig """
+    ax = np.linspace(-(l - 1) / 2.0, (l - 1) / 2.0, l)
+    xx, yy = np.meshgrid(ax, ax)
+    kernel = np.exp(-0.5 * (np.square(xx) + np.square(yy)) / np.square(sig))
+    return kernel / np.sum(kernel)
+
+
+def normxcorr2(template, image, mode="same"):
+    """
+    Input arrays should be floating point numbers.
+    :param template: N-D array, of template or filter you are using for cross-correlation.
+    Must be less or equal dimensions to image.
+    Length of each dimension must be less than length of image.
+    :param image: N-D array    """
+    # If this happens, it is probably a mistake
+    if (
+        np.ndim(template) > np.ndim(image)
+        or len(
+            [i for i in range(np.ndim(template)) if template.shape[i] > image.shape[i]]
+        )
+        > 0
+    ):
+        print("normxcorr2: TEMPLATE larger than IMG. Arguments may be swapped.")
+    template = template - np.mean(template)
+    image = image - np.mean(image)
+    a1 = np.ones(template.shape)
+    # Faster to flip up down and left right then use fftconvolve instead of scipy's correlate
+    ar = np.flipud(np.fliplr(template))
+    out = fftconvolve(image, ar.conj(), mode=mode)
+    image = fftconvolve(np.square(image), a1, mode=mode) - np.square(
+        fftconvolve(image, a1, mode=mode)
+    ) / (np.prod(template.shape))
+    # Remove small machine precision errors after subtraction
+    image[np.where(image < 0)] = 0
+    template = np.sum(np.square(template))
+
+    out = out / np.sqrt(image * template)
+    # Remove any divisions by 0 or very close to 0
+    out[np.where(np.logical_not(np.isfinite(out)))] = 0
+    return out
+
+
+def find_marker(gray):
+    adjusted = cv2.convertScaleAbs(gray, alpha=3, beta=0)
+    mask = cv2.inRange(adjusted, 255, 255)
+    """ normalized cross correlation """
+    # template = gkern(l=20, sig=5)
+    template = gkern(l=20, sig=5)
+
+    nrmcrimg = normxcorr2(template, mask)
+    """""" """""" """""" """""" """""" """"""
+    a = nrmcrimg
+    b = 2 * ((a - a.min()) / (a.max() - a.min())) - 1
+    b = (b - b.min()) / (b.max() - b.min())
+    mask = np.asarray(b < 0.5)  # 0.5
+    return (mask * 255).astype("uint8")
+
+
+def find2dpeaks(res):
+    """
+    Create masks for all the dots. find 2D peaks.
+    Takes an image and detect the peaks using the local maximum filter.
+    Returns a boolean mask of the peaks (i.e. 1 when
+    the pixel's value is the neighborhood maximum, 0 otherwise)
+    """
+    img3 = res
+    neighborhood_size = 20
+    threshold = 0
+    data_max = maximum_filter(img3, neighborhood_size)
+    maxima = img3 == data_max
+    data_min = minimum_filter(img3, neighborhood_size)
+    diff = (data_max - data_min) > threshold
+    maxima[diff == 0] = 0
+    labeled, num_objects = ndimage.label(maxima)
+    xy = np.array(ndimage.center_of_mass(img3, labeled, range(1, num_objects + 1)))
+    return xy
+
+
+class MarkerTracking:
+    def __init__(self, init_frame, N=7, M=9, params=None):
+        self.N = N
+        self.M = M
+        self.NUM_MKS = N * M
+        # Parameters for Lucas Kanade optical flow
+        self.init_frame = init_frame
+        self.lk_params = dict(
+            winSize=(100, 100),
+            maxLevel=2,
+            criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 20, 2),
+            flags=0,
+        )
+
+        init_framegray = cv2.cvtColor(init_frame, cv2.COLOR_BGR2GRAY)
+        init_framegray = cv2.adaptiveThreshold(
+            init_framegray,
+            255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY,
+            41,
+            10,
+        )
+
+        # find mask
+        mask = find_marker(init_framegray)
+        cv2.imwrite("mask.jpg", mask)
+        # find marker centers
+        self.mc0 = find2dpeaks(mask)
+        self.mc0[:, [0, 1]] = self.mc0[:, [1, 0]]  # Mx2
+
+        self.MK_SCALE = 10
+        self.original = np.zeros((self.NUM_MKS, 2, 2))
+        # p2 = M x [x,y]
+        # self.mc0 = M x [x,y]
+        # self.mc0
+        # xy1 = MK_SCALE * (p2 - self.mc0) + self.mc0 (M x [x,y])
+
+    def get_flow(self, frame, reset=False, debug_img=False):
+        p2, st2, err2 = cv2.calcOpticalFlowPyrLK(
+            self.init_frame, frame, self.mc0.astype("float32"), None, **self.lk_params
+        )
+        curr = np.zeros((self.NUM_MKS, 2, 2))
+        curr[:, 0, :] = self.mc0
+        curr[:, 1, :] = self.MK_SCALE * (p2 - self.mc0) + self.mc0
+        curr[:, 1, :] = curr[:, 1, :] - self.original[:, 1, :] + self.original[:, 0, :]
+        if reset:
+            self.original[:, 0, :] = self.mc0
+            self.original[:, 1, :] = self.MK_SCALE * (p2 - self.mc0) + self.mc0
+
+        if debug_img:
+            for mk in range(curr.shape[0]):
+                curr_i = curr.copy().astype(np.int32)
+                cv2.arrowedLine(
+                    frame,
+                    (curr_i[mk, 0, 0], curr_i[mk, 0, 1]),
+                    (curr_i[mk, 1, 0], curr_i[mk, 1, 1]),
+                    (0, 255, 255),
+                    thickness=1,
+                    tipLength=0.2,
+                )
+            cv2.imwrite("debug_img.jpg", frame)
+        return curr
+
+    @property
+    def marker_shape(self):
+        return (self.N, self.M)
diff --git a/gsmini/marker_tracker/tracking.py b/gsmini/marker_tracker/tracking.py
new file mode 100644
index 0000000..a4da970
--- /dev/null
+++ b/gsmini/marker_tracker/tracking.py
@@ -0,0 +1,75 @@
+import numpy as np
+import gsmini.cpptracker.marker_detection as marker_detection
+import gsmini.cpptracker.find_marker as find_marker
+
+
+def marker_calibration(frame, N, M):
+    """
+    Args:
+    frame: the frame that contains the marker
+    N, M: the row and column of the marker array
+    Returns:
+    x0_, y0_: the coordinate of upper-left marker
+    dx_, dy_: the horizontal and vertical interval between adjacent markers
+    """
+    mask = marker_detection.find_marker(frame)
+    mc = marker_detection.marker_center(mask, frame)
+    mc_sorted1 = mc[mc[:, 0].argsort()]
+    mc1 = mc_sorted1[:N]
+    mc1 = mc1[mc1[:, 1].argsort()]
+
+    mc_sorted2 = mc[mc[:, 1].argsort()]
+    mc2 = mc_sorted2[:M]
+    mc2 = mc2[mc2[:, 0].argsort()]
+
+    x0_ = np.round(mc1[0][0])
+    y0_ = np.round(mc1[0][1])
+    dx_ = mc2[1, 0] - mc2[0, 0]
+    dy_ = mc1[1, 1] - mc1[0, 1]
+
+    return x0_, y0_, dx_, dy_
+
+
+class MarkerTracking:
+    def __init__(self, frame, N=7, M=9, FPS=25):
+        self.N_ = N
+        self.M_ = M
+        self.FPS_ = FPS
+        self.x0_, self.y0_, self.dx_, self.dy_ = marker_calibration(
+            frame, self.N_, self.M_
+        )
+        self.matcher = find_marker.Matching(
+            self.N_, self.M_, self.FPS_, self.x0_, self.y0_, self.dx_, self.dy_
+        )
+
+    def get_flow(self, frame):
+        mask = marker_detection.find_marker(frame)
+        mc = marker_detection.marker_center(mask, frame)
+        self.matcher.init(mc)
+        self.matcher.run()
+        flow = self.matcher.get_flow()
+        print(flow)
+        """
+        flow: (Ox, Oy, Cx, Cy, Occupied)
+            Ox, Oy: N*M matrix, the x and y coordinate of each marker at frame 0
+            Cx, Cy: N*M matrix, the x and y coordinate of each marker at current frame
+            Occupied: N*M matrix, the index of the marker at each position, -1 means inferred.
+                e.g. Occupied[i][j] = k, meaning the marker mc[k] lies in row i, column j.
+        """
+
+        Ox = np.array(flow[0]).reshape(self.N_, self.M_)
+        Oy = np.array(flow[1]).reshape(self.N_, self.M_)
+
+        Cx = np.array(flow[2]).reshape(self.N_, self.M_)
+        Cy = np.array(flow[3]).reshape(self.N_, self.M_)
+
+        output = np.zeros((2, self.N_, self.M_, 2))
+        output[0, :, :, 0] = Ox
+        output[0, :, :, 1] = Oy
+        output[1, :, :, 0] = Cx
+        output[1, :, :, 1] = Cy
+        return output
+
+    @property
+    def marker_shape(self):
+        return (self.N_, self.M_)
diff --git a/gsmini/marker_tracker/tracking_class.cpp b/gsmini/marker_tracker/tracking_class.cpp
new file mode 100644
index 0000000..9baa101
--- /dev/null
+++ b/gsmini/marker_tracker/tracking_class.cpp
@@ -0,0 +1,384 @@
+// File Name: tracking.cpp
+// Author: Shaoxiong Wang
+// Create Time: 2018/12/20 10:11
+
+#include "tracking_class.h"
+#include <iostream>
+#include <stdio.h>
+
+double dist_sqr(Point_t a, Point_t b){
+    return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
+}
+
+
+double sum(Point_t a){
+    return (a.x*a.x + a.y*a.y);
+}
+
+int Matching::precessor(int i, int j) {
+    return (degree[i][j] <= theta 
+        && degree[i][j] >= -theta 
+        && Dist[i][j] <= dmax 
+        && Dist[i][j] >= dmin);
+}
+
+Matching::Matching(int N_, int M_, int fps_, double x0_, double y0_, double dx_, double dy_){
+
+    N = N_;
+    M = M_;
+    NM = N * M;
+
+    fps = fps_;
+
+    x_0 = x0_; y_0 = y0_; dx = dx_; dy = dy_;
+
+    int i, j;
+    for (i = 0; i < N; i++) {
+        for (j = 0; j < M; j++) {
+            O[i][j].x = x_0 + j * dx;
+            O[i][j].y = y_0 + i * dy;
+        }
+    }
+    flag_record = 1;
+
+    dmin = (dx * 0.5) * (dx * 0.5);
+    dmax = (dx * 1.8) * (dx * 1.8);
+    theta = 70;
+    moving_max = dx * 2;
+    flow_difference_threshold = dx * 0.8;
+    cost_threshold = 15000 * (dx / 21)* (dx / 21);
+}
+
+// void Matching::init(Point_t *centers, int count){
+void Matching::init(std::vector<std::vector<double>> centers) {
+    int i, j;
+
+    // read points from centers
+    n = centers.size();
+
+    for (i = 0; i < n; i++){
+        C[i].x = centers[i][0];
+        C[i].y = centers[i][1];
+        C[i].id = i;
+        // std::cout<<C[i].x<<" "<<C[i].y<<" "<<std::endl;
+    }
+
+    // init arrays for search
+    memset(done, 0, sizeof(done));
+    memset(occupied, -1, sizeof(occupied));
+    minf = -1;
+
+    // sort by x-axis, if same by y-axis
+    std::sort(C, C+n);
+
+    // calculate distance and angle O(N^2M^2)
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < i; j++) {
+            Dist[i][j] = dist_sqr(C[i], C[j]);
+            degree[i][j] = asin(fabs(C[i].y - C[j].y) / sqrt(Dist[i][j])) * 180.0 / PI;
+        }
+    }
+}
+
+void Matching::run(){
+    int missing, spare;
+
+    time_st = clock();
+
+    missing = NM - n;
+    spare = n - NM;
+    missing = missing < 0 ? 0 : missing;
+    spare = spare < 0 ? 0 : spare;
+    dfs(0, 0, missing, spare);
+    for(int t=1;t<=3;t++) {
+        if(minf == -1){
+            // std::cout<<"TRY AGAIN!!"<<std::endl;
+            memset(done, 0, sizeof(done));
+            memset(occupied, -1, sizeof(occupied));
+            dfs(0, 0, missing + 1, spare + 1);
+        }
+    }
+    int i;
+    if (flag_record == 1){
+        flag_record = 0;
+        for (i = 0; i < n; i++){
+            O[MinRow[i]][MinCol[i]].x = C[i].x;
+            O[MinRow[i]][MinCol[i]].y = C[i].y;
+        }
+    }
+    // std::cout<<"MINF "<<minf<<"\t\t";
+}
+
+std::tuple<vvd, vvd, vvd, vvd, vvd> Matching::get_flow() {
+    vvd Ox(N), Oy(N), Cx(N), Cy(N), Occupied(N);
+
+    int i, j;
+    for(i = 0; i < N; i++){
+        Ox[i] = vd(M); Oy[i] = vd(M); Cx[i] = vd(M); Cy[i] = vd(M); Occupied[i] = vd(M);
+        for(j = 0; j < M; j++){
+            Ox[i][j] = O[i][j].x;
+            Oy[i][j] = O[i][j].y;
+            Cx[i][j] = MinD[i][j].x;
+            Cy[i][j] = MinD[i][j].y;
+            Occupied[i][j] = MinOccupied[i][j];
+            // Point a(matcher.O[i][j].x, matcher.O[i][j].y), b(matcher.MinD[i][j].x + 2 * (matcher.MinD[i][j].x - matcher.O[i][j].x), matcher.MinD[i][j].y + 2 * (matcher.MinD[i][j].y - matcher.O[i][j].y));
+        }
+    }
+
+    return std::make_tuple(Ox, Oy, Cx, Cy, Occupied);
+}
+
+double Matching::calc_cost(int i){
+    double c = 0, cost = 0;
+    int left, up, down;
+    Point_t flow1, flow2;
+
+    cost = cost + K1 * sum(C[i] - O[Row[i]][Col[i]]);
+    flow1 = C[i] - O[Row[i]][Col[i]];
+
+    if(Col[i] > 0){
+        left = occupied[Row[i]][Col[i]-1];
+        if (left > -1){
+            flow2 = C[left] - O[Row[i]][Col[i]-1];
+            c = sum(flow2 - flow1);
+            if (sqrt(c) >= flow_difference_threshold) c = 1e8;
+            cost +=  K2 * c;
+        }
+    }
+    if(Row[i] > 0){
+        up = occupied[Row[i]-1][Col[i]];
+        if (up > -1){
+            flow2 = C[up] - O[Row[i]-1][Col[i]];
+            c = sum(flow2 - flow1);
+            if (sqrt(c) >= flow_difference_threshold) c = 1e8;
+            cost +=  K2 * c;
+        }
+    }
+    if(Row[i] < N - 1){
+        down = occupied[Row[i] + 1][Col[i]];
+        if (down > -1){
+            flow2 = C[down] - O[Row[i]+1][Col[i]];
+            c = sum(flow2 - flow1);
+            if (sqrt(c) >= flow_difference_threshold) c = 1e8;
+            cost +=  K2 * c;
+        }
+    }
+    return cost;
+}
+
+double Matching::infer(){
+    double cost = 0;
+    int boarder_nb = 0;
+    int i, j, k, x, y, d=1, cnt, nx, ny, nnx, nny;
+
+    int dir[4][2] = {{0, -1}, {-1, 0}, {0, 1}, {1, 0}};
+    Point_t flow1, flow2;
+
+    Point_t moving;
+
+    for(i = 0; i < N; i++){
+        for(j = 0;j < M; j++){
+            if(occupied[i][j] <= -1){
+                moving.x = 0;
+                moving.y = 0;
+                cnt = 0;
+                for (k=0;k<4;k++){
+                    nx = i + dir[k][0];
+                    ny = j + dir[k][1];
+                    nnx = nx + dir[k][0];
+                    nny = ny + dir[k][1];
+                    if (nnx < 0 || nnx >= N || nny < 0 || nny >= M) continue;
+                    if (occupied[nx][ny] <= -1 || occupied[nnx][nny] <= -1) continue;
+                    moving = moving + (C[occupied[nx][ny]] - O[nx][ny] + (C[occupied[nx][ny]] - O[nx][ny] - C[occupied[nnx][nny]] + O[nnx][nny]));
+                    cnt += 1;
+                }
+                if(cnt == 0){
+                    for(x=i-d;x<=i+d;x++){
+                        for(y=j-d;y<=j+d;y++){
+                            if (x < 0 || x >= N || y < 0 || y >= M) continue;
+                            if (occupied[x][y] <= -1) continue;
+                            moving = moving + (C[occupied[x][y]] - O[x][y]);
+                            cnt += 1;
+                        }
+                    }
+                }
+                if(cnt == 0){
+                    for(x=i-d-1;x<=i+d+1;x++){
+                        for(y=j-d-1;y<=j+d+1;y++){
+                            if (x < 0 || x >= N || y < 0 || y >= M) continue;
+                            if (occupied[x][y] <= -1) continue;
+                            moving = moving + (C[occupied[x][y]] - O[x][y]);
+                            cnt += 1;
+                        }
+                    }
+                }
+                D[i][j] = O[i][j] + moving / (cnt + 1e-6);
+                if (j == 0 && D[i][j].y >= O[i][j].y - dy / 2.0) boarder_nb++;
+                if (j == N-1 && D[i][j].y <= O[i][j].y + dy / 2.0) boarder_nb++;
+                cost = cost + K1 * sum(D[i][j] - O[i][j]);
+            }
+        }
+    }
+
+    if(boarder_nb >= N -1 ) cost += 1e7;
+
+    for(i = 0; i < N; i++){
+        for(j = 0;j < M; j++){
+            if(occupied[i][j] <= -1){
+                flow1 = D[i][j] - O[i][j];
+                for (k = 0; k < 4; k++){
+                    nx = i + dir[k][0];
+                    ny = j + dir[k][1];
+                    if (nx < 0 || nx > N - 1 || ny < 0 || ny > M -1) continue;
+                    if (occupied[nx][ny] > -1){
+                        flow2 = (C[occupied[nx][ny]] - O[nx][ny]);
+                        cost +=  K2 * sum(flow2 - flow1);
+                    }
+                    else if (k < 2 && occupied[nx][ny] <= -1){
+                        flow2 = (D[nx][ny] - O[nx][ny]);
+                        cost +=  K2 * sum(flow2 - flow1);
+                    }
+                }
+            }
+        }
+    }
+    return cost;
+}
+
+void Matching::dfs(int i, double cost, int missing, int spare){
+    // if(occupied[6][0] <= -1 && occupied[7][0] <= -1)
+    // std::cout<<i<<" "<<"COST: "<<cost<<"fmin: "<< minf<< " missing "<<missing<<" spare "<<spare<<std::endl;
+    if (((float)(clock()-time_st))/CLOCKS_PER_SEC >= 1.0 / fps) return;
+    if(cost >= minf && minf != -1) return;
+    if(cost >= cost_threshold) return;
+    int j, k, count = 0, flag, m, same_col;
+    double c;
+    if (i >= n) {
+        cost += infer();
+        // printf("\nCOST: %lf\n", cost);
+        // for (j=0;j<n;j++){
+        //     printf("%d %d \t %lf %lf\n", Row[j], Col[j], C[j].x, C[j].y);
+        // }
+        // printf("--------------------------------------------\n");
+        if (cost < minf || minf == -1) {
+            // if (int(cost) == 31535) cost = 0;
+            minf = cost;
+            for (j=0;j<n;j++){
+                // printf("%d %d \t %lf %lf\n", Row[j], Col[j], C[j].x, C[j].y);
+                MinRow[j] = Row[j];
+                MinCol[j] = Col[j];
+                if (Row[j] < 0) continue;
+                D[Row[j]][Col[j]].x = C[j].x;
+                D[Row[j]][Col[j]].y = C[j].y;
+            }
+            for (j=0;j<N;j++){
+                for (k=0;k<M;k++){
+                    MinOccupied[j][k] = occupied[j][k];
+                    MinD[j][k].x = D[j][k].x;
+                    MinD[j][k].y = D[j][k].y;
+                }
+            }
+        }
+        return;
+    }
+
+
+    for (j=0;j<i;j++) {
+        // if (i == 45) std::cout<<i<<" "<<j<<std::endl;
+
+        if (precessor(i, j)) {
+            Row[i] = Row[j];
+            Col[i] = Col[j] + 1;
+            count++;
+            if (Col[i] >= M) continue;
+            if (occupied[Row[i]][Col[i]] > -1) continue;
+            if (Row[i] > 0 && occupied[Row[i]-1][Col[i]] > -1 && C[i].y <= C[occupied[Row[i]-1][Col[i]]].y) continue;
+            if (Row[i] < N - 1 && occupied[Row[i]+1][Col[i]] > -1 && C[i].y >= C[occupied[Row[i]+1][Col[i]]].y) continue;
+            int vflag = 0;
+            for (k=0;k<N;k++){
+                same_col = occupied[k][Col[i]];
+                if(same_col > -1 && ((k < Row[i] && C[same_col].y > C[i].y) || (k > Row[i] && C[same_col].y < C[i].y))){
+                    vflag = 1;
+                    break;
+                }
+            }
+            if (vflag == 1) continue;
+            occupied[Row[i]][Col[i]] = i;
+
+            c = calc_cost(i);
+            dfs(i+1, cost+c, missing, spare);
+            occupied[Row[i]][Col[i]] = -1;
+        }
+    }
+
+
+    // if (count == 0) {
+        for (j=0;j<N;j++) {
+            if(done[j] == 0){
+                flag = 0;
+                for (int k = 0;k < N;k++) {
+                    // printf("%d %d %d %d\t\t", k, done[k], first[k], C[i].x);
+                    if (done[k] && 
+                        ((k < j && first[k] > C[i].y) || (k > j && first[k] < C[i].y))
+                        ){
+                        flag = 1;
+                        break;
+                    }
+                }
+                if (flag == 1) continue;
+                done[j] = 1;
+                first[j] = C[i].y;
+                Row[i] = j;
+                Col[i] = 0;
+
+                occupied[Row[i]][Col[i]] = i;
+                c = calc_cost(i);
+
+                dfs(i+1, cost+c, missing, spare);
+                done[j] = 0;
+                occupied[Row[i]][Col[i]] = -1;
+            }
+        }
+    // }
+
+    // considering missing points
+    // if (C[i].y > dy && C[i].y < O[0][M-1].y - dy / 2) return;
+    for(m=1;m<=missing;m++){
+        for (j=0;j<N;j++) {
+            // if (j >= 1 && j < N - 1) continue;
+            if(fabs(C[i].y - O[j][0].y) > moving_max) continue;
+            for(k=M-1;k>=0;k--) if(occupied[j][k]>-1) break;
+            if(k+m+1>=M) continue;
+            if (sqrt(sum(C[i] - O[j][k+m+1])) > moving_max) continue;
+            for(int t=1;t<=m;t++) occupied[j][k+t] = -2;
+            Row[i] = j;
+            Col[i] = k + m + 1;
+            c = calc_cost(i);
+            occupied[Row[i]][Col[i]] = i;
+            dfs(i+1, cost+c, missing - m, spare);
+
+            for(int t=1;t<=m+1;t++) occupied[j][k+t] = -1;
+        }
+    }
+
+    if (spare > 0){
+        Row[i] = -1;
+        Col[i] = -1;
+        dfs(i+1, cost, missing, spare-1);
+    }
+}
+
+
+
+std::tuple<double, double> Matching::test() {
+    return std::make_tuple(dx, dy);
+}
+
+PYBIND11_MODULE(find_marker, m) {
+    py::class_<Matching>(m, "Matching")
+        .def(py::init<int, int, int, double, double, double, double>(), py::arg("N_") = 8, py::arg("M_") = 8, py::arg("fps_") = 30, py::arg("x0_") = 80., py::arg("y0_") = 15., py::arg("dx_") = 21., py::arg("dy_") = 21.)
+        .def("run", &Matching::run)
+        .def("test", &Matching::test)
+        .def("init", &Matching::init)
+        .def("get_flow", &Matching::get_flow);
+}
\ No newline at end of file
diff --git a/gsmini/marker_tracker/tracking_class.h b/gsmini/marker_tracker/tracking_class.h
new file mode 100644
index 0000000..7b09db1
--- /dev/null
+++ b/gsmini/marker_tracker/tracking_class.h
@@ -0,0 +1,97 @@
+    #include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <algorithm>
+#include <time.h>
+#include <thread>
+#include <chrono>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+namespace py = pybind11;
+
+typedef std::vector<double> vd;
+typedef std::vector<std::vector<double>> vvd;
+
+#define MAXN 30
+#define MAXM 30
+#define MAXNM 900 
+#define PI 3.14159265
+
+
+struct Point_t
+{
+    double x, y;
+    int id;
+
+    Point_t(){
+    }
+
+    Point_t(double x, double y, double id=0){
+        this->x = x;
+        this->y = y;
+        this->id = id;
+    }
+    bool operator<(Point_t other) const
+    {
+        return (x < other.x || (x == other.x && y < other.y));
+    }
+
+    Point_t operator-(Point_t other) const
+    {
+        Point_t ret(x-other.x, y-other.y);
+        return ret;
+    }
+
+    Point_t operator+(Point_t other) const
+    {
+        Point_t ret(x+other.x, y+other.y);
+        return ret;
+    }
+
+    Point_t operator/(double other) const
+    {
+        Point_t ret(x/other, y/other);
+        return ret;
+    }
+};
+
+class Matching{
+private:
+    // double x_0 = 160, y_0 = 30, dx = 43.0, dy = 43.0; //GelSight Hanjun x1
+    double x_0, y_0, dx, dy; //GelSight Hanjun x0.5
+    // double x_0 = 34, y_0 = 37, dx = 27.0, dy = 27.0; //34 - 223,  37 - 200 GelSight_SX
+    // double x_0 = 6, y_0 = 16, dx = 31.0, dy = 31.0; //6 - 130,  16 - 138 HSR x0.5
+    // double x_0 = 12, y_0 = 32, dx = 62.0, dy = 62.0; //6 - 130,  16 - 138 HSR x1
+    // double x_0 = 15, y_0 = 15, dx = 23.0, dy = 23.0; //15-195 15-202 HSR blue
+
+    int Row[MAXNM], Col[MAXNM];
+    int Dist[MAXNM][MAXNM], done[MAXN], occupied[MAXN][MAXM], first[MAXN];
+    int fps;
+    double degree[MAXNM][MAXNM];
+    double dmin, dmax, theta;
+    double moving_max;
+    double cost_threshold, flow_difference_threshold;
+    clock_t time_st;
+
+public:
+    int n;
+    int N, M, NM;
+    int flag_record = 1;
+
+    int MinRow[MAXNM], MinCol[MAXNM], MinOccupied[MAXN][MAXM];
+    double minf = -1;
+
+    Point_t O[MAXN][MAXM], D[MAXN][MAXM], C[MAXNM], MinD[MAXN][MAXM];
+    double K1 = 0.1, K2 = 1;
+
+    Matching(int N_=8, int M_=8, int fps_=30, double x_0 = 80., double y_0 = 15., double dx = 21.0, double dy = 21.0);
+    // void init(Point_t *centers, int count);
+    void init(std::vector<std::vector<double>> input);
+    int precessor(int i, int j);
+    double calc_cost(int i);
+    void dfs(int i, double cost, int missing, int spare);
+    void run();
+    std::tuple<vvd, vvd, vvd, vvd, vvd> get_flow();
+    std::tuple<double, double> test();
+    double infer();
+};
diff --git a/gsmini/markertracker.py b/gsmini/markertracker.py
deleted file mode 100644
index c8b6e37..0000000
--- a/gsmini/markertracker.py
+++ /dev/null
@@ -1,327 +0,0 @@
-import copy
-import math
-from enum import Enum
-
-import cv2
-import numpy as np
-from scipy.ndimage import convolve
-from skimage import measure
-from skimage.morphology import closing, disk
-
-from .fit_grid import fit_grid
-from .normxcorr2 import normxcorr2
-
-
-class GridStyle(Enum):
-    NOBORDER = 0
-    ALL = 1
-
-
-class MarkerTracker:
-    def __init__(self, marker_image, params=None):
-        if params is None:
-            self.params = {"GridStyle": GridStyle.NOBORDER, "DoPlot": False}
-        else:
-            self.params = params
-
-        img = marker_image
-
-        filtsig = 20
-        bwthresh = -0.05
-
-        mxim = np.max(img.astype(float), axis=2)
-        ydim, xdim = mxim.shape
-
-        # High-pass filter
-        # fsg = filtsig
-        # fsz = 2 * round(3 * fsg) + 1
-        # gf = gaussian(fsz, fsg)
-
-        gf = gauss_signal(filtsig)
-        filtim = gauss_filt(mxim, gf)
-        # filtim = cv2.filter2D(mxim, -1, gf, borderType=cv2.BORDER_REPLICATE)
-
-        hpim = mxim - filtim
-
-        se = disk(3)
-        bw = hpim < bwthresh
-        # lb = regionprops_label(closing(bw, selem=se))
-        label_image = measure.label(closing(bw, footprint=se))
-
-        # Set minimum size threshold
-        min_size = 20
-        max_size = 500
-
-        # Remove connected components smaller than the threshold
-        clean_labels = np.zeros_like(label_image)
-        for i in range(1, label_image.max() + 1):
-            blobsize = (label_image == i).sum()
-            if blobsize >= min_size and blobsize <= max_size:
-                clean_labels[label_image == i] = i
-
-        marker_mask = np.where(clean_labels != 0, 1, 0)
-        # cv2.imshow('original_mask', marker_mask.astyp(float))
-
-        all_props = measure.regionprops(clean_labels, mxim)
-        areas = np.array([prop.area for prop in all_props])
-        centers = np.array([prop.centroid for prop in all_props])
-        intensities = np.array([prop.intensity_mean for prop in all_props])
-
-        # Sort the centers and get corresponding indices
-        # sorted_indices = np.lexsort((centers[:, 1], centers[:, 0]))
-        # sorted_centers = centers[sorted_indices]
-        sorted_indices = self.sort_centers(centers)
-        sorted_centers = centers[sorted_indices]
-        sorted_areas = areas[sorted_indices]
-        sorted_intensities = intensities[sorted_indices]
-
-        # Find the grid spacing
-        gsp = self.estimate_grid_spacing(sorted_centers)
-
-        new_centers = sorted_centers
-        new_areas = sorted_areas
-        new_intensities = sorted_intensities
-
-        # NOBORDER will restrict to uniform grid
-        if self.params["GridStyle"] == GridStyle.ALL:
-            (
-                num_rows,
-                num_cols,
-                row_coordinates,
-                col_coordinates,
-            ) = self.assign_coordinates(sorted_centers)
-        else:
-            # Point needs to be minsp away from other points and minpd away from image boundary
-            minpd = gsp / 3
-            minsp = gsp / 2
-            good_centers = []
-            good_indices = np.empty((0,), dtype=int)
-            for c in range(sorted_centers.shape[0]):
-                pt = sorted_centers[c, :]
-                if (pt[1] > minpd and pt[1] < xdim - minpd) and (
-                    pt[0] > minpd and pt[0] < xdim - minpd
-                ):
-                    if len(good_centers) > 0:
-                        lastpts = np.asarray(good_centers)
-                        dsts = np.sqrt(
-                            np.square(pt[1] - lastpts[:, 1])
-                            + np.square(pt[0] - lastpts[:, 0])
-                        )
-                        if not np.any(dsts < minsp):
-                            good_centers.append(pt)
-                            good_indices = np.append(good_indices, c)
-                    else:
-                        good_centers.append(pt)
-                        good_indices = np.append(good_indices, c)
-
-            new_centers = np.asarray(good_centers)
-            new_areas = sorted_areas[good_indices]
-            new_intensities = sorted_intensities[good_indices]
-
-            # fit centers to a grid
-            gridpts, gridw = fit_grid(new_centers, gsp)
-
-            # get the grid coordinates
-            gridct = gridw / gsp
-            gridct[:, 1] = gridct[:, 1] - np.min(gridct[:, 1])
-            gridct[:, 0] = gridct[:, 0] - np.min(gridct[:, 0])
-            num_cols = int(max(gridct[:, 0]) + 1)
-            num_rows = int(max(gridct[:, 1]) + 1)
-            row_coordinates = (np.round(gridct[:, 1])).astype("int")
-            col_coordinates = (np.round(gridct[:, 0])).astype("int")
-
-        print(f"Number of rows, cols: {num_rows}, {num_cols}")
-        # print("Row coordinates:", row_coordinates)
-        # print("Column coordinates:", col_coordinates)
-
-        nct = new_centers.shape[0]
-        # Estimate dot radius
-        radii = np.zeros(nct)
-        for i in range(nct):
-            p = new_centers[i, :]
-            radii[i] = np.sqrt(new_areas[i] / np.pi)
-
-        # Save marker data in struct
-        self.xdim = mxim.shape[1]
-        self.ydim = mxim.shape[0]
-        self.gridsz = [num_cols, num_rows]
-        self.gridsp = gsp
-        self.marker_mask = marker_mask
-        self.initial_marker_coord = [col_coordinates, row_coordinates]
-        self.initial_marker_center = new_centers
-        self.marker_radius = radii
-        self.marker_blackpt = new_intensities
-        self.marker_center = new_centers
-        self.marker_lastpos = new_centers
-        self.marker_currentpos = new_centers
-
-    def create_markermask(self, img, centers, radius):
-        markermask = np.zeros((img.shape[0], img.shape[1]))
-
-        for c in range(len(centers)):
-            cv2.circle(
-                markermask,
-                (int(centers[c, 1]), int(centers[c, 0])),
-                int(radius[c]),
-                color=(255, 255, 255),
-                thickness=-1,
-            )
-        cv2.imshow("mask", markermask)
-
-        return markermask
-
-    def sort_centers(self, dot_centers):
-        sorted_indices = np.lexsort((dot_centers[:, 1], dot_centers[:, 0]))
-        sorted_dot_centers = dot_centers[sorted_indices]
-
-        # Extract the x and y coordinates of the sorted dot centers
-        x_coords = [dot[0] for dot in sorted_dot_centers]
-        y_coords = [dot[1] for dot in sorted_dot_centers]
-
-        # Calculate the number of rows and columns
-        lenx = len(x_coords)
-
-        # Loop through each dot center and assign grid coordinates
-        lx = 0
-        new_sorted_indices = np.empty((0,), dtype=int)
-
-        while lx < lenx - 1:
-            xvals = []
-            yvals = []
-            old_sorted_indices = np.empty((0,), dtype=int)
-            x0 = x_coords[lx]
-            y0 = y_coords[lx]
-            xvals.append(x0)
-            yvals.append(y0)
-            old_sorted_indices = np.append(old_sorted_indices, sorted_indices[lx])
-            lx = lx + 1
-            x1 = x_coords[lx]
-            y1 = y_coords[lx]
-            while (x1 - x0) < 10 and lx < lenx - 1:
-                old_sorted_indices = np.append(old_sorted_indices, sorted_indices[lx])
-                xvals.append(x1)
-                yvals.append(y1)
-                lx = lx + 1
-                x0 = x1
-                x1 = x_coords[lx]
-                y1 = y_coords[lx]
-            if lx == lenx - 1:
-                old_sorted_indices = np.append(old_sorted_indices, sorted_indices[lx])
-                xvals.append(x_coords[lx])
-                yvals.append(y_coords[lx])
-            if len(yvals) > 5:
-                sorted_yindx = np.argsort(yvals)
-                old_sorted = old_sorted_indices[sorted_yindx]
-                new_sorted_indices = np.append(new_sorted_indices, old_sorted)
-
-        return new_sorted_indices
-
-    def assign_coordinates(self, dot_centers):
-        # Sort the dot centers by their x and y coordinates
-        # sorted_dot_centers = sorted(dot_centers, key=lambda x: (x[0], x[1]))
-        x_coords = [dot[0] for dot in dot_centers]
-        y_coords = [dot[1] for dot in dot_centers]
-
-        # Calculate the number of rows and columns
-        ncoords = len(y_coords)
-
-        # Create dictionaries to store the mapping from dot center to grid coordinates
-        row_coordinates = np.zeros(ncoords)
-        col_coordinates = np.zeros(ncoords)
-
-        idx = 0
-        idy = 0
-        n = 0
-        num_rows = 0
-        num_cols = 0
-        x0 = x_coords[idx]
-        col_coordinates[n] = idx
-        row_coordinates[n] = idy
-        while n < ncoords - 1:
-            col_coordinates[n] = idx
-            row_coordinates[n] = idy
-            x1 = x_coords[n + 1]
-            while (x1 - x0) < 15 and n < ncoords - 2:
-                n = n + 1
-                idy = idy + 1
-                col_coordinates[n] = idx
-                row_coordinates[n] = idy
-                x0 = x1
-                x1 = x_coords[n + 1]
-                # if lx < lenx - 2:
-                #     x1 = x_coords[lx+1]
-            n = n + 1
-            if idx == 0:
-                num_rows = n
-            yind = idy + 1
-            idy = 0
-            idx = idx + 1
-            num_cols = idx
-            x0 = x1
-
-        col_coordinates[n] = int(idx - 1)
-        row_coordinates[n] = int(yind)
-
-        return num_cols, num_rows, col_coordinates, row_coordinates
-
-    def estimate_grid_spacing(self, centers):
-        N = centers.shape[0]
-        dsts = np.zeros(4 * N)
-        for i in range(N):
-            p = centers[i, :]
-            d = np.sqrt((centers[:, 1] - p[1]) ** 2 + (centers[:, 0] - p[0]) ** 2)
-            srtd = np.sort(d)
-            dsts[4 * i] = srtd[1]
-            dsts[4 * i + 1] = srtd[2]
-            dsts[4 * i + 2] = srtd[3]
-            dsts[4 * i + 3] = srtd[4]
-        gsp = np.median(dsts)
-        return gsp
-
-
-def gauss_signal(sigma):
-    # Define the size and sigma of the Gaussian filter
-    fsg = sigma
-    fsz = 2 * round(3 * fsg) + 1
-
-    # Create a 1D Gaussian kernel using numpy
-    k = np.exp(-np.arange(-(fsz - 1) / 2, (fsz - 1) / 2 + 1) ** 2 / (2 * fsg**2))
-
-    # Normalize the kernel
-    k = k / np.sum(k)
-
-    # Convert the 1D kernel to a 2D filter by taking the outer product
-    gf = np.outer(k, k)
-
-    # Normalize the filter
-    gf = gf / np.sum(gf)
-
-    return gf
-
-
-def gauss_filt(input_img, gf_img):
-    # Convert the input image to grayscale
-    # gray_img = np.mean(input_img, axis=2)
-
-    # Apply Gaussian smoothing with the gaussian_image as filter
-    smoothed_image = convolve(input_img, gf_img)
-
-    return smoothed_image
-
-
-if __name__ == "__main__":
-    cp = cv2.VideoCapture("data/mini_example.avi")
-    ret, img = cp.read()
-    for i in range(10):
-        ret, img = cp.read()
-
-    # cv2.imwrite('f0.png', img)
-    # cv2.imshow('img', img)
-    # cv2.waitKey()
-
-    params = {"GridStyle": GridStyle.NOBORDER, "DoPlot": True}
-
-    img = np.float32(img) / 255.0
-    mtrack = MarkerTracker(img, params)
-
-    print(mtrack)
diff --git a/gsmini/msg/_Shear.py b/gsmini/msg/_Shear.py
new file mode 100644
index 0000000..020ace6
--- /dev/null
+++ b/gsmini/msg/_Shear.py
@@ -0,0 +1,248 @@
+# This Python file uses the following encoding: utf-8
+"""autogenerated by genpy from gsmini/Shear.msg. Do not edit."""
+import codecs
+import sys
+python3 = True if sys.hexversion > 0x03000000 else False
+import genpy
+import struct
+
+import std_msgs.msg
+
+class Shear(genpy.Message):
+  _md5sum = "92608befc2d559ba6aede10d33b8ba51"
+  _type = "gsmini/Shear"
+  _has_header = True  # flag to mark the presence of a Header object
+  _full_text = """Header header
+uint8 n
+uint8 m
+float32[] initial
+float32[] markers
+
+================================================================================
+MSG: std_msgs/Header
+# Standard metadata for higher-level stamped data types.
+# This is generally used to communicate timestamped data 
+# in a particular coordinate frame.
+# 
+# sequence ID: consecutively increasing ID 
+uint32 seq
+#Two-integer timestamp that is expressed as:
+# * stamp.sec: seconds (stamp_secs) since epoch (in Python the variable is called 'secs')
+# * stamp.nsec: nanoseconds since stamp_secs (in Python the variable is called 'nsecs')
+# time-handling sugar is provided by the client library
+time stamp
+#Frame this data is associated with
+string frame_id
+"""
+  __slots__ = ['header','n','m','initial','markers']
+  _slot_types = ['std_msgs/Header','uint8','uint8','float32[]','float32[]']
+
+  def __init__(self, *args, **kwds):
+    """
+    Constructor. Any message fields that are implicitly/explicitly
+    set to None will be assigned a default value. The recommend
+    use is keyword arguments as this is more robust to future message
+    changes.  You cannot mix in-order arguments and keyword arguments.
+
+    The available fields are:
+       header,n,m,initial,markers
+
+    :param args: complete set of field values, in .msg order
+    :param kwds: use keyword arguments corresponding to message field names
+    to set specific fields.
+    """
+    if args or kwds:
+      super(Shear, self).__init__(*args, **kwds)
+      # message fields cannot be None, assign default values for those that are
+      if self.header is None:
+        self.header = std_msgs.msg.Header()
+      if self.n is None:
+        self.n = 0
+      if self.m is None:
+        self.m = 0
+      if self.initial is None:
+        self.initial = []
+      if self.markers is None:
+        self.markers = []
+    else:
+      self.header = std_msgs.msg.Header()
+      self.n = 0
+      self.m = 0
+      self.initial = []
+      self.markers = []
+
+  def _get_types(self):
+    """
+    internal API method
+    """
+    return self._slot_types
+
+  def serialize(self, buff):
+    """
+    serialize message into buffer
+    :param buff: buffer, ``StringIO``
+    """
+    try:
+      _x = self
+      buff.write(_get_struct_3I().pack(_x.header.seq, _x.header.stamp.secs, _x.header.stamp.nsecs))
+      _x = self.header.frame_id
+      length = len(_x)
+      if python3 or type(_x) == unicode:
+        _x = _x.encode('utf-8')
+        length = len(_x)
+      buff.write(struct.Struct('<I%ss'%length).pack(length, _x))
+      _x = self
+      buff.write(_get_struct_2B().pack(_x.n, _x.m))
+      length = len(self.initial)
+      buff.write(_struct_I.pack(length))
+      pattern = '<%sf'%length
+      buff.write(struct.Struct(pattern).pack(*self.initial))
+      length = len(self.markers)
+      buff.write(_struct_I.pack(length))
+      pattern = '<%sf'%length
+      buff.write(struct.Struct(pattern).pack(*self.markers))
+    except struct.error as se: self._check_types(struct.error("%s: '%s' when writing '%s'" % (type(se), str(se), str(locals().get('_x', self)))))
+    except TypeError as te: self._check_types(ValueError("%s: '%s' when writing '%s'" % (type(te), str(te), str(locals().get('_x', self)))))
+
+  def deserialize(self, str):
+    """
+    unpack serialized message in str into this message instance
+    :param str: byte array of serialized message, ``str``
+    """
+    if python3:
+      codecs.lookup_error("rosmsg").msg_type = self._type
+    try:
+      if self.header is None:
+        self.header = std_msgs.msg.Header()
+      end = 0
+      _x = self
+      start = end
+      end += 12
+      (_x.header.seq, _x.header.stamp.secs, _x.header.stamp.nsecs,) = _get_struct_3I().unpack(str[start:end])
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      start = end
+      end += length
+      if python3:
+        self.header.frame_id = str[start:end].decode('utf-8', 'rosmsg')
+      else:
+        self.header.frame_id = str[start:end]
+      _x = self
+      start = end
+      end += 2
+      (_x.n, _x.m,) = _get_struct_2B().unpack(str[start:end])
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      pattern = '<%sf'%length
+      start = end
+      s = struct.Struct(pattern)
+      end += s.size
+      self.initial = s.unpack(str[start:end])
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      pattern = '<%sf'%length
+      start = end
+      s = struct.Struct(pattern)
+      end += s.size
+      self.markers = s.unpack(str[start:end])
+      return self
+    except struct.error as e:
+      raise genpy.DeserializationError(e)  # most likely buffer underfill
+
+
+  def serialize_numpy(self, buff, numpy):
+    """
+    serialize message with numpy array types into buffer
+    :param buff: buffer, ``StringIO``
+    :param numpy: numpy python module
+    """
+    try:
+      _x = self
+      buff.write(_get_struct_3I().pack(_x.header.seq, _x.header.stamp.secs, _x.header.stamp.nsecs))
+      _x = self.header.frame_id
+      length = len(_x)
+      if python3 or type(_x) == unicode:
+        _x = _x.encode('utf-8')
+        length = len(_x)
+      buff.write(struct.Struct('<I%ss'%length).pack(length, _x))
+      _x = self
+      buff.write(_get_struct_2B().pack(_x.n, _x.m))
+      length = len(self.initial)
+      buff.write(_struct_I.pack(length))
+      pattern = '<%sf'%length
+      buff.write(self.initial.tostring())
+      length = len(self.markers)
+      buff.write(_struct_I.pack(length))
+      pattern = '<%sf'%length
+      buff.write(self.markers.tostring())
+    except struct.error as se: self._check_types(struct.error("%s: '%s' when writing '%s'" % (type(se), str(se), str(locals().get('_x', self)))))
+    except TypeError as te: self._check_types(ValueError("%s: '%s' when writing '%s'" % (type(te), str(te), str(locals().get('_x', self)))))
+
+  def deserialize_numpy(self, str, numpy):
+    """
+    unpack serialized message in str into this message instance using numpy for array types
+    :param str: byte array of serialized message, ``str``
+    :param numpy: numpy python module
+    """
+    if python3:
+      codecs.lookup_error("rosmsg").msg_type = self._type
+    try:
+      if self.header is None:
+        self.header = std_msgs.msg.Header()
+      end = 0
+      _x = self
+      start = end
+      end += 12
+      (_x.header.seq, _x.header.stamp.secs, _x.header.stamp.nsecs,) = _get_struct_3I().unpack(str[start:end])
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      start = end
+      end += length
+      if python3:
+        self.header.frame_id = str[start:end].decode('utf-8', 'rosmsg')
+      else:
+        self.header.frame_id = str[start:end]
+      _x = self
+      start = end
+      end += 2
+      (_x.n, _x.m,) = _get_struct_2B().unpack(str[start:end])
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      pattern = '<%sf'%length
+      start = end
+      s = struct.Struct(pattern)
+      end += s.size
+      self.initial = numpy.frombuffer(str[start:end], dtype=numpy.float32, count=length)
+      start = end
+      end += 4
+      (length,) = _struct_I.unpack(str[start:end])
+      pattern = '<%sf'%length
+      start = end
+      s = struct.Struct(pattern)
+      end += s.size
+      self.markers = numpy.frombuffer(str[start:end], dtype=numpy.float32, count=length)
+      return self
+    except struct.error as e:
+      raise genpy.DeserializationError(e)  # most likely buffer underfill
+
+_struct_I = genpy.struct_I
+def _get_struct_I():
+    global _struct_I
+    return _struct_I
+_struct_2B = None
+def _get_struct_2B():
+    global _struct_2B
+    if _struct_2B is None:
+        _struct_2B = struct.Struct("<2B")
+    return _struct_2B
+_struct_3I = None
+def _get_struct_3I():
+    global _struct_3I
+    if _struct_3I is None:
+        _struct_3I = struct.Struct("<3I")
+    return _struct_3I
diff --git a/gsmini/msg/__init__.py b/gsmini/msg/__init__.py
new file mode 100644
index 0000000..9df99de
--- /dev/null
+++ b/gsmini/msg/__init__.py
@@ -0,0 +1 @@
+from ._Shear import *
diff --git a/msg/Shear.msg b/msg/Shear.msg
new file mode 100644
index 0000000..4522e66
--- /dev/null
+++ b/msg/Shear.msg
@@ -0,0 +1,5 @@
+Header header
+uint8 n
+uint8 m
+float32[] initial
+float32[] markers
diff --git a/package.xml b/package.xml
new file mode 100644
index 0000000..3a9dcdc
--- /dev/null
+++ b/package.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<package format="2">
+  <name>gsmini</name>
+  <version>0.0.2</version>
+  <description>The gsmini package</description>
+
+  <!-- One maintainer tag required, multiple allowed, one person per tag -->
+  <!-- Example:  -->
+  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
+  <maintainer email="aloha@todo.todo">aloha</maintainer>
+
+
+  <!-- One license tag required, multiple allowed, one license per tag -->
+  <!-- Commonly used license strings: -->
+  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
+  <license>TODO</license>
+
+
+  <!-- Url tags are optional, but multiple are allowed, one per tag -->
+  <!-- Optional attribute type can be: website, bugtracker, or repository -->
+  <!-- Example: -->
+  <!-- <url type="website">http://wiki.ros.org/gsmini</url> -->
+
+
+  <!-- Author tags are optional, multiple are allowed, one per tag -->
+  <!-- Authors do not have to be maintainers, but could be -->
+  <!-- Example: -->
+  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
+
+
+  <!-- The *depend tags are used to specify dependencies -->
+  <!-- Dependencies can be catkin packages or system dependencies -->
+  <!-- Examples: -->
+  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
+  <!--   <depend>roscpp</depend> -->
+  <!--   Note that this is equivalent to the following: -->
+  <!--   <build_depend>roscpp</build_depend> -->
+  <!--   <exec_depend>roscpp</exec_depend> -->
+  <!-- Use build_depend for packages you need at compile time: -->
+  <build_depend>message_generation</build_depend>
+  <!-- Use build_export_depend for packages you need in order to build against this package: -->
+  <build_export_depend>message_generation</build_export_depend>
+  <!-- Use buildtool_depend for build tool packages: -->
+  <!--   <buildtool_depend>catkin</buildtool_depend> -->
+  <!-- Use exec_depend for packages you need at runtime: -->
+  <exec_depend>message_runtime</exec_depend> -->
+  <!-- Use test_depend for packages you need only for testing: -->
+  <!--   <test_depend>gtest</test_depend> -->
+  <!-- Use doc_depend for packages you need only for building documentation: -->
+  <!--   <doc_depend>doxygen</doc_depend> -->
+  <buildtool_depend>catkin</buildtool_depend>
+
+
+  <!-- The export tag contains other, unspecified, tags -->
+  <export>
+    <!-- Other tools can request additional information be placed here -->
+
+  </export>
+</package>
diff --git a/scripts/generate_python_rosmsg.py b/scripts/generate_python_rosmsg.py
new file mode 100755
index 0000000..93ead55
--- /dev/null
+++ b/scripts/generate_python_rosmsg.py
@@ -0,0 +1,37 @@
+import os
+import shutil
+from pathlib import Path
+import subprocess
+
+REPO_PATH = Path(__file__).absolute().parent.parent
+PKG_PATH = REPO_PATH / "gsmini"
+ROSMSG_DIR = REPO_PATH / "msg"
+ros_ws_path = [parent for parent in PKG_PATH.parents if "_ws" in parent.name]
+assert len(ros_ws_path) == 1
+ros_ws_path = ros_ws_path[0]
+TARGET_PYMSG_DIR = PKG_PATH / "msg"
+
+print("ROSMSG_DIR:", ROSMSG_DIR)
+print("ROS_WORKSPACE:", ros_ws_path)
+print("TARGET_PYMSG_DIR:", TARGET_PYMSG_DIR)
+
+# Scan the msg directory for message files
+msg_files = [f for f in os.listdir(ROSMSG_DIR) if f.endswith(".msg")]
+
+# Process each message file
+for msg_file in msg_files:
+    msg_name = msg_file.split(".")[0]
+
+    # Find the compiled Python file for the message
+    find_command = f"find {str(ros_ws_path)}/devel/lib -name _{msg_name}.py"
+    msg_file = subprocess.check_output(find_command, shell=True, text=True)
+    find_result = os.popen(find_command).read().strip()
+    if find_result:
+        print(find_result)
+        # Copy the .py file to the python_package/msg folder
+        shutil.copy(find_result, TARGET_PYMSG_DIR / f"_{msg_name}.py")
+        # Update the __init__.py file to import the message
+        with open(os.path.join(TARGET_PYMSG_DIR, "__init__.py"), "a") as init_file:
+            init_file.write(f"from ._{msg_name} import *\n")
+
+print("Process completed.")
diff --git a/setup.py b/setup.py
index 812c03a..ed40ef1 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="gsmini",
-    version="2.0",
+    version="0.0.2",
     python_requires=">=3.8",
     packages=find_packages(),
     license="",