diff --git a/detectron/core/config.py b/detectron/core/config.py
index 1f836e0e8..c06f7ccf3 100644
--- a/detectron/core/config.py
+++ b/detectron/core/config.py
@@ -1007,6 +1007,7 @@
         'TRAIN.DROPOUT',
         'USE_GPU_NMS',
         'TEST.NUM_TEST_IMAGES',
+        '--device_id'
     }
 )
 
diff --git a/detectron/core/test_engine.py b/detectron/core/test_engine.py
index 78ecb3de7..52bd58793 100644
--- a/detectron/core/test_engine.py
+++ b/detectron/core/test_engine.py
@@ -105,7 +105,8 @@ def result_getter():
                     dataset_name,
                     proposal_file,
                     output_dir,
-                    multi_gpu=multi_gpu_testing
+                    multi_gpu=multi_gpu_testing,
+                    gpu_id=gpu_id
                 )
                 all_results.update(results)
 
diff --git a/detectron/roi_data/loader.py b/detectron/roi_data/loader.py
index d2e2591d7..e52d7e78c 100644
--- a/detectron/roi_data/loader.py
+++ b/detectron/roi_data/loader.py
@@ -182,8 +182,12 @@ def enqueue_blobs(self, gpu_id, blob_names, blobs):
         assert len(blob_names) == len(blobs)
         t = time.time()
         dev = c2_utils.CudaDevice(gpu_id)
-        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
-        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
+        if gpu_id < 0:
+            queue_name = self._blobs_queue_name
+            blob_names = blob_names
+        else:
+            queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
+            blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
         for (blob_name, blob) in zip(blob_names, blobs):
             workspace.FeedBlob(blob_name, blob, device_option=dev)
         logger.debug(
@@ -258,6 +262,14 @@ def create_blobs_queues(self):
                         capacity=self._blobs_queue_capacity
                     )
                 )
+        if self._num_gpus == 0:
+            workspace.RunOperatorOnce(
+                core.CreateOperator(
+                    'CreateBlobsQueue', [], [self._blobs_queue_name],
+                    num_blobs=len(self.get_output_names()),
+                    capacity=self._blobs_queue_capacity
+                )
+            )
         return self.create_enqueue_blobs()
 
     def close_blobs_queues(self):
@@ -269,6 +281,12 @@ def close_blobs_queues(self):
                         'CloseBlobsQueue', [self._blobs_queue_name], []
                     )
                 )
+        if self._num_gpus == 0:
+            workspace.RunOperatorOnce(
+                core.CreateOperator(
+                    'CloseBlobsQueue', [self._blobs_queue_name], []
+                )
+            )
 
     def create_enqueue_blobs(self):
         blob_names = self.get_output_names()
@@ -279,6 +297,9 @@ def create_enqueue_blobs(self):
             with c2_utils.NamedCudaScope(gpu_id):
                 for blob in enqueue_blob_names:
                     workspace.CreateBlob(core.ScopedName(blob))
+        if self._num_gpus == 0:
+            for blob in enqueue_blob_names:
+                workspace.CreateBlob(core.ScopedName(blob))
         return enqueue_blob_names
 
     def register_sigint_handler(self):
diff --git a/detectron/utils/c2.py b/detectron/utils/c2.py
index 5a085718f..66cca5ec4 100644
--- a/detectron/utils/c2.py
+++ b/detectron/utils/c2.py
@@ -30,6 +30,8 @@
 
 import detectron.utils.env as envu
 
+DEVICE_ID_CPU = -1
+DEVICE_ID_IDEEP = -2
 
 def import_contrib_ops():
     """Import contrib ops needed by Detectron."""
@@ -104,37 +106,70 @@ def UnscopeName(possibly_scoped_name):
 def NamedCudaScope(gpu_id):
     """Creates a GPU name scope and CUDA device scope. This function is provided
     to reduce `with ...` nesting levels."""
-    with GpuNameScope(gpu_id):
-        with CudaScope(gpu_id):
+    if gpu_id == DEVICE_ID_CPU:
+        with CpuScope():
             yield
+    elif gpu_id == DEVICE_ID_IDEEP:
+        with IdeepScope():
+            yield
+    else:
+        with GpuNameScope(gpu_id):
+            with CudaScope(gpu_id):
+                yield
 
 
 @contextlib.contextmanager
 def GpuNameScope(gpu_id):
-    """Create a name scope for GPU device `gpu_id`."""
-    with core.NameScope('gpu_{:d}'.format(gpu_id)):
+    if gpu_id < 0:
         yield
+    else:
+        """Create a name scope for GPU device `gpu_id`."""
+        with core.NameScope('gpu_{:d}'.format(gpu_id)):
+            yield
 
 
 @contextlib.contextmanager
 def CudaScope(gpu_id):
-    """Create a CUDA device scope for GPU device `gpu_id`."""
-    gpu_dev = CudaDevice(gpu_id)
-    with core.DeviceScope(gpu_dev):
-        yield
+    if gpu_id == DEVICE_ID_CPU:
+        with CpuScope():
+            yield
+    elif gpu_id == DEVICE_ID_IDEEP:
+        with IdeepScope():
+            yield
+    else:
+        """Create a CUDA device scope for GPU device `gpu_id`."""
+        gpu_dev = CudaDevice(gpu_id)
+        with core.DeviceScope(gpu_dev):
+            yield
 
 
 @contextlib.contextmanager
 def CpuScope():
     """Create a CPU device scope."""
-    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
+    cpu_dev = CpuDevice()
     with core.DeviceScope(cpu_dev):
         yield
 
+def CpuDevice():
+    return core.DeviceOption(caffe2_pb2.CPU)
+
+@contextlib.contextmanager
+def IdeepScope():
+    ideep_dev = IdeepDevice()
+    with core.DeviceScope(ideep_dev):
+        yield
+
+def IdeepDevice():
+    return core.DeviceOption(caffe2_pb2.IDEEP)
 
 def CudaDevice(gpu_id):
     """Create a Cuda device."""
-    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
+    if gpu_id == DEVICE_ID_CPU:
+        return CpuDevice()
+    elif gpu_id == DEVICE_ID_IDEEP:
+        return IdeepDevice()
+    else:
+        return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
 
 
 def gauss_fill(std):
diff --git a/detectron/utils/env.py b/detectron/utils/env.py
index cd2004932..4b1dc6795 100644
--- a/detectron/utils/env.py
+++ b/detectron/utils/env.py
@@ -67,6 +67,11 @@ def get_detectron_ops_lib():
             # TODO(ilijar): Switch to using a logger
             print('Found Detectron ops lib: {}'.format(ops_path))
             break
+        ops_path = os.path.join(prefix, 'lib/libcaffe2_detectron_ops.so')
+        if os.path.exists(ops_path):
+            # TODO(ilijar): Switch to using a logger
+            print('Found Detectron ops lib: {}'.format(ops_path))
+            break
     assert os.path.exists(ops_path), \
         ('Detectron ops lib not found; make sure that your Caffe2 '
          'version includes Detectron module')
diff --git a/tools/infer_simple.py b/tools/infer_simple.py
index 721d1c868..d98c48b72 100644
--- a/tools/infer_simple.py
+++ b/tools/infer_simple.py
@@ -99,6 +99,12 @@ def parse_args():
         default='pdf',
         type=str
     )
+    parser.add_argument(
+        '--device_id',
+        dest='device_id',
+        default=0,
+        type=int
+    )
     if len(sys.argv) == 1:
         parser.print_help()
         sys.exit(1)
@@ -118,7 +124,7 @@ def main(args):
     assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \
         'Models that require precomputed proposals are not supported'
 
-    model = infer_engine.initialize_model_from_cfg(args.weights)
+    model = infer_engine.initialize_model_from_cfg(args.weights, gpu_id = args.device_id)
     dummy_coco_dataset = dummy_datasets.get_coco_dataset()
 
     if os.path.isdir(args.im_or_folder):
@@ -134,7 +140,7 @@ def main(args):
         im = cv2.imread(im_name)
         timers = defaultdict(Timer)
         t = time.time()
-        with c2_utils.NamedCudaScope(0):
+        with c2_utils.NamedCudaScope(args.device_id):
             cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
                 model, im, None, timers=timers
             )
diff --git a/tools/test_net.py b/tools/test_net.py
index a0be84e55..8a6a3d24a 100755
--- a/tools/test_net.py
+++ b/tools/test_net.py
@@ -79,6 +79,12 @@ def parse_args():
         type=int,
         nargs=2
     )
+    parser.add_argument(
+        '--device_id',
+        dest='device_id',
+        default=0,
+        type=int
+    )
     parser.add_argument(
         'opts',
         help='See detectron/core/config.py for all options',
@@ -113,5 +119,6 @@ def parse_args():
         cfg.TEST.WEIGHTS,
         ind_range=args.range,
         multi_gpu_testing=args.multi_gpu_testing,
+        gpu_id=args.device_id,
         check_expected_results=True,
     )