eliminated some dead code

karypis · karypis · commit afb03377873d · 2018-03-20T07:27:55.000-05:00
diff --git a/Makefile b/Makefile
@@ -51,7 +51,7 @@ ifneq ($(cc), not-set)
     CONFIG_FLAGS += -DCMAKE_C_COMPILER=$(cc)
 endif
 
-PKGNAME=gktc
+CONFIG_FLAGS += -DPKGNAME=gktc
 
 define run-config
 mkdir -p $(BUILDDIR)
@@ -77,7 +77,4 @@ distclean:
 remake:
 	find . -name CMakeLists.txt -exec touch {} ';'
 
-dist:
-	utils/mkdist.sh $(PKGNAME)
-
-.PHONY: config distclean all clean install uninstall remake dist
+.PHONY: config distclean all clean install uninstall remake
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ OpenMP-based parallel program for counting the number of triangles in a sparse g
  - CMake 2.8, found at http://www.cmake.org/, as well as GNU make. 
  - Download, build, and install [GKlib](https://github.com/KarypisLab/GKlib).
 
-Assumming that the above are available, two commands should suffice to 
+Assuming that the above are available, two commands should suffice to 
 build the software:
 ```
 make config 
@@ -26,6 +26,7 @@ Configuration options are:
 cc=[compiler]     - The C compiler to use [default: gcc]
 prefix=[PATH]     - Set the installation prefix [default: ~/local]
 gklib_path=[PATH] - Where GKlib was installed [default: ~/local]
+openmp=not-set    - To build a serial version
 ```
 
 
@@ -41,6 +42,29 @@ For usage information just type
 ```
 gktc -help
 ```
+and here is a sample run:
+```
+gktc -nthreads=4 test/p2p-Gnutella31.metis
+Reading graph test/p2p-Gnutella31.metis...
+
+-----------------
+  infile: test/p2p-Gnutella31.metis
+  #nvtxs: 62586
+ #nedges: 295784
+nthreads: 4
+
+& compatible maxhmsize: 255, startv: 23
+
+Results...
+    #probes:       209251; rate:     249.98 MP/sec
+
+Timings...
+     preprocessing:     0.002s
+ triangle counting:     0.001s
+    total (/x i/o):     0.003s
+-----------------
+```
+
 
 ## Other make commands
     make uninstall 
@@ -54,54 +78,61 @@ gktc -help
 
 
 ## Performance 
-The following shows a sample of gktc's performance on Intel's KnightsLanding
+The following shows a sample of gktc's performance on Intel's Knights Landing
 processor:
 
 ```
+  total: total time excluding I/O
+    ppt: pre-processing time
+    tct: triangle counting time
+speedup: relative to p=1
+
 rmat scale25 
 ----------------------------------------
-#p     total    ppt      tct   speedup 
-1      767.4  141.9    625.5   
-5      154.4   29.1    125.2     5.0x 
-10      77.2   14.5     62.6     9.9x
-20      38.6    7.3     31.2    19.9x
-40      19.6    3.7     16.1    39.2x
-68      12.2    2.2      9.9    62.9x
-136      9.3    1.7      7.6    82.5x
-272     10.1    1.5      8.6    76.0x
+#p     total     ppt       tct   speedup 
+1      767.4s  141.9s    625.5s   
+5      154.4s   29.1s    125.2s     5.0x 
+10      77.2s   14.5s     62.6s     9.9x
+20      38.6s    7.3s     31.2s    19.9x
+40      19.6s    3.7s     16.1s    39.2x
+68      12.2s    2.2s      9.9s    62.9x
+136      9.3s    1.7s      7.6s    82.5x
+272     10.1s    1.5s      8.6s    76.0x
 
 twitter 
 ----------------------------------------
-#p     total    ppt      tct   speedup    
-1     1422.6  307.7   1114.9  
-5      285.7   62.3    223.2     5.0x 
-10     143.4   31.2    112.1     9.9x 
-20      71.4   15.6     55.8    19.9x 
-40      37.1    7.9     29.1    38.4x 
-68      23.1    4.8     18.3    61.6x 
-136     17.1    3.4     13.6    83.2x 
-272     19.3    3.2     16.0    73.7x 
+#p     total     ppt       tct   speedup    
+1     1422.6s  307.7s   1114.9s  
+5      285.7s   62.3s    223.2s     5.0x 
+10     143.4s   31.2s    112.1s     9.9x 
+20      71.4s   15.6s     55.8s    19.9x 
+40      37.1s    7.9s     29.1s    38.4x 
+68      23.1s    4.8s     18.3s    61.6x 
+136     17.1s    3.4s     13.6s    83.2x 
+272     19.3s    3.2s     16.0s    73.7x 
 
 friendster 
 ----------------------------------------
-#p     total    ppt      tct   speedup  
-1     1618.5  421.3   1196.7   
-5      316.8   84.8    231.5     5.1x  
-10     159.0   42.5    116.1    10.2x 
-20      79.5   21.3     57.8    20.4x
-40      40.6   10.7     29.4    39.9x 
-68      25.5    6.5     18.4    63.5x 
-136     16.9    4.5     11.7    95.8x 
-272     13.5    3.6      8.9   119.9x 
+#p     total     ppt       tct   speedup  
+1     1618.5s  421.3s   1196.7s   
+5      316.8s   84.8s    231.5s     5.1x  
+10     159.0s   42.5s    116.1s    10.2x 
+20      79.5s   21.3s     57.8s    20.4x
+40      40.6s   10.7s     29.4s    39.9x 
+68      25.5s    6.5s     18.4s    63.5x 
+136     16.9s    4.5s     11.7s    95.8x 
+272     13.5s    3.6s      8.9s   119.9x 
 ```
 
 ## Citing 
 The parallel algorithm implemented is based on the one described in
 
-["Exploring Optimizations on Shared-memory Platforms for Parallel Triangle Counting
-Algorithms". Ancy Sarah Tom, Narayanan Sundaram, Nesreen K. Ahmed, Shaden Smith, 
+[__"Exploring Optimizations on Shared-memory Platforms for Parallel Triangle Counting
+Algorithms."__ Ancy Sarah Tom, Narayanan Sundaram, Nesreen K. Ahmed, Shaden Smith, 
 Stijn Eyerman, Midhunchandra Kodiyath, Ibrahim Hur, Fabrizio Petrini, and George
 Karypis. IEEE High Performance Extreme Computing Conference (HPEC),
-2017](http://glaros.dtc.umn.edu/gkhome/node/1214), which was one of the finalists for
-the [GraphChallenge 2017 competition](http://graphchallenge.mit.edu/).
+2017](http://glaros.dtc.umn.edu/gkhome/node/1214)
+
+This was one of the finalists for the [GraphChallenge 2017
+competition](http://graphchallenge.mit.edu/).
 
diff --git a/main.c b/main.c
@@ -45,16 +45,12 @@ int main(int argc, char *argv[])
   gk_stopwctimer(vault->timer_global);
 
   printf("\nResults...\n");
-  /*
-  printf(" #triangles: %12"PRId64"; rate: %10.2lf MT/sec\n", ntriangles, 
-      ((double)ntriangles)/((double)1e6*gk_getwctimer(vault->tctimer))); 
-  */
   printf("    #probes: %12"PRIu64"; rate: %10.2lf MP/sec\n", vault->nprobes, 
-      ((double)vault->nprobes)/((double)1e6*gk_getwctimer(vault->tctimer))); 
+      ((double)vault->nprobes)/((double)1e6*gk_getwctimer(vault->timer_tc))); 
 
   printf("\nTimings...\n");
-  printf("     preprocessing: %9.3lfs\n", gk_getwctimer(vault->pptimer));
-  printf(" triangle counting: %9.3lfs\n", gk_getwctimer(vault->tctimer));
+  printf("     preprocessing: %9.3lfs\n", gk_getwctimer(vault->timer_pp));
+  printf(" triangle counting: %9.3lfs\n", gk_getwctimer(vault->timer_tc));
   printf("    total (/x i/o): %9.3lfs\n", gk_getwctimer(vault->timer_global));
   printf("-----------------\n");
 
diff --git a/ptc.c b/ptc.c
@@ -1,6 +1,6 @@
 /*!
 \file
-\brief The various OpenMP triangle counting routines
+\brief The OpenMP triangle counting routine
 \date Started 1/14/2018
 \author George
 \version\verbatim $Id: cmdline.c 20946 2017-05-10 23:12:48Z karypis $ \endverbatim
@@ -52,8 +52,6 @@ gk_graph_t *ptc_Preprocess(params_t *params, vault_t *vault)
   csrange = 16*((csrange+15)/16); /* get the per thread arrays to be alligned 
                                      at the start of the cache line */
 
-  gk_startwctimer(vault->timer_6);
-
   gcounts = gk_i32malloc(nthreads*csrange, "gcounts");
   psums   = gk_zmalloc(nthreads, "psums");
 
@@ -65,7 +63,11 @@ gk_graph_t *ptc_Preprocess(params_t *params, vault_t *vault)
     int32_t ti, di, ci, dstart, dend;
     int32_t *counts, *buffer;
     ssize_t ej, ejend, psum, chunksize;
+#if defined(_OPENMP)
     int mytid = omp_get_thread_num();
+#else
+    int mytid = 0;
+#endif
 
     vistart = mytid*((nvtxs+nthreads-1)/nthreads);
     viend   = gk_min(nvtxs, (mytid+1)*((nvtxs+nthreads-1)/nthreads));
@@ -154,17 +156,13 @@ gk_graph_t *ptc_Preprocess(params_t *params, vault_t *vault)
       }
     }
     psums[mytid] = nchunks+1;
-    //printf("mytid: %d, nchunks: %d [%zd]\n", mytid, nchunks+1, (nxadj[viend]-nxadj[vistart])/chunksize);
 
     #pragma omp barrier
     #pragma omp single
     for (ti=1; ti<nthreads; ti++)
       psums[ti] += psums[ti-1];
     #pragma omp barrier
 
-    //#pragma omp single
-    //printf("bsize: %d, total #chunks: %zd, chunksize: %zd\n", bsize, psums[nthreads-1], chunksize);
-
     #pragma omp single
     chunkptr = gk_i32malloc(psums[nthreads-1]+1, "chunkptr");
     #pragma omp barrier
@@ -193,12 +191,7 @@ gk_graph_t *ptc_Preprocess(params_t *params, vault_t *vault)
     #pragma omp barrier
     */
 
-    /* create the reordered/sorted graph */
-    #pragma omp single
-    gk_startwctimer(vault->timer_7);
-
-    
-    /* process the chunks in parallel */
+    /* create the reordered/sorted graph by processing the chunks in parallel */
     #pragma omp for schedule(dynamic, 1) nowait
     for (ci=nchunks-1; ci>=0; ci--) {
       for (vi=chunkptr[ci]; vi<chunkptr[ci+1]; vi++) {
@@ -213,11 +206,7 @@ gk_graph_t *ptc_Preprocess(params_t *params, vault_t *vault)
       }
     }
 
-    #pragma omp single
-    gk_stopwctimer(vault->timer_7);
-
   }
-  gk_stopwctimer(vault->timer_6);
 
   gk_free((void **)&perm, &iperm, &gcounts, &psums, &chunkptr, LTERM);
 
@@ -295,7 +284,11 @@ int64_t ptc_MapJIK(params_t *params, vault_t *vault)
     ssize_t ei, eiend, eistart, ej, ejend, ejstart;
     int32_t l, nc;
     int32_t l2=1, hmsize=(1<<(l2+4))-1, *hmap;
-    int32_t mytid = omp_get_thread_num();
+#if defined(_OPENMP)
+    int mytid = omp_get_thread_num();
+#else
+    int mytid = 0;
+#endif
 
     hmap = gk_i32smalloc(maxhmsize+1, 0, "hmap");
 
diff --git a/struct.h b/struct.h
@@ -20,8 +20,8 @@ typedef struct {
 
   /* timers */
   double timer_global;
-  double pptimer;
-  double tctimer;
+  double timer_pp;
+  double timer_tc;
 } vault_t; 
 
 
diff --git a/test/p2p-Gnutella31.metis b/test/p2p-Gnutella31.metis