Skip to content

Commit 2e987e7

Browse files
committed
Added new data generation/split scripts
1 parent 68bab73 commit 2e987e7

File tree

4 files changed

+107
-87
lines changed

4 files changed

+107
-87
lines changed

README.md

+24-3
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ source activate gcn-tsp-env
5555

5656
# Install all dependencies and Jupyter Lab (for using notebooks).
5757
conda install pytorch=0.4.1 cuda90 -c pytorch
58-
conda install numpy==1.15.4 scipy==1.1.0 matplotlib==3.0.2 seaborn==0.9.0 pandas==0.24.2 networkx==2.2 scikit-learn==0.20.2 tensorflow-gpu==1.12.0 tensorboard==1.12.0
58+
conda install numpy==1.15.4 scipy==1.1.0 matplotlib==3.0.2 seaborn==0.9.0 pandas==0.24.2 networkx==2.2 scikit-learn==0.20.2 tensorflow-gpu==1.12.0 tensorboard==1.12.0 Cython
5959
pip3 install tensorboardx==1.5 fastprogress==0.1.18
6060
conda install -c conda-forge jupyterlab
6161
```
@@ -77,6 +77,27 @@ python main.py --config <path-to-config.json>
7777
```
7878

7979
#### Splitting datasets into Training and Validation sets
80-
For TSP10, TSP20 and TSP30 datasets, everything is good to go.
80+
For TSP10, TSP20 and TSP30 datasets, everything is good to go once you download and extract the files.
8181
For TSP50 and TSP100, the 1M training set needs to be split into 10K validation samples and 999K training samples.
82-
Use the `split_train_val.ipynb` notebook to do this through Jupyter Lab.
82+
Use the `split_train_val.py` script to do so.
83+
For consistency, the script uses the first 10K samples in the 1M file as the validation set and the remaining 999K as the training set.
84+
85+
```sh
86+
cd data
87+
python split_train_val.py --num_nodes <num-nodes>
88+
```
89+
90+
### Generating new data
91+
New TSP data can be generated using the [Concorde solver](https://github.com/jvkersch/pyconcorde).
92+
93+
```sh
94+
# Install the pyConcorde library in the /data directory
95+
cd data
96+
git clone https://github.com/jvkersch/pyconcorde
97+
cd pyconcorde
98+
pip install -e .
99+
cd ..
100+
101+
# Run the data generation script
102+
python generate_tsp_concorde.py --num_samples <num-sample> --num_nodes <num-nodes>
103+
```

data/generate_tsp_concorde.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import time
2+
import argparse
3+
import pprint as pp
4+
import os
5+
6+
import pandas as pd
7+
import numpy as np
8+
from concorde.tsp import TSPSolver
9+
10+
11+
if __name__ == "__main__":
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument("--num_samples", type=int, default=10000)
14+
parser.add_argument("--num_nodes", type=int, default=20)
15+
parser.add_argument("--node_dim", type=int, default=2)
16+
parser.add_argument("--filename", type=str, default=None)
17+
opts = parser.parse_args()
18+
19+
if opts.filename is None:
20+
opts.filename = f"tsp{opts.num_nodes}_concorde.txt"
21+
22+
# Pretty print the run args
23+
pp.pprint(vars(opts))
24+
25+
set_nodes_coord = np.random.random([opts.num_samples, opts.num_nodes, opts.node_dim])
26+
with open(opts.filename, "w") as f:
27+
start_time = time.time()
28+
for nodes_coord in set_nodes_coord:
29+
solver = TSPSolver.from_data(nodes_coord[:,0], nodes_coord[:,1], norm="GEO")
30+
solution = solver.solve()
31+
f.write( " ".join( str(x)+str(" ")+str(y) for x,y in nodes_coord) )
32+
f.write( str(" ") + str('output') + str(" ") )
33+
f.write( str(" ").join( str(node_idx+1) for node_idx in solution.tour) )
34+
f.write( str(" ") + str(solution.tour[0]+1) + str(" ") )
35+
f.write( "\n" )
36+
end_time = time.time() - start_time
37+
38+
print(f"Completed generation of {opts.num_samples} samples of TSP{opts.num_nodes}.")
39+
print(f"Total time: {end_time/3600:.1f}h")
40+
print(f"Average time: {(end_time/3600)/opts.num_samples:.1f}h")

data/split_train_val.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import time
2+
import argparse
3+
import pprint as pp
4+
import os
5+
6+
7+
if __name__ == "__main__":
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument("--num_nodes", type=int, default=20)
10+
parser.add_argument("--val_size", type=int, default=10000)
11+
parser.add_argument("--node_dim", type=int, default=2)
12+
parser.add_argument("--filename", type=str, default=None)
13+
opts = parser.parse_args()
14+
15+
if opts.filename is None:
16+
opts.filename = f"tsp{opts.num_nodes}_concorde.txt"
17+
18+
# Pretty print the run args
19+
pp.pprint(vars(opts))
20+
21+
start_time = time.time()
22+
23+
filedata = open(opts.filename, "r").readlines()
24+
print("Total samples: ", len(filedata))
25+
val_data = filedata[:opts.val_size]
26+
print("Validation samples: ", len(val_data))
27+
train_data = filedata[opts.val_size:]
28+
print("Training samples: ", len(train_data))
29+
30+
# Create separate validation data file
31+
with open("tsp{}_val_concorde.txt".format(opts.num_nodes), "w", encoding="utf-8") as f:
32+
for line in val_data:
33+
f.write(line)
34+
35+
# Create separate train data file
36+
with open("tsp{}_train_concorde.txt".format(opts.num_nodes), "w", encoding="utf-8") as f:
37+
for line in train_data:
38+
f.write(line)
39+
40+
end_time = time.time() - start_time
41+
42+
print(f"Total time: {end_time/3600:.1f}")
43+

split_train_val.ipynb

-84
This file was deleted.

0 commit comments

Comments
 (0)