Skip to content

Commit 1935cc7

Browse files
inbalpazipaz
authored andcommitted
Removed the set_seed option (when a seed is set, which is the default, it is always 1).
Updated README.md and manual.
1 parent b30215e commit 1935cc7

File tree

5 files changed

+24
-32
lines changed

5 files changed

+24
-32
lines changed

README.md

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,40 +77,52 @@ python syntracker.py -out SynTracker_output/ -mode continue
7777
### A description of all SynTracker's possible command line arguments:
7878

7979
```
80-
python syntracker.py [-h] [-target target_directory_path] [-ref ref_directory_path] [-out output_directory_path]
81-
[-metadata metadata_file] [-mode 'new'/'continue'] [-cores number_of_cores]
82-
[-length region_length][--identity blast_identity] [--coverage blast_coverage]
83-
[--save_intermediate] [--set_seed integer_for_seed]
80+
python syntracker.py [-h] [-target target_directory_path] [-ref ref_directory_path]
81+
[-out output_directory_path] [-metadata metadata_file]
82+
[-mode 'new'/'continue'] [-cores number_of_cores] [-length region_length]
83+
[--identity blast_identity] [--coverage blast_coverage]
84+
[--save_intermediate] [--no_seed]
8485
8586
options:
8687
-h, --help show this help message and exit
88+
8789
-target [target_directory_path]
8890
Path of the target directory which contains metagenome assemblies or genomes
91+
8992
-ref [ref_directory_path]
9093
Path of the references folder containing the reference genomes
94+
9195
-out [output_directory_path]
9296
The path to the output directory . When running in 'new' mode (the default), this argument is optional. By
9397
default a folder named 'Syntracker_output/' will be created under the current directory (if the given path
9498
already exists, it will be written over). When running in 'continue' mode, it is mandatory to provide the
9599
path to the output directory of the run that is requested to be continued.
100+
96101
-metadata [metadata_file]
97102
Path to a metadata file (optional). The file should be in CSV format and must include the sample ID.
103+
98104
-mode ['new'/'continue']
99105
The running mode: 'new' or 'continue' (default='new') (Start a new run or continue a previous run that has been terminated).
106+
100107
-cores [number_of_cores]
101108
The number of cores to use for the multi-processed stages of the calculation.
102109
(Optional, by default SynTracker uses the maximal number of available cores).
110+
103111
-length [region_length]
104112
The length of the compared region. (Optional, default=5000)
113+
105114
--identity [blast_identity]
106115
Minimal blast identity (optional, default=97)
116+
107117
--coverage [blast_coverage]
108118
Minimal blast coverage (optional, default=70)
119+
109120
--save_intermediate
110121
Saves R intermediate data structures for debugging purposes (by default, they are not saved).
111-
--set_seed [integer_for_seed]
112-
An integer number to set the seed for subsampling of n regions per pairwise (by default, the seed is 1).
113-
--no_seed Set no seed for the subsampling of n regions per pairwise (by default, seed=1 is set).
122+
123+
--no_seed Set no seed for the subsampling of n regions per pairwise (optional).
124+
This means that the average synteny scores may change between SynTracker runs due to the subsampling.
125+
By default, a seed=1 is set to enable reproducibility between different runs.
114126
```
115127

116128
## Output

SynTracker_Manual.docx

408 Bytes
Binary file not shown.

SynTracker_Manual.pdf

-2.24 KB
Binary file not shown.

syntracker.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,7 @@ def main():
6868
out_param.write("Minimal identity: " + str(config.minimal_identity) + "\n")
6969
if config.save_intermediate:
7070
out_param.write("\nSave intermediate: " + str(config.save_intermediate) + "\n")
71-
if config.is_set_seed:
72-
out_param.write("Seed: " + str(config.seed_num) + "\n")
73-
else:
71+
if config.is_set_seed is False:
7472
out_param.write("No seed\n")
7573

7674
############################################

syntracker_first_stage/parser.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,9 @@ def parse_arguments():
4646
type=int, default=config.minimal_coverage)
4747
parser.add_argument("--save_intermediate", help="Saves R intermediate data structures for debugging purposes",
4848
action='store_true', default=False)
49-
parser.add_argument("--set_seed", metavar="integer_for_seed",
50-
help="An integer number to set the seed for subsampling of n regions per pairwise "
51-
"(by default, the seed is 1).", type=int)
52-
parser.add_argument("--no_seed", help="Set no seed for the subsampling of n regions per pairwise "
53-
"(by default, seed=1 is set).",
49+
parser.add_argument("--no_seed", help="Set no seed for the subsampling of n regions per pairwise. This means that "
50+
"the average synteny scores may change between SynTracker runs. This is an optional parameter. "
51+
"By default, a seed=1 is set to enable reproducibility between different runs.",
5452
action='store_true', default=False)
5553

5654
# Parse the given arguments
@@ -162,14 +160,6 @@ def parse_arguments():
162160
config.is_set_seed = False
163161
config.seed_num = 0
164162

165-
elif args.set_seed is not None:
166-
if args.set_seed > 0:
167-
config.seed_num = args.set_seed
168-
else:
169-
error = "Error: if you use the '--set_seed' option, it must be followed by an integer to set the seed " \
170-
"with.\n"
171-
return error
172-
173163
return error
174164

175165

@@ -229,14 +219,9 @@ def read_conf_file():
229219
elif re.search("^Save intermediate", line):
230220
config.save_intermediate = True
231221

232-
elif re.search("^Seed", line):
233-
m = re.search("^Seed:\s(\d+)\n", line)
234-
if m:
235-
config.is_set_seed = True
236-
seed = m.group(1)
237-
238222
elif re.search("^No seed", line):
239223
config.is_set_seed = False
224+
config.seed_num = 0
240225

241226
elif re.search("^Reference genomes:", line):
242227
in_ref_genomes_list = 1
@@ -309,9 +294,6 @@ def read_conf_file():
309294
error = "The minimal identity is not written in the config file."
310295
return error
311296

312-
if config.is_set_seed:
313-
config.seed_num = int(seed)
314-
315297
# Verify that there is at least one reference genome and that input files exist
316298
genomes_counter = 0
317299
for genome in config.genomes_dict:

0 commit comments

Comments
 (0)