Skip to content

Commit 342540c

Browse files
committed
Convert segments to families function
1 parent 398d60a commit 342540c

File tree

10 files changed

+869
-1
lines changed

10 files changed

+869
-1
lines changed

pom.xml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>com.antigenomics</groupId>
66
<artifactId>vdjtools</artifactId>
7-
<version>1.1.9</version>
7+
<version>1.1.10-SNAPSHOT</version>
88
<packaging>jar</packaging>
99

1010
<name>vdjtools</name>
@@ -21,6 +21,7 @@
2121
<includes>
2222
<include>rscripts/*</include>
2323
<include>profile/*</include>
24+
<include>vj_families.txt</include>
2425
</includes>
2526
</resource>
2627
</resources>

src/main/groovy/com/antigenomics/vdjtools/VdjTools.groovy

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ package com.antigenomics.vdjtools
3333
import com.antigenomics.vdjtools.annotate.Annotate
3434
import com.antigenomics.vdjtools.annotate.CalcCdrAaStats
3535
import com.antigenomics.vdjtools.annotate.CalcDegreeStats
36+
import com.antigenomics.vdjtools.annotate.SegmentsToFamilies
3637
import com.antigenomics.vdjtools.basic.*
3738
import com.antigenomics.vdjtools.diversity.CalcDiversityStats
3839
import com.antigenomics.vdjtools.diversity.PlotQuantileStats
@@ -171,6 +172,8 @@ def getScript = { String scriptName ->
171172
return new CalcDegreeStats()
172173
case "ANNOTATE":
173174
return new Annotate()
175+
case "SEGMENTSTOFAMILIES":
176+
return new SegmentsToFamilies()
174177
case "SCANDATABASE":
175178
println "Moved to VDJdb since 1.0.5, please visit vdjdb.cdr3.net"
176179
System.exit(0)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package com.antigenomics.vdjtools.annotate
2+
3+
import com.antigenomics.vdjtools.io.SampleWriter
4+
import com.antigenomics.vdjtools.misc.CommonUtil
5+
import com.antigenomics.vdjtools.sample.Sample
6+
import com.antigenomics.vdjtools.sample.SampleCollection
7+
import com.antigenomics.vdjtools.sample.SegmentConverter
8+
9+
def cli = new CliBuilder(usage: "SegmentsToFamilies [options] " +
10+
"[sample1 sample2 ... if not -m] output_prefix")
11+
cli.h("display help message")
12+
cli.m(longOpt: "metadata", argName: "filename", args: 1,
13+
"Metadata file. First and second columns should contain file name and sample id. " +
14+
"Header is mandatory and will be used to assign column names for metadata.")
15+
cli.s(longOpt: "species", argName: "name", args: 1,
16+
"Dataset species, 'human' or 'mouse'.", required: true)
17+
cli.c(longOpt: "compress", "Compress output sample files.")
18+
19+
20+
def opt = cli.parse(args)
21+
22+
if (opt == null) {
23+
System.exit(2)
24+
}
25+
26+
if (opt.h) {
27+
cli.usage()
28+
System.exit(2)
29+
}
30+
31+
// Check if metadata is provided
32+
33+
def metadataFileName = opt.m
34+
35+
if (metadataFileName ? opt.arguments().size() != 1 : opt.arguments().size() < 2) {
36+
if (metadataFileName)
37+
println "Only output prefix should be provided in case of -m"
38+
else
39+
println "At least 1 sample files should be provided if not using -m"
40+
cli.usage()
41+
System.exit(2)
42+
}
43+
44+
def outputFilePrefix = opt.arguments()[-1],
45+
species = (String) opt.s,
46+
compress = (boolean) opt.c
47+
48+
if (!["human", "mouse"].any { it.equalsIgnoreCase(species) }) {
49+
println "Should specify either human or mouse as species."
50+
System.exit(2)
51+
}
52+
53+
def scriptName = getClass().canonicalName.split("\\.")[-1]
54+
55+
//
56+
// Batch load samples
57+
//
58+
59+
println "[${new Date()} $scriptName] Reading samples"
60+
61+
def sampleCollection = metadataFileName ?
62+
new SampleCollection((String) metadataFileName) :
63+
new SampleCollection(opt.arguments()[0..-2])
64+
65+
println "[${new Date()} $scriptName] ${sampleCollection.size()} samples prepared"
66+
67+
// Load segment conversions
68+
def vSegmentMap = new HashMap<String, String>(),
69+
jSegmentMap = new HashMap<String, String>()
70+
CommonUtil.resourceStreamReader("vj_families.txt").splitEachLine("\t") {
71+
if (it[0].equalsIgnoreCase(species)) {
72+
if (it[2].equalsIgnoreCase("v")) {
73+
vSegmentMap.put(it[3], it[4])
74+
} else {
75+
jSegmentMap.put(it[3], it[4])
76+
}
77+
}
78+
}
79+
80+
def converter = new SegmentConverter(vSegmentMap, jSegmentMap)
81+
82+
//
83+
// Iterate over samples and change V segments
84+
//
85+
def sw = new SampleWriter(compress)
86+
87+
sampleCollection.eachWithIndex { sample, ind ->
88+
def sampleId = sample.sampleMetadata.sampleId
89+
println "[${new Date()} $scriptName] Changing segments for $sampleId.."
90+
91+
// print output
92+
sw.writeConventional(new Sample(sample, converter), outputFilePrefix)
93+
}
94+
95+
sampleCollection.metadataTable.storeWithOutput(outputFilePrefix, compress,
96+
"segm2fam")
97+
98+
println "[${new Date()} $scriptName] Finished"

src/main/java/com/antigenomics/vdjtools/sample/Clonotype.java

+9
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,15 @@ public int hashCode() {
497497
return result;
498498
}
499499

500+
Clonotype withSegments(Segment v, Segment j) {
501+
return new Clonotype(parent, count, freq,
502+
segmPoints, v, d, j,
503+
cdr3nt, cdr3aa,
504+
inFrame, noStop, isComplete,
505+
annotation
506+
);
507+
}
508+
500509
@Override
501510
public Clonotype getClonotype() {
502511
return this;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package com.antigenomics.vdjtools.sample;
2+
3+
4+
public interface ClonotypeConverter {
5+
Clonotype convert(Clonotype clonotype);
6+
}

src/main/java/com/antigenomics/vdjtools/sample/Sample.java

+9
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ public Sample(Sample other, Map<Clonotype, Integer> samplerMap) {
8888
Collections.sort(clonotypes);
8989
}
9090

91+
public Sample(Sample other, ClonotypeConverter clonotypeConverter) {
92+
this.sampleMetadata = other.sampleMetadata;
93+
this.annotationHeader = other.annotationHeader;
94+
95+
for (Clonotype clonotype : other.clonotypes) {
96+
this.addClonotype(clonotypeConverter.convert(clonotype));
97+
}
98+
}
99+
91100
/**
92101
* Creates a new sample by filtering and selecting top N clonotypes from the specified sample.
93102
*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.antigenomics.vdjtools.sample;
2+
3+
import com.antigenomics.vdjtools.misc.Segment;
4+
import com.antigenomics.vdjtools.misc.SegmentFactory;
5+
6+
import java.util.HashMap;
7+
import java.util.Map;
8+
9+
public class SegmentConverter implements ClonotypeConverter {
10+
private final Map<String, Segment> vSegmentMap = new HashMap<>(),
11+
jSegmentMap = new HashMap<>();
12+
13+
public SegmentConverter(Map<String, String> vSegmentMap, Map<String, String> jSegmentMap) {
14+
for (Map.Entry<String, String> conv : vSegmentMap.entrySet()) {
15+
this.vSegmentMap.put(conv.getKey(),
16+
SegmentFactory.INSTANCE.create(conv.getValue()));
17+
}
18+
19+
for (Map.Entry<String, String> conv : jSegmentMap.entrySet()) {
20+
this.jSegmentMap.put(conv.getKey(),
21+
SegmentFactory.INSTANCE.create(conv.getValue()));
22+
}
23+
}
24+
25+
@Override
26+
public Clonotype convert(Clonotype clonotype) {
27+
return clonotype.withSegments(
28+
vSegmentMap.getOrDefault(clonotype.getV(), clonotype.getVBinary()),
29+
jSegmentMap.getOrDefault(clonotype.getJ(), clonotype.getJBinary()));
30+
}
31+
}

0 commit comments

Comments
 (0)