Skip to content
Merged

Jdk8 #151

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 2 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ This library contains the SEER Java implementations of the Multiple Primary and

The implementation was partially based on the KCR Multiple Primary Rules Library developed by the Kentucky Cancer Registry.

This library requires Java 8.

## Download

The library is available on [Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.imsweb%22%20AND%20a%3A%mph%22).
Expand Down Expand Up @@ -87,40 +89,6 @@ If histology is not in the range 9590-9993, one of the following solid tumors se
If DX year is 2006 or earlier and the case is not Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Solid Malignant" rules will be used.<br/><br/>
If DX year is 2006 or earlier and the case is Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Benign Brain" rules will be used.

## Testing Data

The project contains [a lab class](https://github.com/imsweb/mph/blob/master/src/test/java/lab/TestingDataCreation.java) that can be used to generate CSV files
that contains fake data along with the library result.

A sample testing file is available in the project: [mph-testing-2000-2022.csv.gz](https://github.com/imsweb/mph/blob/master/src/test/resources/mph-testing-2000-2022.csv.gz)

To create larger file, clone the project and execute the main method of that class locally.

The class allows the following parameters (defined in the top of the main method):
- numTests: the number of rows for the generated CSV file
- minDxYear: the minimum DX year to use
- maxDxYear: the maximum DX year to use

The CSV files will contain the following columns:
- year1
- month1
- day1
- site1
- hist1
- beh1
- lat1
- year2
- month2
- day2
- site2
- hist2
- beh2
- lat2
- result
- reason

The lab class uses the [Data Generator](https://github.com/imsweb/data-generator) library to create the fake data.

## About SEER

This library was developed through the [SEER](http://seer.cancer.gov/) program.
Expand Down
19 changes: 10 additions & 9 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import com.vanniktech.maven.publish.*
import com.vanniktech.maven.publish.JavaLibrary
import com.vanniktech.maven.publish.JavadocJar
import com.vanniktech.maven.publish.SonatypeHost

plugins {
id 'java-library'
id 'jacoco'
id 'com.vanniktech.maven.publish' version '0.34.0' // publish to Maven Central
id 'com.vanniktech.maven.publish' version '0.31.0' // publish to Maven Central
id 'com.github.ben-manes.versions' version '0.52.0' // check for out-of-date dependencies (run 'dependencyUpdates' manually)
id 'com.github.spotbugs' version '6.4.2' // spotbugs code analysis
id 'org.sonarqube' version '6.3.1.5724' // sonarQube analysis
Expand All @@ -20,12 +22,8 @@ repositories {
}

dependencies {
implementation 'org.apache.commons:commons-lang3:3.18.0'
implementation 'de.siegmar:fastcsv:3.7.0'

testImplementation 'junit:junit:4.13.2'
testImplementation 'com.imsweb:seerapi-client-java:5.9'
testImplementation 'com.imsweb:data-generator:2.2'
}

// enforce UTF-8, display the compilation warnings
Expand All @@ -35,8 +33,8 @@ tasks.withType(JavaCompile).configureEach {
}

java {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}

tasks.withType(JavaCompile).configureEach {
Expand All @@ -63,6 +61,9 @@ jar {
'Automatic-Module-Name': 'com.imsweb.mph'
)
}
from('VERSION') {
rename { fileName -> "mph-library-version.txt" }
}
}

// jacoco plugin settings
Expand Down Expand Up @@ -110,7 +111,7 @@ tasks.register('hematoDataTest', Test) {
mavenPublishing {
configure(new JavaLibrary(new JavadocJar.Javadoc(), true))

publishToMavenCentral(true)
publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL, true) // to upgrade teh plugin, just remove the first parameter...
signAllPublications()

pom {
Expand Down
88 changes: 4 additions & 84 deletions src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,15 @@

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.lang3.StringUtils;

import de.siegmar.fastcsv.reader.CsvReader;
import de.siegmar.fastcsv.reader.NamedCsvRecord;

import com.imsweb.mph.internal.CsvUtils;
import com.imsweb.mph.internal.HematoDTO;

/**
Expand All @@ -36,79 +27,9 @@ public class DefaultHematoDataProvider implements HematoDataProvider {
private final Map<String, List<HematoDTO>> _transformFromDto;

public DefaultHematoDataProvider() {

_samePrimaryDto = new HashMap<>();
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010SamePrimaryPairs.csv")) {
if (is == null)
throw new IllegalStateException("Unable to get Hematopoietic2010SamePrimaryPairs.csv");
try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader<NamedCsvRecord> csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
csvReader.stream().forEach(line -> {
Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
if (_samePrimaryDto.containsKey(line.getField(0)))
_samePrimaryDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
else {
List<HematoDTO> list = new ArrayList<>();
list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
_samePrimaryDto.put(line.getField(0), list);
}
});
}
}
catch (IOException e) {
throw new IllegalStateException(e);
}

_transformToDto = new HashMap<>();
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformToPairs.csv")) {
if (is == null)
throw new IllegalStateException("Unable to get Hematopoietic2010TransformToPairs.csv");
try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader<NamedCsvRecord> csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
csvReader.stream().forEach(line -> {
Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
if (_transformToDto.containsKey(line.getField(0)))
_transformToDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
else {
List<HematoDTO> list = new ArrayList<>();
list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
_transformToDto.put(line.getField(0), list);
}
});
}
}
catch (IOException e) {
throw new IllegalStateException(e);
}

_transformFromDto = new HashMap<>();
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformFromPairs.csv")) {
if (is == null)
throw new IllegalStateException("Unable to get Hematopoietic2010TransformFromPairs.csv");
try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader<NamedCsvRecord> csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
csvReader.stream().forEach(line -> {
Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
if (_transformFromDto.containsKey(line.getField(0)))
_transformFromDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
else {
List<HematoDTO> list = new ArrayList<>();
list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
_transformFromDto.put(line.getField(0), list);
}
});
}
}
catch (IOException e) {
throw new IllegalStateException(e);
}

_samePrimaryDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010SamePrimaryPairs.csv");
_transformToDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformToPairs.csv");
_transformFromDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformFromPairs.csv");
}

@Override
Expand All @@ -128,7 +49,6 @@ public List<HematoDTO> getTransformFrom(String morphology) {

@Override
public Date getDataLastUpdated() {

try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("hemato_data_info.properties")) {
if (is == null)
throw new IllegalStateException("Unable to get info properties");
Expand Down
16 changes: 8 additions & 8 deletions src/main/java/com/imsweb/mph/MphGroup.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.Range;

import com.imsweb.mph.internal.Range;
import com.imsweb.mph.mpgroups.GroupUtility;

@SuppressWarnings("unused")
public abstract class MphGroup {

protected String _id;
Expand All @@ -30,17 +30,17 @@ public abstract class MphGroup {

protected List<MphRule> _rules;

private List<Range<Integer>> _siteIncRanges;
private final List<Range> _siteIncRanges;

private List<Range<Integer>> _siteExcRanges;
private final List<Range> _siteExcRanges;

private List<Range<Integer>> _histIncRanges;
private final List<Range> _histIncRanges;

private List<Range<Integer>> _histExcRanges;
private final List<Range> _histExcRanges;

private List<Range<Integer>> _behavIncRanges;
private final List<Range> _behavIncRanges;

private List<Range<Integer>> _yearIncRanges;
private final List<Range> _yearIncRanges;

protected MphGroup(String id, String name, String siteInclusions, String siteExclusions, String histInclusions, String histExclusions, String behavInclusions, String yearInclusions) {
_id = id;
Expand Down
8 changes: 2 additions & 6 deletions src/main/java/com/imsweb/mph/MphInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
*/
package com.imsweb.mph;

import org.apache.commons.lang3.math.NumberUtils;

import com.imsweb.mph.mpgroups.GroupUtility;

/**
Expand Down Expand Up @@ -43,7 +41,6 @@ public void setPrimarySite(String primarySite) {
_primarySite = primarySite;
}


public void setHistologyIcdO3(String histologyIcdO3) {
_histologyIcdO3 = histologyIcdO3;
}
Expand All @@ -61,12 +58,12 @@ public void setBehaviorIcdO2(String behaviorIcdO2) {
}

public String getHistology() {
int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
return year < 2001 && GroupUtility.validateHistology(_histologyIcdO2) ? _histologyIcdO2 : _histologyIcdO3;
}

public String getBehavior() {
int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
return year < 2001 && GroupUtility.validateBehavior(_behaviorIcdO2) ? _behaviorIcdO2 : _behaviorIcdO3;
}

Expand Down Expand Up @@ -102,7 +99,6 @@ public void setDateOfDiagnosisDay(String dateOfDiagnosisDay) {
_dateOfDiagnosisDay = dateOfDiagnosisDay;
}


public String getIcdCode() {
return getHistology() + "/" + getBehavior();
}
Expand Down
46 changes: 38 additions & 8 deletions src/main/java/com/imsweb/mph/MphUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
*/
package com.imsweb.mph;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
Expand All @@ -11,9 +16,6 @@
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;

import com.imsweb.mph.internal.TempRuleResult;
import com.imsweb.mph.mpgroups.GroupUtility;
import com.imsweb.mph.mpgroups.Mp1998HematopoieticGroup;
Expand Down Expand Up @@ -60,7 +62,9 @@
*/
public final class MphUtils {

private final Pattern _morphology = Pattern.compile("^(\\d{4}/\\d)");
private final static Pattern _MORPHOLOGY_PATTERN = Pattern.compile("^(\\d{4}/\\d)");

private final static Pattern _DIGITS_PATTERN = Pattern.compile("\\d+");

/**
* The possible result of determining if two tumors are single or multiple primaries.
Expand Down Expand Up @@ -117,8 +121,34 @@ public static synchronized MphUtils getInstance() {
return _INSTANCE;
}

/**
* Returns true if the provided values only contains digits (and at least one of them)
*/
public static boolean isDigits(String value) {
return value != null && _DIGITS_PATTERN.matcher(value).matches();
}

public static String getLibraryVersion() {
String result = "?";

try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("mph-library-version.txt")) {
if (is != null) {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.US_ASCII))) {
result = reader.readLine();
}
}

}
catch (IOException | RuntimeException e) {
// ignored
}

return result;
}

/**
* Constructor
* <br/>
* This will use the default hemato db provider
*/
public MphUtils() {
Expand Down Expand Up @@ -201,8 +231,8 @@ public MphUtils(HematoDataProvider provider) {
public MphOutput computePrimaries(MphInput input1, MphInput input2) {
MphOutput output = new MphOutput();

int year1 = NumberUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1;
int year2 = NumberUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1;
int year1 = isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1;
int year2 = isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1;
String site1 = input1.getPrimarySite();
String site2 = input2.getPrimarySite();
String hist1 = input1.getHistology();
Expand Down Expand Up @@ -273,7 +303,7 @@ else if (result.getFinalResult() != null) {
if (potentialResult == null || potentialResult.getPotentialResult().equals(result.getFinalResult())) {
output.setResult(result.getFinalResult());
output.setStep(rule.getStep());
output.setReason(StringUtils.isNotBlank(result.getMessage()) ? result.getMessage() : rule.getReason());
output.setReason(result.getMessage() != null && !result.getMessage().trim().isEmpty() ? result.getMessage() : rule.getReason());
if (potentialResult != null && potentialResult.getPotentialResult().equals(result.getFinalResult()))
output.getAppliedRules().addAll(rulesAppliedAfterQuestionable);
}
Expand Down Expand Up @@ -332,7 +362,7 @@ public Date getHematoDataLastUpdated() {
* @return true if two diseases are same primary and false otherwise.
*/
public boolean isHematoSamePrimary(String morph1, String morph2, int year1, int year2) {
if (morph1 == null || morph2 == null || !_morphology.matcher(morph1).matches() || !_morphology.matcher(morph2).matches())
if (morph1 == null || morph2 == null || !_MORPHOLOGY_PATTERN.matcher(morph1).matches() || !_MORPHOLOGY_PATTERN.matcher(morph2).matches())
return false;
if (morph1.equals(morph2))
return true;
Expand Down
Loading