diff --git a/README.md b/README.md
index 480f7cb..0938846 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@ This library contains the SEER Java implementations of the Multiple Primary and
The implementation was partially based on the KCR Multiple Primary Rules Library developed by the Kentucky Cancer Registry.
+This library requires Java 8.
+
## Download
The library is available on [Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.imsweb%22%20AND%20a%3A%mph%22).
@@ -87,40 +89,6 @@ If histology is not in the range 9590-9993, one of the following solid tumors se
If DX year is 2006 or earlier and the case is not Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Solid Malignant" rules will be used.
If DX year is 2006 or earlier and the case is Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Benign Brain" rules will be used.
-## Testing Data
-
-The project contains [a lab class](https://github.com/imsweb/mph/blob/master/src/test/java/lab/TestingDataCreation.java) that can be used to generate CSV files
-that contains fake data along with the library result.
-
-A sample testing file is available in the project: [mph-testing-2000-2022.csv.gz](https://github.com/imsweb/mph/blob/master/src/test/resources/mph-testing-2000-2022.csv.gz)
-
-To create larger file, clone the project and execute the main method of that class locally.
-
-The class allows the following parameters (defined in the top of the main method):
- - numTests: the number of rows for the generated CSV file
- - minDxYear: the minimum DX year to use
- - maxDxYear: the maximum DX year to use
-
-The CSV files will contain the following columns:
- - year1
- - month1
- - day1
- - site1
- - hist1
- - beh1
- - lat1
- - year2
- - month2
- - day2
- - site2
- - hist2
- - beh2
- - lat2
- - result
- - reason
-
-The lab class uses the [Data Generator](https://github.com/imsweb/data-generator) library to create the fake data.
-
## About SEER
This library was developed through the [SEER](http://seer.cancer.gov/) program.
diff --git a/build.gradle b/build.gradle
index 3747b25..7a29672 100644
--- a/build.gradle
+++ b/build.gradle
@@ -1,9 +1,11 @@
-import com.vanniktech.maven.publish.*
+import com.vanniktech.maven.publish.JavaLibrary
+import com.vanniktech.maven.publish.JavadocJar
+import com.vanniktech.maven.publish.SonatypeHost
plugins {
id 'java-library'
id 'jacoco'
- id 'com.vanniktech.maven.publish' version '0.34.0' // publish to Maven Central
+ id 'com.vanniktech.maven.publish' version '0.31.0' // publish to Maven Central
id 'com.github.ben-manes.versions' version '0.52.0' // check for out-of-date dependencies (run 'dependencyUpdates' manually)
id 'com.github.spotbugs' version '6.4.2' // spotbugs code analysis
id 'org.sonarqube' version '6.3.1.5724' // sonarQube analysis
@@ -20,12 +22,8 @@ repositories {
}
dependencies {
- implementation 'org.apache.commons:commons-lang3:3.18.0'
- implementation 'de.siegmar:fastcsv:3.7.0'
-
testImplementation 'junit:junit:4.13.2'
testImplementation 'com.imsweb:seerapi-client-java:5.9'
- testImplementation 'com.imsweb:data-generator:2.2'
}
// enforce UTF-8, display the compilation warnings
@@ -35,8 +33,8 @@ tasks.withType(JavaCompile).configureEach {
}
java {
- sourceCompatibility = JavaVersion.VERSION_11
- targetCompatibility = JavaVersion.VERSION_11
+ sourceCompatibility = JavaVersion.VERSION_1_8
+ targetCompatibility = JavaVersion.VERSION_1_8
}
tasks.withType(JavaCompile).configureEach {
@@ -63,6 +61,9 @@ jar {
'Automatic-Module-Name': 'com.imsweb.mph'
)
}
+ from('VERSION') {
+ rename { fileName -> "mph-library-version.txt" }
+ }
}
// jacoco plugin settings
@@ -110,7 +111,7 @@ tasks.register('hematoDataTest', Test) {
mavenPublishing {
configure(new JavaLibrary(new JavadocJar.Javadoc(), true))
- publishToMavenCentral(true)
+ publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL, true) // to upgrade teh plugin, just remove the first parameter...
signAllPublications()
pom {
diff --git a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java
index 004c12e..c3f96cd 100644
--- a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java
+++ b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java
@@ -5,24 +5,15 @@
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
-import org.apache.commons.lang3.StringUtils;
-
-import de.siegmar.fastcsv.reader.CsvReader;
-import de.siegmar.fastcsv.reader.NamedCsvRecord;
-
+import com.imsweb.mph.internal.CsvUtils;
import com.imsweb.mph.internal.HematoDTO;
/**
@@ -36,79 +27,9 @@ public class DefaultHematoDataProvider implements HematoDataProvider {
private final Map> _transformFromDto;
public DefaultHematoDataProvider() {
-
- _samePrimaryDto = new HashMap<>();
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010SamePrimaryPairs.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to get Hematopoietic2010SamePrimaryPairs.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> {
- Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
- Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
- Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
- Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
- if (_samePrimaryDto.containsKey(line.getField(0)))
- _samePrimaryDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- else {
- List list = new ArrayList<>();
- list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- _samePrimaryDto.put(line.getField(0), list);
- }
- });
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
-
- _transformToDto = new HashMap<>();
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformToPairs.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to get Hematopoietic2010TransformToPairs.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> {
- Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
- Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
- Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
- Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
- if (_transformToDto.containsKey(line.getField(0)))
- _transformToDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- else {
- List list = new ArrayList<>();
- list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- _transformToDto.put(line.getField(0), list);
- }
- });
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
-
- _transformFromDto = new HashMap<>();
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformFromPairs.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to get Hematopoietic2010TransformFromPairs.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> {
- Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null;
- Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null;
- Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null;
- Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null;
- if (_transformFromDto.containsKey(line.getField(0)))
- _transformFromDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- else {
- List list = new ArrayList<>();
- list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5)));
- _transformFromDto.put(line.getField(0), list);
- }
- });
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
-
+ _samePrimaryDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010SamePrimaryPairs.csv");
+ _transformToDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformToPairs.csv");
+ _transformFromDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformFromPairs.csv");
}
@Override
@@ -128,7 +49,6 @@ public List getTransformFrom(String morphology) {
@Override
public Date getDataLastUpdated() {
-
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("hemato_data_info.properties")) {
if (is == null)
throw new IllegalStateException("Unable to get info properties");
diff --git a/src/main/java/com/imsweb/mph/MphGroup.java b/src/main/java/com/imsweb/mph/MphGroup.java
index 5ffdf31..5def4b3 100644
--- a/src/main/java/com/imsweb/mph/MphGroup.java
+++ b/src/main/java/com/imsweb/mph/MphGroup.java
@@ -6,10 +6,10 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.commons.lang3.Range;
-
+import com.imsweb.mph.internal.Range;
import com.imsweb.mph.mpgroups.GroupUtility;
+@SuppressWarnings("unused")
public abstract class MphGroup {
protected String _id;
@@ -30,17 +30,17 @@ public abstract class MphGroup {
protected List _rules;
- private List> _siteIncRanges;
+ private final List _siteIncRanges;
- private List> _siteExcRanges;
+ private final List _siteExcRanges;
- private List> _histIncRanges;
+ private final List _histIncRanges;
- private List> _histExcRanges;
+ private final List _histExcRanges;
- private List> _behavIncRanges;
+ private final List _behavIncRanges;
- private List> _yearIncRanges;
+ private final List _yearIncRanges;
protected MphGroup(String id, String name, String siteInclusions, String siteExclusions, String histInclusions, String histExclusions, String behavInclusions, String yearInclusions) {
_id = id;
diff --git a/src/main/java/com/imsweb/mph/MphInput.java b/src/main/java/com/imsweb/mph/MphInput.java
index 5fd44ab..0d11647 100644
--- a/src/main/java/com/imsweb/mph/MphInput.java
+++ b/src/main/java/com/imsweb/mph/MphInput.java
@@ -3,8 +3,6 @@
*/
package com.imsweb.mph;
-import org.apache.commons.lang3.math.NumberUtils;
-
import com.imsweb.mph.mpgroups.GroupUtility;
/**
@@ -43,7 +41,6 @@ public void setPrimarySite(String primarySite) {
_primarySite = primarySite;
}
-
public void setHistologyIcdO3(String histologyIcdO3) {
_histologyIcdO3 = histologyIcdO3;
}
@@ -61,12 +58,12 @@ public void setBehaviorIcdO2(String behaviorIcdO2) {
}
public String getHistology() {
- int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
+ int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
return year < 2001 && GroupUtility.validateHistology(_histologyIcdO2) ? _histologyIcdO2 : _histologyIcdO3;
}
public String getBehavior() {
- int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
+ int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999;
return year < 2001 && GroupUtility.validateBehavior(_behaviorIcdO2) ? _behaviorIcdO2 : _behaviorIcdO3;
}
@@ -102,7 +99,6 @@ public void setDateOfDiagnosisDay(String dateOfDiagnosisDay) {
_dateOfDiagnosisDay = dateOfDiagnosisDay;
}
-
public String getIcdCode() {
return getHistology() + "/" + getBehavior();
}
diff --git a/src/main/java/com/imsweb/mph/MphUtils.java b/src/main/java/com/imsweb/mph/MphUtils.java
index 9cabb07..d6b8724 100644
--- a/src/main/java/com/imsweb/mph/MphUtils.java
+++ b/src/main/java/com/imsweb/mph/MphUtils.java
@@ -3,6 +3,11 @@
*/
package com.imsweb.mph;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
@@ -11,9 +16,6 @@
import java.util.Map;
import java.util.regex.Pattern;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.math.NumberUtils;
-
import com.imsweb.mph.internal.TempRuleResult;
import com.imsweb.mph.mpgroups.GroupUtility;
import com.imsweb.mph.mpgroups.Mp1998HematopoieticGroup;
@@ -60,7 +62,9 @@
*/
public final class MphUtils {
- private final Pattern _morphology = Pattern.compile("^(\\d{4}/\\d)");
+ private final static Pattern _MORPHOLOGY_PATTERN = Pattern.compile("^(\\d{4}/\\d)");
+
+ private final static Pattern _DIGITS_PATTERN = Pattern.compile("\\d+");
/**
* The possible result of determining if two tumors are single or multiple primaries.
@@ -117,8 +121,34 @@ public static synchronized MphUtils getInstance() {
return _INSTANCE;
}
+ /**
+ * Returns true if the provided values only contains digits (and at least one of them)
+ */
+ public static boolean isDigits(String value) {
+ return value != null && _DIGITS_PATTERN.matcher(value).matches();
+ }
+
+ public static String getLibraryVersion() {
+ String result = "?";
+
+ try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("mph-library-version.txt")) {
+ if (is != null) {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.US_ASCII))) {
+ result = reader.readLine();
+ }
+ }
+
+ }
+ catch (IOException | RuntimeException e) {
+ // ignored
+ }
+
+ return result;
+ }
+
/**
* Constructor
+ *
* This will use the default hemato db provider
*/
public MphUtils() {
@@ -201,8 +231,8 @@ public MphUtils(HematoDataProvider provider) {
public MphOutput computePrimaries(MphInput input1, MphInput input2) {
MphOutput output = new MphOutput();
- int year1 = NumberUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1;
- int year2 = NumberUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1;
+ int year1 = isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1;
+ int year2 = isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1;
String site1 = input1.getPrimarySite();
String site2 = input2.getPrimarySite();
String hist1 = input1.getHistology();
@@ -273,7 +303,7 @@ else if (result.getFinalResult() != null) {
if (potentialResult == null || potentialResult.getPotentialResult().equals(result.getFinalResult())) {
output.setResult(result.getFinalResult());
output.setStep(rule.getStep());
- output.setReason(StringUtils.isNotBlank(result.getMessage()) ? result.getMessage() : rule.getReason());
+ output.setReason(result.getMessage() != null && !result.getMessage().trim().isEmpty() ? result.getMessage() : rule.getReason());
if (potentialResult != null && potentialResult.getPotentialResult().equals(result.getFinalResult()))
output.getAppliedRules().addAll(rulesAppliedAfterQuestionable);
}
@@ -332,7 +362,7 @@ public Date getHematoDataLastUpdated() {
* @return true if two diseases are same primary and false otherwise.
*/
public boolean isHematoSamePrimary(String morph1, String morph2, int year1, int year2) {
- if (morph1 == null || morph2 == null || !_morphology.matcher(morph1).matches() || !_morphology.matcher(morph2).matches())
+ if (morph1 == null || morph2 == null || !_MORPHOLOGY_PATTERN.matcher(morph1).matches() || !_MORPHOLOGY_PATTERN.matcher(morph2).matches())
return false;
if (morph1.equals(morph2))
return true;
diff --git a/src/main/java/com/imsweb/mph/internal/CsvUtils.java b/src/main/java/com/imsweb/mph/internal/CsvUtils.java
new file mode 100644
index 0000000..b5a091a
--- /dev/null
+++ b/src/main/java/com/imsweb/mph/internal/CsvUtils.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2025 Information Management Services, Inc.
+ */
+package com.imsweb.mph.internal;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+public final class CsvUtils {
+
+ private CsvUtils() {
+ // static utility class
+ }
+
+ public static String writeCsvValues(String[] values) {
+ StringBuilder buf = new StringBuilder();
+ for (String value : values) {
+ if (buf.length() > 0)
+ buf.append(',');
+ if (value != null && value.contains(","))
+ buf.append("\"").append(value.replace("\"", "\"\"")).append("\"");
+ else if (value != null)
+ buf.append(value);
+ }
+ return buf.toString();
+ }
+
+ public static Map> parseHematoCsvFile(String filename) {
+ Map> result = new LinkedHashMap<>();
+
+ try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) {
+ if (is == null)
+ throw new IllegalStateException("Unable to read " + filename + "; unable to find data file");
+ int expectedColumns = -1;
+ try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+ String line = reader.readLine();
+ while (line != null) {
+ if (reader.getLineNumber() == 1)
+ expectedColumns = CsvUtils.parseCsvLine(reader.getLineNumber(), line).size();
+ else {
+ List fields = CsvUtils.parseCsvLine(reader.getLineNumber(), line);
+
+ if (expectedColumns != -1 && fields.size() != expectedColumns)
+ throw new IOException("Line " + reader.getLineNumber() + ": expected " + expectedColumns + " columns, but found " + fields.size() + " columns");
+
+ Short validStartYear = fields.get(1) != null && !fields.get(1).trim().isEmpty() ? Short.valueOf(fields.get(1)) : null;
+ Short validEndYear = fields.get(1) != null && !fields.get(2).trim().isEmpty() ? Short.valueOf(fields.get(2)) : null;
+ Short startYear = fields.get(1) != null && !fields.get(3).trim().isEmpty() ? Short.valueOf(fields.get(3)) : null;
+ Short endYear = fields.get(1) != null && !fields.get(4).trim().isEmpty() ? Short.valueOf(fields.get(4)) : null;
+ if (result.containsKey(fields.get(0)))
+ result.get(fields.get(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, fields.get(5)));
+ else {
+ List list = new ArrayList<>();
+ list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, fields.get(5)));
+ result.put(fields.get(0), list);
+ }
+
+ }
+ line = reader.readLine();
+ }
+ }
+ }
+ catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+
+ return result;
+ }
+
+ public static List parseGroupCsvFile(String filename) {
+ List result = new ArrayList<>();
+
+ try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) {
+ if (is == null)
+ throw new IllegalStateException("Unable to read " + filename + "; unable to find data file");
+ int expectedColumns = -1;
+ try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+ String line = reader.readLine();
+ while (line != null) {
+ if (reader.getLineNumber() == 1)
+ expectedColumns = CsvUtils.parseCsvLine(reader.getLineNumber(), line).size();
+ else {
+ List fields = CsvUtils.parseCsvLine(reader.getLineNumber(), line);
+ if (expectedColumns != -1 && fields.size() != expectedColumns)
+ throw new IOException("Line " + reader.getLineNumber() + ": expected " + expectedColumns + " columns, but found " + fields.size() + " columns");
+ result.add(fields.toArray(new String[0]));
+ }
+ line = reader.readLine();
+ }
+ }
+ }
+ catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+
+ return result;
+ }
+
+ public static List parseCsvLine(int lineNumber, String line) throws IOException {
+ List result = new ArrayList<>();
+
+ char cQuote = '"';
+ char cDelimiter = ',';
+ int curIndex = 0;
+ int nextQuote;
+ int nextDelimiter;
+
+ StringBuilder buf = new StringBuilder();
+ buf.append(cQuote);
+ String singleQuotes = buf.toString();
+ buf.append(cQuote);
+ String doubleQuotes = buf.toString();
+
+ String value;
+ while (curIndex < line.length()) {
+ if (line.charAt(curIndex) == cQuote) {
+ // handle quoted value
+ nextQuote = getNextSingleQuote(line, cQuote, curIndex);
+ if (nextQuote < 0)
+ throw new IOException("Line " + lineNumber + ": found an unmatched quote");
+ else {
+ result.add(line.substring(curIndex + 1, nextQuote).replace(doubleQuotes, singleQuotes));
+ // update the current index to be after delimiter, after the ending quote
+ curIndex = nextQuote;
+ if (curIndex + 1 < line.length()) {
+ // if there is a next value, set current index to be after delimiter
+ if (line.charAt(curIndex + 1) == cDelimiter) {
+ curIndex += 2;
+ // handle case where last value is empty
+ if (curIndex == line.length())
+ result.add("");
+ }
+ // else character after ending quote is not EOL and not delimiter, stop parsing
+ else
+ throw new IOException("Line " + lineNumber + ": expected a delimiter after the quote");
+ }
+ else
+ // end of line is after ending quote, stop parsing
+ curIndex++;
+ }
+ }
+ else {
+ // handle unquoted value
+ nextDelimiter = getNextDelimiter(line, cDelimiter, curIndex);
+ value = line.substring(curIndex, nextDelimiter).replace(doubleQuotes, singleQuotes);
+ // unquoted values should not contain any quotes
+ if (value.contains(singleQuotes))
+ throw new IOException("Line " + lineNumber + ": value contains some quotes but does not start with a quote");
+ else {
+ result.add(value);
+ curIndex = nextDelimiter + 1;
+ // handle case where last value is empty
+ if (curIndex == line.length())
+ result.add("");
+ }
+ }
+ }
+
+ return result;
+ }
+
+ private static int getNextSingleQuote(String line, char quote, int from) {
+ if (from >= line.length())
+ return -1;
+
+ int index = from + 1;
+ boolean found = false;
+ while ((index < line.length()) && !found) {
+ if (line.charAt(index) != quote)
+ index++;
+ else {
+ if ((index + 1 == line.length()) || (line.charAt(index + 1) != quote))
+ found = true;
+ else
+ index += 2;
+ }
+
+ }
+
+ index = (index == line.length()) ? -1 : index;
+
+ return index;
+ }
+
+ private static int getNextDelimiter(String line, char delimiter, int from) {
+ if (from >= line.length())
+ return line.length();
+
+ int index = from;
+ while ((index < line.length()) && (line.charAt(index) != delimiter))
+ index++;
+
+ return index;
+ }
+
+}
diff --git a/src/main/java/com/imsweb/mph/internal/Range.java b/src/main/java/com/imsweb/mph/internal/Range.java
new file mode 100644
index 0000000..ab1ae05
--- /dev/null
+++ b/src/main/java/com/imsweb/mph/internal/Range.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2025 Information Management Services, Inc.
+ */
+package com.imsweb.mph.internal;
+
+public class Range {
+
+ private final Integer minimum;
+
+ private final Integer maximum;
+
+ public Range(Integer minimum, Integer maximum) {
+ this.minimum = minimum;
+ this.maximum = maximum;
+ }
+
+ public static Range of(final Integer fromInclusive, final Integer toInclusive) {
+ return new Range(fromInclusive, toInclusive);
+ }
+
+ public static Range is(final Integer value) {
+ return new Range(value, value);
+ }
+
+ public boolean contains(final Integer element) {
+ if (element == null)
+ return false;
+
+ return Integer.compare(element, minimum) > -1 && Integer.compare(element, maximum) < 1;
+ }
+}
diff --git a/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java b/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java
index 9d9fd5c..303aed0 100644
--- a/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java
+++ b/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java
@@ -11,17 +11,20 @@
import java.util.Collections;
import java.util.List;
import java.util.Objects;
-
-import org.apache.commons.lang3.Range;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.math.NumberUtils;
+import java.util.regex.Pattern;
import com.imsweb.mph.MphConstants;
import com.imsweb.mph.MphInput;
+import com.imsweb.mph.MphUtils;
+import com.imsweb.mph.internal.Range;
public final class GroupUtility {
+ private static final Pattern _SPLIT_COMMA = Pattern.compile(",");
+ private static final Pattern _SPLIT_DASH = Pattern.compile("-");
+
private GroupUtility() {
+ // static utility class
}
/**
@@ -35,14 +38,14 @@ public static boolean validateProperties(String primarySite, String histology, S
* Validates primary site
*/
public static boolean validateSite(String site) {
- return site != null && site.length() == 4 && site.startsWith("C") && NumberUtils.isDigits(site.substring(1)) && !"C809".equalsIgnoreCase(site);
+ return site != null && site.length() == 4 && site.startsWith("C") && MphUtils.isDigits(site.substring(1)) && !"C809".equalsIgnoreCase(site);
}
/**
* Validates histology
*/
public static boolean validateHistology(String hist) {
- return NumberUtils.isDigits(hist) && Integer.parseInt(hist) >= 8000 && Integer.parseInt(hist) <= 9999;
+ return MphUtils.isDigits(hist) && Integer.parseInt(hist) >= 8000 && Integer.parseInt(hist) <= 9999;
}
/**
@@ -88,15 +91,15 @@ public static boolean sameKnownDateParts(MphInput i1, MphInput i2) {
DateFieldParts date = new DateFieldParts(i1, i2);
return date.getYear1() != null && date.getYear1().equals(date.getYear2()) &&
(date.getMonth1() == null || date.getMonth2() == null || (date.getMonth1().equals(date.getMonth2()) &&
- (date.getDay1() == null || date.getDay2() == null || date.getDay1().equals(date.getDay2()))));
+ (date.getDay1() == null || date.getDay2() == null || date.getDay1().equals(date.getDay2()))));
}
/**
* Checks if integer value is in a list of ranges
*/
- public static boolean isContained(List> list, Integer value) {
+ public static boolean isContained(List list, Integer value) {
if (list != null && !list.isEmpty())
- for (Range range : list)
+ for (Range range : list)
if (range.contains(value))
return true;
return false;
@@ -105,14 +108,16 @@ public static boolean isContained(List> list, Integer value) {
/**
* computes list of range values from string
*/
- public static List> computeRange(String rawValue, boolean isSite) {
+ public static List computeRange(String rawValue, boolean isSite) {
if (rawValue == null)
return Collections.emptyList();
- List> result = new ArrayList<>();
+ List result = new ArrayList<>();
- for (String item : StringUtils.split(rawValue, ',')) {
- String[] parts = StringUtils.split(item.trim(), '-');
+ for (String item : _SPLIT_COMMA.split(rawValue)) {
+ if (item.trim().isEmpty())
+ continue;
+ String[] parts = _SPLIT_DASH.split(item.trim());
if (parts.length == 1) {
if (isSite)
result.add(Range.is(Integer.parseInt(parts[0].trim().substring(1))));
@@ -138,9 +143,11 @@ public static List expandList(List list) {
if (list == null || list.isEmpty())
return list;
for (String item : list) {
- String[] ranges = StringUtils.split(item.trim(), ',');
+ String[] ranges = _SPLIT_COMMA.split(item.trim());
for (String range : ranges) {
- String[] parts = StringUtils.split(range.trim(), '-');
+ if (range.trim().isEmpty())
+ continue;
+ String[] parts = _SPLIT_DASH.split(range.trim());
if (parts.length <= 1)
result.add(range);
else {
@@ -365,9 +372,9 @@ else if (endMon != null) {
* Returns the site, hist/beh information of the input
*/
public static String getSiteHistInfo(String site, String hist, String beh, int year) {
- return (StringUtils.isBlank(site) ? "Unknown Site" : site) + ", "
- + (StringUtils.isBlank(hist) ? "Unknown Histology" : hist) + "/"
- + (StringUtils.isBlank(beh) ? "Unknown Behavior" : beh) + " "
+ return (site == null || site.trim().isEmpty() ? "Unknown Site" : site) + ", "
+ + (hist == null || hist.trim().isEmpty() ? "Unknown Histology" : hist) + "/"
+ + (beh == null || beh.trim().isEmpty() ? "Unknown Behavior" : beh) + " "
+ (validateYear(year) ? ("with year of diagnosis " + year) : "with unknown year of diagnosis");
}
@@ -381,18 +388,18 @@ static class DateFieldParts {
private Integer _day2;
public DateFieldParts(MphInput input1, MphInput input2) {
- _year1 = NumberUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : null;
- _year2 = NumberUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : null;
- _month1 = NumberUtils.isDigits(input1.getDateOfDiagnosisMonth()) ? Integer.parseInt(input1.getDateOfDiagnosisMonth()) : null;
+ _year1 = MphUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : null;
+ _year2 = MphUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : null;
+ _month1 = MphUtils.isDigits(input1.getDateOfDiagnosisMonth()) ? Integer.parseInt(input1.getDateOfDiagnosisMonth()) : null;
if (_month1 != null && (_month1 < 1 || _month1 > 12))
_month1 = null;
- _month2 = NumberUtils.isDigits(input2.getDateOfDiagnosisMonth()) ? Integer.parseInt(input2.getDateOfDiagnosisMonth()) : null;
+ _month2 = MphUtils.isDigits(input2.getDateOfDiagnosisMonth()) ? Integer.parseInt(input2.getDateOfDiagnosisMonth()) : null;
if (_month2 != null && (_month2 < 1 || _month2 > 12))
_month2 = null;
- _day1 = _month1 != null && NumberUtils.isDigits(input1.getDateOfDiagnosisDay()) ? Integer.parseInt(input1.getDateOfDiagnosisDay()) : null;
+ _day1 = _month1 != null && MphUtils.isDigits(input1.getDateOfDiagnosisDay()) ? Integer.parseInt(input1.getDateOfDiagnosisDay()) : null;
if (_year1 != null && _day1 != null && (_day1 < 1 || _day1 > LocalDate.of(_year1, _month1, 1).lengthOfMonth()))
_day1 = null;
- _day2 = _month2 != null && NumberUtils.isDigits(input2.getDateOfDiagnosisDay()) ? Integer.parseInt(input2.getDateOfDiagnosisDay()) : null;
+ _day2 = _month2 != null && MphUtils.isDigits(input2.getDateOfDiagnosisDay()) ? Integer.parseInt(input2.getDateOfDiagnosisDay()) : null;
if (_year2 != null && _day2 != null && (_day2 < 1 || _day2 > LocalDate.of(_year2, _month2, 1).lengthOfMonth()))
_day2 = null;
}
diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java
index 414c0b7..a9c08bc 100644
--- a/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java
+++ b/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java
@@ -3,23 +3,16 @@
*/
package com.imsweb.mph.mpgroups;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
-import de.siegmar.fastcsv.reader.CsvReader;
-import de.siegmar.fastcsv.reader.NamedCsvRecord;
-
import com.imsweb.mph.MphConstants;
import com.imsweb.mph.MphGroup;
import com.imsweb.mph.MphInput;
import com.imsweb.mph.MphRule;
import com.imsweb.mph.MphUtils;
import com.imsweb.mph.RuleExecutionContext;
+import com.imsweb.mph.internal.CsvUtils;
import com.imsweb.mph.internal.TempRuleResult;
public class Mp1998HematopoieticGroup extends MphGroup {
@@ -50,8 +43,8 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte
String secondDx = MphConstants.COMPARE_DX_FIRST_LATEST == laterDx ? i1.getHistology() : i2.getHistology();
for (String[] row : _HEMATOPOIETIC_1998)
if ((firstDx.compareTo(row[0]) >= 0 && firstDx.compareTo(row[1]) <= 0 && secondDx.compareTo(row[2]) >= 0 && secondDx.compareTo(row[3]) <= 0) ||
- (MphConstants.COMPARE_DX_EQUAL == laterDx && (secondDx.compareTo(row[0]) >= 0 && secondDx.compareTo(row[1]) <= 0 && firstDx.compareTo(row[2]) >= 0 && firstDx.compareTo(
- row[3]) <= 0))) {
+ (MphConstants.COMPARE_DX_EQUAL == laterDx && (secondDx.compareTo(row[0]) >= 0 && secondDx.compareTo(row[1]) <= 0 && firstDx.compareTo(row[2]) >= 0 && firstDx.compareTo(
+ row[3]) <= 0))) {
result.setFinalResult(MphUtils.MpResult.SINGLE_PRIMARY);
result.setMessage("Single primary based on SEER 1998 multiple primary rules for hematopoietic cancer.");
return result;
@@ -67,18 +60,7 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte
}
private static synchronized void initializeLookup() {
- if (_HEMATOPOIETIC_1998.isEmpty()) {
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic1998HistologyPairs.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to read Hematopoietic1998HistologyPairs.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> _HEMATOPOIETIC_1998.add(line.getFields().toArray(new String[0])));
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
- }
+ if (_HEMATOPOIETIC_1998.isEmpty())
+ _HEMATOPOIETIC_1998.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic1998HistologyPairs.csv"));
}
-
}
diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java
index 33b6d42..26751a9 100644
--- a/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java
+++ b/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java
@@ -3,23 +3,16 @@
*/
package com.imsweb.mph.mpgroups;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
-import de.siegmar.fastcsv.reader.CsvReader;
-import de.siegmar.fastcsv.reader.NamedCsvRecord;
-
import com.imsweb.mph.MphConstants;
import com.imsweb.mph.MphGroup;
import com.imsweb.mph.MphInput;
import com.imsweb.mph.MphRule;
import com.imsweb.mph.MphUtils;
import com.imsweb.mph.RuleExecutionContext;
+import com.imsweb.mph.internal.CsvUtils;
import com.imsweb.mph.internal.TempRuleResult;
public class Mp2001HematopoieticGroup extends MphGroup {
@@ -79,27 +72,9 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte
}
private static synchronized void initializeLookups() {
- if (_2001_HEMATOPOIETIC_GROUPS.isEmpty() || _2001_HEMATOPOIETIC_GROUP_PAIRS.isEmpty()) {
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2001HistologyGroups.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to read Hematopoietic2001HistologyGroups.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> _2001_HEMATOPOIETIC_GROUPS.add(line.getFields().toArray(new String[0])));
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
- try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2001HistologyGroupPairs.csv")) {
- if (is == null)
- throw new IllegalStateException("Unable to read Hematopoietic2001HistologyGroupPairs.csv");
- try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
- csvReader.stream().forEach(line -> _2001_HEMATOPOIETIC_GROUP_PAIRS.add(line.getFields().toArray(new String[0])));
- }
- }
- catch (IOException e) {
- throw new IllegalStateException(e);
- }
- }
+ if (_2001_HEMATOPOIETIC_GROUPS.isEmpty())
+ _2001_HEMATOPOIETIC_GROUPS.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic2001HistologyGroups.csv"));
+ if (_2001_HEMATOPOIETIC_GROUP_PAIRS.isEmpty())
+ _2001_HEMATOPOIETIC_GROUP_PAIRS.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic2001HistologyGroupPairs.csv"));
}
}
diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java
index 4906a5b..fe2d3a5 100644
--- a/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java
+++ b/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java
@@ -6,8 +6,6 @@
import java.util.Arrays;
import java.util.List;
-import org.apache.commons.lang3.StringUtils;
-
import com.imsweb.mph.MphConstants;
import com.imsweb.mph.MphGroup;
import com.imsweb.mph.MphInput;
@@ -50,7 +48,7 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte
TempRuleResult result = new TempRuleResult();
List lateralityNotRequiredSites = Arrays.asList("C440", "C448", "C449");
if (lateralityNotRequiredSites.contains(i1.getPrimarySite()) || MphConstants.PAIRED_NO_INFORMATION.equals(i1.getLaterality()) || MphConstants.PAIRED_NO_INFORMATION.equals(
- i2.getLaterality()) || StringUtils.isEmpty(i1.getLaterality()) || StringUtils.isEmpty(i2.getLaterality()))
+ i2.getLaterality()) || i1.getLaterality() == null || i1.getLaterality().trim().isEmpty() || i2.getLaterality() == null || i2.getLaterality().trim().isEmpty())
return result;
// mid-line (5) is considered (look the example)
if (!Arrays.asList(MphConstants.RIGHT, MphConstants.LEFT, MphConstants.MID_LINE).containsAll(Arrays.asList(i1.getLaterality(), i2.getLaterality()))) {
diff --git a/src/main/resources/hemato_data_info.properties b/src/main/resources/hemato_data_info.properties
index 1e381d0..51ebd97 100644
--- a/src/main/resources/hemato_data_info.properties
+++ b/src/main/resources/hemato_data_info.properties
@@ -1,2 +1,2 @@
-#Thu Aug 14 14:22:49 EDT 2025
-last_updated=202508141422
+#Sun Oct 26 09:53:21 EDT 2025
+last_updated=202510260953
diff --git a/src/test/java/com/imsweb/mph/MphUtilsTest.java b/src/test/java/com/imsweb/mph/MphUtilsTest.java
index 1c2e5e3..8b4a9a6 100644
--- a/src/test/java/com/imsweb/mph/MphUtilsTest.java
+++ b/src/test/java/com/imsweb/mph/MphUtilsTest.java
@@ -39,6 +39,11 @@ public class MphUtilsTest {
private final MphUtils _utils = MphUtils.getInstance();
+ @Test
+ public void testGetLibraryVersion() {
+ Assert.assertEquals("1.0-UNIT-TESTS", MphUtils.getLibraryVersion()); // real version file is copied (and renamed) during the build process...
+ }
+
@Test
public void testIsHematoSamePrimary() {
@@ -1933,7 +1938,8 @@ public void test2010Hematopoietic() {
MphInput i1 = new MphInput(), i2 = new MphInput();
MphOutput output;
- //M1 TODO
+ //M1
+ // FD - this case was never written; not sure why...
//M2
i1.setPrimarySite("C779");
@@ -2069,9 +2075,11 @@ public void test2010Hematopoietic() {
//confirms that the NOS and the more specific histology are the same primary.
//This is Skipped on the automated process
- //M8 TODO
+ //M8
+ // FD - this case was never written; not sure why...
- //M9 TODO
+ //M9
+ // FD - this case was never written; not sure why...
//M10 Abstract as multiple primaries** when a neoplasm is originally diagnosed as a chronic neoplasm AND there is a second diagnosis of an acute
//neoplasm more than 21 days after the chronic diagnosis.
@@ -2108,7 +2116,8 @@ public void test2010Hematopoietic() {
Assert.assertEquals(10, output.getAppliedRules().size());
Assert.assertEquals(MpResult.QUESTIONABLE, output.getResult());
- //M11 TODO
+ //M11
+ // FD - this case was never written; not sure why...
//M12 Abstract a single primary* when a neoplasm is originally diagnosed as acute AND reverts to a chronic neoplasm AND there is no confirmation
//available that the patient has been treated for the acute neoplasm.
@@ -2340,9 +2349,11 @@ public void test2004SolidMalignant() {
MphInput i1 = new MphInput(), i2 = new MphInput();
MphOutput output;
- //Rule 1 TODO
+ //Rule 1
+ // FD - this case was never written; not sure why...
- //Rule 2 TODO
+ //Rule 2
+ // FD - this case was never written; not sure why...
//Rule 3: Simultaneous multiple lesions of the same histologic type within the same site (i.e., multifocal tumors in a single organ or site) are a single primary.
//If a new cancer of the same histology as an earlier one is diagnosed in the same site within two months, this is a single primary cancer
diff --git a/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java b/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java
new file mode 100644
index 0000000..2302752
--- /dev/null
+++ b/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2025 Information Management Services, Inc.
+ */
+package com.imsweb.mph.internal;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class CsvUtilsTest {
+
+ @Test
+ public void textWriteCsvValues() {
+ Assert.assertEquals("1", CsvUtils.writeCsvValues(new String[] {"1"}));
+ Assert.assertEquals("1,2,3", CsvUtils.writeCsvValues(new String[] {"1", "2", "3"}));
+ Assert.assertEquals("1,,3", CsvUtils.writeCsvValues(new String[] {"1", null, "3"}));
+ Assert.assertEquals("\"1,2,3\"", CsvUtils.writeCsvValues(new String[] {"1,2,3"}));
+ Assert.assertEquals("\"1,\"\"2\"\",3\"", CsvUtils.writeCsvValues(new String[] {"1,\"2\",3"}));
+ }
+
+ @Test
+ public void testParseHematoCsvFile() {
+ Map> result = CsvUtils.parseHematoCsvFile("hematopoietic-pairs-test.csv");
+ Assert.assertEquals(2, result.size());
+ Assert.assertEquals(1, result.get("9742/3").size());
+ Assert.assertEquals(2, result.get("9741/3").size());
+
+ try {
+ CsvUtils.parseHematoCsvFile("hematopoietic-pairs-test-invalid.csv");
+ Assert.fail("Should have been an exception!");
+ }
+ catch (RuntimeException e) {
+ // expected
+ }
+
+ try {
+ CsvUtils.parseHematoCsvFile("UNKNOWN");
+ Assert.fail("Should have been an exception!");
+ }
+ catch (RuntimeException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testParseGroupCsvFile() {
+ List result = CsvUtils.parseGroupCsvFile("hematopoietic-groups-test.csv");
+ Assert.assertEquals(2, result.size());
+
+ try {
+ CsvUtils.parseGroupCsvFile("hematopoietic-groups-test-invalid.csv");
+ Assert.fail("Should have been an exception!");
+ }
+ catch (RuntimeException e) {
+ // expected
+ }
+
+ try {
+ CsvUtils.parseGroupCsvFile("UNKNOWN");
+ Assert.fail("Should have been an exception!");
+ }
+ catch (RuntimeException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testParseCsvLine() throws IOException {
+ Assert.assertEquals(Collections.singletonList("1"), CsvUtils.parseCsvLine(1, "1"));
+ Assert.assertEquals(Arrays.asList("1", "2", "3"), CsvUtils.parseCsvLine(1, "1,2,3"));
+ Assert.assertEquals(Arrays.asList("1", "", "3"), CsvUtils.parseCsvLine(1, "1,,3"));
+ Assert.assertEquals(Collections.singletonList("1,2,3"), CsvUtils.parseCsvLine(1, "\"1,2,3\""));
+ Assert.assertEquals(Collections.singletonList("1,\"2\",3"), CsvUtils.parseCsvLine(1, "\"1,\"\"2\"\",3\""));
+
+ assertFailedParsing(1, "\"1,2,3");
+ assertFailedParsing(2, "1,2,3\"");
+ assertFailedParsing(3, "1,\"2\"x,3");
+ }
+
+ private void assertFailedParsing(int lineNumber, String line) {
+ try {
+ CsvUtils.parseCsvLine(lineNumber, line);
+ Assert.fail("Should have been an exception!");
+ }
+ catch (IOException e) {
+ Assert.assertTrue(e.getMessage().contains("Line " + lineNumber));
+ }
+ }
+}
diff --git a/src/test/java/com/imsweb/mph/internal/RangeTest.java b/src/test/java/com/imsweb/mph/internal/RangeTest.java
new file mode 100644
index 0000000..98df9e4
--- /dev/null
+++ b/src/test/java/com/imsweb/mph/internal/RangeTest.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2025 Information Management Services, Inc.
+ */
+package com.imsweb.mph.internal;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeTest {
+
+ @Test
+ public void testRange() {
+ Range range = Range.of(1, 3);
+ Assert.assertFalse(range.contains(0));
+ Assert.assertTrue(range.contains(1));
+ Assert.assertTrue(range.contains(2));
+ Assert.assertTrue(range.contains(3));
+ Assert.assertFalse(range.contains(5));
+
+ Assert.assertFalse(range.contains(null));
+
+ range = Range.is(2);
+ Assert.assertFalse(range.contains(1));
+ Assert.assertTrue(range.contains(2));
+ Assert.assertFalse(range.contains(3));
+ }
+
+}
diff --git a/src/test/java/lab/HematoDataLab.java b/src/test/java/lab/HematoDataLab.java
index 6307e81..85ee65d 100644
--- a/src/test/java/lab/HematoDataLab.java
+++ b/src/test/java/lab/HematoDataLab.java
@@ -3,6 +3,7 @@
*/
package lab;
+import java.io.BufferedWriter;
import java.io.File;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
@@ -16,8 +17,7 @@
import java.util.Map;
import java.util.Properties;
-import de.siegmar.fastcsv.writer.CsvWriter;
-
+import com.imsweb.mph.internal.CsvUtils;
import com.imsweb.seerapi.client.NotFoundException;
import com.imsweb.seerapi.client.SeerApi;
import com.imsweb.seerapi.client.disease.Disease;
@@ -42,9 +42,9 @@ public static void main(String[] args) throws Exception {
File transformFromFile = new File(dir, "Hematopoietic2010TransformFromPairs.csv");
try (OutputStream hematoDataInfoOutput = Files.newOutputStream(hematoDataInfoFile.toPath());
- CsvWriter samePrimaryWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(samePrimaryFile.toPath()), StandardCharsets.UTF_8));
- CsvWriter transformToWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(transformToFile.toPath()), StandardCharsets.UTF_8));
- CsvWriter transformFromWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(transformFromFile.toPath()), StandardCharsets.UTF_8))) {
+ BufferedWriter samePrimaryWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(samePrimaryFile.toPath()), StandardCharsets.UTF_8));
+ BufferedWriter transformToWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(transformToFile.toPath()), StandardCharsets.UTF_8));
+ BufferedWriter transformFromWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(transformFromFile.toPath()), StandardCharsets.UTF_8))) {
SeerApi api = new SeerApi.Builder().connect();
List allDiseases = new ArrayList<>();
@@ -137,9 +137,20 @@ else if (previousTotal != total) {
transformFrom.add(new String[] {morphology, validStartYear, validEndYear, startYear, endYear, transformFromMorphology.getIcdO3Morphology()});
}
}
- samePrimaryPairs.forEach(samePrimaryWriter::writeRecord);
- transformTo.forEach(transformToWriter::writeRecord);
- transformFrom.forEach(transformFromWriter::writeRecord);
+
+ for (String[] line : samePrimaryPairs) {
+ samePrimaryWriter.write(CsvUtils.writeCsvValues(line));
+ samePrimaryWriter.write("\r\n");
+ }
+ for (String[] line : transformTo) {
+ transformToWriter.write(CsvUtils.writeCsvValues(line));
+ transformToWriter.write("\r\n");
+ }
+ for (String[] line : transformFrom) {
+ transformFromWriter.write(CsvUtils.writeCsvValues(line));
+ transformFromWriter.write("\r\n");
+ }
+
Properties prop = new Properties();
prop.setProperty("last_updated", LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddkkmm")));
prop.store(hematoDataInfoOutput, null);
diff --git a/src/test/java/lab/TestingDataCreation.java b/src/test/java/lab/TestingDataCreation.java
deleted file mode 100644
index 520daaf..0000000
--- a/src/test/java/lab/TestingDataCreation.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2023 Information Management Services, Inc.
- */
-package lab;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.zip.GZIPOutputStream;
-
-import com.imsweb.datagenerator.naaccr.NaaccrDataGeneratorOptions;
-import com.imsweb.datagenerator.naaccr.NaaccrXmlDataGenerator;
-import com.imsweb.layout.LayoutFactory;
-import com.imsweb.mph.MphInput;
-import com.imsweb.mph.MphOutput;
-import com.imsweb.mph.MphUtils;
-import com.imsweb.mph.MphUtils.MpResult;
-import com.imsweb.naaccrxml.entity.Tumor;
-
-/**
- * This class can be used to generate CSV files with fake data, along with the result of the library.
- *
- * The class uses another library (data-generator) to create the data. The simplest way to run it
- * is to clone the project from GitHub and run it within your preferred IDE.
- */
-public class TestingDataCreation {
-
- public static void main(String[] args) throws IOException {
-
- // global parameters
- int numTests = 10000;
- int minDxYear = 2000;
- int maxDxYear = 2022;
-
- // the fake data generator
- NaaccrXmlDataGenerator generator = new NaaccrXmlDataGenerator(LayoutFactory.getNaaccrXmlLayout(LayoutFactory.LAYOUT_ID_NAACCR_XML_23));
-
- // the options for the generator
- NaaccrDataGeneratorOptions options = new NaaccrDataGeneratorOptions();
- options.setMinDxYear(minDxYear);
- options.setMaxDxYear(maxDxYear);
-
- // by default, the testing file will be created in the "build" folder of the project
- File targetFolder = new File(System.getProperty("user.dir") + "/build");
- if (!targetFolder.exists() && !targetFolder.mkdir())
- throw new IOException("Unable to create target folder");
- File targetFile = new File(targetFolder, "mph-testing-" + minDxYear + "-" + maxDxYear + ".csv.gz");
-
- // execute the run
- int numTestsCrated = 0;
- try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(targetFile.toPath())), StandardCharsets.US_ASCII))) {
- writer.write("year1,month1,day1,site1,hist1,beh1,lat1,year2,month2,day2,site2,hist2,beh2,lat2,result,reason\n");
- while (numTestsCrated < numTests) {
- Tumor tumor1 = generator.generatePatient(1).getTumor(0);
- Tumor tumor2 = generator.generatePatient(1).getTumor(0);
-
- MphInput input1 = new MphInput();
- String dxDate1 = tumor1.getItemValue("dateOfDiagnosis");
- input1.setDateOfDiagnosisYear(dxDate1.substring(0, 4));
- input1.setDateOfDiagnosisMonth(dxDate1.substring(4, 6));
- input1.setDateOfDiagnosisDay(dxDate1.substring(6, 8));
- input1.setPrimarySite(tumor1.getItemValue("primarySite"));
- input1.setHistologyIcdO3(tumor1.getItemValue("histologicTypeIcdO3"));
- input1.setBehaviorIcdO3(tumor1.getItemValue("behaviorCodeIcdO3"));
- input1.setLaterality(tumor1.getItemValue("laterality"));
-
- MphInput input2 = new MphInput();
- String dxDate2 = tumor2.getItemValue("dateOfDiagnosis");
- input2.setDateOfDiagnosisYear(dxDate2.substring(0, 4));
- input2.setDateOfDiagnosisMonth(dxDate2.substring(4, 6));
- input2.setDateOfDiagnosisDay(dxDate2.substring(6, 8));
- input2.setPrimarySite(tumor2.getItemValue("primarySite"));
- input2.setHistologyIcdO3(String.valueOf(tumor2.getItemValue("histologicTypeIcdO3")));
- input2.setBehaviorIcdO3(String.valueOf(tumor2.getItemValue("behaviorCodeIcdO3")));
- input2.setLaterality(String.valueOf(tumor2.getItemValue("laterality")));
-
- MphOutput output = MphUtils.getInstance().computePrimaries(input1, input2);
-
- // this is a bit tricky, but since we use fake data, most of the inputs will return a multiple-primaries result with a reason
- // that the two tumors are in different groups; those cases are not very interesting to test, and so we filter them out...
- if (output.getResult() != MpResult.INVALID_INPUT && !output.getReason().equals("The two sets of parameters belong to two different cancer groups.")) {
-
- List row = new ArrayList<>();
- row.add(input1.getDateOfDiagnosisYear());
- row.add(input1.getDateOfDiagnosisMonth());
- row.add(input1.getDateOfDiagnosisDay());
- row.add(input1.getPrimarySite());
- row.add(input1.getHistology());
- row.add(input1.getBehavior());
- row.add(input1.getLaterality());
- row.add(input2.getDateOfDiagnosisYear());
- row.add(input2.getDateOfDiagnosisMonth());
- row.add(input2.getDateOfDiagnosisDay());
- row.add(input2.getPrimarySite());
- row.add(input2.getHistology());
- row.add(input2.getBehavior());
- row.add(input2.getLaterality());
- row.add(output.getResult().toString());
- row.add(output.getReason().replace("\n", "\\n"));
-
- writer.write(String.join(",", row));
- writer.write("\n");
-
- numTestsCrated++;
- }
- }
- }
- }
-}
diff --git a/src/test/resources/hematopoietic-groups-test-invalid.csv b/src/test/resources/hematopoietic-groups-test-invalid.csv
new file mode 100644
index 0000000..6c46825
--- /dev/null
+++ b/src/test/resources/hematopoietic-groups-test-invalid.csv
@@ -0,0 +1,3 @@
+HistologyGroupNumber,HistologyLow,HistologyHigh,HistologyGroupName
+1,9590,9590,"Malignant lymphoma, NOS"
+2,9591,9591,Malignant lymphoma, non-Hodgkin, NOS
\ No newline at end of file
diff --git a/src/test/resources/hematopoietic-groups-test.csv b/src/test/resources/hematopoietic-groups-test.csv
new file mode 100644
index 0000000..b45f615
--- /dev/null
+++ b/src/test/resources/hematopoietic-groups-test.csv
@@ -0,0 +1,3 @@
+HistologyGroupNumber,HistologyLow,HistologyHigh,HistologyGroupName
+1,9590,9590,"Malignant lymphoma, NOS"
+2,9591,9591,"Malignant lymphoma, non-Hodgkin, NOS"
\ No newline at end of file
diff --git a/src/test/resources/hematopoietic-pairs-test-invalid.csv b/src/test/resources/hematopoietic-pairs-test-invalid.csv
new file mode 100644
index 0000000..90368b6
--- /dev/null
+++ b/src/test/resources/hematopoietic-pairs-test-invalid.csv
@@ -0,0 +1,4 @@
+morphology,valid start year,valid end year,start year,end year,same primary
+9742/3,2001,,2001,2009,9590/3
+9741/3,2001,,2001,2009,9590/3
+9741/3,2001
\ No newline at end of file
diff --git a/src/test/resources/hematopoietic-pairs-test.csv b/src/test/resources/hematopoietic-pairs-test.csv
new file mode 100644
index 0000000..062af5c
--- /dev/null
+++ b/src/test/resources/hematopoietic-pairs-test.csv
@@ -0,0 +1,4 @@
+morphology,valid start year,valid end year,start year,end year,same primary
+9742/3,2001,,2001,2009,9590/3
+9741/3,2001,,2001,2009,9590/3
+9741/3,2001,,2001,2009,9740/3
\ No newline at end of file
diff --git a/src/test/resources/mph-library-version.txt b/src/test/resources/mph-library-version.txt
new file mode 100644
index 0000000..d7992d9
--- /dev/null
+++ b/src/test/resources/mph-library-version.txt
@@ -0,0 +1 @@
+1.0-UNIT-TESTS
\ No newline at end of file