From 954309e4037ca79a06f8dfee178aa2773db497b2 Mon Sep 17 00:00:00 2001 From: depryf Date: Sun, 26 Oct 2025 10:24:19 -0400 Subject: [PATCH 1/3] Made code compatible with Java 8 again --- README.md | 34 ---- build.gradle | 16 +- .../imsweb/mph/DefaultHematoDataProvider.java | 87 +------- src/main/java/com/imsweb/mph/MphGroup.java | 16 +- src/main/java/com/imsweb/mph/MphInput.java | 8 +- src/main/java/com/imsweb/mph/MphUtils.java | 19 +- .../com/imsweb/mph/internal/CsvUtils.java | 190 ++++++++++++++++++ .../java/com/imsweb/mph/internal/Range.java | 31 +++ .../com/imsweb/mph/mpgroups/GroupUtility.java | 55 ++--- .../mpgroups/Mp1998HematopoieticGroup.java | 28 +-- .../mpgroups/Mp2001HematopoieticGroup.java | 35 +--- .../Mp2021CutaneousMelanomaGroup.java | 4 +- .../resources/hemato_data_info.properties | 4 +- src/test/java/lab/HematoDataLab.java | 27 ++- src/test/java/lab/TestingDataCreation.java | 114 ----------- 15 files changed, 316 insertions(+), 352 deletions(-) create mode 100644 src/main/java/com/imsweb/mph/internal/CsvUtils.java create mode 100644 src/main/java/com/imsweb/mph/internal/Range.java delete mode 100644 src/test/java/lab/TestingDataCreation.java diff --git a/README.md b/README.md index 480f7cb..1507bb9 100644 --- a/README.md +++ b/README.md @@ -87,40 +87,6 @@ If histology is not in the range 9590-9993, one of the following solid tumors se If DX year is 2006 or earlier and the case is not Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Solid Malignant" rules will be used.

If DX year is 2006 or earlier and the case is Benign Brain (C700-C729, C751-C753 with behavior 0/1), the "2006 and earlier Benign Brain" rules will be used. -## Testing Data - -The project contains [a lab class](https://github.com/imsweb/mph/blob/master/src/test/java/lab/TestingDataCreation.java) that can be used to generate CSV files -that contains fake data along with the library result. - -A sample testing file is available in the project: [mph-testing-2000-2022.csv.gz](https://github.com/imsweb/mph/blob/master/src/test/resources/mph-testing-2000-2022.csv.gz) - -To create larger file, clone the project and execute the main method of that class locally. - -The class allows the following parameters (defined in the top of the main method): - - numTests: the number of rows for the generated CSV file - - minDxYear: the minimum DX year to use - - maxDxYear: the maximum DX year to use - -The CSV files will contain the following columns: - - year1 - - month1 - - day1 - - site1 - - hist1 - - beh1 - - lat1 - - year2 - - month2 - - day2 - - site2 - - hist2 - - beh2 - - lat2 - - result - - reason - -The lab class uses the [Data Generator](https://github.com/imsweb/data-generator) library to create the fake data. - ## About SEER This library was developed through the [SEER](http://seer.cancer.gov/) program. diff --git a/build.gradle b/build.gradle index 3747b25..9fb9b69 100644 --- a/build.gradle +++ b/build.gradle @@ -1,9 +1,11 @@ -import com.vanniktech.maven.publish.* +import com.vanniktech.maven.publish.JavaLibrary +import com.vanniktech.maven.publish.JavadocJar +import com.vanniktech.maven.publish.SonatypeHost plugins { id 'java-library' id 'jacoco' - id 'com.vanniktech.maven.publish' version '0.34.0' // publish to Maven Central + id 'com.vanniktech.maven.publish' version '0.31.0' // publish to Maven Central id 'com.github.ben-manes.versions' version '0.52.0' // check for out-of-date dependencies (run 'dependencyUpdates' manually) id 'com.github.spotbugs' version '6.4.2' // spotbugs code analysis id 'org.sonarqube' version '6.3.1.5724' // sonarQube analysis @@ -20,12 +22,8 @@ repositories { } dependencies { - implementation 'org.apache.commons:commons-lang3:3.18.0' - implementation 'de.siegmar:fastcsv:3.7.0' - testImplementation 'junit:junit:4.13.2' testImplementation 'com.imsweb:seerapi-client-java:5.9' - testImplementation 'com.imsweb:data-generator:2.2' } // enforce UTF-8, display the compilation warnings @@ -35,8 +33,8 @@ tasks.withType(JavaCompile).configureEach { } java { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 } tasks.withType(JavaCompile).configureEach { @@ -110,7 +108,7 @@ tasks.register('hematoDataTest', Test) { mavenPublishing { configure(new JavaLibrary(new JavadocJar.Javadoc(), true)) - publishToMavenCentral(true) + publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL, true) // to upgrade teh plugin, just remove the first parameter... signAllPublications() pom { diff --git a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java index 004c12e..326400b 100644 --- a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java +++ b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java @@ -5,24 +5,15 @@ import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.ArrayList; import java.util.Collections; import java.util.Date; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; -import org.apache.commons.lang3.StringUtils; - -import de.siegmar.fastcsv.reader.CsvReader; -import de.siegmar.fastcsv.reader.NamedCsvRecord; - +import com.imsweb.mph.internal.CsvUtils; import com.imsweb.mph.internal.HematoDTO; /** @@ -36,79 +27,9 @@ public class DefaultHematoDataProvider implements HematoDataProvider { private final Map> _transformFromDto; public DefaultHematoDataProvider() { - - _samePrimaryDto = new HashMap<>(); - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010SamePrimaryPairs.csv")) { - if (is == null) - throw new IllegalStateException("Unable to get Hematopoietic2010SamePrimaryPairs.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> { - Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null; - Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null; - Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null; - Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null; - if (_samePrimaryDto.containsKey(line.getField(0))) - _samePrimaryDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - else { - List list = new ArrayList<>(); - list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - _samePrimaryDto.put(line.getField(0), list); - } - }); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - - _transformToDto = new HashMap<>(); - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformToPairs.csv")) { - if (is == null) - throw new IllegalStateException("Unable to get Hematopoietic2010TransformToPairs.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> { - Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null; - Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null; - Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null; - Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null; - if (_transformToDto.containsKey(line.getField(0))) - _transformToDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - else { - List list = new ArrayList<>(); - list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - _transformToDto.put(line.getField(0), list); - } - }); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - - _transformFromDto = new HashMap<>(); - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2010TransformFromPairs.csv")) { - if (is == null) - throw new IllegalStateException("Unable to get Hematopoietic2010TransformFromPairs.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> { - Short validStartYear = StringUtils.isNotBlank(line.getField(1)) ? Short.valueOf(line.getField(1)) : null; - Short validEndYear = StringUtils.isNotBlank(line.getField(2)) ? Short.valueOf(line.getField(2)) : null; - Short startYear = StringUtils.isNotBlank(line.getField(3)) ? Short.valueOf(line.getField(3)) : null; - Short endYear = StringUtils.isNotBlank(line.getField(4)) ? Short.valueOf(line.getField(4)) : null; - if (_transformFromDto.containsKey(line.getField(0))) - _transformFromDto.get(line.getField(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - else { - List list = new ArrayList<>(); - list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, line.getField(5))); - _transformFromDto.put(line.getField(0), list); - } - }); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - + _samePrimaryDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010SamePrimaryPairs.csv"); + _transformToDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformToPairs.csv"); + _transformFromDto = CsvUtils.parseHematoCsvFile("Hematopoietic2010TransformFromPairs.csv"); } @Override diff --git a/src/main/java/com/imsweb/mph/MphGroup.java b/src/main/java/com/imsweb/mph/MphGroup.java index 5ffdf31..5def4b3 100644 --- a/src/main/java/com/imsweb/mph/MphGroup.java +++ b/src/main/java/com/imsweb/mph/MphGroup.java @@ -6,10 +6,10 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang3.Range; - +import com.imsweb.mph.internal.Range; import com.imsweb.mph.mpgroups.GroupUtility; +@SuppressWarnings("unused") public abstract class MphGroup { protected String _id; @@ -30,17 +30,17 @@ public abstract class MphGroup { protected List _rules; - private List> _siteIncRanges; + private final List _siteIncRanges; - private List> _siteExcRanges; + private final List _siteExcRanges; - private List> _histIncRanges; + private final List _histIncRanges; - private List> _histExcRanges; + private final List _histExcRanges; - private List> _behavIncRanges; + private final List _behavIncRanges; - private List> _yearIncRanges; + private final List _yearIncRanges; protected MphGroup(String id, String name, String siteInclusions, String siteExclusions, String histInclusions, String histExclusions, String behavInclusions, String yearInclusions) { _id = id; diff --git a/src/main/java/com/imsweb/mph/MphInput.java b/src/main/java/com/imsweb/mph/MphInput.java index 5fd44ab..0d11647 100644 --- a/src/main/java/com/imsweb/mph/MphInput.java +++ b/src/main/java/com/imsweb/mph/MphInput.java @@ -3,8 +3,6 @@ */ package com.imsweb.mph; -import org.apache.commons.lang3.math.NumberUtils; - import com.imsweb.mph.mpgroups.GroupUtility; /** @@ -43,7 +41,6 @@ public void setPrimarySite(String primarySite) { _primarySite = primarySite; } - public void setHistologyIcdO3(String histologyIcdO3) { _histologyIcdO3 = histologyIcdO3; } @@ -61,12 +58,12 @@ public void setBehaviorIcdO2(String behaviorIcdO2) { } public String getHistology() { - int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999; + int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999; return year < 2001 && GroupUtility.validateHistology(_histologyIcdO2) ? _histologyIcdO2 : _histologyIcdO3; } public String getBehavior() { - int year = NumberUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999; + int year = MphUtils.isDigits(_dateOfDiagnosisYear) ? Integer.parseInt(_dateOfDiagnosisYear) : 9999; return year < 2001 && GroupUtility.validateBehavior(_behaviorIcdO2) ? _behaviorIcdO2 : _behaviorIcdO3; } @@ -102,7 +99,6 @@ public void setDateOfDiagnosisDay(String dateOfDiagnosisDay) { _dateOfDiagnosisDay = dateOfDiagnosisDay; } - public String getIcdCode() { return getHistology() + "/" + getBehavior(); } diff --git a/src/main/java/com/imsweb/mph/MphUtils.java b/src/main/java/com/imsweb/mph/MphUtils.java index 9cabb07..a97af1f 100644 --- a/src/main/java/com/imsweb/mph/MphUtils.java +++ b/src/main/java/com/imsweb/mph/MphUtils.java @@ -11,9 +11,6 @@ import java.util.Map; import java.util.regex.Pattern; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.math.NumberUtils; - import com.imsweb.mph.internal.TempRuleResult; import com.imsweb.mph.mpgroups.GroupUtility; import com.imsweb.mph.mpgroups.Mp1998HematopoieticGroup; @@ -60,7 +57,9 @@ */ public final class MphUtils { - private final Pattern _morphology = Pattern.compile("^(\\d{4}/\\d)"); + private final static Pattern _MORPHOLOGY_PATTERN = Pattern.compile("^(\\d{4}/\\d)"); + + private final static Pattern _DIGITS_PATTERN = Pattern.compile("\\d+"); /** * The possible result of determining if two tumors are single or multiple primaries. @@ -117,6 +116,10 @@ public static synchronized MphUtils getInstance() { return _INSTANCE; } + public static boolean isDigits(String value) { + return value != null && _DIGITS_PATTERN.matcher(value).matches(); + } + /** * Constructor * This will use the default hemato db provider @@ -201,8 +204,8 @@ public MphUtils(HematoDataProvider provider) { public MphOutput computePrimaries(MphInput input1, MphInput input2) { MphOutput output = new MphOutput(); - int year1 = NumberUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1; - int year2 = NumberUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1; + int year1 = isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : -1; + int year2 = isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : -1; String site1 = input1.getPrimarySite(); String site2 = input2.getPrimarySite(); String hist1 = input1.getHistology(); @@ -273,7 +276,7 @@ else if (result.getFinalResult() != null) { if (potentialResult == null || potentialResult.getPotentialResult().equals(result.getFinalResult())) { output.setResult(result.getFinalResult()); output.setStep(rule.getStep()); - output.setReason(StringUtils.isNotBlank(result.getMessage()) ? result.getMessage() : rule.getReason()); + output.setReason(result.getMessage() != null && !result.getMessage().trim().isEmpty() ? result.getMessage() : rule.getReason()); if (potentialResult != null && potentialResult.getPotentialResult().equals(result.getFinalResult())) output.getAppliedRules().addAll(rulesAppliedAfterQuestionable); } @@ -332,7 +335,7 @@ public Date getHematoDataLastUpdated() { * @return true if two diseases are same primary and false otherwise. */ public boolean isHematoSamePrimary(String morph1, String morph2, int year1, int year2) { - if (morph1 == null || morph2 == null || !_morphology.matcher(morph1).matches() || !_morphology.matcher(morph2).matches()) + if (morph1 == null || morph2 == null || !_MORPHOLOGY_PATTERN.matcher(morph1).matches() || !_MORPHOLOGY_PATTERN.matcher(morph2).matches()) return false; if (morph1.equals(morph2)) return true; diff --git a/src/main/java/com/imsweb/mph/internal/CsvUtils.java b/src/main/java/com/imsweb/mph/internal/CsvUtils.java new file mode 100644 index 0000000..88e4f46 --- /dev/null +++ b/src/main/java/com/imsweb/mph/internal/CsvUtils.java @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2025 Information Management Services, Inc. + */ +package com.imsweb.mph.internal; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public final class CsvUtils { + + private CsvUtils() { + // static utility class + } + + public static String writeCsvValues(String[] values) { + StringBuilder buf = new StringBuilder(); + for (String value : values) { + if (buf.length() > 0) + buf.append(','); + if (value != null && value.contains(",")) + buf.append("\"").append(value.replace("\"", "\"\"")).append("\""); + else if (value != null) + buf.append(value); + } + return buf.toString(); + } + + public static Map> parseHematoCsvFile(String filename) { + Map> result = new LinkedHashMap<>(); + + try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) { + if (is == null) + throw new IllegalStateException("Unable to read " + filename + "; unable to find data file"); + try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line = reader.readLine(); + while (line != null) { + if (reader.getLineNumber() != 1) { + List fields = CsvUtils.parseCsvLine(reader.getLineNumber(), line); + + Short validStartYear = fields.get(1) != null && !fields.get(1).trim().isEmpty() ? Short.valueOf(fields.get(1)) : null; + Short validEndYear = fields.get(1) != null && !fields.get(2).trim().isEmpty() ? Short.valueOf(fields.get(2)) : null; + Short startYear = fields.get(1) != null && !fields.get(3).trim().isEmpty() ? Short.valueOf(fields.get(3)) : null; + Short endYear = fields.get(1) != null && !fields.get(4).trim().isEmpty() ? Short.valueOf(fields.get(4)) : null; + if (result.containsKey(fields.get(0))) + result.get(fields.get(0)).add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, fields.get(5))); + else { + List list = new ArrayList<>(); + list.add(new HematoDTO(validStartYear, validEndYear, startYear, endYear, fields.get(5))); + result.put(fields.get(0), list); + } + + } + line = reader.readLine(); + } + } + } + catch (IOException e) { + throw new IllegalStateException(e); + } + + return result; + } + + public static List parseGroupCsvFile(String filename) { + List result = new ArrayList<>(); + + try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) { + if (is == null) + throw new IllegalStateException("Unable to read " + filename + "; unable to find data file"); + try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line = reader.readLine(); + while (line != null) { + if (reader.getLineNumber() != 1) + result.add(CsvUtils.parseCsvLine(reader.getLineNumber(), line).toArray(new String[0])); + line = reader.readLine(); + } + } + } + catch (IOException e) { + throw new IllegalStateException(e); + } + + return result; + } + + public static List parseCsvLine(int lineNumber, String line) throws IOException { + List result = new ArrayList<>(); + + char cQuote = '"'; + char cDelimiter = ','; + int curIndex = 0; + int nextQuote; + int nextDelimiter; + + StringBuilder buf = new StringBuilder(); + buf.append(cQuote); + String singleQuotes = buf.toString(); + buf.append(cQuote); + String doubleQuotes = buf.toString(); + + String value; + while (curIndex < line.length()) { + if (line.charAt(curIndex) == cQuote) { + // handle quoted value + nextQuote = getNextSingleQuote(line, cQuote, curIndex); + if (nextQuote < 0) + throw new IOException("Line " + lineNumber + ": found an unmatched quote"); + else { + result.add(line.substring(curIndex + 1, nextQuote).replace(doubleQuotes, singleQuotes)); + // update the current index to be after delimiter, after the ending quote + curIndex = nextQuote; + if (curIndex + 1 < line.length()) { + // if there is a next value, set current index to be after delimiter + if (line.charAt(curIndex + 1) == cDelimiter) { + curIndex += 2; + // handle case where last value is empty + if (curIndex == line.length()) + result.add(""); + } + // else character after ending quote is not EOL and not delimiter, stop parsing + else + throw new IOException("Line " + lineNumber + ": expected a delimiter after the quote"); + } + else + // end of line is after ending quote, stop parsing + curIndex++; + } + } + else { + // handle unquoted value + nextDelimiter = getNextDelimiter(line, cDelimiter, curIndex); + value = line.substring(curIndex, nextDelimiter).replace(doubleQuotes, singleQuotes); + // unquoted values should not contain any quotes + if (value.contains(singleQuotes)) + throw new IOException("Line " + lineNumber + ": value contains some quotes but does not start with a quote"); + else { + result.add(value); + curIndex = nextDelimiter + 1; + // handle case where last value is empty + if (curIndex == line.length()) + result.add(""); + } + } + } + + return result; + } + + private static int getNextSingleQuote(String line, char quote, int from) { + if (from >= line.length()) + return -1; + + int index = from + 1; + boolean found = false; + while ((index < line.length()) && !found) { + if (line.charAt(index) != quote) + index++; + else { + if ((index + 1 == line.length()) || (line.charAt(index + 1) != quote)) + found = true; + else + index += 2; + } + + } + + index = (index == line.length()) ? -1 : index; + + return index; + } + + private static int getNextDelimiter(String line, char delimiter, int from) { + if (from >= line.length()) + return line.length(); + + int index = from; + while ((index < line.length()) && (line.charAt(index) != delimiter)) + index++; + + return index; + } + +} diff --git a/src/main/java/com/imsweb/mph/internal/Range.java b/src/main/java/com/imsweb/mph/internal/Range.java new file mode 100644 index 0000000..ab1ae05 --- /dev/null +++ b/src/main/java/com/imsweb/mph/internal/Range.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2025 Information Management Services, Inc. + */ +package com.imsweb.mph.internal; + +public class Range { + + private final Integer minimum; + + private final Integer maximum; + + public Range(Integer minimum, Integer maximum) { + this.minimum = minimum; + this.maximum = maximum; + } + + public static Range of(final Integer fromInclusive, final Integer toInclusive) { + return new Range(fromInclusive, toInclusive); + } + + public static Range is(final Integer value) { + return new Range(value, value); + } + + public boolean contains(final Integer element) { + if (element == null) + return false; + + return Integer.compare(element, minimum) > -1 && Integer.compare(element, maximum) < 1; + } +} diff --git a/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java b/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java index 9d9fd5c..303aed0 100644 --- a/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java +++ b/src/main/java/com/imsweb/mph/mpgroups/GroupUtility.java @@ -11,17 +11,20 @@ import java.util.Collections; import java.util.List; import java.util.Objects; - -import org.apache.commons.lang3.Range; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.math.NumberUtils; +import java.util.regex.Pattern; import com.imsweb.mph.MphConstants; import com.imsweb.mph.MphInput; +import com.imsweb.mph.MphUtils; +import com.imsweb.mph.internal.Range; public final class GroupUtility { + private static final Pattern _SPLIT_COMMA = Pattern.compile(","); + private static final Pattern _SPLIT_DASH = Pattern.compile("-"); + private GroupUtility() { + // static utility class } /** @@ -35,14 +38,14 @@ public static boolean validateProperties(String primarySite, String histology, S * Validates primary site */ public static boolean validateSite(String site) { - return site != null && site.length() == 4 && site.startsWith("C") && NumberUtils.isDigits(site.substring(1)) && !"C809".equalsIgnoreCase(site); + return site != null && site.length() == 4 && site.startsWith("C") && MphUtils.isDigits(site.substring(1)) && !"C809".equalsIgnoreCase(site); } /** * Validates histology */ public static boolean validateHistology(String hist) { - return NumberUtils.isDigits(hist) && Integer.parseInt(hist) >= 8000 && Integer.parseInt(hist) <= 9999; + return MphUtils.isDigits(hist) && Integer.parseInt(hist) >= 8000 && Integer.parseInt(hist) <= 9999; } /** @@ -88,15 +91,15 @@ public static boolean sameKnownDateParts(MphInput i1, MphInput i2) { DateFieldParts date = new DateFieldParts(i1, i2); return date.getYear1() != null && date.getYear1().equals(date.getYear2()) && (date.getMonth1() == null || date.getMonth2() == null || (date.getMonth1().equals(date.getMonth2()) && - (date.getDay1() == null || date.getDay2() == null || date.getDay1().equals(date.getDay2())))); + (date.getDay1() == null || date.getDay2() == null || date.getDay1().equals(date.getDay2())))); } /** * Checks if integer value is in a list of ranges */ - public static boolean isContained(List> list, Integer value) { + public static boolean isContained(List list, Integer value) { if (list != null && !list.isEmpty()) - for (Range range : list) + for (Range range : list) if (range.contains(value)) return true; return false; @@ -105,14 +108,16 @@ public static boolean isContained(List> list, Integer value) { /** * computes list of range values from string */ - public static List> computeRange(String rawValue, boolean isSite) { + public static List computeRange(String rawValue, boolean isSite) { if (rawValue == null) return Collections.emptyList(); - List> result = new ArrayList<>(); + List result = new ArrayList<>(); - for (String item : StringUtils.split(rawValue, ',')) { - String[] parts = StringUtils.split(item.trim(), '-'); + for (String item : _SPLIT_COMMA.split(rawValue)) { + if (item.trim().isEmpty()) + continue; + String[] parts = _SPLIT_DASH.split(item.trim()); if (parts.length == 1) { if (isSite) result.add(Range.is(Integer.parseInt(parts[0].trim().substring(1)))); @@ -138,9 +143,11 @@ public static List expandList(List list) { if (list == null || list.isEmpty()) return list; for (String item : list) { - String[] ranges = StringUtils.split(item.trim(), ','); + String[] ranges = _SPLIT_COMMA.split(item.trim()); for (String range : ranges) { - String[] parts = StringUtils.split(range.trim(), '-'); + if (range.trim().isEmpty()) + continue; + String[] parts = _SPLIT_DASH.split(range.trim()); if (parts.length <= 1) result.add(range); else { @@ -365,9 +372,9 @@ else if (endMon != null) { * Returns the site, hist/beh information of the input */ public static String getSiteHistInfo(String site, String hist, String beh, int year) { - return (StringUtils.isBlank(site) ? "Unknown Site" : site) + ", " - + (StringUtils.isBlank(hist) ? "Unknown Histology" : hist) + "/" - + (StringUtils.isBlank(beh) ? "Unknown Behavior" : beh) + " " + return (site == null || site.trim().isEmpty() ? "Unknown Site" : site) + ", " + + (hist == null || hist.trim().isEmpty() ? "Unknown Histology" : hist) + "/" + + (beh == null || beh.trim().isEmpty() ? "Unknown Behavior" : beh) + " " + (validateYear(year) ? ("with year of diagnosis " + year) : "with unknown year of diagnosis"); } @@ -381,18 +388,18 @@ static class DateFieldParts { private Integer _day2; public DateFieldParts(MphInput input1, MphInput input2) { - _year1 = NumberUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : null; - _year2 = NumberUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : null; - _month1 = NumberUtils.isDigits(input1.getDateOfDiagnosisMonth()) ? Integer.parseInt(input1.getDateOfDiagnosisMonth()) : null; + _year1 = MphUtils.isDigits(input1.getDateOfDiagnosisYear()) ? Integer.parseInt(input1.getDateOfDiagnosisYear()) : null; + _year2 = MphUtils.isDigits(input2.getDateOfDiagnosisYear()) ? Integer.parseInt(input2.getDateOfDiagnosisYear()) : null; + _month1 = MphUtils.isDigits(input1.getDateOfDiagnosisMonth()) ? Integer.parseInt(input1.getDateOfDiagnosisMonth()) : null; if (_month1 != null && (_month1 < 1 || _month1 > 12)) _month1 = null; - _month2 = NumberUtils.isDigits(input2.getDateOfDiagnosisMonth()) ? Integer.parseInt(input2.getDateOfDiagnosisMonth()) : null; + _month2 = MphUtils.isDigits(input2.getDateOfDiagnosisMonth()) ? Integer.parseInt(input2.getDateOfDiagnosisMonth()) : null; if (_month2 != null && (_month2 < 1 || _month2 > 12)) _month2 = null; - _day1 = _month1 != null && NumberUtils.isDigits(input1.getDateOfDiagnosisDay()) ? Integer.parseInt(input1.getDateOfDiagnosisDay()) : null; + _day1 = _month1 != null && MphUtils.isDigits(input1.getDateOfDiagnosisDay()) ? Integer.parseInt(input1.getDateOfDiagnosisDay()) : null; if (_year1 != null && _day1 != null && (_day1 < 1 || _day1 > LocalDate.of(_year1, _month1, 1).lengthOfMonth())) _day1 = null; - _day2 = _month2 != null && NumberUtils.isDigits(input2.getDateOfDiagnosisDay()) ? Integer.parseInt(input2.getDateOfDiagnosisDay()) : null; + _day2 = _month2 != null && MphUtils.isDigits(input2.getDateOfDiagnosisDay()) ? Integer.parseInt(input2.getDateOfDiagnosisDay()) : null; if (_year2 != null && _day2 != null && (_day2 < 1 || _day2 > LocalDate.of(_year2, _month2, 1).lengthOfMonth())) _day2 = null; } diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java index 414c0b7..a9c08bc 100644 --- a/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java +++ b/src/main/java/com/imsweb/mph/mpgroups/Mp1998HematopoieticGroup.java @@ -3,23 +3,16 @@ */ package com.imsweb.mph.mpgroups; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import de.siegmar.fastcsv.reader.CsvReader; -import de.siegmar.fastcsv.reader.NamedCsvRecord; - import com.imsweb.mph.MphConstants; import com.imsweb.mph.MphGroup; import com.imsweb.mph.MphInput; import com.imsweb.mph.MphRule; import com.imsweb.mph.MphUtils; import com.imsweb.mph.RuleExecutionContext; +import com.imsweb.mph.internal.CsvUtils; import com.imsweb.mph.internal.TempRuleResult; public class Mp1998HematopoieticGroup extends MphGroup { @@ -50,8 +43,8 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte String secondDx = MphConstants.COMPARE_DX_FIRST_LATEST == laterDx ? i1.getHistology() : i2.getHistology(); for (String[] row : _HEMATOPOIETIC_1998) if ((firstDx.compareTo(row[0]) >= 0 && firstDx.compareTo(row[1]) <= 0 && secondDx.compareTo(row[2]) >= 0 && secondDx.compareTo(row[3]) <= 0) || - (MphConstants.COMPARE_DX_EQUAL == laterDx && (secondDx.compareTo(row[0]) >= 0 && secondDx.compareTo(row[1]) <= 0 && firstDx.compareTo(row[2]) >= 0 && firstDx.compareTo( - row[3]) <= 0))) { + (MphConstants.COMPARE_DX_EQUAL == laterDx && (secondDx.compareTo(row[0]) >= 0 && secondDx.compareTo(row[1]) <= 0 && firstDx.compareTo(row[2]) >= 0 && firstDx.compareTo( + row[3]) <= 0))) { result.setFinalResult(MphUtils.MpResult.SINGLE_PRIMARY); result.setMessage("Single primary based on SEER 1998 multiple primary rules for hematopoietic cancer."); return result; @@ -67,18 +60,7 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte } private static synchronized void initializeLookup() { - if (_HEMATOPOIETIC_1998.isEmpty()) { - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic1998HistologyPairs.csv")) { - if (is == null) - throw new IllegalStateException("Unable to read Hematopoietic1998HistologyPairs.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> _HEMATOPOIETIC_1998.add(line.getFields().toArray(new String[0]))); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - } + if (_HEMATOPOIETIC_1998.isEmpty()) + _HEMATOPOIETIC_1998.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic1998HistologyPairs.csv")); } - } diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java index 33b6d42..26751a9 100644 --- a/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java +++ b/src/main/java/com/imsweb/mph/mpgroups/Mp2001HematopoieticGroup.java @@ -3,23 +3,16 @@ */ package com.imsweb.mph.mpgroups; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import de.siegmar.fastcsv.reader.CsvReader; -import de.siegmar.fastcsv.reader.NamedCsvRecord; - import com.imsweb.mph.MphConstants; import com.imsweb.mph.MphGroup; import com.imsweb.mph.MphInput; import com.imsweb.mph.MphRule; import com.imsweb.mph.MphUtils; import com.imsweb.mph.RuleExecutionContext; +import com.imsweb.mph.internal.CsvUtils; import com.imsweb.mph.internal.TempRuleResult; public class Mp2001HematopoieticGroup extends MphGroup { @@ -79,27 +72,9 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte } private static synchronized void initializeLookups() { - if (_2001_HEMATOPOIETIC_GROUPS.isEmpty() || _2001_HEMATOPOIETIC_GROUP_PAIRS.isEmpty()) { - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2001HistologyGroups.csv")) { - if (is == null) - throw new IllegalStateException("Unable to read Hematopoietic2001HistologyGroups.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> _2001_HEMATOPOIETIC_GROUPS.add(line.getFields().toArray(new String[0]))); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("Hematopoietic2001HistologyGroupPairs.csv")) { - if (is == null) - throw new IllegalStateException("Unable to read Hematopoietic2001HistologyGroupPairs.csv"); - try (Reader reader = new InputStreamReader(is, StandardCharsets.US_ASCII); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) { - csvReader.stream().forEach(line -> _2001_HEMATOPOIETIC_GROUP_PAIRS.add(line.getFields().toArray(new String[0]))); - } - } - catch (IOException e) { - throw new IllegalStateException(e); - } - } + if (_2001_HEMATOPOIETIC_GROUPS.isEmpty()) + _2001_HEMATOPOIETIC_GROUPS.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic2001HistologyGroups.csv")); + if (_2001_HEMATOPOIETIC_GROUP_PAIRS.isEmpty()) + _2001_HEMATOPOIETIC_GROUP_PAIRS.addAll(CsvUtils.parseGroupCsvFile("Hematopoietic2001HistologyGroupPairs.csv")); } } diff --git a/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java b/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java index 4906a5b..fe2d3a5 100644 --- a/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java +++ b/src/main/java/com/imsweb/mph/mpgroups/Mp2021CutaneousMelanomaGroup.java @@ -6,8 +6,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.commons.lang3.StringUtils; - import com.imsweb.mph.MphConstants; import com.imsweb.mph.MphGroup; import com.imsweb.mph.MphInput; @@ -50,7 +48,7 @@ public TempRuleResult apply(MphInput i1, MphInput i2, RuleExecutionContext conte TempRuleResult result = new TempRuleResult(); List lateralityNotRequiredSites = Arrays.asList("C440", "C448", "C449"); if (lateralityNotRequiredSites.contains(i1.getPrimarySite()) || MphConstants.PAIRED_NO_INFORMATION.equals(i1.getLaterality()) || MphConstants.PAIRED_NO_INFORMATION.equals( - i2.getLaterality()) || StringUtils.isEmpty(i1.getLaterality()) || StringUtils.isEmpty(i2.getLaterality())) + i2.getLaterality()) || i1.getLaterality() == null || i1.getLaterality().trim().isEmpty() || i2.getLaterality() == null || i2.getLaterality().trim().isEmpty()) return result; // mid-line (5) is considered (look the example) if (!Arrays.asList(MphConstants.RIGHT, MphConstants.LEFT, MphConstants.MID_LINE).containsAll(Arrays.asList(i1.getLaterality(), i2.getLaterality()))) { diff --git a/src/main/resources/hemato_data_info.properties b/src/main/resources/hemato_data_info.properties index 1e381d0..51ebd97 100644 --- a/src/main/resources/hemato_data_info.properties +++ b/src/main/resources/hemato_data_info.properties @@ -1,2 +1,2 @@ -#Thu Aug 14 14:22:49 EDT 2025 -last_updated=202508141422 +#Sun Oct 26 09:53:21 EDT 2025 +last_updated=202510260953 diff --git a/src/test/java/lab/HematoDataLab.java b/src/test/java/lab/HematoDataLab.java index 6307e81..85ee65d 100644 --- a/src/test/java/lab/HematoDataLab.java +++ b/src/test/java/lab/HematoDataLab.java @@ -3,6 +3,7 @@ */ package lab; +import java.io.BufferedWriter; import java.io.File; import java.io.OutputStream; import java.io.OutputStreamWriter; @@ -16,8 +17,7 @@ import java.util.Map; import java.util.Properties; -import de.siegmar.fastcsv.writer.CsvWriter; - +import com.imsweb.mph.internal.CsvUtils; import com.imsweb.seerapi.client.NotFoundException; import com.imsweb.seerapi.client.SeerApi; import com.imsweb.seerapi.client.disease.Disease; @@ -42,9 +42,9 @@ public static void main(String[] args) throws Exception { File transformFromFile = new File(dir, "Hematopoietic2010TransformFromPairs.csv"); try (OutputStream hematoDataInfoOutput = Files.newOutputStream(hematoDataInfoFile.toPath()); - CsvWriter samePrimaryWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(samePrimaryFile.toPath()), StandardCharsets.UTF_8)); - CsvWriter transformToWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(transformToFile.toPath()), StandardCharsets.UTF_8)); - CsvWriter transformFromWriter = CsvWriter.builder().build(new OutputStreamWriter(Files.newOutputStream(transformFromFile.toPath()), StandardCharsets.UTF_8))) { + BufferedWriter samePrimaryWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(samePrimaryFile.toPath()), StandardCharsets.UTF_8)); + BufferedWriter transformToWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(transformToFile.toPath()), StandardCharsets.UTF_8)); + BufferedWriter transformFromWriter = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(transformFromFile.toPath()), StandardCharsets.UTF_8))) { SeerApi api = new SeerApi.Builder().connect(); List allDiseases = new ArrayList<>(); @@ -137,9 +137,20 @@ else if (previousTotal != total) { transformFrom.add(new String[] {morphology, validStartYear, validEndYear, startYear, endYear, transformFromMorphology.getIcdO3Morphology()}); } } - samePrimaryPairs.forEach(samePrimaryWriter::writeRecord); - transformTo.forEach(transformToWriter::writeRecord); - transformFrom.forEach(transformFromWriter::writeRecord); + + for (String[] line : samePrimaryPairs) { + samePrimaryWriter.write(CsvUtils.writeCsvValues(line)); + samePrimaryWriter.write("\r\n"); + } + for (String[] line : transformTo) { + transformToWriter.write(CsvUtils.writeCsvValues(line)); + transformToWriter.write("\r\n"); + } + for (String[] line : transformFrom) { + transformFromWriter.write(CsvUtils.writeCsvValues(line)); + transformFromWriter.write("\r\n"); + } + Properties prop = new Properties(); prop.setProperty("last_updated", LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddkkmm"))); prop.store(hematoDataInfoOutput, null); diff --git a/src/test/java/lab/TestingDataCreation.java b/src/test/java/lab/TestingDataCreation.java deleted file mode 100644 index 520daaf..0000000 --- a/src/test/java/lab/TestingDataCreation.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) 2023 Information Management Services, Inc. - */ -package lab; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPOutputStream; - -import com.imsweb.datagenerator.naaccr.NaaccrDataGeneratorOptions; -import com.imsweb.datagenerator.naaccr.NaaccrXmlDataGenerator; -import com.imsweb.layout.LayoutFactory; -import com.imsweb.mph.MphInput; -import com.imsweb.mph.MphOutput; -import com.imsweb.mph.MphUtils; -import com.imsweb.mph.MphUtils.MpResult; -import com.imsweb.naaccrxml.entity.Tumor; - -/** - * This class can be used to generate CSV files with fake data, along with the result of the library. - *

- * The class uses another library (data-generator) to create the data. The simplest way to run it - * is to clone the project from GitHub and run it within your preferred IDE. - */ -public class TestingDataCreation { - - public static void main(String[] args) throws IOException { - - // global parameters - int numTests = 10000; - int minDxYear = 2000; - int maxDxYear = 2022; - - // the fake data generator - NaaccrXmlDataGenerator generator = new NaaccrXmlDataGenerator(LayoutFactory.getNaaccrXmlLayout(LayoutFactory.LAYOUT_ID_NAACCR_XML_23)); - - // the options for the generator - NaaccrDataGeneratorOptions options = new NaaccrDataGeneratorOptions(); - options.setMinDxYear(minDxYear); - options.setMaxDxYear(maxDxYear); - - // by default, the testing file will be created in the "build" folder of the project - File targetFolder = new File(System.getProperty("user.dir") + "/build"); - if (!targetFolder.exists() && !targetFolder.mkdir()) - throw new IOException("Unable to create target folder"); - File targetFile = new File(targetFolder, "mph-testing-" + minDxYear + "-" + maxDxYear + ".csv.gz"); - - // execute the run - int numTestsCrated = 0; - try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(targetFile.toPath())), StandardCharsets.US_ASCII))) { - writer.write("year1,month1,day1,site1,hist1,beh1,lat1,year2,month2,day2,site2,hist2,beh2,lat2,result,reason\n"); - while (numTestsCrated < numTests) { - Tumor tumor1 = generator.generatePatient(1).getTumor(0); - Tumor tumor2 = generator.generatePatient(1).getTumor(0); - - MphInput input1 = new MphInput(); - String dxDate1 = tumor1.getItemValue("dateOfDiagnosis"); - input1.setDateOfDiagnosisYear(dxDate1.substring(0, 4)); - input1.setDateOfDiagnosisMonth(dxDate1.substring(4, 6)); - input1.setDateOfDiagnosisDay(dxDate1.substring(6, 8)); - input1.setPrimarySite(tumor1.getItemValue("primarySite")); - input1.setHistologyIcdO3(tumor1.getItemValue("histologicTypeIcdO3")); - input1.setBehaviorIcdO3(tumor1.getItemValue("behaviorCodeIcdO3")); - input1.setLaterality(tumor1.getItemValue("laterality")); - - MphInput input2 = new MphInput(); - String dxDate2 = tumor2.getItemValue("dateOfDiagnosis"); - input2.setDateOfDiagnosisYear(dxDate2.substring(0, 4)); - input2.setDateOfDiagnosisMonth(dxDate2.substring(4, 6)); - input2.setDateOfDiagnosisDay(dxDate2.substring(6, 8)); - input2.setPrimarySite(tumor2.getItemValue("primarySite")); - input2.setHistologyIcdO3(String.valueOf(tumor2.getItemValue("histologicTypeIcdO3"))); - input2.setBehaviorIcdO3(String.valueOf(tumor2.getItemValue("behaviorCodeIcdO3"))); - input2.setLaterality(String.valueOf(tumor2.getItemValue("laterality"))); - - MphOutput output = MphUtils.getInstance().computePrimaries(input1, input2); - - // this is a bit tricky, but since we use fake data, most of the inputs will return a multiple-primaries result with a reason - // that the two tumors are in different groups; those cases are not very interesting to test, and so we filter them out... - if (output.getResult() != MpResult.INVALID_INPUT && !output.getReason().equals("The two sets of parameters belong to two different cancer groups.")) { - - List row = new ArrayList<>(); - row.add(input1.getDateOfDiagnosisYear()); - row.add(input1.getDateOfDiagnosisMonth()); - row.add(input1.getDateOfDiagnosisDay()); - row.add(input1.getPrimarySite()); - row.add(input1.getHistology()); - row.add(input1.getBehavior()); - row.add(input1.getLaterality()); - row.add(input2.getDateOfDiagnosisYear()); - row.add(input2.getDateOfDiagnosisMonth()); - row.add(input2.getDateOfDiagnosisDay()); - row.add(input2.getPrimarySite()); - row.add(input2.getHistology()); - row.add(input2.getBehavior()); - row.add(input2.getLaterality()); - row.add(output.getResult().toString()); - row.add(output.getReason().replace("\n", "\\n")); - - writer.write(String.join(",", row)); - writer.write("\n"); - - numTestsCrated++; - } - } - } - } -} From d5e7a9e6a42264958c581c0f8c65b98e759dae5b Mon Sep 17 00:00:00 2001 From: depryf Date: Mon, 27 Oct 2025 09:37:35 -0400 Subject: [PATCH 2/3] Added proper unit tests --- build.gradle | 3 + .../imsweb/mph/DefaultHematoDataProvider.java | 1 - src/main/java/com/imsweb/mph/MphUtils.java | 27 ++++++ .../com/imsweb/mph/internal/CsvUtils.java | 19 +++- .../java/com/imsweb/mph/MphUtilsTest.java | 23 +++-- .../com/imsweb/mph/internal/CsvUtilsTest.java | 94 +++++++++++++++++++ .../com/imsweb/mph/internal/RangeTest.java | 28 ++++++ .../hematopoietic-groups-test-invalid.csv | 3 + .../resources/hematopoietic-groups-test.csv | 3 + .../hematopoietic-pairs-test-invalid.csv | 4 + .../resources/hematopoietic-pairs-test.csv | 4 + src/test/resources/mph-library-version.txt | 1 + 12 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java create mode 100644 src/test/java/com/imsweb/mph/internal/RangeTest.java create mode 100644 src/test/resources/hematopoietic-groups-test-invalid.csv create mode 100644 src/test/resources/hematopoietic-groups-test.csv create mode 100644 src/test/resources/hematopoietic-pairs-test-invalid.csv create mode 100644 src/test/resources/hematopoietic-pairs-test.csv create mode 100644 src/test/resources/mph-library-version.txt diff --git a/build.gradle b/build.gradle index 9fb9b69..7a29672 100644 --- a/build.gradle +++ b/build.gradle @@ -61,6 +61,9 @@ jar { 'Automatic-Module-Name': 'com.imsweb.mph' ) } + from('VERSION') { + rename { fileName -> "mph-library-version.txt" } + } } // jacoco plugin settings diff --git a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java index 326400b..c3f96cd 100644 --- a/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java +++ b/src/main/java/com/imsweb/mph/DefaultHematoDataProvider.java @@ -49,7 +49,6 @@ public List getTransformFrom(String morphology) { @Override public Date getDataLastUpdated() { - try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("hemato_data_info.properties")) { if (is == null) throw new IllegalStateException("Unable to get info properties"); diff --git a/src/main/java/com/imsweb/mph/MphUtils.java b/src/main/java/com/imsweb/mph/MphUtils.java index a97af1f..d6b8724 100644 --- a/src/main/java/com/imsweb/mph/MphUtils.java +++ b/src/main/java/com/imsweb/mph/MphUtils.java @@ -3,6 +3,11 @@ */ package com.imsweb.mph; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -116,12 +121,34 @@ public static synchronized MphUtils getInstance() { return _INSTANCE; } + /** + * Returns true if the provided values only contains digits (and at least one of them) + */ public static boolean isDigits(String value) { return value != null && _DIGITS_PATTERN.matcher(value).matches(); } + public static String getLibraryVersion() { + String result = "?"; + + try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("mph-library-version.txt")) { + if (is != null) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.US_ASCII))) { + result = reader.readLine(); + } + } + + } + catch (IOException | RuntimeException e) { + // ignored + } + + return result; + } + /** * Constructor + *
* This will use the default hemato db provider */ public MphUtils() { diff --git a/src/main/java/com/imsweb/mph/internal/CsvUtils.java b/src/main/java/com/imsweb/mph/internal/CsvUtils.java index 88e4f46..b5a091a 100644 --- a/src/main/java/com/imsweb/mph/internal/CsvUtils.java +++ b/src/main/java/com/imsweb/mph/internal/CsvUtils.java @@ -38,12 +38,18 @@ public static Map> parseHematoCsvFile(String filename) { try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) { if (is == null) throw new IllegalStateException("Unable to read " + filename + "; unable to find data file"); + int expectedColumns = -1; try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { String line = reader.readLine(); while (line != null) { - if (reader.getLineNumber() != 1) { + if (reader.getLineNumber() == 1) + expectedColumns = CsvUtils.parseCsvLine(reader.getLineNumber(), line).size(); + else { List fields = CsvUtils.parseCsvLine(reader.getLineNumber(), line); + if (expectedColumns != -1 && fields.size() != expectedColumns) + throw new IOException("Line " + reader.getLineNumber() + ": expected " + expectedColumns + " columns, but found " + fields.size() + " columns"); + Short validStartYear = fields.get(1) != null && !fields.get(1).trim().isEmpty() ? Short.valueOf(fields.get(1)) : null; Short validEndYear = fields.get(1) != null && !fields.get(2).trim().isEmpty() ? Short.valueOf(fields.get(2)) : null; Short startYear = fields.get(1) != null && !fields.get(3).trim().isEmpty() ? Short.valueOf(fields.get(3)) : null; @@ -74,11 +80,18 @@ public static List parseGroupCsvFile(String filename) { try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(filename)) { if (is == null) throw new IllegalStateException("Unable to read " + filename + "; unable to find data file"); + int expectedColumns = -1; try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { String line = reader.readLine(); while (line != null) { - if (reader.getLineNumber() != 1) - result.add(CsvUtils.parseCsvLine(reader.getLineNumber(), line).toArray(new String[0])); + if (reader.getLineNumber() == 1) + expectedColumns = CsvUtils.parseCsvLine(reader.getLineNumber(), line).size(); + else { + List fields = CsvUtils.parseCsvLine(reader.getLineNumber(), line); + if (expectedColumns != -1 && fields.size() != expectedColumns) + throw new IOException("Line " + reader.getLineNumber() + ": expected " + expectedColumns + " columns, but found " + fields.size() + " columns"); + result.add(fields.toArray(new String[0])); + } line = reader.readLine(); } } diff --git a/src/test/java/com/imsweb/mph/MphUtilsTest.java b/src/test/java/com/imsweb/mph/MphUtilsTest.java index 1c2e5e3..8b4a9a6 100644 --- a/src/test/java/com/imsweb/mph/MphUtilsTest.java +++ b/src/test/java/com/imsweb/mph/MphUtilsTest.java @@ -39,6 +39,11 @@ public class MphUtilsTest { private final MphUtils _utils = MphUtils.getInstance(); + @Test + public void testGetLibraryVersion() { + Assert.assertEquals("1.0-UNIT-TESTS", MphUtils.getLibraryVersion()); // real version file is copied (and renamed) during the build process... + } + @Test public void testIsHematoSamePrimary() { @@ -1933,7 +1938,8 @@ public void test2010Hematopoietic() { MphInput i1 = new MphInput(), i2 = new MphInput(); MphOutput output; - //M1 TODO + //M1 + // FD - this case was never written; not sure why... //M2 i1.setPrimarySite("C779"); @@ -2069,9 +2075,11 @@ public void test2010Hematopoietic() { //confirms that the NOS and the more specific histology are the same primary. //This is Skipped on the automated process - //M8 TODO + //M8 + // FD - this case was never written; not sure why... - //M9 TODO + //M9 + // FD - this case was never written; not sure why... //M10 Abstract as multiple primaries** when a neoplasm is originally diagnosed as a chronic neoplasm AND there is a second diagnosis of an acute //neoplasm more than 21 days after the chronic diagnosis. @@ -2108,7 +2116,8 @@ public void test2010Hematopoietic() { Assert.assertEquals(10, output.getAppliedRules().size()); Assert.assertEquals(MpResult.QUESTIONABLE, output.getResult()); - //M11 TODO + //M11 + // FD - this case was never written; not sure why... //M12 Abstract a single primary* when a neoplasm is originally diagnosed as acute AND reverts to a chronic neoplasm AND there is no confirmation //available that the patient has been treated for the acute neoplasm. @@ -2340,9 +2349,11 @@ public void test2004SolidMalignant() { MphInput i1 = new MphInput(), i2 = new MphInput(); MphOutput output; - //Rule 1 TODO + //Rule 1 + // FD - this case was never written; not sure why... - //Rule 2 TODO + //Rule 2 + // FD - this case was never written; not sure why... //Rule 3: Simultaneous multiple lesions of the same histologic type within the same site (i.e., multifocal tumors in a single organ or site) are a single primary. //If a new cancer of the same histology as an earlier one is diagnosed in the same site within two months, this is a single primary cancer diff --git a/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java b/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java new file mode 100644 index 0000000..2302752 --- /dev/null +++ b/src/test/java/com/imsweb/mph/internal/CsvUtilsTest.java @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2025 Information Management Services, Inc. + */ +package com.imsweb.mph.internal; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.junit.Assert; +import org.junit.Test; + +public class CsvUtilsTest { + + @Test + public void textWriteCsvValues() { + Assert.assertEquals("1", CsvUtils.writeCsvValues(new String[] {"1"})); + Assert.assertEquals("1,2,3", CsvUtils.writeCsvValues(new String[] {"1", "2", "3"})); + Assert.assertEquals("1,,3", CsvUtils.writeCsvValues(new String[] {"1", null, "3"})); + Assert.assertEquals("\"1,2,3\"", CsvUtils.writeCsvValues(new String[] {"1,2,3"})); + Assert.assertEquals("\"1,\"\"2\"\",3\"", CsvUtils.writeCsvValues(new String[] {"1,\"2\",3"})); + } + + @Test + public void testParseHematoCsvFile() { + Map> result = CsvUtils.parseHematoCsvFile("hematopoietic-pairs-test.csv"); + Assert.assertEquals(2, result.size()); + Assert.assertEquals(1, result.get("9742/3").size()); + Assert.assertEquals(2, result.get("9741/3").size()); + + try { + CsvUtils.parseHematoCsvFile("hematopoietic-pairs-test-invalid.csv"); + Assert.fail("Should have been an exception!"); + } + catch (RuntimeException e) { + // expected + } + + try { + CsvUtils.parseHematoCsvFile("UNKNOWN"); + Assert.fail("Should have been an exception!"); + } + catch (RuntimeException e) { + // expected + } + } + + @Test + public void testParseGroupCsvFile() { + List result = CsvUtils.parseGroupCsvFile("hematopoietic-groups-test.csv"); + Assert.assertEquals(2, result.size()); + + try { + CsvUtils.parseGroupCsvFile("hematopoietic-groups-test-invalid.csv"); + Assert.fail("Should have been an exception!"); + } + catch (RuntimeException e) { + // expected + } + + try { + CsvUtils.parseGroupCsvFile("UNKNOWN"); + Assert.fail("Should have been an exception!"); + } + catch (RuntimeException e) { + // expected + } + } + + @Test + public void testParseCsvLine() throws IOException { + Assert.assertEquals(Collections.singletonList("1"), CsvUtils.parseCsvLine(1, "1")); + Assert.assertEquals(Arrays.asList("1", "2", "3"), CsvUtils.parseCsvLine(1, "1,2,3")); + Assert.assertEquals(Arrays.asList("1", "", "3"), CsvUtils.parseCsvLine(1, "1,,3")); + Assert.assertEquals(Collections.singletonList("1,2,3"), CsvUtils.parseCsvLine(1, "\"1,2,3\"")); + Assert.assertEquals(Collections.singletonList("1,\"2\",3"), CsvUtils.parseCsvLine(1, "\"1,\"\"2\"\",3\"")); + + assertFailedParsing(1, "\"1,2,3"); + assertFailedParsing(2, "1,2,3\""); + assertFailedParsing(3, "1,\"2\"x,3"); + } + + private void assertFailedParsing(int lineNumber, String line) { + try { + CsvUtils.parseCsvLine(lineNumber, line); + Assert.fail("Should have been an exception!"); + } + catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Line " + lineNumber)); + } + } +} diff --git a/src/test/java/com/imsweb/mph/internal/RangeTest.java b/src/test/java/com/imsweb/mph/internal/RangeTest.java new file mode 100644 index 0000000..98df9e4 --- /dev/null +++ b/src/test/java/com/imsweb/mph/internal/RangeTest.java @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2025 Information Management Services, Inc. + */ +package com.imsweb.mph.internal; + +import org.junit.Assert; +import org.junit.Test; + +public class RangeTest { + + @Test + public void testRange() { + Range range = Range.of(1, 3); + Assert.assertFalse(range.contains(0)); + Assert.assertTrue(range.contains(1)); + Assert.assertTrue(range.contains(2)); + Assert.assertTrue(range.contains(3)); + Assert.assertFalse(range.contains(5)); + + Assert.assertFalse(range.contains(null)); + + range = Range.is(2); + Assert.assertFalse(range.contains(1)); + Assert.assertTrue(range.contains(2)); + Assert.assertFalse(range.contains(3)); + } + +} diff --git a/src/test/resources/hematopoietic-groups-test-invalid.csv b/src/test/resources/hematopoietic-groups-test-invalid.csv new file mode 100644 index 0000000..6c46825 --- /dev/null +++ b/src/test/resources/hematopoietic-groups-test-invalid.csv @@ -0,0 +1,3 @@ +HistologyGroupNumber,HistologyLow,HistologyHigh,HistologyGroupName +1,9590,9590,"Malignant lymphoma, NOS" +2,9591,9591,Malignant lymphoma, non-Hodgkin, NOS \ No newline at end of file diff --git a/src/test/resources/hematopoietic-groups-test.csv b/src/test/resources/hematopoietic-groups-test.csv new file mode 100644 index 0000000..b45f615 --- /dev/null +++ b/src/test/resources/hematopoietic-groups-test.csv @@ -0,0 +1,3 @@ +HistologyGroupNumber,HistologyLow,HistologyHigh,HistologyGroupName +1,9590,9590,"Malignant lymphoma, NOS" +2,9591,9591,"Malignant lymphoma, non-Hodgkin, NOS" \ No newline at end of file diff --git a/src/test/resources/hematopoietic-pairs-test-invalid.csv b/src/test/resources/hematopoietic-pairs-test-invalid.csv new file mode 100644 index 0000000..90368b6 --- /dev/null +++ b/src/test/resources/hematopoietic-pairs-test-invalid.csv @@ -0,0 +1,4 @@ +morphology,valid start year,valid end year,start year,end year,same primary +9742/3,2001,,2001,2009,9590/3 +9741/3,2001,,2001,2009,9590/3 +9741/3,2001 \ No newline at end of file diff --git a/src/test/resources/hematopoietic-pairs-test.csv b/src/test/resources/hematopoietic-pairs-test.csv new file mode 100644 index 0000000..062af5c --- /dev/null +++ b/src/test/resources/hematopoietic-pairs-test.csv @@ -0,0 +1,4 @@ +morphology,valid start year,valid end year,start year,end year,same primary +9742/3,2001,,2001,2009,9590/3 +9741/3,2001,,2001,2009,9590/3 +9741/3,2001,,2001,2009,9740/3 \ No newline at end of file diff --git a/src/test/resources/mph-library-version.txt b/src/test/resources/mph-library-version.txt new file mode 100644 index 0000000..d7992d9 --- /dev/null +++ b/src/test/resources/mph-library-version.txt @@ -0,0 +1 @@ +1.0-UNIT-TESTS \ No newline at end of file From b09f62e5b854cd6400878e5811813987ee41d0d2 Mon Sep 17 00:00:00 2001 From: depryf Date: Mon, 27 Oct 2025 09:38:52 -0400 Subject: [PATCH 3/3] Added proper unit tests --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 1507bb9..0938846 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ This library contains the SEER Java implementations of the Multiple Primary and The implementation was partially based on the KCR Multiple Primary Rules Library developed by the Kentucky Cancer Registry. +This library requires Java 8. + ## Download The library is available on [Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.imsweb%22%20AND%20a%3A%mph%22).