Skip to content

Commit

Permalink
Add min/max value stats to summary report
Browse files Browse the repository at this point in the history
  • Loading branch information
vish-cs committed Dec 9, 2024
1 parent 490368a commit 03d3af1
Show file tree
Hide file tree
Showing 13 changed files with 124 additions and 54 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion util/src/main/java/org/datacommons/util/CSVReportWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
import org.apache.commons.csv.CSVPrinter;
import org.datacommons.util.SummaryReportGenerator.StatVarSummary;

/*
* This class generates a CSV summary report of stats related to a dataset import.
*/
class CSVReportWriter {

enum ReportHeaders {
StatVar,
NumPlaces,
NumObservations,
MinValue,
MaxValue,
NumObservationsDates,
MinDate,
MaxDate,
Expand All @@ -35,6 +40,8 @@ public static void writeRecords(Map<String, StatVarSummary> records, Writer sw)
sv,
summary.places.size(),
summary.numObservations,
summary.minValue,
summary.maxValue,
summary.dates.size(),
!summary.dates.isEmpty()
? ((TreeSet<String>) (summary.getUniqueDates())).first()
Expand All @@ -53,4 +60,3 @@ public static void writeRecords(Map<String, StatVarSummary> records, Writer sw)
}
}
}

7 changes: 7 additions & 0 deletions util/src/main/java/org/datacommons/util/StatChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,13 @@ private synchronized void extractStatVarInfoFromNode(McfGraph.PropertyValues nod
if (svDcid.isEmpty()) return;
StatVarSummary svMap = svSummaryMap.computeIfAbsent(svDcid, k -> new StatVarSummary());
svMap.numObservations++;
try {
double value = Double.parseDouble(McfUtil.getPropVal(node, Vocabulary.VALUE));
svMap.minValue = Math.min(svMap.minValue, value);
svMap.maxValue = Math.max(svMap.maxValue, value);
} catch (NumberFormatException e) {
// Ignore if the value is not a number.
}
svMap.dates.add(McfUtil.getPropVal(node, Vocabulary.OBSERVATION_DATE));
svMap.places.add(McfUtil.getPropVal(node, Vocabulary.OBSERVATION_ABOUT));
svMap.mMethods.add(McfUtil.getPropVal(node, Vocabulary.MEASUREMENT_METHOD));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class SummaryReportGenerator {

public static boolean TEST_mode = false;
public static final String SUMMARY_REPORT_HTML = "summary_report.html";
public static final String SUMMARY_REPORT_CSV = "summary_report.csv";
public static final String SUMMARY_REPORT_CSV = "summary_report.csv";

// An object to save the information about a stat var. This contains all the necessary getters to
// access the information in this object from SummaryReport.ftl
Expand All @@ -48,6 +48,8 @@ public static final class StatVarSummary {
Set<String> observationPeriods = new HashSet<>();
// This set will not be populated when StatVarSummary is generated by PlaceSeriesSummary
Set<String> dates = new HashSet<>();
double minValue = Double.MAX_VALUE;
double maxValue = Double.MIN_VALUE;

// The following two lists will only be populated for StatVarSummary generated by
// PlaceSeriesSummary and the value at index n of seriesValues will correspond to the date at
Expand All @@ -59,6 +61,14 @@ public int getNumObservations() {
return this.numObservations;
}

public double getMinValue() {
return this.minValue;
}

public double getMaxValue() {
return this.maxValue;
}

public Set<String> getPlaces() {
return new TreeSet<>(this.places);
}
Expand Down Expand Up @@ -191,7 +201,7 @@ public static void generateReportSummary(
data.put("placeSeriesSummaryMap", placeSeriesSummaryMap);
Writer file = new FileWriter(Paths.get(outputDir.toString(), SUMMARY_REPORT_HTML).toString());
template.process(data, file);
Writer csvFile = new FileWriter(Paths.get(outputDir.toString(), SUMMARY_REPORT_CSV).toString());
Writer csvFile = new FileWriter(Paths.get(outputDir.toString(), SUMMARY_REPORT_CSV).toString());
CSVReportWriter.writeRecords(svSummaryMap, csvFile);
}

Expand Down
4 changes: 4 additions & 0 deletions util/src/main/resources/SummaryReport.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@
<th>StatVar</th>
<th>Num Places</th>
<th>Num Observations</th>
<th>Min Value</th>
<th>Max Value</th>
<th>Num Observation Dates</th>
<th>Min Date</th>
<th>Max Date</th>
Expand All @@ -204,6 +206,8 @@
<td><a name="statvars--${sv}" href="#statvars--${sv}">${sv}</a></td>
<td>${svSummary.getPlaces()?size}</td>
<td>${svSummary.getNumObservations()}</td>
<td>${svSummary.getMinValue()}</td>
<td>${svSummary.getMaxValue()}</td>
<td>${svSummary.getUniqueDates()?size}</td>
<td>${svSummary.getUniqueDates()?first!""}</td>
<td>${svSummary.getUniqueDates()?last!""}</td>
Expand Down
11 changes: 7 additions & 4 deletions util/src/test/java/org/datacommons/util/CSVReportWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,24 @@ public class CSVReportWriterTest {

public static final String[] HEADERS = {"author", "title"};
public static final String EXPECTED_FILESTREAM =
"StatVar,NumPlaces,NumObservations,NumObservationsDates,MinDate,MaxDate,MeasurementMethods,Units,ScalingFactors,observationPeriods\r\n"
+ "Var1,0,6,0,,,[CensusACS5YrSurvey],[],[],[]\r\n"
+ "Var2,0,2,2,2020,2025,[CensusACS5YrSurvey],[],[],[]";
"StatVar,NumPlaces,NumObservations,MinValue,MaxValue,NumObservationsDates,MinDate,MaxDate,MeasurementMethods,Units,ScalingFactors,observationPeriods\r\n"
+ "Var1,0,6,10.0,100.0,0,,,[CensusACS5YrSurvey],[],[],[]\r\n"
+ "Var2,0,2,5.0,60.0,2,2020,2025,[CensusACS5YrSurvey],[],[],[]";
Map<String, StatVarSummary> records;

@Before
public void setUp() {
StatVarSummary countPersonExpectedSummary = new StatVarSummary();
countPersonExpectedSummary.numObservations = 6;
countPersonExpectedSummary.mMethods = Set.of("CensusACS5YrSurvey");
countPersonExpectedSummary.minValue = 10;
countPersonExpectedSummary.maxValue = 100;
StatVarSummary countFemaleExpectedSummary = new StatVarSummary();
countFemaleExpectedSummary.numObservations = 2;
countFemaleExpectedSummary.mMethods = Set.of("CensusACS5YrSurvey");
countFemaleExpectedSummary.dates = Set.of("2020", "2025");
countFemaleExpectedSummary.minValue = 5;
countFemaleExpectedSummary.maxValue = 60;

records =
Collections.unmodifiableMap(
Expand All @@ -48,4 +52,3 @@ public void csvWriterSuccess() throws IOException {
assertEquals(EXPECTED_FILESTREAM, sw.toString().trim());
}
}

0 comments on commit 03d3af1

Please sign in to comment.