Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 56 additions & 75 deletions bin/newyear.pl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ BEGIN
use Data::Dumper;

use CRMS;
use CRMS::NewYear;
use CRMS::RightsPredictor;

binmode(STDOUT, ':encoding(UTF-8)');
Expand Down Expand Up @@ -83,6 +84,7 @@ END
verbose => $verbose,
instance => $instance
);
my $new_year = CRMS::NewYear->new;

$verbose = 0 unless defined $verbose;
print "Verbosity $verbose\n" if $verbose;
Expand All @@ -104,7 +106,7 @@ END
if ($tsv) {
open $tsv_fh, '>:encoding(UTF-8)', $tsv or die "failed to open TSV file $tsv: $!";
my @cols = ('ID', 'Project', 'Author', 'Title', 'Pub Date', 'Country', 'Current Rights',
'Extracted Data', 'Predictions', 'New Rights', 'Message');
'Extracted Data', 'Predictions', 'New Rights');
printf $tsv_fh "%s\n", join("\t", @cols);
}

Expand Down Expand Up @@ -153,9 +155,13 @@ sub ProcessCommonwealthProject {
next;
}
my ($acurr, $rcurr, $src, $usr, $timecurr, $note) = @{$rq->[0]};
next if $acurr eq 'pd' or $acurr =~ m/^cc/;
next if !$new_year->are_rights_in_scope($acurr, $rcurr);
my $record = $crms->GetMetadata($id);
next unless defined $record;
if (!defined $record) {
$crms->ClearErrors;
print RED "Unable to get metadata for $id\n" if $verbose;
next;
}
my $rp = CRMS::RightsPredictor->new(record => $record);
my $gid = $row->[1];
my $time = $row->[2];
Expand All @@ -166,10 +172,10 @@ sub ProcessCommonwealthProject {
my $ref2 = $crms->SelectAll($sql, $gid);
next if scalar @$ref2 == 0;
my %alldates;
my %predictions;
my $predictions = {};
foreach my $row2 (@{$ref2}) {
my $note = $row2->[0] || '';
my $user = $row2->[1];
my $user = $row2->[1]; # FIXME: not used
my $data = $row2->[2];
$data = $jsonxs->decode($data);
my $date = $data->{'date'};
Expand All @@ -188,35 +194,18 @@ sub ProcessCommonwealthProject {
reference_year => $year);
my $prediction = $rp->rights;
if (defined $prediction) {
$predictions{$prediction} = 1;
$predictions->{$prediction} = 1;
}
}
$alldates{$_->[0]} = 1 for @$dates;
}
my ($ic, $icus, $pd, $pdus);
foreach my $pred (keys %predictions) {
$ic = $pred if $pred =~ m/^ic\//;
$icus = $pred if $pred =~ m/^icus/;
$pd = $pred if $pred =~ m/^pd\//;
$pdus = $pred if $pred =~ m/^pdus/;
}
if (scalar keys %predictions && !defined $ic && ($icus || $pd || $pdus)) {
my $new_rights;
if (defined $pd) {
$new_rights = ($acurr eq 'pd')? undef:$pd;
}
if (defined $pdus) {
$new_rights = ($acurr =~ m/^pd/)? undef:$pdus;
}
if (defined $icus) {
$new_rights = ($acurr =~ m/^pd/ || $acurr =~ m/^icus/)? undef:$icus;
}
if (defined $new_rights) {
my ($a, $r) = split m/\//, $new_rights;
SubmitNewYearReview($id, $a, $r, 'Commonwealth', $record,
join(';', sort keys %alldates),
join(';', sort keys %predictions));
}
my $new_rights = $new_year->choose_rights_prediction($acurr, $predictions);
if ($new_rights) {
my ($a, $r) = split m/\//, $new_rights;
SubmitNewYearReview($id, $a, $r, 'Commonwealth', $record,
join(';', sort keys %alldates),
join(';', sort keys %$predictions),
"$acurr/$rcurr");
}
}
}
Expand Down Expand Up @@ -259,6 +248,7 @@ sub ProcessPubDateProject
$date =~ s/^\s+|\s+$//g;
$dates{$date} = $date if defined $date;
}
# FIXME: $date_str is unused
my $date_str = join ', ', keys %dates;
if (scalar keys %dates == 1) {
my $rq = $crms->RightsQuery($id, 1);
Expand All @@ -267,10 +257,11 @@ sub ProcessPubDateProject
next;
}
my ($acurr, $rcurr, $src, $usr, $timecurr, $note) = @{$rq->[0]};
next if $acurr eq 'pd' or $acurr =~ m/^cc/;
next if !$new_year->are_rights_in_scope($acurr, $rcurr);
my $record = $crms->GetMetadata($id);
if (!defined $record) {
#print RED "Unable to get metadata for $id\n";
$crms->ClearErrors;
print RED "Unable to get metadata for $id\n" if $verbose;
next;
}
my $date = (keys %dates)[0];
Expand All @@ -286,7 +277,7 @@ sub ProcessPubDateProject
}
if (defined $attr && $attr ne $acurr) {
SubmitNewYearReview($id, $attr, 'cdpp', 'Publication Date', $record,
join(';', sort keys %extracted_data), '');
join(';', sort keys %extracted_data), '', "$acurr/$rcurr");
}
}
}
Expand Down Expand Up @@ -318,9 +309,13 @@ sub ProcessCrownCopyrightProject {
next;
}
my ($acurr, $rcurr, $src, $usr, $timecurr, $note) = @{$rq->[0]};
next if $acurr eq 'pd' or $acurr =~ m/^cc/;
next if !$new_year->are_rights_in_scope($acurr, $rcurr);
my $record = $crms->GetMetadata($id);
next unless defined $record;
if (!defined $record) {
$crms->ClearErrors;
print RED "Unable to get metadata for $id\n" if $verbose;
next;
}
my $gid = $row->[1];
my $time = $row->[2];
$seen{$id} = 1;
Expand All @@ -330,11 +325,11 @@ sub ProcessCrownCopyrightProject {
my $ref2 = $crms->SelectAll($sql, $gid);
next unless scalar @$ref2 > 0;
my %alldates;
my %predictions = ();
my $predictions = {};
my %dates = ();
foreach my $row2 (@{$ref2}) {
my $note = $row2->[0] || '';
my $user = $row2->[1];
my $user = $row2->[1]; # FIXME: not used
my $data = $row2->[2];
$data = $jsonxs->decode($data);
my $date = $data->{'date'};
Expand All @@ -348,35 +343,18 @@ sub ProcessCrownCopyrightProject {
is_corporate => 1, is_crown => 1, reference_year => $year);
my $prediction = $rp->rights;
if (defined $prediction) {
$predictions{$prediction} = 1;
$predictions->{$prediction} = 1;
}
}
$alldates{$_} = 1 for keys %dates;
}
my ($ic, $icus, $pd, $pdus);
foreach my $pred (keys %predictions) {
$ic = $pred if $pred =~ m/^ic\//;
$icus = $pred if $pred =~ m/^icus/;
$pd = $pred if $pred =~ m/^pd\//;
$pdus = $pred if $pred =~ m/^pdus/;
}
if (scalar keys %predictions && !defined $ic && ($icus || $pd || $pdus)) {
my $new_rights;
if (defined $pd) {
$new_rights = ($acurr eq 'pd')? undef:$pd;
}
if (defined $pdus) {
$new_rights = ($acurr =~ m/^pd/)? undef:$pdus;
}
if (defined $icus) {
$new_rights = ($acurr =~ m/^pd/ || $acurr =~ m/^icus/)? undef:$icus;
}
if (defined $new_rights) {
my ($a, $r) = split m/\//, $new_rights;
SubmitNewYearReview($id, $a, $r, 'Crown Copyright', $record,
join(';', sort keys %alldates),
join(';', sort keys %predictions));
}
my $new_rights = $new_year->choose_rights_prediction($acurr, $predictions);
if ($new_rights) {
my ($a, $r) = split m/\//, $new_rights;
SubmitNewYearReview($id, $a, $r, 'Crown Copyright', $record,
join(';', sort keys %alldates),
join(';', sort keys %$predictions),
"$acurr/$rcurr");
}
}
}
Expand All @@ -391,9 +369,12 @@ sub ProcessKeioICUS {
my $current_rights = $crms->GetCurrentRights($id);
next if $current_rights !~ m/^icus/;
my $record = $crms->GetMetadata($id);
print BOLD RED "Can't get metadata for $id\n" unless defined $record;
next unless defined $record;
SubmitNewYearReview($id, 'pd', 'add', 'Keio', $record, '', '');
if (!defined $record) {
$crms->ClearErrors;
print RED "Unable to get metadata for $id\n" if $verbose;
next;
}
SubmitNewYearReview($id, 'pd', 'add', 'Keio', $record, '', '', $current_rights);
}
}

Expand All @@ -406,7 +387,17 @@ sub SubmitNewYearReview
my $record = shift;
my $extracted_data = shift;
my $predictions = shift;
my $current_rights = shift;

if ($tsv) {
my @values = ($id, $project_name, $record->author || '',
$record->title || '', $record->publication_date->text || '',
$record->country || '', $current_rights, $extracted_data,
$predictions, "$new_attr/$new_reason");
@values = map { local $_ = $_; $_ =~ s/\s+/ /g; $_; } @values;
printf $tsv_fh "%s\n", join("\t", @values);
}
return if $noop;
$crms->UpdateMetadata($id, 1, $record);
my $res = $crms->AddItemToQueueOrSetItemActive($id, 0, 1, 'newyear', undef, $record, $nyp);
my $code = $res->{'status'};
Expand All @@ -431,16 +422,6 @@ sub SubmitNewYearReview
if ($result) {
print RED "SubmitReview() for $id: $result\n";
}
if ($tsv) {
my $rq = $crms->RightsQuery($id, 1);
my ($acurr, $rcurr, $src, $usr, $timecurr, $note) = @{$rq->[0]};
my @values = ($id, $project_name, $record->author || '',
$record->title || '', $record->publication_date->text || '',
$record->country || '', "$acurr/$rcurr", $extracted_data,
$predictions, "$new_attr/$new_reason", $msg);
@values = map { local $_ = $_; $_ =~ s/\s+/ /g; $_; } @values;
printf $tsv_fh "%s\n", join("\t", @values);
}
}

close($tsv_fh) if $tsv;
Expand Down
120 changes: 120 additions & 0 deletions lib/CRMS/NewYear.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package CRMS::NewYear;

# A utility package containing logic that relates to the new year Public Domain Day
# rollover of rights.

use strict;
use warnings;
use utf8;

use Data::Dumper;
use List::Util qw();

sub new {
my $class = shift;

my $self = { @_ };
bless($self, $class);
return $self;
}

# Are the passed-in current rights "attribute" and "reason" names in scope for
# PDD rollover? This excludes pd and CC items, because there's nothing more to be done.
# It also excludes */{con, del, man, pvt, supp} items, because CRMS can't override them
# and there is no point in reporting them.
sub are_rights_in_scope {
my $self = shift;
my $attribute = shift;
my $reason = shift;

if (
$attribute eq 'pd' ||
$attribute =~ m/^cc/ ||
$reason eq 'con' ||
$reason eq 'del' ||
$reason eq 'man' ||
$reason eq 'pvt' ||
$reason eq 'supp'
) {
return;
}
return 1;
}

# Given a set of rights predictions that might reasonably be made based on reviewer data,
# in the form of a hashref of { "pd/add" => 1, "ic/add" => 1, ... } and the current attribute
# name, calculate the most restrictive attribute among the predictions, provided that attribute
# would be a "step up" from the current one (in terms of permissiveness).
# For example, if there is a "pdus" and an "icus" prediction, only consider "icus".
# And then only return it as the new rights if it would make the current rights less restrictive.
#
# Scenario where no prediction is allowed:
# |-----------|
# | 3 pd | <----- PREDICTION (cannot use, it is not the most restrictive)
# |-----------|
# | 2 pdus | <----- CURRENT RIGHTS
# |-----------|
# | 1 icus | <----- PREDICTION (cannot use, would be a downgrade)
# |-----------|
# | 0 ic |
# |-----------|
#
# Scenario where we do want a new prediction:
# |-----------|
# | 3 pd | <----- PREDICTION (cannot use, it is not the most restrictive)
# |-----------|
# | 2 pdus | <----- PREDICTION (USE THIS -- it is the most restrictive prediction, and better than icus)
# |-----------|
# | 1 icus | <----- CURRENT RIGHTS
# |-----------|
# | 0 ic |
# |-----------|
#
# In a nutshell: choose minimum prediction and return it if it is greater than current rights
#
# Note: there are rare but attested cases where we have legit pd/add and pd/exp predictions
# and the unordered hash implementation will, if pd is chosen, choose one at random.
# The old implementation did the same thing. KH has signed off on this as "don't care."
sub choose_rights_prediction {
my $self = shift;
my $attribute = shift; # current rights attr string
my $predictions = shift;

# This map allows us to grade the current rights and the predictions into 0-3 as above.
my $attr_values = {'pd' => 3, 'pdus' => 2, 'icus' => 1, 'ic' => 0};

# Get the value for current rights.
# It should not be terribly unusual to find current rights outside the pd/pdus/icus/ic contionuum
# although we will filter many of these out with `are_rights_in_scope` above.
# However, that does allow through rights like und/nfi (at least historically, we no longer export
# these to the rights DB by default).
# We can bail out if we get something outside the expected main four attributes.
my $current_value = $attr_values->{$attribute};
return unless defined $current_value;

# Expand predictions into array of { prediction => "attr/reason", value => value }
my @values = map {
my ($a, $r) = split('/', $_);
{ prediction => $_, value => $attr_values->{$a} };
} keys %$predictions;

# Extract out the "minimum" prediction, conflating undefs (unknown prediction) with ic,
# neither of which we can return as a viable choice.
my $min = List::Util::reduce {
($a->{value} || 0) < ($b->{value} || 0) ? $a : $b;
} @values;

# Bail out if there were no predictions, or if the best we can do is ic or "anything else"
if (!$min || !$min->{value}) {
return;
}

# Compare the values. If the predicted is greater than current, return that.
# This will be the benefit of the new year rollover in terms of less restrictive rights.
if ($min->{value} > $current_value) {
return $min->{prediction};
}
return;
}

1;
Loading