Skip to content

Commit

Permalink
Closes #1444: Integrate the Irish Research Council (IRC) projects mining
Browse files Browse the repository at this point in the history
Integrating unidentified IRC project mining.
Supplementing integration tests suite with the IRC case.
  • Loading branch information
marekhorst committed Jan 12, 2024
1 parent 7b20299 commit f88411f
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ hidden var 'sshrc_unidentified' from (select id from grants where fundingclass1=
hidden var 'nrc_unidentified' from (select id from grants where fundingclass1="NRC" and grantid="unidentified" limit 1);
hidden var 'inca_unidentified' from (select id from grants where fundingclass1="INCa" and grantid="unidentified" limit 1);
hidden var 'hfri_unidentified' from (select id from grants where fundingclass1="HFRI" and grantid="unidentified" limit 1);
hidden var 'irc_unidentified' from (select id from grants where fundingclass1="IRC" and grantid="unidentified" limit 1);

create temp table pubs as setschema 'c1,c2' select jsonpath(c1, '$.id', '$.text') from stdinput();

Expand Down Expand Up @@ -55,6 +56,9 @@ create temp table matched_undefined_miur_only as select distinct docid, var('miu
select c1 as docid, textwindow2s(c2,10,1,10, '\b(?:RBSI\d{2}\w{4})\b') from (setschema 'c1,c2' select * from pubs where c2 is not null))
where var('miur_unidentified') and (regexprmatches('\b(?:RBSI\d{2}\w{4})\b', middle));

create temp table matched_undefined_irc_only as select distinct docid, var('irc_unidentified') as id, prev,middle,next from (setschema 'docid,prev,middle,next'
select c1 as docid, textwindow2s(keywords(comprspaces(lower(regexpr("\n",c2," ")))),10,3,10, 'irish research council') from (setschema 'c1,c2' select * from pubs where c2 is not null))
where var('irc_unidentified');



Expand Down Expand Up @@ -372,4 +376,6 @@ select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'text
union all
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" "||middle||" "||next) from matched_undefined_gsri
union all
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" << "||middle||" >> "||next) from (select * from hfri_unidentified_only group by docid);
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" "||middle||" "||next) from matched_undefined_irc_only
union all
select jdict('documentId', docid, 'projectId', id, 'confidenceLevel', 0.8, 'textsnippet', prev||" << "||middle||" >> "||next) from (select * from hfri_unidentified_only group by docid);
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@
{"text":"This material is based upon work supported by the National Science Foundation under Grant No. ATM-0513463.", "id":"50|od_______212::0f31511cdbd148bf5446b52f49ba8544"}
{"text":"Acknowledgements This work was partially supported by Science Foundation Ireland Grant 04/IN1/I478 and Science Foundation Ireland Grant 03/RPT1/I382.", "id":"50|doi_________::9c4ddd5d830294ab76d7e7919a379f3b"}
{"text":"Acknowledgments: This work was supported by the Hellenic Foundation for Research and Innovation (HFRI - Project No: 789)", "id":"50|arXiv_______::54002047659adf031293eabfbfe9938b"}
{"text":"This work was initially funded by a Government of Ireland Postgraduate Grant from the Irish Research Council, and subsequently supported by EPSRC Hubs for Robotics and AI in Hazardous Environments", "id":"50|06cdd3ff4700::d5035b3bb468e4ea7b4d82073634d138"}

Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@
"documentId": "50|core_ac_uk__::2fea53c390909c2640b2d1a94d53c0a7",
"projectId": "40|inca________::1e5e62235d094afd01cd56e65112fc63",
"confidenceLevel": 0.8,
"textsnippet": "This work was supported by INCA (plbio 2010-216 and INCa-DGOS-Inserm 6046) "
"textsnippet": "This work was supported by INCA (plbio 2010-216 and INCa-DGOS-Inserm 6046)"
}
{
"documentId": "50|arXiv_______::a343cdcd534d696dd93c7ee9d78b9be7",
Expand Down Expand Up @@ -454,4 +454,10 @@
"projectId": "40|hfri________::644d89adeca811786cf72d7967ec9813",
"confidenceLevel": 0.8,
"textsnippet": "acknowledgments work supported hellenic foundation research innovation hfri project 789"
}
}
{
"documentId": "50|06cdd3ff4700::d5035b3bb468e4ea7b4d82073634d138",
"projectId": "40|501100002081::1e5e62235d094afd01cd56e65112fc63",
"confidenceLevel": 0.8,
"textsnippet": "funded by a government of ireland postgraduate grant from the irish research council and subsequently supported by epsrc hubs for robotics and ai"
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@
{"id": "40|irb_hr______::37ca9ece55928656726557c7c0a36a1a", "projectGrantId": "IP-2013-11-1021", "projectAcronym": null, "fundingClass": "HRZZ::", "jsonextrainfo": "{}"}
{"id": "40|nhmrc_______::019492919738381cbee98a17ae1dae25", "projectGrantId": "1056888", "projectAcronym": null, "fundingClass": "NHMRC::NHMRC Partnerships", "jsonextrainfo": "{}"}
{"id": "40|hfri________::644d89adeca811786cf72d7967ec9813", "projectGrantId": "789", "projectAcronym": null, "fundingClass": "HFRI::", "jsonextrainfo": "{}"}
{"id": "40|hfri________::cb5d92ce46b051859d1d9655e0ae7b46", "projectGrantId": "unidentified", "projectAcronym": null, "fundingClass": "HFRI::", "jsonextrainfo": "{}"}
{"id": "40|hfri________::cb5d92ce46b051859d1d9655e0ae7b46", "projectGrantId": "unidentified", "projectAcronym": null, "fundingClass": "HFRI::", "jsonextrainfo": "{}"}
{"id": "40|501100002081::1e5e62235d094afd01cd56e65112fc63", "projectGrantId": "unidentified", "projectAcronym": null, "fundingClass": "IRC::", "jsonextrainfo": "{}"}
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@
"type": "COUNTER",
"value": "1"
}
{
"key": "processing.referenceExtraction.project.references.byfunder.irc",
"type": "COUNTER",
"value": "1"
}
{
"key": "processing.referenceExtraction.project.references.byfunder.mestd",
"type": "COUNTER",
Expand Down Expand Up @@ -176,5 +181,5 @@
{
"key": "processing.referenceExtraction.project.references.total",
"type": "COUNTER",
"value": "76"
"value": "77"
}

0 comments on commit f88411f

Please sign in to comment.