Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Enhance slr feature #559

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public ManageStudyDefinitionViewModel(Study study, Path studyDirectory, ImportFo
title.setValue(study.getTitle());
authors.addAll(study.getAuthors());
researchQuestions.addAll(study.getResearchQuestions());
queries.addAll(study.getQueries().stream().map(StudyQuery::getQuery).collect(Collectors.toList()));
queries.addAll(study.getQueries().stream().map(StudyQuery::getBaseQuery).collect(Collectors.toList()));
databases.addAll(study.getDatabases()
.stream()
.map(studyDatabase -> new StudyDatabaseItem(studyDatabase.getName(), studyDatabase.isEnabled()))
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/logic/crawler/Crawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class Crawler {
public Crawler(Path studyRepositoryRoot, SlrGitHandler gitHandler, ImportFormatPreferences importFormatPreferences, SavePreferences savePreferences, TimestampPreferences timestampPreferences, BibEntryTypesManager bibEntryTypesManager, FileUpdateMonitor fileUpdateMonitor) throws IllegalArgumentException, IOException, ParseException {
studyRepository = new StudyRepository(studyRepositoryRoot, gitHandler, importFormatPreferences, fileUpdateMonitor, savePreferences, bibEntryTypesManager);
StudyDatabaseToFetcherConverter studyDatabaseToFetcherConverter = new StudyDatabaseToFetcherConverter(studyRepository.getActiveLibraryEntries(), importFormatPreferences);
this.studyFetcher = new StudyFetcher(studyDatabaseToFetcherConverter.getActiveFetchers(), studyRepository.getSearchQueryStrings());
this.studyFetcher = new StudyFetcher(studyDatabaseToFetcherConverter.getActiveFetchers(), studyRepository.getSearchQueries());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ private SearchBasedFetcher createFetcherFromLibraryEntry(StudyDatabase studyData
Set<SearchBasedFetcher> searchBasedFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences);
String libraryNameFromFetcher = studyDatabase.getName();
return searchBasedFetchers.stream()
.filter(searchBasedFetcher -> searchBasedFetcher.getName().toLowerCase().equals(libraryNameFromFetcher.toLowerCase()))
.filter(searchBasedFetcher -> searchBasedFetcher.getName().equalsIgnoreCase(libraryNameFromFetcher))
.findAny()
.orElse(null);
}
Expand Down
14 changes: 8 additions & 6 deletions src/main/java/org/jabref/logic/crawler/StudyFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.study.FetchResult;
import org.jabref.model.study.QueryResult;
import org.jabref.model.study.StudyQuery;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -25,9 +26,9 @@ class StudyFetcher {
private static final int MAX_AMOUNT_OF_RESULTS_PER_FETCHER = 100;

private final List<SearchBasedFetcher> activeFetchers;
private final List<String> searchQueries;
private final List<StudyQuery> searchQueries;

StudyFetcher(List<SearchBasedFetcher> activeFetchers, List<String> searchQueries) throws IllegalArgumentException {
StudyFetcher(List<SearchBasedFetcher> activeFetchers, List<StudyQuery> searchQueries) throws IllegalArgumentException {
this.searchQueries = searchQueries;
this.activeFetchers = activeFetchers;
}
Expand All @@ -43,8 +44,9 @@ public List<QueryResult> crawl() {
.collect(Collectors.toList());
}

private QueryResult getQueryResult(String searchQuery) {
return new QueryResult(searchQuery, performSearchOnQuery(searchQuery));
private QueryResult getQueryResult(StudyQuery searchQuery) {
// Results for all query refinements are stored within the directory of the base query
return new QueryResult(searchQuery.getBaseQuery(), performSearchOnQuery(searchQuery));
}

/**
Expand All @@ -53,9 +55,9 @@ private QueryResult getQueryResult(String searchQuery) {
* @param searchQuery The query the search is performed for.
* @return Mapping of each fetcher by name and all their retrieved publications as a BibDatabase
*/
private List<FetchResult> performSearchOnQuery(String searchQuery) {
private List<FetchResult> performSearchOnQuery(StudyQuery searchQuery) {
return activeFetchers.parallelStream()
.map(fetcher -> performSearchOnQueryForFetcher(searchQuery, fetcher))
.map(fetcher -> performSearchOnQueryForFetcher(searchQuery.getLibrarySpecificQueryOrDefault(fetcher.getName()), fetcher))
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
Expand Down
29 changes: 9 additions & 20 deletions src/main/java/org/jabref/logic/crawler/StudyRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.nio.charset.UnsupportedCharsetException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.List;
Expand All @@ -22,8 +21,6 @@
import org.jabref.logic.git.SlrGitHandler;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.OpenDatabase;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
Expand Down Expand Up @@ -53,7 +50,6 @@ class StudyRepository {
private static final Pattern MATCHCOLON = Pattern.compile(":");
private static final Pattern MATCHILLEGALCHARACTERS = Pattern.compile("[^A-Za-z0-9_.\\s=-]");
// Currently we make assumptions about the configuration: the remotes, work and search branch names
private static final String REMOTE = "origin";
private static final String WORK_BRANCH = "work";
private static final String SEARCH_BRANCH = "search";

Expand All @@ -75,14 +71,13 @@ class StudyRepository {
* contain the study definition file.
* @throws IOException Thrown if the given repository does not exists, or the study definition file
* does not exist
* @throws ParseException Problem parsing the study definition file.
*/
public StudyRepository(Path pathToRepository,
SlrGitHandler gitHandler,
ImportFormatPreferences importFormatPreferences,
FileUpdateMonitor fileUpdateMonitor,
SavePreferences savePreferences,
BibEntryTypesManager bibEntryTypesManager) throws IOException, ParseException {
BibEntryTypesManager bibEntryTypesManager) throws IOException {
this.repositoryPath = pathToRepository;
this.gitHandler = gitHandler;
this.importFormatPreferences = importFormatPreferences;
Expand Down Expand Up @@ -170,12 +165,11 @@ private Study parseStudyFile() throws IOException {
/**
* Returns all query strings of the study definition
*
* @return List of all queries as Strings.
* @return List of all queries.
*/
public List<String> getSearchQueryStrings() {
public List<StudyQuery> getSearchQueries() {
return study.getQueries()
.parallelStream()
.map(StudyQuery::getQuery)
.collect(Collectors.toList());
}

Expand Down Expand Up @@ -210,12 +204,10 @@ public Study getStudy() {
*/
public void persist(List<QueryResult> crawlResults) throws IOException, GitAPIException, SaveException {
updateWorkAndSearchBranch();
study.setLastSearchDate(LocalDate.now());
persistStudy();
gitHandler.createCommitOnCurrentBranch("Update search date", true);
gitHandler.createCommitOnCurrentBranch("Write config file", true);
gitHandler.checkoutBranch(SEARCH_BRANCH);
persistResults(crawlResults);
study.setLastSearchDate(LocalDate.now());
persistStudy();
try {
// First commit changes to search branch branch and update remote
Expand Down Expand Up @@ -269,12 +261,10 @@ private void persistStudy() throws IOException {
*/
private void setUpRepositoryStructure() throws IOException {
// Cannot use stream here since IOException has to be thrown
StudyDatabaseToFetcherConverter converter = new StudyDatabaseToFetcherConverter(this.getActiveLibraryEntries(), importFormatPreferences);
for (String query : this.getSearchQueryStrings()) {
createQueryResultFolder(query);
converter.getActiveFetchers()
.forEach(searchBasedFetcher -> createFetcherResultFile(query, searchBasedFetcher));
createQueryResultFile(query);
for (StudyQuery query : this.getSearchQueries()) {
createQueryResultFolder(query.getBaseQuery());
getActiveLibraryEntries().forEach(library -> createFetcherResultFile(query.getBaseQuery(), library.getName()));
createQueryResultFile(query.getBaseQuery());
}
createStudyResultFile();
}
Expand All @@ -296,8 +286,7 @@ private void createFolder(Path folder) throws IOException {
}
}

private void createFetcherResultFile(String query, SearchBasedFetcher searchBasedFetcher) {
String fetcherName = searchBasedFetcher.getName();
private void createFetcherResultFile(String query, String fetcherName) {
Path fetcherResultFile = getPathToFetcherResultFile(query, fetcherName);
createBibFile(fetcherResultFile);
}
Expand Down
5 changes: 0 additions & 5 deletions src/main/java/org/jabref/logic/crawler/StudyYamlParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import org.jabref.model.study.Study;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;

public class StudyYamlParser {

Expand All @@ -20,7 +18,6 @@ public class StudyYamlParser {
*/
public Study parseStudyYamlFile(Path studyYamlFile) throws IOException {
ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory());
yamlMapper.registerModule(new JavaTimeModule());
try (InputStream fileInputStream = new FileInputStream(studyYamlFile.toFile())) {
return yamlMapper.readValue(fileInputStream, Study.class);
}
Expand All @@ -32,8 +29,6 @@ public Study parseStudyYamlFile(Path studyYamlFile) throws IOException {
public void writeStudyYamlFile(Study study, Path studyYamlFile) throws IOException {
ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory().disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER)
.enable(YAMLGenerator.Feature.MINIMIZE_QUOTES));
yamlMapper.registerModule(new JavaTimeModule());
yamlMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
yamlMapper.writeValue(studyYamlFile.toFile(), study);
}
}
2 changes: 2 additions & 0 deletions src/main/java/org/jabref/logic/git/GitHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ public boolean createCommitOnCurrentBranch(String commitMessage, boolean amend)
.setMessage(commitMessage)
.call();
}
} catch (GitAPIException e) {
LOGGER.error("Could not create commit on branch", e);
}
return commitCreated;
}
Expand Down
97 changes: 97 additions & 0 deletions src/main/java/org/jabref/logic/importer/FetcherDelegator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.jabref.logic.importer;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.transformers.AbstractQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.ArXivQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.CollectionOfComputerScienceBibliographiesQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.DBLPQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.DefaultQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.GVKQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.IEEEQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.JstorQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.ScholarQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.SpringerQueryTransformer;
import org.jabref.logic.importer.fetcher.transformers.ZbMathQueryTransformer;
import org.jabref.model.entry.BibEntry;

import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;

import static org.jabref.logic.importer.fetcher.transformers.AbstractQueryTransformer.NO_EXPLICIT_FIELD;

/**
* Manages how a query is executed (either raw or transformed)
*/
public class FetcherDelegator {

public FetcherDelegator() {
}

/**
* Looks for hits which are matched by the given free-text query.
*
* @param searchQuery query string that can be parsed into a lucene query
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearch(String searchQuery, SearchBasedFetcher fetcher) throws FetcherException {
if (searchQuery.isBlank()) {
return Collections.emptyList();
}
// TODO: The delegator furthermore should detect whether the proposed query is a specialized query (e.g. doi and use the appropriate fetcher, instead of the provided one

SyntaxParser parser = new StandardSyntaxParser();
QueryNode queryNode;
try {
queryNode = parser.parse(searchQuery, NO_EXPLICIT_FIELD);
} catch (QueryNodeParseException e) {
throw new FetcherException("An error occurred when parsing the query");
}

AbstractQueryTransformer transformer = getTransformer(fetcher);
Optional<String> transformedQuery = transformer.transformLuceneQuery(queryNode);
return postFilterResult(fetcher.performSearch(transformedQuery.orElse(searchQuery)), transformer);
}

private AbstractQueryTransformer getTransformer(SearchBasedFetcher fetcher) {
String lowerCaseFetcherName = fetcher.getName().toLowerCase();
return switch (lowerCaseFetcherName) {
case "arxiv" -> new ArXivQueryTransformer();
case "ieeexplore" -> new IEEEQueryTransformer();
case "gvk" -> new GVKQueryTransformer();
case "springer" -> new SpringerQueryTransformer();
case "google scholar" -> new ScholarQueryTransformer();
case "dblp" -> new DBLPQueryTransformer();
case "collection of computer science bibliographies" -> new CollectionOfComputerScienceBibliographiesQueryTransformer();
case "zbmath" -> new ZbMathQueryTransformer();
case "jstor" -> new JstorQueryTransformer();
default -> new DefaultQueryTransformer();
};
}

/**
* Depending on the type of query transformer the result has to be filtered
*
* @param result the result
* @param transformer transformer used for the query, depending on the type, post filtering is required
* @return a filtered result list
*/
private List<BibEntry> postFilterResult(List<BibEntry> result, AbstractQueryTransformer transformer) {
// TODO
return result;
}

/**
* Looks for hits which are matched by the given free-text query.
*
* @param searchQuery query string
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearchRaw(String searchQuery, SearchBasedFetcher fetcher) throws FetcherException {
return fetcher.performSearch(searchQuery);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,14 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param luceneQuery the root node of the lucene query
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException;

/**
* @param searchQuery query string that can be parsed into a lucene query
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
default Page<BibEntry> performSearchPaged(String searchQuery, int pageNumber) throws FetcherException {
if (searchQuery.isBlank()) {
return new Page<>(searchQuery, pageNumber, Collections.emptyList());
}
SyntaxParser parser = new StandardSyntaxParser();
final String NO_EXPLICIT_FIELD = "default";
try {
return this.performSearchPaged(parser.parse(searchQuery, NO_EXPLICIT_FIELD), pageNumber);
} catch (QueryNodeParseException e) {
throw new FetcherException("An error occurred during parsing of the query.");
}
}
Page<BibEntry> performSearchPaged(String searchQuery, int pageNumber) throws FetcherException;

/**
* @return default pageSize
Expand All @@ -49,11 +26,11 @@ default int getPageSize() {
/**
* This method is used to send complex queries using fielded search.
*
* @param luceneQuery the root node of the lucene query
* @param query the query string used to identify relevant documents
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(luceneQuery, 0).getContent());
default List<BibEntry> performSearch(String query) throws FetcherException {
return new ArrayList<>(performSearchPaged(query, 0).getContent());
}

}
Loading