@@ -248,7 +248,13 @@ etl <- function(res, repo, non_public) {
248248 subject_cleaned = gsub(" (wikidata)?\\ .org/entity/[qQ]([\\ d]+)?" , " " , subject_cleaned ) # remove wikidata classification
249249 subject_cleaned = gsub(" </keyword><keyword>" , " " , subject_cleaned ) # remove </keyword><keyword>
250250 subject_cleaned = gsub(" \\ [No keyword\\ ]" , " " , subject_cleaned )
251- subject_cleaned = gsub(" \\ [[^]]*\\ ]" , " " , subject_cleaned ) # remove any text inside square brackets
251+
252+ if (! is.null(params $ vis_type ) && params $ vis_type == " timeline" ) {
253+ subject_cleaned = remove_keywords_with_text_in_square_brackets(subject_cleaned )
254+ } else {
255+ subject_cleaned = remove_text_in_square_brackets_from_keywords(subject_cleaned )
256+ }
257+
252258 subject_cleaned = gsub(" \\ [[^\\ []+\\ ][^\\ ;]+(;|$)?" , " " , subject_cleaned ) # remove classification
253259 subject_cleaned = gsub(" [0-9]{2,} [A-Z]+[^;]*(;|$)?" , " " , subject_cleaned ) # remove classification
254260 subject_cleaned = gsub(" -- " , " ; " , subject_cleaned ) # replace inconsistent keyword separation
@@ -357,6 +363,18 @@ decode_dctypenorm <- function(dctypestring) {
357363 return (typecodes )
358364}
359365
366+ remove_keywords_with_text_in_square_brackets <- function (x ) {
367+ # This function removes whole keywords that contain text in square brackets.
368+ # Example: 'Climate [MeSH]' | 'Some keywords [Chemical]'.
369+ gsub(" [^;]*\\ [[^]]+\\ ][^;]*;?" , " " , x )
370+ }
371+
372+ remove_text_in_square_brackets_from_keywords <- function (x ) {
373+ # This function removes text in square brackets.
374+ # Example: 'Climate [MeSH]' -> 'Climate'| 'Some keywords [Chemical]' -> 'Some keywords'.
375+ gsub(" \\ [[^]]*\\ ]" , " " , x )
376+ }
377+
360378dctypenorm_decoder <- list (
361379 " 4" = " Audio" ,
362380 " 11" = " Book" ,
0 commit comments