From 4d1a4439a2956e9401898f895f20e10263d2b216 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Fri, 18 Mar 2022 06:41:52 +0900 Subject: [PATCH] FrontMatter: new subparser parser run from Markdown parser In this version, just extracting "foo" in "title: foo" written in Yaml. JSON(;;;) and TOML(+++) are not supported yet. Close #3032. Signed-off-by: Masatake YAMATO --- .../simple-rmarkdown.d/expected.tags | 23 ++- .../simple-rmarkdown.d/input.rmd | 3 + docs/news.rst | 1 + main/parsers_p.h | 4 +- parsers/frontmatter.c | 89 ++++++++ parsers/frontmatter.h | 19 ++ parsers/markdown.c | 11 + parsers/yamlfrontmatter.c | 194 ++++++++++++++++++ source.mak | 4 + 9 files changed, 336 insertions(+), 12 deletions(-) create mode 100644 parsers/frontmatter.c create mode 100644 parsers/frontmatter.h create mode 100644 parsers/yamlfrontmatter.c diff --git a/Units/parser-rmarkdown.r/simple-rmarkdown.d/expected.tags b/Units/parser-rmarkdown.r/simple-rmarkdown.d/expected.tags index 50d750120d..b8c9683e47 100644 --- a/Units/parser-rmarkdown.r/simple-rmarkdown.d/expected.tags +++ b/Units/parser-rmarkdown.r/simple-rmarkdown.d/expected.tags @@ -1,11 +1,12 @@ -S1 input.rmd /^# S1$/;" chapter line:1 language:Markdown end:14 -xyX input.rmd /^```{r xyX}$/;" chunklabel line:3 language:RMarkdown extras:subparser -S2 input.rmd /^# S2$/;" chapter line:15 language:Markdown end:25 -__anon4a45a9700100 input.rmd /^```{r, cache = TRUE, dependson = "xyX"}$/;" chunklabel line:17 language:RMarkdown extras:subparser,anonymous -__anon4a45a9700200 input.rmd /^```{python}$/;" chunklabel line:21 language:RMarkdown extras:subparser,anonymous -S3 input.rmd /^# S3$/;" chapter line:26 language:Markdown end:27 -x input.rmd /^x <- 1$/;" globalVar line:5 language:R extras:guest end:5 -foo input.rmd /^foo <- function () {$/;" function line:6 language:R extras:guest end:9 -y input.rmd /^ y <- 2$/;" functionVar line:7 language:R function:foo extras:guest end:7 -X input.rmd /^X <- func()$/;" globalVar line:11 language:R extras:guest end:11 -f input.rmd /^def f():$/;" function line:22 language:Python extras:guest end:24 +S1 input.rmd /^# S1$/;" chapter line:4 language:Markdown end:17 +xyX input.rmd /^```{r xyX}$/;" chunklabel line:6 language:RMarkdown extras:subparser +S2 input.rmd /^# S2$/;" chapter line:18 language:Markdown end:28 +__anon4a45a9700100 input.rmd /^```{r, cache = TRUE, dependson = "xyX"}$/;" chunklabel line:20 language:RMarkdown extras:subparser,anonymous +__anon4a45a9700200 input.rmd /^```{python}$/;" chunklabel line:24 language:RMarkdown extras:subparser,anonymous +S3 input.rmd /^# S3$/;" chapter line:29 language:Markdown end:30 +x input.rmd /^x <- 1$/;" globalVar line:8 language:R extras:guest end:8 +foo input.rmd /^foo <- function () {$/;" function line:9 language:R extras:guest end:12 +y input.rmd /^ y <- 2$/;" functionVar line:10 language:R function:foo extras:guest end:10 +X input.rmd /^X <- func()$/;" globalVar line:14 language:R extras:guest end:14 +f input.rmd /^def f():$/;" function line:25 language:Python extras:guest end:27 +example for u-ctags input.rmd /^title: example for u-ctags$/;" title line:2 language:FrontMatter extras:subparser diff --git a/Units/parser-rmarkdown.r/simple-rmarkdown.d/input.rmd b/Units/parser-rmarkdown.r/simple-rmarkdown.d/input.rmd index b0cd0d025a..b0505ea5d6 100644 --- a/Units/parser-rmarkdown.r/simple-rmarkdown.d/input.rmd +++ b/Units/parser-rmarkdown.r/simple-rmarkdown.d/input.rmd @@ -1,3 +1,6 @@ +--- +title: example for u-ctags +--- # S1 ```{r xyX} diff --git a/docs/news.rst b/docs/news.rst index 3703ebaeec..94c0a7df04 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -412,6 +412,7 @@ The following parsers have been added: * Elixir *optlib* * Elm *optlib* * Falcon +* FrontMatter *only YAML syntax, running as a guest on R?Markdown* * FunctionParameters *perl based subparser* * Gdbinit script *optlib* * GemSpec *Ruby based subparser* diff --git a/main/parsers_p.h b/main/parsers_p.h index e6f25891a1..fb14b2b602 100644 --- a/main/parsers_p.h +++ b/main/parsers_p.h @@ -30,7 +30,8 @@ #define YAML_PARSER_LIST \ YamlParser, \ AnsiblePlaybookParser, \ - OpenAPIParser + OpenAPIParser, \ + YamlFrontMatter #else #define YAML_PARSER_LIST #endif @@ -85,6 +86,7 @@ FalconParser, \ FlexParser, \ FortranParser, \ + FrontMatterParser, \ FunctionParametersParser, \ FyppParser, \ GdbinitParser, \ diff --git a/parsers/frontmatter.c b/parsers/frontmatter.c new file mode 100644 index 0000000000..59ee456ea1 --- /dev/null +++ b/parsers/frontmatter.c @@ -0,0 +1,89 @@ +/* +* +* Copyright (c) 2022, Masatake YAMATO +* Copyright (c) 2022, Red Hat, K.K. +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for extracting language objects in FrontMatter. +* +* https://gohugo.io/content-management/front-matter +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "frontmatter.h" + +#include "entry.h" +#include "parse.h" +#include "promise.h" +#include "read.h" + +#include + +/* +* DATA DEFINITIONS +*/ +static kindDefinition FrontMatterKinds [] = { + { true, 't', "title", "titles", }, +}; + +/* +* FUNCTION DEFINITIONS +*/ +static void findFrontMatterTags (void) +{ + const unsigned char *line = readLineFromInputFile (); + + if (line == NULL) + return; + +#ifdef HAVE_LIBYAML + if (strcmp("---", (const char *)line) == 0) + { + line = readLineFromInputFile (); + unsigned long endOffset = strlen((const char *)line); + if (line) + { + long startLineNum = getInputLineNumber (); + while ((line = readLineFromInputFile())) + endOffset = strlen((const char *)line); + + long endLineNum = getInputLineNumber (); + + makePromise ("YamlFrontMatter", startLineNum, 0, + endLineNum, endOffset, startLineNum); + } + return; + } +#endif +} + +extern parserDefinition* FrontMatterParser (void) +{ + parserDefinition* def = parserNew ("FrontMatter"); + def->kindTable = FrontMatterKinds; + def->kindCount = ARRAY_SIZE (FrontMatterKinds); + + def->parser = findFrontMatterTags; + + /* + * This setting (useMemoryStreamInput) is for running + * Yaml parser from YamlFrontMatter as subparser. + * YamlFrontMatter is run from FrontMatter as a gust parser. + * FrontMatter is run from Markdown as a guest parser. + * This stacked structure hits the limitation of the main + * part: subparser's requirement for memory based input stream + * is not propagated to the main part. + * + * TODO: instead of setting useMemoryStreamInput here, we + * should remove the limitation. + */ + def->useMemoryStreamInput = true; + + return def; +} diff --git a/parsers/frontmatter.h b/parsers/frontmatter.h new file mode 100644 index 0000000000..4a90d100b6 --- /dev/null +++ b/parsers/frontmatter.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2022, Masatake YAMATO + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * Frontmatter parser interface exported to the other parsers + */ + +#ifndef CTAGS_FRONTMATTER_H +#define CTAGS_FRONTMATTER_H + +#include "general.h" + +typedef enum { + FRONTMATTER_TITLE_KIND, +} frontmatterKind; + +#endif diff --git a/parsers/markdown.c b/parsers/markdown.c index b528accb26..6d5216a254 100644 --- a/parsers/markdown.c +++ b/parsers/markdown.c @@ -260,7 +260,18 @@ static void findMarkdownTags (void) if (lineNum == 1 || inPreambule) { if (line[pos] == '-' && line[pos + 1] == '-' && line[pos + 2] == '-') + { + if (inPreambule) + { + long endLineNumber = lineNum; + if (startLineNumber < endLineNumber) + makePromise ("FrontMatter", startLineNumber, 0, + endLineNumber, 0, startSourceLineNumber); + } + else + startSourceLineNumber = startLineNumber = lineNum; inPreambule = !inPreambule; + } } if (inPreambule) diff --git a/parsers/yamlfrontmatter.c b/parsers/yamlfrontmatter.c new file mode 100644 index 0000000000..35b40c507d --- /dev/null +++ b/parsers/yamlfrontmatter.c @@ -0,0 +1,194 @@ +/* +* +* Copyright (c) 2022, Masatake YAMATO +* Copyright (c) 2022, Red Hat, K.K. +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for extracting language objects in FrontMatter +* using Yaml. +* +* https://gohugo.io/content-management/front-matter +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "frontmatter.h" +#include "yaml.h" + +#include "entry.h" +#include "gcc-attr.h" +#include "parse.h" +#include "read.h" +#include "subparser.h" +#include "trace.h" + + +/* +* DATA DECLARATIONS +*/ +enum yamlfrontmatterDetectingState { + DSTAT_LAST_KEY, + DSTAT_LAST_VALUE, + DSTAT_INITIAL, +}; + +struct sYamlFrontMatterSubparser { + yamlSubparser yaml; + enum yamlfrontmatterDetectingState detection_state; +}; + + +/* +* FUNCTION PROTOTYPES +*/ +static bool yamlFrontmattterInitTagEntry (tagEntryInfo *e, char *name, void *data); + + +/* +* DATA DEFINITIONS +*/ + +static langType frontMatterLang; + +static tagYpathTable ypathTables [] = { + { + "title", + DSTAT_LAST_VALUE, + .initTagEntry = yamlFrontmattterInitTagEntry, + }, +}; + + +/* +* FUNCTION DEFINITIONS +*/ + +static void yamlfrontmatterStateMachine (struct sYamlFrontMatterSubparser *yamlfrontmatter, + yaml_token_t *token) +{ +#ifdef DO_TRACING + ypathPrintTypeStack (YAML(yamlfrontmatter)); +#endif + + switch (token->type) + { + case YAML_KEY_TOKEN: + yamlfrontmatter->detection_state = DSTAT_LAST_KEY; + break; + case YAML_SCALAR_TOKEN: + switch (yamlfrontmatter->detection_state) + { + case DSTAT_LAST_KEY: + ypathFillKeywordOfTokenMaybe (YAML(yamlfrontmatter), token, getInputLanguage ()); + /* FALL THROUGH */ + case DSTAT_LAST_VALUE: + TRACE_PRINT("token-callback: %s: %s", + (yamlfrontmatter->detection_state == DSTAT_LAST_KEY)? "key": "value", + (char*)token->data.scalar.value); + ypathHandleToken (YAML(yamlfrontmatter), token, yamlfrontmatter->detection_state, + ypathTables, ARRAY_SIZE (ypathTables)); + break; + default: + break; + } + + yamlfrontmatter->detection_state = DSTAT_INITIAL; + + break; + case YAML_VALUE_TOKEN: + yamlfrontmatter->detection_state = DSTAT_LAST_VALUE; + break; + + default: + yamlfrontmatter->detection_state = DSTAT_INITIAL; + break; + } +} + +static void newTokenCallback (yamlSubparser *s, yaml_token_t *token) +{ + if (token->type == YAML_BLOCK_SEQUENCE_START_TOKEN + || token->type == YAML_BLOCK_MAPPING_START_TOKEN) + ypathPushType (s, token); + + yamlfrontmatterStateMachine ((struct sYamlFrontMatterSubparser *)s, token); + + if (token->type == YAML_BLOCK_END_TOKEN) + ypathPopType (s); + else if (token->type == YAML_STREAM_END_TOKEN) + ypathPopAllTypes (s); +} + +static bool yamlFrontmattterInitTagEntry (tagEntryInfo *e, char *name, void * data CTAGS_ATTR_UNUSED) +{ + initForeignTagEntry (e, name, frontMatterLang, FRONTMATTER_TITLE_KIND); + return true; +} + +static void yamlFrontMatterInputStart(subparser *s) +{ + ((struct sYamlFrontMatterSubparser*)s)->detection_state = DSTAT_INITIAL; + ((yamlSubparser*)s)->ypathTypeStack = NULL; +} + +static void yamlFrontMatterInputEnd(subparser *s) +{ + Assert (((yamlSubparser*)s)->ypathTypeStack == NULL); +} + +static void findYamlFrontMatterTags (void) +{ + scheduleRunningBaseparser (0); +} + +static void yamlFrontMatterInitialize (langType language) +{ + ypathCompileTables (language, ypathTables, ARRAY_SIZE (ypathTables), 0); + frontMatterLang = getNamedLanguage ("FrontMatter", 0); +} + +static void yamlFrontMatterFinalize (langType language, bool initialized) +{ + if (initialized) + ypathCompiledCodeDelete (ypathTables, ARRAY_SIZE (ypathTables)); +} + +extern parserDefinition* YamlFrontMatter (void) +{ + static struct sYamlFrontMatterSubparser yamlfrontmatterSubparser = { + .yaml = { + .subparser = { + .direction = SUBPARSER_SUB_RUNS_BASE, + .inputStart = yamlFrontMatterInputStart, + .inputEnd = yamlFrontMatterInputEnd, + }, + .newTokenNotfify = newTokenCallback + }, + }; + static parserDependency dependencies [] = { + { DEPTYPE_SUBPARSER, "Yaml", &yamlfrontmatterSubparser }, + { DEPTYPE_FOREIGNER, "FrontMatter", NULL }, + }; + + parserDefinition* const def = parserNew ("YamlFrontMatter"); + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + + def->kindTable = NULL; + def->kindCount = 0; + def->parser = findYamlFrontMatterTags; + def->initialize = yamlFrontMatterInitialize; + def->finalize = yamlFrontMatterFinalize; + + /* This parser runs ONLY as a part of FrontMatter parser. + * User may not want to enable/disable this parser directly. */ + def->invisible = true; + + return def; +} diff --git a/source.mak b/source.mak index 18b7459290..4ab083e74e 100644 --- a/source.mak +++ b/source.mak @@ -258,6 +258,7 @@ PARSER_HEADS = \ parsers/cxx/cxx_token.h \ parsers/cxx/cxx_token_chain.h \ \ + parsers/frontmatter.h \ parsers/iniconf.h \ parsers/m4.h \ parsers/make.h \ @@ -319,6 +320,7 @@ PARSER_SRCS = \ parsers/falcon.c \ parsers/flex.c \ parsers/fortran.c \ + parsers/frontmatter.c \ parsers/fypp.c \ parsers/gdscript.c \ parsers/gemspec.c \ @@ -408,6 +410,8 @@ YAML_SRCS = \ \ parsers/ansibleplaybook.c \ \ + parsers/yamlfrontmatter.c \ + \ $(NULL) PCRE2_HEADS =