Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6c3e86b
Starting work on v2 schema per #195. Doing this as a chained customiz…
joeytakeda Nov 16, 2024
cd14903
Build file targets basically working; more user-information work to do.
martindholmes Jan 10, 2025
8dd3bcf
Merge branch 'dev' into iss195_config
joeytakeda Jan 17, 2025
dc9a2ee
Merge branch 'dev' into iss195_config
joeytakeda Jan 17, 2025
001c45a
Merge pull request #329 from projectEndings/issue-195-build-file
joeytakeda Jan 17, 2025
37e6d89
Merge branch 'dev' into iss195_config
joeytakeda Jan 17, 2025
6b41bf8
Merge branch 'dev' into iss195_config
joeytakeda Feb 9, 2025
31f5bd4
Significant progress on v2.0 schema changes (#iss195)
joeytakeda Feb 9, 2025
ed16797
Updates to documentation to cover CustomEvents.
martindholmes Feb 10, 2025
9df6fc7
Major work and updates for v2 (#195)
joeytakeda Feb 19, 2025
feb5a79
Merge branch 'iss195_config' of https://github.com/projectEndings/sta…
joeytakeda Feb 19, 2025
e38afbd
Updating schema to reflect new class
joeytakeda Feb 19, 2025
9802293
Small fix; need recursion
joeytakeda Feb 19, 2025
4ae8468
Rebuilding config for tests
joeytakeda Feb 19, 2025
674b616
Updating documentation configuration
joeytakeda Feb 19, 2025
50779a8
Adding defaults example codeblock and handling
joeytakeda Feb 19, 2025
edad44d
More updates to try and get sample config egXML rendering (somewhat) …
joeytakeda Feb 19, 2025
b33c8f8
Small tweak for style
joeytakeda Feb 19, 2025
0bdc49b
Attempt at making fillable form for configuration
joeytakeda Feb 19, 2025
a24aac9
Removing inadvertently committed temp files
joeytakeda Feb 19, 2025
01da0f2
Small adjustments
joeytakeda Feb 19, 2025
c42d132
Fixes #336
joeytakeda Feb 20, 2025
d2d35cb
Add underscore to version suffix, if needed
joeytakeda Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,5 @@ js/ssSearch-debug.js
js/ssSearch.js
js/*.js.map

# VSCode Configuration
.idea
75 changes: 66 additions & 9 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
</pathconvert>

<!--Load the configuration file as a property file-->
<xmlproperty file="${ssConfigFile}" keeproot="true"/>
<xmlproperty file="${ssConfigFile}" keeproot="true" collapseattributes="true"/>

<!--Set the verbose flag to false by default; set to true to see detailed messages-->
<property name="ssVerbose" value="false"/>
Expand All @@ -114,7 +114,7 @@
<property name="ssVerboseReport" value="false"/>

<!--The path to the search page, relative to the configuration directory-->
<property name="ssSearchFilePath" value="${configDir.converted}/${config.params.searchFile}"/>
<property name="ssSearchFilePath" value="${configDir.converted}/${config.params.searchPage.file}"/>

<!--The name of the collection dir, derived from the search file-->
<dirname property="ssCollectionDirName" file="${ssSearchFilePath}"/>
Expand All @@ -130,9 +130,12 @@

<!--Output folder, which is forked, depending on whether or not an output folder
is specified in the configuration file-->
<property name="ssOutputFolder" if:set="config.params.outputFolder"
value="${config.params.outputFolder}"/>
<property name="ssOutputFolder" unless:set="config.params.outputFolder" value="staticSearch"/>
<property name="ssOutputFolder"
if:set="config.params.output.dir"
value="${config.params.output.dir}"/>
<property name="ssOutputFolder"
unless:set="config.params.output.dir"
value="staticSearch"/>

<!--Stemmer folder, which is forked, depending on whether or not a stemmer folder
is specified in the configuration file-->
Expand Down Expand Up @@ -189,6 +192,7 @@

<!-- If we're building the test data, we need to generate the VERSION file
and also copy the .htm_ files to .html before starting. -->
<echo message="${ssConfigFileName}"/>
<if>
<equals arg1="configTest.xml" arg2="${ssConfigFileName}"/>
<then>
Expand Down Expand Up @@ -235,11 +239,54 @@
</then>
</if>
</target>

<target name="validate">
<target name="checkConfigVersion">
<description>
TARGET: validate
Task to validate that the source XHTML is well-formed XHTML. Note that this DOES
TARGET checkConfigVersion
Checks the supplied configuration file to see if it has
@version="2" on its root element. If not, it invokes the
conversion process.
</description>
<echo message="Checking version of config file ${ssBaseDir.converted}/${ssConfig}..."/>
<fileset id="configOnly" dir="${ssBaseDir.converted}" includes="${ssConfig}">
<containsregexp expression="&lt;config[^&gt;]+version\s*=\s*.2."/>
</fileset>
<condition property="isV2" value="true" else="false">
<matches pattern="^2" string="${config.version}"/>
</condition>

<antcall target="convertConfigFile" unless:true="${isV2}"/>
</target>

<target name="convertConfigFile">
<description>
TARGET convertConfigFile
This runs an identity transform on the supplied config
file to create from it a version 2 file, and then reports
the outcome to the user.
</description>
<input addproperty="outputStrategy"
message="Your config file is for version 1 of staticSearch.&#x0a;
It will be converted to version 2 format.&#x0a;
Would you like to overwrite the existing file or &#x0a;
create a new file? (o=overwrite, n=new)"
validargs="o,n"
defaultvalue="n"/>
<!--<echo message="${outputStrategy}"/>-->
<java classpath="${ssSaxon}" classname="net.sf.saxon.Transform" failonerror="true" fork="true">
<arg value="-xsl:${ssBaseDir.converted}/xsl/convert_v1_to_v2.xsl"/>
<arg value="-s:${ssBaseDir.converted}/${ssConfig}"/>
<arg value="--suppressXsltNamespaceCheck:on"/>
<arg value="ssBaseDir=${ssBaseDir.converted}"/>
<arg value="ssVerbose=${ssVerbose}"/>
<arg value="output=${outputStrategy}"/>
</java>
</target>

<target name="validate.html">
<description>
TARGET: validate.html
Target to check input files as well-formed XHTML. Note that this DOES
NOT check whether or not it is valid XHTML: only that it is well-formed. This target also
checks the config file and sees if it's invalid. We don't fail if it's invalid, though, since
we must assume that some people just don't care: but raise the error anyway.
Expand All @@ -255,6 +302,9 @@
<exclude name="**/staticSearch_report.html"/>
</fileset>
</xmlvalidate>
</target>

<target name="validate.config" depends="checkConfigVersion">
<echo message="Validating ${ssConfigFile} against the staticSearch schema..."/>
<exec executable="java" failonerror="false">
<arg line="-jar ${ssBaseDir.converted}/lib/jing.jar"/>
Expand All @@ -263,6 +313,13 @@
</exec>
</target>

<target name="validate" depends="validate.config, validate.html">
<description>
TARGET: validate
Task to validate various input files, including the XHTML and the configuration file.
</description>
</target>



<target name="config">
Expand Down
13 changes: 11 additions & 2 deletions buildSchema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@
****************************************************************-->

<target name="patchOddSrc">
<description>Patches the staticSearch ODD to use RNG interleave
to work around bugs in TEI's regular processing.</description>
<description>Preprocesses the staticSearch ODD to expand any constructs
or modify features that are not handled by the TEI stylesheets
(e.g. interleave).</description>
<echo message="${echo.separator}"/>
<echo message="Patching the staticSearch ODD ..."/>
<echo message="${echo.separator}"/>
Expand Down Expand Up @@ -240,6 +241,14 @@
and documentation creation tasks.</description>
</target>

<target name="justDocs" depends="patchOddSrc, odd2processedOdd, odd2Doc">
<description>
TARGET: justDocs

This target just creates the documentation; useful for quick proofing and rapid development.
</description>
</target>



</project>
47 changes: 19 additions & 28 deletions configTest.xml
Original file line number Diff line number Diff line change
@@ -1,36 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Configuration file file:/Users/takeda/projects/staticSearch/configTest.xml converted to version 2 2025-02-08.-->
<config xmlns="http://hcmc.uvic.ca/ns/staticSearch" version="2">
<params>
<searchFile>test/search.html</searchFile>
<versionFile>test/VERSION</versionFile>
<recurse>true<!--Need to set recurse to true for the test--></recurse>
<phrasalSearch>true</phrasalSearch>
<wildcardSearch>true</wildcardSearch>
<createContexts>true</createContexts>
<resultsPerPage>5</resultsPerPage>
<minWordLength>2</minWordLength>
<!--NOTE: If phrasalSearch is set to TRUE, then
maxContexts prop will be ignored-->
<maxKwicsToHarvest>5</maxKwicsToHarvest>
<maxKwicsToShow>5</maxKwicsToShow>
<totalKwicLength>15</totalKwicLength>
<kwicTruncateString>...</kwicTruncateString>
<stopwordsFile>test/test_stopwords.txt</stopwordsFile>
<dictionaryFile>xsl/english_words.txt</dictionaryFile>
<outputFolder>ssTest</outputFolder>
<searchPage file="test/search.html"/>
<index recurse="true"/>
<stopwords file="test/test_stopwords.txt"/>
<dictionary file="xsl/english_words.txt"/>
<scoringAlgorithm name="raw"/>
<stemmer dir="stemmers/en/"/>
<tokenizer minWordLength="2"/>
<createContexts create="true"
phrasalSearch="true"
wildcardSearch="true"
maxKwicLength="15"
kwicTruncateString="..."/>
<results resultsPerPage="5" maxKwicsToShow="5" maxResults="1000"/>
<version file="test/VERSION"/>
<output dir="ssTest"/>
</params>

<rules>
<rule weight="2"
match="h1 | h2"/>
<rule weight="0"
match="span[@class='lineNum']"/>
<rule weight="0"
match="script | style"/>
<rule weight="2" match="h1 | h2"/>
<rule weight="0" match="span[@class='lineNum']"/>
<rule weight="0" match="script | style"/>
<rule weight="10" match="span[@class='weighty']"/>
<rule weight="5" match="td/span"/>
</rules>

<contexts>
<context match="blockquote" label="Quotations"/>
<context match="div[@class='l']"/>
Expand All @@ -39,12 +33,10 @@
<context match="cite" label="Citations"/>
<context match="p[contains-token(@class,'citation')]" label="Citations"/>
</contexts>

<excludes>
<exclude type="index" match="html[@id='excluded']"/>
<exclude match="meta[contains-token(@class,'excludedMeta')]" type="filter"/>
</excludes>

<filters>
<filter filterName="Worth reading">
<span xmlns="http://www.w3.org/1999/xhtml">
Expand All @@ -56,6 +48,5 @@
<span style="font-variant: small-caps;">Random Piffle</span>
</span>
</filter>

</filters>
</config>
61 changes: 61 additions & 0 deletions configTest_v1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://hcmc.uvic.ca/ns/staticSearch" version="1">
<params>
<searchFile>test/search.html</searchFile>
<versionFile>test/VERSION</versionFile>
<recurse>true<!--Need to set recurse to true for the test--></recurse>
<phrasalSearch>true</phrasalSearch>
<wildcardSearch>true</wildcardSearch>
<createContexts>true</createContexts>
<resultsPerPage>5</resultsPerPage>
<minWordLength>2</minWordLength>
<!--NOTE: If phrasalSearch is set to TRUE, then
maxContexts prop will be ignored-->
<maxKwicsToHarvest>5</maxKwicsToHarvest>
<maxKwicsToShow>5</maxKwicsToShow>
<totalKwicLength>15</totalKwicLength>
<kwicTruncateString>...</kwicTruncateString>
<stopwordsFile>test/test_stopwords.txt</stopwordsFile>
<dictionaryFile>xsl/english_words.txt</dictionaryFile>
<outputFolder>ssTest</outputFolder>
</params>

<rules>
<rule weight="2"
match="h1 | h2"/>
<rule weight="0"
match="span[@class='lineNum']"/>
<rule weight="0"
match="script | style"/>
<rule weight="10" match="span[@class='weighty']"/>
<rule weight="5" match="td/span"/>
</rules>

<contexts>
<context match="blockquote" label="Quotations"/>
<context match="div[@class='l']"/>
<context match="span[@class='note'] | *[contains-token(@class,'sidenotes')]"
label="Notes"/>
<context match="cite" label="Citations"/>
<context match="p[contains-token(@class,'citation')]" label="Citations"/>
</contexts>

<excludes>
<exclude type="index" match="html[@id='excluded']"/>
<exclude match="meta[contains-token(@class,'excludedMeta')]" type="filter"/>
</excludes>

<filters>
<filter filterName="Worth reading">
<span xmlns="http://www.w3.org/1999/xhtml">
<i>Really</i> worth reading:
</span>
</filter>
<filter filterName="Random nonsense">
<span xmlns="http://www.w3.org/1999/xhtml" lang="en">
<span style="font-variant: small-caps;">Random Piffle</span>
</span>
</filter>

</filters>
</config>
64 changes: 29 additions & 35 deletions config_documentation.xml
Original file line number Diff line number Diff line change
@@ -1,39 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Configuration file file:/Users/takeda/projects/staticSearch/config_documentation.xml converted to version 2 2025-02-19.-->
<config xmlns="http://hcmc.uvic.ca/ns/staticSearch" version="2">
<params>
<searchFile>docs/index.html</searchFile>
<versionFile>test/VERSION</versionFile>
<stemmerFolder>en</stemmerFolder>
<recurse>false</recurse>
<phrasalSearch>true</phrasalSearch>
<wildcardSearch>true</wildcardSearch>
<createContexts>true</createContexts>
<!--NOTE: If phrasalSearch is set to TRUE, then
maxContexts prop will be ignored-->
<maxKwicsToHarvest>3</maxKwicsToHarvest>
<maxKwicsToShow>10</maxKwicsToShow>
<totalKwicLength>15</totalKwicLength>
<kwicTruncateString>...</kwicTruncateString>
<stopwordsFile>xsl/english_stopwords.txt</stopwordsFile>
<dictionaryFile>xsl/english_words.txt</dictionaryFile>
<outputFolder>search</outputFolder>
</params>

<rules>
<rule weight="2"
match="h1 | h2"/>
<!--ignore the TOC-->
<rule weight="0"
match="div[contains-token(@class,'tei_front')]"/>

<!--Don't index codeblocks in table cells-->
<rule weight="0" match="td/pre"/>

</rules>

<excludes>
<params>
<searchPage file="docs/index.html"/>
<index recurse="false"/>
<stopwords file="xsl/english_stopwords.txt"/>
<dictionary file="xsl/english_words.txt"/>
<scoringAlgorithm name="raw"/>
<stemmer dir="stemmers/en"/>
<tokenizer minWordLength="2"/>
<createContexts create="true"
phrasalSearch="true"
wildcardSearch="true"
maxKwicLength="15"
kwicTruncateString="..."/>
<results resultsPerPage="0" maxKwicsToShow="10" maxResults="2000"/>
<version file="test/VERSION"/>
<output dir="search"/>
</params>
<rules>
<rule weight="2" match="h1 | h2"/>
<!--ignore the TOC-->
<rule weight="0" match="div[contains-token(@class,'tei_front')]"/>
<!--Don't index codeblocks in table cells-->
<rule weight="0" match="td/pre"/>
</rules>
<excludes>
<!--Ignore the master SS doc from the search-->
<exclude type="index" match="html[matches(document-uri(/),'staticSearch.html$')]"/>
</excludes>

<exclude type="index"
match="html[matches(document-uri(/),'staticSearch.html$')]"/>
</excludes>
</config>
Loading