Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/intersystems/iknow
Browse files Browse the repository at this point in the history
  • Loading branch information
JosDenysGitHub committed Mar 4, 2022
2 parents 78cffdb + 782cc2e commit 7b58dce
Show file tree
Hide file tree
Showing 46 changed files with 110,540 additions and 107,331 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ env:
JSON_INCLUDE: ${{ github.workspace }}/thirdparty/json/single_include

jobs:
manylinux2010_x86_64:
manylinux2014_x86_64:
runs-on: ubuntu-20.04
outputs:
REF_TESTING_PASSED: ${{ steps.tests.outputs.REF_TESTING_PASSED }}
Expand Down Expand Up @@ -57,7 +57,7 @@ jobs:
~/ccache
~/pipcache
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2010_x86_64:$MANYLINUX2010_X86_64_TAG /iknow/actions/build_manylinux.sh
run: docker container run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_x86_64:$MANYLINUX2014_X86_64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
run: docker container run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down Expand Up @@ -143,7 +143,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
run: docker container run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down Expand Up @@ -284,9 +284,9 @@ jobs:
${{ github.workspace }}/reference_materials/reports
test_result:
needs: [manylinux2010_x86_64, manylinux2014_aarch64, manylinux2014_ppc64le, macosx_10_9_x86_64, windows_x86_64]
needs: [manylinux2014_x86_64, manylinux2014_aarch64, manylinux2014_ppc64le, macosx_10_9_x86_64, windows_x86_64]
runs-on: ubuntu-20.04
if: needs.manylinux2010_x86_64.outputs.REF_TESTING_PASSED == '0' || needs.macosx_10_9_x86_64.outputs.REF_TESTING_PASSED == '0' || needs.windows_x86_64.outputs.REF_TESTING_PASSED == '0'
if: needs.manylinux2014_x86_64.outputs.REF_TESTING_PASSED == '0' || needs.macosx_10_9_x86_64.outputs.REF_TESTING_PASSED == '0' || needs.windows_x86_64.outputs.REF_TESTING_PASSED == '0'
steps:
- name: create comment
uses: peter-evans/commit-comment@v1
Expand All @@ -298,7 +298,7 @@ jobs:
deploy:
if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/master'
needs: [manylinux2010_x86_64, manylinux2014_aarch64, manylinux2014_ppc64le, macosx_10_9_x86_64, windows_x86_64]
needs: [manylinux2014_x86_64, manylinux2014_aarch64, manylinux2014_ppc64le, macosx_10_9_x86_64, windows_x86_64]
runs-on: ubuntu-20.04
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
Expand All @@ -316,10 +316,10 @@ jobs:
key: ${{ github.job }}-run-${{ github.run_number }}
restore-keys: ${{ github.job }}-run-
path: ~/.cache/pip
- name: download manylinux2010_x86_64-wheel
- name: download manylinux2014_x86_64-wheel
uses: actions/download-artifact@v2
with:
name: manylinux2010_x86_64-wheel
name: manylinux2014_x86_64-wheel
path: ~/wheels
- name: download manylinux2014_aarch64-wheel
uses: actions/download-artifact@v2
Expand Down
9 changes: 3 additions & 6 deletions actions/build_manylinux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ if [ "$PROCESSOR" = aarch64 ] || [ "$PROCESSOR" = ppc64le ]; then
yum install -y epel-release
# this mirror is often slow, so disable it
echo "exclude=csc.mcs.sdsmt.edu" >> /etc/yum/pluginconf.d/fastestmirror.conf
elif [ "$PROCESSOR" = x86_64 ]; then
echo "exclude=mirror.es.its.nyu.edu" >> /etc/yum/pluginconf.d/fastestmirror.conf
fi
yum install -y dos2unix ccache
mkdir -p /opt/ccache
Expand All @@ -36,11 +38,6 @@ ln -s /usr/bin/ccache /opt/ccache/c++
ln -s /usr/bin/ccache /opt/ccache/gcc
ln -s /usr/bin/ccache /opt/ccache/g++
export PATH="/opt/ccache:$PATH"
if [ "$PROCESSOR" = x86_64 ]; then
# On manylinux2010_x86_64, the version of ccache is too old to recognize the
# CCACHE_MAXSIZE environment variable, so set the max cache size manually.
ccache --max-size "$CCACHE_MAXSIZE"
fi


##### Build ICU if it's not cached #####
Expand All @@ -53,7 +50,7 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")

dos2unix -f *.m4 config.* configure* *.in install-sh mkinstalldirs runConfigureICU
export CXXFLAGS="-std=c++11"
PYTHON=/opt/python/cp39-cp39/bin/python ./runConfigureICU Linux --prefix="$ICUDIR"
PYTHON=/opt/python/cp310-cp310/bin/python ./runConfigureICU Linux --prefix="$ICUDIR"
gmake -j $(nproc)
gmake install
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
Expand Down
8 changes: 4 additions & 4 deletions actions/dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ PYVERSIONS_OSX="3.6.15 3.7.12 3.8.12 3.9.10 3.10.2"
BUILDCACHE_NAME="Release v0.27.6"
BUILDCACHE_URL_WIN=https://github.com/mbitsnbites/buildcache/releases/download/v0.27.6/buildcache-windows.zip
CYTHON_VERSION=0.29.28
MANYLINUX2010_X86_64_TAG=2022-02-13-594988e
MANYLINUX2014_AARCH64_TAG=2022-02-13-594988e
MANYLINUX2014_PPC64LE_TAG=2022-02-13-594988e
MANYLINUX2014_X86_64_TAG=2022-02-27-769bdbd
MANYLINUX2014_AARCH64_TAG=2022-02-27-769bdbd
MANYLINUX2014_PPC64LE_TAG=2022-02-27-769bdbd
# END DEPENDENCY-AUTOUPDATE SECTION


Expand Down Expand Up @@ -57,6 +57,6 @@ fi

# set variables that will be needed in later steps
echo "CYTHON_VERSION=$CYTHON_VERSION" >> $GITHUB_ENV
echo "MANYLINUX2010_X86_64_TAG=$MANYLINUX2010_X86_64_TAG" >> $GITHUB_ENV
echo "MANYLINUX2014_X86_64_TAG=$MANYLINUX2014_X86_64_TAG" >> $GITHUB_ENV
echo "MANYLINUX2014_AARCH64_TAG=$MANYLINUX2014_AARCH64_TAG" >> $GITHUB_ENV
echo "MANYLINUX2014_PPC64LE_TAG=$MANYLINUX2014_PPC64LE_TAG" >> $GITHUB_ENV
10 changes: 5 additions & 5 deletions actions/update_manylinux.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@


vars = updatelib.get_vars()
manylinux2010_x86_64_tag = vars['MANYLINUX2010_X86_64_TAG']
manylinux2014_x86_64_tag = vars['MANYLINUX2014_X86_64_TAG']
manylinux2014_aarch64_tag = vars['MANYLINUX2014_AARCH64_TAG']
manylinux2014_ppc64le_tag = vars['MANYLINUX2014_PPC64LE_TAG']

# send Quay API requests and parse responses
tags = []
for repo in ('manylinux2010_x86_64', 'manylinux2014_aarch64', 'manylinux2014_ppc64le'):
for repo in ('manylinux2014_x86_64', 'manylinux2014_aarch64', 'manylinux2014_ppc64le'):
r = requests.get(f'https://quay.io/api/v1/repository/pypa/{repo}/image')
json_data = r.json()
for image in reversed(json_data['images']):
Expand All @@ -30,15 +30,15 @@
tags.append(tag)

# set variables to latest ICU version
vars['MANYLINUX2010_X86_64_TAG'] = tags[0]
vars['MANYLINUX2014_X86_64_TAG'] = tags[0]
vars['MANYLINUX2014_AARCH64_TAG'] = tags[1]
vars['MANYLINUX2014_PPC64LE_TAG'] = tags[2]
updatelib.set_vars(vars)

# set environment variables for next GitHub actions step
message = []
if manylinux2010_x86_64_tag != tags[0]:
message.append(['manylinux2010_x86_64', manylinux2010_x86_64_tag, tags[0]])
if manylinux2014_x86_64_tag != tags[0]:
message.append(['manylinux2014_x86_64', manylinux2014_x86_64_tag, tags[0]])
if manylinux2014_aarch64_tag != tags[1]:
message.append(['manylinux2014_aarch64', manylinux2014_aarch64_tag, tags[1]])
if manylinux2014_ppc64le_tag != tags[2]:
Expand Down
2 changes: 1 addition & 1 deletion actions/updatelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def setenv(name, value):
'BUILDCACHE_NAME',
'BUILDCACHE_URL_WIN',
'CYTHON_VERSION',
'MANYLINUX2010_X86_64_TAG',
'MANYLINUX2014_X86_64_TAG',
'MANYLINUX2014_AARCH64_TAG',
'MANYLINUX2014_PPC64LE_TAG'
}
Expand Down
Empty file added language_development/en
Empty file.
153 changes: 153 additions & 0 deletions language_development/find_examples_for_rule_with_udct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
"""
Created on Mar 1 2022
@author: sdebergh
# This Python file uses the following encoding: utf-8
find_examples_for_rule.py is a tool to find sentences in which a given rule is applied.
Usage: "python find_examples_for_rule_with_udct.py <text files directory> <output file> <language> <rule number> <user dictionary>"
Example (on Windows): "python find_examples_for_rule_with_udct.py C:\TextCorpus\English\Financial\ C:\output\ en 531 C:\\repos\\iKnow\\reference_materials\\udct_test_dictionaries\\en_udct.txt"
-> find examples for rule 531 of the English language model
"""

import sys, os

# do "pip install iknowpy" if iknowpy is not installed
import iknowpy

# read command line
in_path_par = sys.argv[1]
out_path_par = sys.argv[2]
language_par = sys.argv[3]
rule_number = sys.argv[4]
user_dct_par = sys.argv[5]

# functions
# add a line in the output file
def write_ln(file_,text_):
file_.write((text_+"\r\n").encode('utf8'))

# create a mapping table for rule numbers, based on xx_compiler_report.log
def create_mapping_table(mapping_file):
read_mapping_file = open(mapping_file, encoding='utf-8')
for line in read_mapping_file:
if line != '\n':
mapping = line.split()[0]
if ':' in mapping:
mapping_table[mapping.split(':')[0]] = mapping.split(':')[1]

# find the matching number in the mapping table
def extract_rule_id(rule_order):
rule_id = mapping_table[rule_order]
return rule_id

# read user dictionary
def read_udct_file(file_,udct_):
f_udct = open(file_,"r",True,"utf8")
for txt_line in f_udct:
# print('txt_line: ' + txt_line)
txt_line = txt_line.rstrip()

if ',' in txt_line and txt_line[0:2] != '/*':
txt_list = txt_line.split(',')
lexrep, action = txt_list[0], txt_list[1]
if (lexrep[0] == '@'):
literal = lexrep[1:]
if action == "UDCertainty":
level = txt_list[2]
udct_.add_certainty_level(literal,int(level[2]))
else:
ret = udct_.add_label(literal,action)
if (ret == -2):
print('label ' + action + ' not valid !')
else: # Set end = $SELECT(command = "\end":1,command = "\noend":0,1:..Err())
if action == "\\end":
udct_.add_sent_end_condition(lexrep, True)
elif action == "\\noend":
udct_.add_sent_end_condition(lexrep, False)
else:
print('action ' + action + ' not valid !')

f_udct.close()



# initiate variables
mapping_file = language_par + "_compiler_report.log" # detect applicable xx_compiler_report.log based on language code
mapping_table = {}
f_rec = []
engine = iknowpy.iKnowEngine()

# load user dictionary
user_dictionary = iknowpy.UserDictionary()
read_udct_file(user_dct_par, user_dictionary)
ret = engine.load_user_dictionary(user_dictionary)


print('Looking for examples for rule ' + rule_number + ' of the ' + language_par + ' language model in ' + in_path_par)




# make a list of input file (recursive list of files, .txt only) - copied from https://stackoverflow.com/questions/18394147/recursive-sub-folder-search-and-return-files-in-a-list-python
f_rec = [os.path.join(dp, f) for dp, dn, filenames in os.walk(in_path_par) for f in filenames if
os.path.splitext(f)[1].lower() == '.txt']


# create mapping table for rule numbers
create_mapping_table(mapping_file)


# open output file and add UTF-8 BOM and information about the content of the file
if os.path.exists(out_path_par):
os.remove(out_path_par)
f_output = open(out_path_par, "ab")
f_output.write(b'\xef\xbb\xbf') # Utf8 BOM
write_ln(f_output, 'Examples for rule ' + rule_number + ' of the ' + language_par + ' language model in ' + in_path_par + '\n')

# read input files one by one
for text_file in f_rec:
print('processing ' + text_file)
f_text = open(text_file, "rb")
header = f_text.read(3)
if (header == b'\xef\xbb\xbf'): # check for Utf8 BOM
header = b'' # remove BOM
text = header + f_text.read() # read text, must be utf8 encoded
text = text.decode('utf8') # decode text to Unicode
f_text.close()

# index input file
engine.index(text, language_par, traces=True)

# read trace output
for trace in engine.m_traces:
# print(trace)
key, value = trace.split(':', 1)[0],trace.split(':', 1)[1]
# store the sentence
if (key == "SentenceFound"):
Sentence = value.split('"')[7]
if len(value.split('"')) > 9: # i.e. if the sentence contains quotes (")
for i in range(8, len(value.split('"')) - 1):
Sentence = Sentence + value.split('"')[i]

# check if the demanded rule is applied to process the sentence
elif (key == "RuleApplication"):
# rule_id in trace refers actually to rule order -> retrieve rule order value
rule_order = value.split(';')[0].split('=')[1]
# extract the number that corresponds to the rule id in rules.csv from compiler_report.log
rule_id = extract_rule_id(rule_order)
# if the rule id corresponds to the demanded rule number, look for the concerned lexreps
if rule_id == rule_number:
lexreps = value.split(';')[3:]
lexreps = str(lexreps)
lexreps_indexes = ''
while 'index=' in lexreps:
lexreps_indexes = lexreps_indexes + ' ' + lexreps[lexreps.find('index=\"')+7:lexreps.find('labels=')-2]
lexreps = lexreps[lexreps.find('labels=')+7:] # cut off left part of lexreps information in order to julp to the next lexrep
# add the concerned lexrep(s) and the sentence to the output
#print(lexreps_indexes.lstrip())
write_ln(f_output, lexreps_indexes.lstrip() + ';' + Sentence)


f_output.close()
1 change: 1 addition & 0 deletions language_development/genTrace-with-udct.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def read_udct_file(file_,udct_):
write_ln(f_trace, key + ':' + updated_value)
# pass
elif (key == "RuleApplicationResult"): # use this only when the code for 'RuleApplication' is activated too
updated_value = 'rule_id=' + rule_id + value[first_semicolon:]
write_ln(f_trace, key + ':' + updated_value)
# pass
elif (key == "JoinResult"):
Expand Down
8 changes: 4 additions & 4 deletions language_models/en/labels.csv
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@
;1,75,$;ENDuration;typeAttribute;attribute for duration indications;0;;Entity(Duration)
;1,$;ENTemp;typeAttribute;attribute for vague time indications;0;;Entity(DateTime)
;1,55,75,$;ENTime;typeAttribute;attribute for concrete time indications;0;;Entity(DateTime)
;1,75,$;ENDateTimeBegin;typeAttribute;added for enabling path expansion;0;;Path(Begin,DateTime)
;1,75,$;ENDateTimeStop;typeAttribute;added for enabling path expansion;0;;Path(End,DateTime)
;1,75,$;ENMeasurementBegin;typeAttribute;added for enabling path expansion;0;;Path(Begin,Measurement)
;1,75,$;ENMeasurementStop;typeAttribute;added for enabling path expansion;0;;Path(End,Measurement)

;1,62,65,70,71,75,$;ENDummy;typeAttribute;dummy attribute to temporarily mark lexreps;0;;
;40,55,65,70,71,77,78,79;ENDummy2;typeAttribute;extra dummy attribute;0;;
Expand Down Expand Up @@ -295,10 +299,6 @@
;75,$;ENNoMeasurement;typeAttribute;mark numbers that are not measurements;0;;
;75;ENInMeasspan;typeAttribute;marker for lexreps in a measurement span;0;;

;1,75,$;ENDateTimeBegin;typeAttribute;added for enabling path expansion;0;;Path(Begin,DateTime)
;1,75,$;ENDateTimeStop;typeAttribute;added for enabling path expansion;0;;Path(End,DateTime)
;1,75,$;ENMeasurementBegin;typeAttribute;added for enabling path expansion;0;;Path(Begin,Measurement)
;1,75,$;ENMeasurementStop;typeAttribute;added for enabling path expansion;0;;Path(End,Measurement)

;75,$;ENNumberPlusUnit2;typeAttribute;copy of NumberPlusUnit, not deleted in join rules;0;;

Expand Down
Loading

0 comments on commit 7b58dce

Please sign in to comment.