diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 00000000..39e774db --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,5 @@ +^CRAN-RELEASE$ +^.*\.Rproj$ +^\.Rproj\.user$ +^cran-comments\.md$ +^CRAN-SUBMISSION$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..83bfa010 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.Rproj.user +.Rhistory +.RData + +vignettes/pics/db_structure.aux +vignettes/pics/db_structure.log +vignettes/pics/db_structure.pdf +*.log +*.gz +*.fdb_latexmk + +*.o + +*.so + +.DS_Store \ No newline at end of file diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 00000000..12d62a83 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 2.5.0 +Date: 2024-03-15 22:08:04 UTC +SHA: 4abe4e308dd1c22ee94547b64970dd53751dcbdf diff --git a/DESCRIPTION b/DESCRIPTION index 5d6ecd3a..b3e2379c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,62 @@ Package: emuR -Type: Package -Title: Interface to the Emu Speech Database System -Version: 2.0 -Date: 2014-06-19 -Author: Jonathan Harrington, Tina John and others, IPS LMU Muenchen & IPDS CAU Kiel -Maintainer: Jonathan Harrington -Description: Provides many special purpose functions for display and analysis of speech data. +Version: 2.5.1 +Date: 2025-08-06 +Title: Main Package of the EMU Speech Database Management System +Authors@R: c( + person("Markus", "Jochim", , "markusjochim@phonetik.uni-muenchen.de", c("aut", "cre"), comment = c(ORCID = "0000-0002-5638-4870")), + person("Raphael", "Winkelmann", , "raphael@phonetik.uni-muenchen.de", c("aut")), + person("Klaus", "Jaensch", , "klausj@phonetik.uni-muenchen.de", c("aut", "ctb")), + person("Steve", "Cassidy", , "steve.cassidy@mq.edu.au", c("aut", "ctb")), + person("Jonathan", "Harrington", , "jmh@phonetik.uni-muenchen.de", c("aut", "ctb"))) +Description: Provide the EMU Speech Database Management System (EMU-SDMS) with + database management, data extraction, data preparation and data + visualization facilities. See + for more details. License: GPL (>= 2) -Depends: R (>= 2.13), stats, methods, MASS -Suggests: emudata +Depends: + R (>= 3.5.0) +Imports: + tools, + utils, + graphics, + methods, + rlang, + stringr (>= 1.4.0), + uuid, + base64enc, + shiny, + wrassp (>= 0.1.4), + jsonlite (>= 1.6.1), + RSQLite (>= 2.0.0), + DBI (>= 0.3.1), + httpuv (>= 1.3.2), + dplyr (>= 0.7.8), + readr (>= 1.1.1), + tibble (>= 1.4.2), + purrr (>= 0.2.4), + tidyr(>= 0.8.2), + mime(>= 0.6), + rstudioapi(>= 0.10), + httr(>= 1.4.1), + V8(>= 3.4.0), + cli(>= 2.5.0), + fs, + tidyselect +Suggests: + stats, + grDevices, + MASS, + ggplot2(>= 2.1.0), + testthat(>= 0.7.1.99), + compare(>= 0.2.4), + knitr(>= 1.7), + rmarkdown(>= 0.9.2), + matlabr, + R.matlab +Encoding: UTF-8 LazyLoad: yes LazyData: yes ZipData: no -URL: http://emu.sourceforge.net/ - +URL: https://github.com/IPS-LMU/emuR, https://ips-lmu.github.io/The-EMU-SDMS-Manual/ +BugReports: https://github.com/IPS-LMU/emuR/issues +RoxygenNote: 7.3.2 diff --git a/Help/Slope.test b/Help/Slope.test deleted file mode 100644 index 26cccd3a..00000000 --- a/Help/Slope.test +++ /dev/null @@ -1,42 +0,0 @@ -.BG -.FN slope.test -.DN -tests whether the difference between two or more regression lines -is significant -.CS -slope.test(...) -.RA -.AG ... -this function takes any number of two column matrices. -The first column is the y-data (in the case of locus equations, this -is the vowel onset) and the second column is the x-data (in the case of -locus equations, vowel target). -.RT -The return value consists of the following componenets: -.RC separate -slope, intercept, r-squared, F-ratio, "d(egrees of) f(reedom)" -and "prob(ability that) line fits data" for the separate data matrices entered. -.RC combined -F-ratio, "d(egrees of) f(reedom)", and "Probability of -them being DIFFERENT" for the slope and for the intercept of the combined -data. -.RC x -the combined x-data for all the matrices. -.RC y -the combined y-data for all the matrices. -.RC mat -the category vectors for the combined data -(consists of 1, 0 and -1). -.RC numrows -the number of rows in each matrix. -.RC numcats -the sum number of matrices entered. - -.SH REFERENCES -see E. Pedhazur, Multiple Regression in Behavioral Research -p.436-450, 496-507. -.SA -lm(), summary.lm(), pf() -.KW ~keyword -.WR - diff --git a/Help/dextract.lab b/Help/dextract.lab deleted file mode 100644 index 706eef16..00000000 --- a/Help/dextract.lab +++ /dev/null @@ -1,33 +0,0 @@ -\" -*-nroff-*- -.BG -.FN dextract.lab -.TL -Extract data from a trackdata -.CS -dextract.lab(trackdata, labs, labtype=unique(labs)) -.RA -.AG trackdata -A trackdata object returned from `track'. -.AG labs -A vector of labels parallel to `trackdata$index', i.e. one for each -segment in the trackdata. -.AG labtype -A vector of labels for which data is to be extracted. -.RT -A trackdata object which is a subset of `trackdata' containing only the -data for those labels in `labtype'. The result has the same components -as the input `trackdata': -.RC data -A vector or matrix of numerical data. -.RC index -A two column matrix giving the start and end indeces into the data -vector for each segment. -.RC ftime -A two column matrix giving the start and end times for each segment. -.SA -track, dextract, get.time.element, frames.time -.EX -# extract only the data for i: and I vowels -i.data _ dextract.lab(voweldata, vowellabs, c("i:", "I")) -.KW mu -.WR diff --git a/Help/discrim b/Help/discrim deleted file mode 100644 index d281881a..00000000 --- a/Help/discrim +++ /dev/null @@ -1,72 +0,0 @@ -\" -*-nroff-*- -.BG -.FN discrim -.TL -Canonical Discriminant Analysis -.CS -discrim(data, labs, dims=length(unique(labs)) - 1, transform=NULL, verbose=F) -.RA -.AG data -A multidimensional data matrix. -.AG labs -A vector of labels, parallel to `data'. -.OA -.AG dims -The number of transformed dimensions to be returned. The maximum is one -less than the number of dimensions in `data'. -.AG transform -The result of a previous call to discrim. This can be used to transform -a second data set according to the eigenvector/eigenvalues determined -from a training data set. -.AG verbose -If TRUE, a commentary is printed. -.RT -If `transform' is supplied the data is transformed according to the -eigenvector/eigenvalues in that tranform and a matrix of data is -returned. If no transform is supplied, the result has the following -components: -.RC data -The transformed data, a matrix with the same number of rows as `data' -and `dims' columns. -.RC variates -The eigenvectors of the transformation. -.RC values -The eigenvalues of the transformation -.DT -(Taken from the unix man page for cda): - -Canonical discriminant analysis takes a set of grouped points in a -high dimesional space and determines the components such that points -within a group form tight clusters. These points are called the -canonical variates and are labeled 0-(n-1) where n is the -dimensionality of the space (i.e. the number of hidden units). The -original points can be projected on to these vectors. The result is a -low dimensional plot which has clustered the points belonging to each -group. - -The method for finding the canonical variates is divided into 3 -steps: - -(1) finding the within-sum-of-squares and the between-sum-of-squares -matrices - -(2) finding the eigenvectors (x) [the canonical variates] and -eigenvalues (@) [the canonical values] which satisfy the equation: -.Cs - Bx = @Wx -.Ce -where B is the between-sum-of-squares matrix and W is the -within-sum-of-squares matrix. The canonical variate with the greatest -canonical value corresponds to the direction in which the ratio of the -between-group distance to the within-group distance is maximised. - -(3) projecting each of the initial input points onto the canonical -variates. - -.SH REFERENCES -The unix man page for `cda'. -.SA -`prcomp' -.EX -.KW mu -.WR diff --git a/Help/dplot b/Help/dplot deleted file mode 100644 index fcc259b1..00000000 --- a/Help/dplot +++ /dev/null @@ -1,68 +0,0 @@ -\" -*-nroff-*- -.BG -.FN dplot -.TL -Plot MU+ trackdata. -.CS -dplot(dataset, labs=NULL, offset=0, ref.time=NULL, average=F, - main="", xlab="time (ms)", ylab="", xlim=NULL, ylim=NULL, - cex=0.5, linetype=F, normalise=F, colour=T, legn="tl", - axes=T) -.RA -.AG dataset -A MU+ trackdata object. -.OA -.AG labs -A label vector parallel to `dataset'. -.AG offset -A number between 0 and 1, segments are alligned on the plot with this -proportion of their duration corresponing to zero. -.AG ref.time -A vector of one time per segment, an alternative to `offset', segments -will be lined up on the plot with these times at zero. -.AG average -If T, the data for each unique label in `labs' is averaged. -.AG normalise -If T, the data for each segment will be time normalised so that all -segments appear to have the same length on the plot. -.AG main -A main title for the plot. -.AG xlab -A label for the x-axis. -.AG ylab -A label for the y-axis. -.AG xlim -A vector of the minimum and maximum limits of the x-axis. -.AG ylim -A vector of the minimum and maximum limits of the y-axis. -.AG cex -The `cex' parameter passed to `par' to control text size on the plot. -.AG linetype -.AG colour -colour and linetype can be either T, F or a vector. If they are F then -the plot will use a single colour or linetype. If they are T then the -plot will use a different colour/linetype for each line. If they is a -vector then it should consist of one label for each element to be -plotted, the colour/linetype will be selected according to this label. -.AG legn -The position to draw the legend, one of `"tl"' (top-left), `"tr"' -(top-right), `"bl"' (bottom-left) or `"br"' (bottom-right). -.AG axes -If T, axes will be drawn on the plot. -.RT -None -.SE -A plot of the track data is generated on the current graphics device. -.DT -This function is a general purpose plotting function for track data -generated by the `track' function. Data for each segment is lined up -at some point along its length (eg. at the start, midpoint or end) and -possibly normalised or averaged before being plotted. Normalisation -consists of stretching each segment out to have the same number of -samples as the longest segment; the stretching is done with the MU+ -function `linear'. Averaging takes account of the different lengths of -the segments in computing the average for each time point. -.SA -track, linear -.KW mu -.WR diff --git a/Help/dsmooth b/Help/dsmooth deleted file mode 100644 index f3420540..00000000 --- a/Help/dsmooth +++ /dev/null @@ -1,20 +0,0 @@ -\" -*-nroff-*- -.BG -.FN dsmooth -.TL -Smooth the data in a trackdata object. -.CS -dsmooth(dataset) -.RA -.AG dataset -A trackdata object as returned from `track'. -.RT -The result of applying the `smooth' function to each column of the data -for each segment in the trackdata object. -.DT -This function uses the `dapply' function to apply `smooth' to the data -for each segment. -.SA -smooth, dapply -.KW ~keyword -.WR diff --git a/Help/ellipse b/Help/ellipse deleted file mode 100644 index 3db5da2f..00000000 --- a/Help/ellipse +++ /dev/null @@ -1,26 +0,0 @@ -.BG -.FN ellipse -.TL -Calculate ellipse coordinates -.CS -ellipse(x, y, rx, ry, orient, incr=360/100) -.RA -.AG x -X coordinate of center -.AG y -y coordinate of center -.AG rx -Radius in the x direction -.AG ry -Radius in the y direction -.AG orient -Orientation, in radians. The angle of the major axis to the x axis. -.OA -.AG incr -The increment between points, in degrees. -.RT -A matrix of x and y coordinates for the ellipse. -.SA -eplot() -.KW mu -.WR diff --git a/Help/eplot b/Help/eplot deleted file mode 100644 index f3250825..00000000 --- a/Help/eplot +++ /dev/null @@ -1,72 +0,0 @@ -\" -*-nroff-*- -.BG -.FN eplot -.TL -Ellipse plot -.CS -function(data, labs, chars, formant = F, scaling = "linear", - nsdev = 2.447747, dopoints = F, classify = F, - doellipse=F, centroid = F, main="", xlab="", ylab="", axes = T, - xlim, ylim, colour, linetype, ...) -.RA -.AG data -A two-columned matrix of data -.OA -.AG labs -A vector of labels, parallel to `data'. -.AG chars -A vector of labels, parallel to `data'. If this argument is specified -these labels will be plotted rather than the labels in `labs'. -.AG formant -If TRUE, then the data is negated so that, for formant data, the plot -is made in the -F2/-F1 plane. This gives a similar layout to the -common vowel quadrilateral. -.AG scaling -Either "mel" or "bark" for mel or bark scaling of the data. -.AG nsdev -Defines the length of the major and minor axes of the ellipses in terms -of the standard deviation of the data. -.AG dopoints -If TRUE, character labels (from `labs' or `chars') are plotted for each -data point. -.AG doellipse -If T, ellipses are drawn on the plot. If F, no ellipses are drawn and, if -'dopoints' is also F, 'centroids' is set to T. -.AG centroid -If T, a single character label is plotted at the center of each ellipse. -.AG classify -If classify is TRUE, a gaussian (mahalanobis distance) classification -is carried out on the data and the confusion matrix is returned. -.AG main -A main title for the plot. -.AG xlab, ylab -Titles for the x and y axes. -.AG axes -If TRUE axes are drawn on the plot. -.AG xlim, ylim -The bounds of the plot on the x and y axes, each should be a two -element vector giving the minimum and maximum values to plot. -.AG colour -If colour is TRUE, the ellipses and labels will be plotted in different -colours. -.AG linetype -If linetype is TRUE, the ellipses will be plotted with different -linetypes. This is useful for plots that will be printed. -.PP -This function will also accept any of the standard graphical parameters -which can be passed to `par()', for example `cex' which specifies the -size of characters to be used. - -.RT -A confusion matrix, if the classify argument is TRUE, otherwise NULL. -.SE -A plot is generated consisting of an ellipse defining the distribution -of each class within the given labels. The mean and covariance of each -class is estimated from the data and plotted. Labels may be plotted for -each data point or at the class mean for each class. -.SA -train(), mahal() -.EX -eplot(formantdata, vowellabs, centroids=T) -.KW mu -.WR diff --git a/Help/euclidean b/Help/euclidean deleted file mode 100644 index f6d12f1b..00000000 --- a/Help/euclidean +++ /dev/null @@ -1,25 +0,0 @@ -\" -*-nroff-*- -.BG -.FN euclidean -.TL -Find the inter-euclidean distance for a data matrix -.CS -euclidean(data, m=1, n=ncol(data)) -.RA -.AG data -A vector or matrix of numerical data. -.OA -.AG m -The first column of data to be used in the distance calculation. -.AG n -The last column of data to be used in the distance calculation. -.RT -Calculates the euclidean distance between successive rows of the matrix -based on columns m:n. -.SA -steady -.EX -> euclidean(cbind(c(1,2,3,4), c(2,3,2,2))) -[1] 1.414214 1.414214 1.000000 -.KW mu -.WR diff --git a/Help/expand.labels b/Help/expand.labels deleted file mode 100644 index 363abc29..00000000 --- a/Help/expand.labels +++ /dev/null @@ -1,18 +0,0 @@ -.BG -.FN expand.labels -.TL -Label each data sample -.CS -expand.labels(indvals, labs) -.RA -.AG indvals -Index component of a trackdata object as returned by `frames', or `track'. -.AG labs -A label vector parallel to `indvals'. -.RT -Returns a vector of labels, one for each row in the data matrix that -corresponds to `indvals'. -.SA -frames, track -.KW mu -.WR diff --git a/Help/fcalc b/Help/fcalc deleted file mode 100644 index 1365852e..00000000 --- a/Help/fcalc +++ /dev/null @@ -1,42 +0,0 @@ -.BG -.FN fcalc -.TL -Arithmetic calculation on spectral data -.CS -fcalc(fftdata, byrow=T, samfreq=20000, nyq=samfreq/2, - low=0, high=nyq, fun=sum, ...) -.RA -.AG fftdata -FFT spectral values as returned from `muspec' with `dbspec=T' (the default) or `muslice'. -.AG byrow -If TRUE (default), the arithmetic calculation will be done on each row. -If FALSE, they will be done on each column. -.AG samfreq -Sampling frequency of the data (Default assumes 20 kHz). -.AG nyq -The upper limit of the spectrum (the Nyquist frequency or half the -sampling frequency). -.AG low -Starting frequency in Hz over which the function in `fun' is to be applied. -.AG high -Ending frequency in Hz over which the function is to be applied. -.AG fun -Any Splus arithmetic function to be applied on `fftdata'. (Default function is -`sum'). -.OA -.AG ... -Extra arguments, which must be unique to `fcalc', that are needed for -the `fun' function. -.RT -`fcalc' returns the results of the calulation in `$vals'. -.DT -The spectral values (in dB's) are first converted back into energy -values, the function `fun' is then applied to the data and the result -is converted back into dB's. -.SA -muslice, muspec -.EX -Calculate the variance of each spectrum in the 1-4 kHz range -> fcalc(fftdata, low=1000, high=4000, fun=var) -.KW mu -.WR diff --git a/Help/fplot b/Help/fplot deleted file mode 100644 index c44340d0..00000000 --- a/Help/fplot +++ /dev/null @@ -1,72 +0,0 @@ -.BG -.FN fplot -.TL -A function for plotting spectra -.CS -fplot(fftdata, labs=NULL, which=NULL, colour=T, linetype=F, - samfreq=20000, nyq=samfreq/2, xlab="Frequency (Hz)", - ylab="Intensity (dB)", low=0, high=nyq, dbrange=NULL, - axes=T, main="", average=F, smoothing=F, points=20, - coeff=F, type="l", super=F, legn="tl", cex=1) -.RA -.AG fftdata -FFT spectral values as returned from `muspec' with `dbspec=T' (the default) or `muslice'. -.OA -.AG labs -A vector of labels parallel to `fftdata'. -.AG which -Select one of the label-types for plotting. -.AG colour -If colour is TRUE, the spectra and labels will be plotted in different colours. -.AG linetype -If linetype is TRUE, the spectra will be plotted with different linetypes. This -is useful for plots that will be printed. -.AG samfreq -Determines the x-axis range, nyquest frequency and the default high frequency range. -.AG nyq -The upper limit of the spectrum (the nyquist frequency). -.AG xlab, ylab -Titles for the x and y axes. -.AG low, high -To specify a low and high frequncy range. (Default range is 0-10000 Hz) -.AG dbrange -Specify a range for the y-axis in db. -.AG axes -If TRUE axes are drawn on the plot. -.AG main -A main axis title for the plot. -.AG average -If TRUE, will compute the average spectra for each different label-type. -.AG smoothing -If TRUE, will compute cepstrally smoothed spectra (default to 20 cepstral -coefficients), then plot the corresponding smoothed spectra. -.AG points -Value of the cepstral coefficient, roughly 20 for a sampling rate of 20kHz. -.AG coeff -If TRUE, the values of the cepstral coefficients will be returned. -.AG type -The way the points/lines are represented. See Splus `plot'. Default is `"l"', a -line plot. -.AG super -Superimpose FFTs that occur in successive rows of fftdata on the same plot. -.AG legn -The position to draw the legend, one of `"tl"' (top-left), `"tr"' -(top-right), `"bl"' (bottom-left), `"br"' (bottom-right) or `"loc"' -(for locator) which requires you to position the lengend with the mouse. -.AG cex -The `cex' parameter passed to `par' to control text size on the plot. -.RT -If `coeff=T', values of cepstral coefficients will be returned. -.SE -Spectral plots are generated. -.SA -muslice, muspec -.EX -# e.g. Extract and plot the cepstrally smoothed average spectral values of the -# [T] and [S] segments centered at the midpoint from the demo.utts database. -> segs _ phon(demo.utts, "Phonetic='T/S'", find="Phonetic") -> mvals _ muspec(segs, centering=T, offset=0.5) -> labs _ label(segs) -> fplot(mvals$spec, labs, average=T, smoothing=T) -.KW mu -.WR diff --git a/Help/frames b/Help/frames deleted file mode 100644 index 2a4c1ff1..00000000 --- a/Help/frames +++ /dev/null @@ -1,40 +0,0 @@ -\" -*-nroff-*- -.BG -.FN frames -.TL -Extract data from the speech database -.CS -function(segs, trackname, justindex = F, justftime = F, - justdata = F, extn = options("extensions")) -.RA -.AG segs -A mu+ segment list. -.AG trackname -A valid parameter track name for the current segment list (see `tracks'). -.OA -.AG justindex -Only return the indx component of the result -.AG justftime -Only return the ftime component of the result -.RT -Unless one of `justdata', `justindex' or `justftime' are specified, the -result is an object of class `trackdata' and has the following components: -.RC data -A matrix of data corresponding to the specified parameter track for the -segment list. The number of dimensions (columns) in the matrix depends -on the track specified, for example, track `"fm[1..3]"' (formants 1 to 3) -will have three columns. The data for all segments is concatenated in -rows, the data corresponding to a particular segment can be extracted -using the `index' component. -.RC index -A matrix with two columns giving the start and end indexes of each segment -within the `data' matrix. -.RC ftime -A matrix with two columns giving the start and end times of each -segment copied from the segment list. Both the `index' and the `ftime' -components will have the same number of rows as the original segment -list. -.SA -track, tracks -.KW mu -.WR diff --git a/Help/frames.record b/Help/frames.record deleted file mode 100644 index f18951b6..00000000 --- a/Help/frames.record +++ /dev/null @@ -1,21 +0,0 @@ -\" -*-nroff-*- -.BG -.FN frames.record -.TL -Find a record in a frames data object. -.CS -frames.record(trackdata, segnum, time) -.RA -.AG trackdata -A trackdata object resulting from a call to `track'. -.AG segnum -The segment number we are interested in. -.AG time -The time we are interested in. -.RT -The index into the `data' component of `trackdata' which contains the -data for this segment at this time. -.SA -track, get.time.element, dextract, dextract.lab -.KW mu -.WR diff --git a/Help/frames.time b/Help/frames.time deleted file mode 100644 index 56110c13..00000000 --- a/Help/frames.time +++ /dev/null @@ -1,24 +0,0 @@ -\" -*-nroff-*- -.BG -.FN frames.time -.TL -Find the time and position of a data element. -.CS -frames.time(dataset, datanum) -.RA -.AG dataset -A dataset returned by `track' or `frames'. -.AG datanum -An integer, an index into the `data' component of `dataset'. -.RT -The segment number which contains the element `datanum' of `dataset$data'. -.DT -The dataset returned from `track' or `frames' consists of a matrix of -data (the `data' component) and two index components (`index' and -`ftime'). The data for all segments is concatenated together in -`$data'. This function can be used to find out which segment a -particular row of `$data' corresponds to. -.SA -track, frames -.KW mu -.WR diff --git a/Help/get.time.element b/Help/get.time.element deleted file mode 100644 index 013ab895..00000000 --- a/Help/get.time.element +++ /dev/null @@ -1,37 +0,0 @@ -\" -*-nroff-*- -.BG -.FN get.time.element -.TL -Get data for a given time -.CS -get.time.element(time, trackdata) -.RA -.AG time -A time in milliseconds -.AG trackdata -A trackdata object as returned by `track'. -.RT -The element number of `trackdata$data' corresponding to `time' -.SA -track, frames -.EX -# Given a segment list: -> segs - start end utt -E "12489.4" "12586.9" "/home/dbase/dbase1/msadb001" -@ "12916.9" "12982.2" "/home/dbase/dbase1/msadb001" -E "13155.2" "13240.4" "/home/dbase/dbase1/msadb001" - -# We extract some formant data: -> data _ track(segs, "fm[1..3]") - -# We can now find which element of data$data corresponds to the start of -# the first "E" segment: -> get.time.element( 12489.4, data ) -[1] 1 - -# and to the midpoint of the last "E" segment: -> get.time.element(13197.8, data ) -[1] 47 -.KW mu -.WR diff --git a/Help/hamming b/Help/hamming deleted file mode 100644 index d2021367..00000000 --- a/Help/hamming +++ /dev/null @@ -1,17 +0,0 @@ -\" -*-nroff-*- -.BG -.FN hamming -.TL -Generate a Hamming window -.CS -hamming(pts=512) -.OA -.AG pts -The width of the window. -.RT -A vector which represents a Hamming window of width `pts', with -elements between 0 and 1. A signal vector should be multiplied by this -vector to apply the hamming window. -.EX -.KW mu -.WR diff --git a/Help/hplot b/Help/hplot deleted file mode 100644 index 354ef473..00000000 --- a/Help/hplot +++ /dev/null @@ -1,41 +0,0 @@ -\" -*-nroff-*- -.BG -.FN hplot -.TL -Plot superimposed histograms -.CS -hplot(vec, labs, xlab = "", ylab = "", main = "", - colour = T, xlim = NULL, axes = T, legn="tl") -.RA -.AG vec -A vector of numerical data. -.OA -.AG labs -A vector of labels parallel to `vec'. -.AG xlab -An x-axis label. -.AG ylab -A y-axis label. -.AG main -A main title for the plot. -.AG colour -If colour is TRUE, different colours will be used for each unique -label. If it is FALSE, different shading patterns will be used. -.AG xlim -The range of the x-axis. -.AG axes -If FALSE, no axes will be plotted. -.AG legn -Where to put the legend. If `FALSE' no legend will be drawn, otherwise -this can be one of `"tl"', `"br"', `"bl"' or `"tr"' (for top-left, bottom-right -etc.) or `"loc"' (for locator) which requres you to position the legend -with the mouse. -.RT -None. -.SE -Generates multiple overlaid histograms, one for each unique label in -`labs'. -.SA -nplot, eplot -.KW mu -.WR diff --git a/Help/is.segs b/Help/is.segs deleted file mode 100644 index 9b6beccd..00000000 --- a/Help/is.segs +++ /dev/null @@ -1,16 +0,0 @@ -.BG -.FN is.segs -.TL -Test whether an object is a segment list -.CS -is.segs(x) -.RA -.AG x -Any Splus object -.RT -TRUE if `x' is a mu+ segment list. -.SA -as.segs -.EX -.KW mu -.WR diff --git a/Help/label b/Help/label deleted file mode 100644 index c23ec3dc..00000000 --- a/Help/label +++ /dev/null @@ -1,39 +0,0 @@ -\" -*-nroff-*- -.BG -.FN label -.TL -Label a segment list. -.CS -label(segs, attribute="Phoneme", - extn=options()$labextn, dirc=options()$templatedir) -.RA -.AG segs -A MU+ segment list. -.OA -.AG attribute -A level name from the current MU+ database. -.AG extn -The extension on the label files in the database. -.AG dirc -The directory to look for the `labed_template' file. -.RT -A vector of labels. -.DT -The label function can be used in two ways. Firstly it can retrieve the -labels associated with a segment list at the time the segment list was -made. This is done by leaving out the `attribute' argument. Secondly -it can retrieve the labels that dominate or are dominated by the -segments in the segment list. For example, to retrieve the labels of -the Words that dominate strong Syllable segments I could use the -following commands. -.Cs -> segs <- phon(utts, "Syllable='S'", find="Syllable") -> wordlabs <- label(segs, "Word") -.Ce -If the `attribute' level is lower than the level of the segment list, -the labels of the segments that are dominated by each segment in the -list are concatenated together in the result. -.SA -phon -.KW mu -.WR diff --git a/Help/label.convert b/Help/label.convert deleted file mode 100644 index bc7c70de..00000000 --- a/Help/label.convert +++ /dev/null @@ -1,21 +0,0 @@ -.BG -.FN label.convert -.TL -Translate labels in a label vector -.CS -label.convert(labels, l1, l2) -.RA -.AG labels -A vector of labels. a -.AG l1 -The label to be replaced. -.AG l2 -The substitute label. -.RT -The `labels' vector with all instances of `l1' replaced by `l2'. -.EX -> label.convert(c("A", "E", "I", "A"), "A", "V") -[1] "V" "E" "I" "V" - -.KW mu -.WR diff --git a/Help/label.num b/Help/label.num deleted file mode 100644 index 21c5fc90..00000000 --- a/Help/label.num +++ /dev/null @@ -1,17 +0,0 @@ -.BG -.FN label.num -.TL -Convert labels to integers -.CS -label.num(labs) -.RA -.AG labs -A vector of labels. -.RT -Each label is replaced by a number. -.EX - > label.num(c("A", "E", "I", "A, "I", "E" )) - [1] 1 2 3 1 3 2 - -.KW mu -.WR diff --git a/Help/linear b/Help/linear deleted file mode 100644 index 1cba6f70..00000000 --- a/Help/linear +++ /dev/null @@ -1,22 +0,0 @@ -\" -*-nroff-*- -.BG -.FN linear -.TL -Perform linear time normalisation on trackdata. -.CS -linear(dataset, n=20) -.RA -.AG dataset -A trackdata object as returned by `track'. -.OA -.AG n -The number of points (samples) required for each segment. -.RT -A new trackdata object where the data for each segment has the same -number (`n') of samples. -.DT -The data for each segment is normaised using the `approx' function. -.SA -approx -.KW mu -.WR diff --git a/Help/mahal b/Help/mahal deleted file mode 100644 index 5464d675..00000000 --- a/Help/mahal +++ /dev/null @@ -1,39 +0,0 @@ -\" -*-nroff-*- -.BG -.FN mahal -.TL -Classify using Mahalanobis distance -.CS -mahal(data, model) -.RA -.AG data -A vector or matrix of data -.AG model -A Gaussian model generated by `train'. -.RT -A label vector with one element per row of `data' -.DT -The `model' argument contains the mean and inverse covariance matrix -(or standard deviation if the data is one-dimensional) for each class -in the training set as well as the class labels. This function -calculates the Mahalanobis distance of each row of `data' from each -class mean and assigns the label of the closest mean to that row. The -result is a vector of labels corresponding to the rows of `data'. - -The Mahalanobis distance between a data point and a class is the -Euclidean distance between the point and the class mean devided by the -covariance matrix for the class. This means that classes with large -covariances will -.I -attract -data points from a larger area than those with small covariances. -.SH REFERENCES -O'Shaughnessy, D. -.I -Speech Communication -(Addison-Wesley: Reading, MA. 1987) -.SA -train -.EX -.KW mu -.WR diff --git a/Help/mahal.dist b/Help/mahal.dist deleted file mode 100644 index 105cbc38..00000000 --- a/Help/mahal.dist +++ /dev/null @@ -1,26 +0,0 @@ -.BG -.FN mahal.dist -.TL -Calculate mahalanobis distances -.CS -mahal.dist(data, model) -.RA -.AG data -A matrix of numerical data points. -.AG model -A gaussian model as returned by the `train' function. -.RT -A matrix of distances with one column for every class (label) in the -gaussian model. -.DT -The `train' function finds the centroids and covariance matrices for a -set of data and corresponding labels: one per unique label. This -function can be used to find the mahalanobis distance of every data -point in a dataset to each of the class centroids. The columns of the -resulting matrix are marked with the label of the centroid to which -they refer. The function `mahal' should be used if you want to find -the closest centroid to each data point. -.SA -train, mahal, bayes.lab, bayes.dist -.KW mu -.WR diff --git a/Help/matscan b/Help/matscan deleted file mode 100644 index f8d7797f..00000000 --- a/Help/matscan +++ /dev/null @@ -1,30 +0,0 @@ -\" -*-nroff-*- -.BG -.FN matscan -.TL -Read matrix data from a file -.CS -matscan(file, num.cols=count.fields(file)[1], what=0, sk=0) -.RA -.AG file -A filename. -.OA -.AG num.cols -The number of columns of data in the file. -.AG what -A template for the data elements in the file, it should be a number for -numeric data (the default) or a string for string data. Note that an -Splus matrix can only hold one type of data (string or numeric), for -mixed types use data tables and the `read.table' function. -.AG sk -The number of leading lines of the file to skip. -.RT -A matrix corresponding to the data in `file'. -.DT -This function has been partially superceeded by the introduction of -data frames and the read.table function. It is still useful however -for reading data into Splus matrix objects. -.SA -read.table -.KW mu -.WR diff --git a/Help/mel b/Help/mel deleted file mode 100644 index fa83153b..00000000 --- a/Help/mel +++ /dev/null @@ -1,24 +0,0 @@ -.BG -.FN mel -.TL -Convert Hz to the mel scale -.CS -mel(x) -.RA -.AG x -The frequency in Hz. -.RT -The corresponding frequency on the mel scale. -.DT -The mel scale frequency is calculated according to the formula: - -.IP -mel = 1/log(2) * (log(1 + (Hz/1000))) * 1000 -.PP -where Hz is the frequency in Hz. -.SH REFERENCES - -.SA -bark -.KW mu -.WR diff --git a/Help/mkdb b/Help/mkdb deleted file mode 100644 index a84afc62..00000000 --- a/Help/mkdb +++ /dev/null @@ -1,36 +0,0 @@ -\" -*-nroff-*- -.BG -.FN mkdb -.TL -Generate a utterance list -.CS -mkdb(directory=options()$dbdefault, extn=options()$labextn, pattern="*") -.RA -.AG directory -The directory which contains the label files which will form the -database. Note that you should not include a trailing `/' on the path -name. -.AG extn -The file extension for label files. -.AG pattern -A pattern to match the desired utterances (use the same format as you -would on the unix command line, i.e. `?' matches any one character, `*' -matches any number of characters). -.RT -A mu+ utterance vector. -.DT -This function is used to generate an utterance vector from a given -directory. The utterance vector specifies the database to be searched -by `phon' and other functions. Each element of the vector is the -name of a label file with the file extension removed. This then refers -to a family of files containing the information about the utterance -(label file, sample data file etc.). -.SA -phon -.EX -# Create an utterance list containing all utterances in directory `"dbase"': -utts _ mkdb("dbase") -# restrict the utterance list to those files begining with msa: -msa.utts _ mkdb("dbase", pattern="msa*") -.KW mu -.WR diff --git a/Help/moment b/Help/moment deleted file mode 100644 index ed95055c..00000000 --- a/Help/moment +++ /dev/null @@ -1,33 +0,0 @@ -.BG -.FN moment -.TL -Calculate the spectral moment. -.CS -moment(specvals, least=T, nyq=10000, low=0, high=nyq) -.RA -.AG specvals -A data matrix representing spectral values, as returned by `muspec'. -.OA -.AG least -If TRUE, normalise each spectrum so that its minimum is 0dB. -.AG nyq -The maximum frequency of the spectrum (the nyquist frequency: half the -sampling frequency). -.AG low -Set the low end of the spectral range to consider (Hz). -.AG high -Set the high end of the spectral range to consider (Hz). -.RT -A list with two components: -.RC first -The first spectral moment (spectral centre of gravity) for each row of -the input matrix. -.RC second -The second spectral moment (spectral variance or moment of inertia) for -each row of the input matrix. -.DT - -.SA -muspec -.KW mu -.WR diff --git a/Help/mu.extensions b/Help/mu.extensions deleted file mode 100644 index 8fc831a6..00000000 --- a/Help/mu.extensions +++ /dev/null @@ -1,23 +0,0 @@ -\" -*-nroff-*- -.BG -.FN mu.extensions -.TL -Change the database file extensions -.CS -mu.extensions(option) -.RA -.AG option -One of `"ESPS"', `"SSFF"' or `"ACCOR"'. -.RT -None -.SE -Sets the `"extensions"' option (see `options("extensions")' for current -setting) which determines which file extensions mu+ uses to search for -signal tracks in the database. -.SA -mu.options -.EX -## Use ESPS file extensions: -mu.extensions("ESPS") -.KW mu -.WR diff --git a/Help/mu.options b/Help/mu.options deleted file mode 100644 index 3cbc8a6c..00000000 --- a/Help/mu.options +++ /dev/null @@ -1,36 +0,0 @@ -\" -*-nroff-*- -.BG -.FN mu.options -.TL -Set up the mu+ options -.CS -mu.options(default=F, ...) -.OA -.AG default -If T, the default values of all options will be set if they are not -explicitly specified. -.AG muhome -A pathname. Records the place where mu+ is installed. -.AG dbdefault -A pathname. The default database directory for `mkdb'. -.AG templatedir -A pathname. The location of the `labed_template' and associated files. -.AG internalspeaker -If T, the internal speaker will be used by default for speech playback. -If F, the external speaker or headphones will be used. -.RT -None. -.DT -This function can be used to set up the default values for these -options or set new values. These options are stored using the Splus -`options' function, consequently their values may be queried or set -using `options'. -.SA -options, mu.extensions, .First.mu -.EX -# set the default database directory -mu.options(dbdefault="/dbase/english") -# find the value of the muhome option -options("muhome") -.KW mu -.WR diff --git a/Help/muclass b/Help/muclass deleted file mode 100644 index 7773c6b0..00000000 --- a/Help/muclass +++ /dev/null @@ -1,22 +0,0 @@ -\" -*-nroff-*- -.BG -.FN muclass -.TL -Find common elements in vectors -.CS -muclass(labels, class) -.RA -.AG labels -A vector of labels. -.AG class -A label or vector of labels. -.RT -A logical vector which is T for each element in `labels' which matches -`class' or an element of `class'. -.SA -match -.EX -muclass(c("a", "b", "c"), c("a", "c")) -[1] T F T -.KW mu -.WR diff --git a/Help/mucolnames b/Help/mucolnames deleted file mode 100644 index 981f0f59..00000000 --- a/Help/mucolnames +++ /dev/null @@ -1,23 +0,0 @@ -\" -*-nroff-*- -.BG -.FN mucolnames -.TL -Find database column names. (Unix only) -.CS -mucolnames(template="labed_template", dir=options()$templatedir) -.RA -.OA -.AG template -The name of the template file to look at. -.AG dir -The directory in which to look for the template file. -.RT -A vector of the column names defined by the template file. -.DT -This function examines the template file to find the columns defined in -the database described by it. These column names define the allowed -queries in the `phon' and `label' functions. -.SA -phon, label -.KW mu -.WR diff --git a/Help/mudur b/Help/mudur deleted file mode 100644 index e62a199e..00000000 --- a/Help/mudur +++ /dev/null @@ -1,20 +0,0 @@ -.BG -.FN mudur -.TL -Extract times and durations from a MU+ segment list -.CS -mustart(segs) -muend(segs) -mudur(segs) -.RA -.AG segs -A MU+ segment list. -.RT -A vector containing one element per segment in the segment list. -`mustart()' and `muend()' extract the start and end times of each -segment in a segment list. `mudur()' extracts the duration of each segment. - -.SA -phon, mkdb -.KW mu -.WR diff --git a/Help/muend b/Help/muend deleted file mode 100644 index 365e44d9..00000000 --- a/Help/muend +++ /dev/null @@ -1,20 +0,0 @@ -.BG -.FN muend -.TL -Extract times and durations from a MU+ segment list -.CS -mustart(segs) -muend(segs) -mudur(segs) -.RA -.AG segs -A MU+ segment list. -.RT -A vector containing one element per segment in the segment list. -`mustart()' and `muend()' extract the start and end times of each -segment in a segment list. `mudur()' extracts the duration of each segment. - -.SA -phon, mkdb -.KW mu -.WR diff --git a/Help/museg.write b/Help/museg.write deleted file mode 100644 index 2ce41d68..00000000 --- a/Help/museg.write +++ /dev/null @@ -1,21 +0,0 @@ -\" -*-nroff-*- -.BG -.FN museg.write -.TL -Write a MU+ segment list to a file. -.CS -museg.write(SegList, File) -.RA -.AG SegList -A MU+ segment list, as returned by `phon'. -.AG File -The name of a file to write the segment list into. -.RT -None. -.SE -The segment list is written to a file in a format suitable for input to -the `muspec' program. -.SA -phon, muspec -.KW mu -.WR diff --git a/Help/mustart b/Help/mustart deleted file mode 100644 index aa49d0a2..00000000 --- a/Help/mustart +++ /dev/null @@ -1,20 +0,0 @@ -.BG -.FN mustart -.TL -Extract times and durations from a MU+ segment list -.CS -mustart(segs) -muend(segs) -mudur(segs) -.RA -.AG segs -A MU+ segment list. -.RT -A vector containing one element per segment in the segment list. -`mustart()' and `muend()' extract the start and end times of each -segment in a segment list. `mudur()' extracts the duration of each segment. - -.SA -phon, mkdb -.KW mu -.WR diff --git a/Help/nframes b/Help/nframes deleted file mode 100644 index a0131677..00000000 --- a/Help/nframes +++ /dev/null @@ -1,19 +0,0 @@ -\" -*-nroff-*- -.BG -.FN nframes -.TL -Compute the number of frames of data for a segment list -.CS -nframes(segments, param) -.RA -.AG segments -A mu+ segment list. -.AG param -One of the valid parameters (see the `track' function). -.RT -The number of frames of data which would be returned in a call to -`track' or `frames'. -.SA -track, frames -.KW mu -.WR diff --git a/Help/norm b/Help/norm deleted file mode 100644 index 94760123..00000000 --- a/Help/norm +++ /dev/null @@ -1,46 +0,0 @@ -\" -*-nroff-*- -.BG -.FN norm -.TL -Normalise speech data -.CS -norm(data, speakerlabs, type="gerst", rescale=F) -.RA -.AG data -A matrix of data. Can be either an n-columned matrix or a trackdata -object as returned by `track'. -.AG speakerlabs -A parallel vector of speaker labels. -.OA -.AG type -The type of extrinsic normalisation to be performed on data. type can -be `"nearey"', `"cen"', `"lob"', `"gerst"' (default), for normalisation -according to Nearey, centroid method, Lobanov, or Gerstman. -.AG rescale -Currently only works for Lobanov normalisation. The normalised values are -multiplied by the standard deviation and then the mean is added, where the -standard deviation and mean are across all original speakers' unnormalised -data. -.RT -Normalised values of data are retuned, having the same structure as data. -.DT -Types of normalisation: -.BL -.LI -`"nearey"', Nearey : Find the log of each data element and subtract -from each the mean of the logarithmic data. -.LI -`"cen"', centroid: Find the mean of the data column and subtract it from each -data element in that column. -.LI -`"lob"', Lobanov: Find the mean and standard deviation of the data. Subtract -the mean from each data element and devide each result by the standard -deviation. -.LI -"gerst", Gerstman: Subtract from the data the minimun formant value then devide -by the formant range. -.LE -.SA -track -.KW mu -.WR diff --git a/Help/nplot b/Help/nplot deleted file mode 100644 index 8a0cdb88..00000000 --- a/Help/nplot +++ /dev/null @@ -1,45 +0,0 @@ -\" -*-nroff-*- -.BG -.FN nplot -.TL -Plot normal data curve -.CS -nplot(values, labs, xlab="", ylab="probability density", - main="", colour=T, linetype=F, plot.lab=T, - xlim, logprob=F) -.RA -.AG values -A vector of numeric values. -.AG labs -A vector of label, parallel to `values'. -.OA -.AG xlab -An y-axis label -.AG ylab -A y-axis label -.AG main -A main title for the plot. -.AG colour -If colour is TRUE, the curves and labels will be plotted in different -colours. -.AG linetype -If linetype is TRUE, the curves will be plotted with different -linetypes. This is useful for plots that will be printed. -.AG plot.lab -If plot.lab is TRUE, labels will be plotted above each normal curve. -.AG xlim -A vector of the minimum and maximum values to plot on the x-axis. -.AG logprob -If TRUE, the log-normal curve will be plotted. -.SE -Generates a plot on the current device which shows the distribution of -the data in `values' for each unique label in `labs'. -.DT -The data is split according to the unique labels in `labs' and the mean -and variance of each data set is calculated. The probability density -for each data set is then plotted. -.SA -hplot, dnorm -.KW mu -.KW plot -.WR diff --git a/Help/percent b/Help/percent deleted file mode 100644 index 6967cd29..00000000 --- a/Help/percent +++ /dev/null @@ -1,16 +0,0 @@ -.BG -.FN percent -.TL -Converts a confusion matrix to a percentage matrix -.CS -percent(data) -.RA -.AG data -A confusion matrix. -.RT -Calculates the percentage of elements, from the total elements in a confusion -matrix row. -.SA -confusion -.KW mu -.WR diff --git a/Help/perform b/Help/perform deleted file mode 100644 index 460a5c1b..00000000 --- a/Help/perform +++ /dev/null @@ -1,16 +0,0 @@ -.BG -.FN perform -.TL -Performance (hit rate) of a confusion matrix -.CS -perform(data) -.RA -.AG data -A confusion matrix. -.RT -Caluculates the accuracy (total score) of the confusion matrix, returning -percentage of correct, and incorrect matches. -.SA -confusion -.KW mu -.WR diff --git a/Help/phon b/Help/phon deleted file mode 100644 index 9112aa96..00000000 --- a/Help/phon +++ /dev/null @@ -1,149 +0,0 @@ -\" -*-nroff-*- -.BG -.FN phon -.TL -Generate a segment list -.CS -phon(utterances, query=paste(find, "!=''"), find="Phoneme", - ret="?", extn=options()$labextn, - dirc = paste(getenv("SHOME"), "/library/mu/ttsfiles", sep="")) -.RA -.AG utterances -A mu+ utterance vector. -.OA -.AG query -The specification of segments to be found. See later for the details of -the syntax of this argument. -.AG find -The level in the utterance hierarchy at which the search will take -place. That is the level of the segments who's properties you are -specifying in `query'. -.AG ret -The type of segment you wish to be returned. This will normally be the -same as the level specified in `find' but may specify a higher level in -the utterance hierarchy. -.AG extn -The file extension of the label files in the database. -.AG dirc -The directory where the labed template file and other system files are -stored. Defaults to the directory `ttsfiles' in the Splus library -directory where Mu+ is installed. -.RT -A mu+ segment list (an object of class "segs") corresponding to those -segments matching the query. -.DT -This function is the central part of the mu+ system, it generates -segment lists where each segment matches the query specified as the -first argument. This documentation provides a brief description of the -syntax of `phon' queries and the other options to the function. Further -information, including a tutorial, can be found in other parts of the -mu+ documentation. -.SH "Query Syntax" -Queries in `phon' consist of a set of conditions which must hold for -the segments we are interested in. The conditions consist of a level -name (corresponding to a column heading in LABED), a comparator and a -value. For example: -.Cs -"Phonetic='A'" -.Ce -constrains the segments to have the Phonetic label 'A'. In addition, -it is possible to refer to labels to the left and right of the current -label using the syntax: -.Cs -"Phonetic[1]='A'" -.Ce -which would match segments which are followed by an 'A' Phonetic label, -and: -.Cs -"Phonetic[-1]='A'" -.Ce -which matches segments preceeded by an 'A' Phonetic label. These -patterns would normally be used in conjuction with other patterns as -shown below. -.PP -Legal comparators for boolean conditions are equal (=) and not equal -(!=). -.PP -The value part of the condition (the right hand side of the expression) -can be either a literal value or a disjuction. The pattern 'A/B/C' will -constrain the value to be either 'A' or 'B' or 'C'. Thus the query: -.Cs -"Phonetic='A/E/I/O/U/V'" -.Ce -might be used to generate a segment list of these six vowels. -.PP -Simple expressions like these may be joined with `and' or `or' into -more complex conditions. For example, to find the above six vowels only -in strong syllables we write: -.Cs -"Phonetic='A/E/I/O/U/V' and Syllable=S" -.Ce -If both `and' and `or' are used in a query, `and' binds more tightly -than `or'. So, for example, -.Cs -"Phonetic='A' or Phonetic='E' and Syllable=S" -.Ce -would be read as: -.Cs -"Phonetic='A' or (Phonetic='E' and Syllable=S)" -.Ce -To force the alternative interpretation, explicit brackets can be used: -.Cs -"(Phonetic='A' or Phonetic='E') and Syllable=S" -.Ce -.SH "Find and Return Types" - -When `phon' searches the database it examines objects at a particular -level of the utterance hierarchy specified by the `find' argument. The -condition specified in the `query' refers to objects at this level. -So, for example, if you are searching for all strong Syllable level -segments the `find' argument will be `"Syllable"' and the condition: -.Cs -"Syllable='S'" -.Ce -It is also valid to specify conditions on levels -.I -above -the find level, for example: -.Cs -"Syllable='S' and Word='C'" -.Ce -will find strong syllables in content words. Note that you may not -specify conditions below the find level. -.PP -For some searches you may wish to return an object higher in the -hierarchy than the find level, for example if you want all Word -segments containing strong Syllables. Since it is not possible to -specify `"Syllable='S'"' when the find level is `"Word"' you must make -the find level `"Syllable"' and the `return' level `"Word"' (using the -`return' argument). For example: -.Cs -phon("Syllable='S'", find="Syllable", return="Word") -.Ce -.SH Positional Conditions -You may also specify conditions involvind the position of segments in -the hierarchy using the `Start', `Medial' and `End' conditions. To -find all phonemes at the start of syllables use: -.Cs -phon("Start(Syllable,Phoneme)=T", find="Phoneme") -.Ce -Where `Start(x,y)' means that element `y' is at the start of an element -of type `x'. Similarly `End(x,y)' is used to find elements at the end -of other segments and `Medial(x,y)' to find elements which are neither -at the start or end of the other segment. So to find all non-Syllable-medial i: -phonemes we use the query: -.Cs -phon("Phoneme='i:' and Medial(Syllable,Phoneme)=F", find="Phoneme") -.Ce -.SH Numerical Conditions -The condition `Num(x,y)' can be used to find segments with a specific -number of sub-segments, such as three-syllable words or syllables with -two phonemes. For example, `Num(Word,Syllable)=2' will find all -`Word' level segments which dominate exactly two `Syllable' segments. -In this case the comparitors can be any of =, !=, >, >=, <, <=. -.PP -Further examples of the use of phon are given in the user manual. -.SA -mkdb, track, frames -.KW mu -.WR diff --git a/Help/play b/Help/play deleted file mode 100644 index bb2fd36a..00000000 --- a/Help/play +++ /dev/null @@ -1,35 +0,0 @@ -\" -*-nroff-*- -.BG -.FN play -.TL -Playback of any segment list -.CS -play(playdata, rate, internalspeaker=F, pausedata=NULL, pauselen=1) -.RA -.AG playdata -A vector of sampled data values, which is output from `track'. -.AG rate -The sampling rate at which playback takes place. -.OA -.AG internalspeaker -If TRUE play uses the internal speaker, -If FALSE (Default), the external speaker is used. -.AG pausedata -Specifies a vector specifying the intervals at which pauses are to be made (in -milliseconds). -.AG pauselen -Specifies the duration of the pauses in seconds. -.SE -The sampling rate at which you select for playback, could have an -effect on the naturality of the output. For natural sound, the playback -sampling rate will need to be the same as the recording sampling -rate. A higher playback rate will increase the speed of the output. A -lower rate will decrease it. -.SA -rate, track, talk, uttplot -.EX -This example would introduce a one second pause at 50, 100, 150 ms relative to -the start of the file created with track. -> play (vals$data, 20000, pausdata=c(50, 50, 50), pauselen=1) -.KW mu -.WR diff --git a/Help/radians b/Help/radians deleted file mode 100644 index 11b97b45..00000000 --- a/Help/radians +++ /dev/null @@ -1,15 +0,0 @@ -.BG -.FN radians -.TL -Converts degrees to radians -.CS -radians(degrees) -.RA -.AG degrees -Angular measurement for conversion. -.RT -Angular measurement in radians. -.DT -There are 360 degrees or 2 * PI radians in one full rotation. -.KW mu -.WR diff --git a/Help/randomise.segs b/Help/randomise.segs deleted file mode 100644 index da427202..00000000 --- a/Help/randomise.segs +++ /dev/null @@ -1,24 +0,0 @@ -.BG -.FN randomise.segs -.TL -Randomise or Reverse items in a segment list -.CS -randomise.segs(segs, bwd=F, rand=F) -.RA -.AG segs -A MU+ segment list. -.OA -.AG bwd -Reverse the order of the segment lists rows. -.AG rand -Randomise the order of the segment lists rows. -.RT -A segment list containing the original elements, but the elements now in a -different row. -.SE -If the segment list has only one element, the list will not differ from the -original. -.SA -phon -.KW mu -.WR diff --git a/Help/rate b/Help/rate deleted file mode 100644 index 2f36bed6..00000000 --- a/Help/rate +++ /dev/null @@ -1,20 +0,0 @@ -.BG -.FN rate -.TL -Find the sample rate of a track file. -.CS -rate(utterance, track="samples", extn=options("extensions")) -.RA -.AG utterance -The utterance you wish find the sample rate for. -.OA -.AG track -The parameter track you want to know about. -.AG extn -The file extensions to look for the track in. -.RT -Returns the sample rate of the specified track in Hz. -.SA -track, frames, play -.KW mu -.WR diff --git a/Help/segplot b/Help/segplot deleted file mode 100644 index 51fe8691..00000000 --- a/Help/segplot +++ /dev/null @@ -1,75 +0,0 @@ -\" -*-nroff-*- -.BG -.FN segplot -.TL -Segment plot -.CS -segplot(segs, param, labels, ylim, ylab = "", main = "", zeros = F, - pdat = 0.5, onset = 0, offset = 0, col = T, linetype = F - smoothing = F, mfrow, addlines, differ ) -.RA -.AG segs -A mu+ segment list. -.AG param -One of the valid parameter tracks for this segment list. Use tracks() -to find the available tracks. -.OA -.AG ylim -Limit the y-axis range for all plots. -.AG labels -A vector of labels to be used as the main title of each plot. If this -is not provided the segment labels will be used. -.AG ylab -A label for every y-axis. -.AG main -A main title for the plot. -.AG zeros -If the track being plotted is `"F0"', then if zeros is TRUE the regions -with probability of voicing less than `pdat' will not be plotted. -.AG pdat -The probability of voicing threshold for `zeros'. -.AG onset -The plot for each segment will start this much to the left of the start -of the segment. -.AG offset -The plot for each segment will end this much to the right of the end -of the segment. -.AG colour -If `TRUE', different colours will be used in the plot. -.AG linetype -If `TRUE' different linetypes will be used in the plot. Useful for -printing. -.AG smoothing -If smoothing is TRUE, a non-linear smoother (`smooth()') is applied to -the data. -.AG mfrow -Partitions the plot into sub-plots, eg if there are six segments in the -segment list, mfrow might be set to c(2,3). If the `mfrow' parameter is -not supplied, the program estimates a reasonable layout for the plots. -.AG addlines -A matrix or vector of time values. If `addlines' is a vector of time -values, then a vertical line is drawn on plot `j' at the time given in -`addlines[j]'. If `addlines' is a matrix, then a series of vertical lines -are drawn on plot `j', for the times given in `addlines[j,i]` (`i'=1, 2, -3...) -.AG differ -An integer, carries out nth order differentiation where n is the value -of `differ'. Defaults to no differentiation. -.SE -Generates multiple plots, one for each segment in the segment list, on -the specified parameter. If overlay is FALSE, one subplot is made per -segment and the subplots are arranged in rows, each labelled with the -segment label. If overlay is TRUE, one large plot is made and the data -for all segments is overlaid. In this case the x-axis is not labelled -since each segment will have a different start time and duration. -.SA -track, uttplot, dplot -.EX -> segs - start end utt -E "12489.4" "12586.9" "/home/dbase/dbase1/msadb001" -@ "12916.9" "12982.2" "/home/dbase/dbase1/msadb001" -E "13155.2" "13240.4" "/home/dbase/dbase1/msadb001" -> segplot(segs, "fm[1..3]") -.KW mu -.WR diff --git a/Help/slen b/Help/slen deleted file mode 100644 index 9d74b61e..00000000 --- a/Help/slen +++ /dev/null @@ -1,15 +0,0 @@ -.BG -.FN slen -.TL -Number of Segments -.CS -slen(x) -.RA -.AG x -A mu+ segment list. -.RT -The number of segments in that segment list. -.SA -phon -.KW mu -.WR diff --git a/Help/sortmatrix b/Help/sortmatrix deleted file mode 100644 index ca79f197..00000000 --- a/Help/sortmatrix +++ /dev/null @@ -1,17 +0,0 @@ -.BG -.FN sortmatrix -.TL -Sort matrix by label -.CS -sortmatrix(mat, labs=dimnames(mat)[[2]]) -.RA -.AG mat -A mu+ sement matrix. -.AG labs -A label vector which has the same number of columns as `mat'. -.RT -Returns a sorted matrix by label, created from `mat'. -.SA -label, phon -.KW mu -.WR diff --git a/Help/splitmat b/Help/splitmat deleted file mode 100644 index 63b28b26..00000000 --- a/Help/splitmat +++ /dev/null @@ -1,32 +0,0 @@ -.BG -.FN splitmat -.TL -Split data into objects of phonetic classes -.CS -splitmat(data, labs, filename="file", labelfile=T) -.RA -.AG data -A vector or matrix of data. -.AG labs -A label file parallel to `data'. -.AG filename -`file' is the object name you wish to save the phonetic classes under, and -is called "file.n", where n is an integer from 1 to n numnber of phonetic -classes. -.OA -.AG labelfile -If TRUE, a parallel label file (object) is created called "l.file.n". -.RT -Matrix of mu+ object files and the phonetic class that it belongs to. -.SA -label -.EX -# Create unique objects and labels for [S] and [T] from the demo.utts database. -> segs _ phon(demo.utts, "Phonetic='S/T'", find="Phonetic") -> labs _ label(segs) -> splitmat(segs, labs, filename="segs", labelfile=T) - filename label-file label-type -[1,] "segs.1" "l.segs.1" "S" -[2,] "segs.2" "l.segs.2" "T" -.KW mu -.WR diff --git a/Help/splitstring b/Help/splitstring deleted file mode 100644 index 33117fbc..00000000 --- a/Help/splitstring +++ /dev/null @@ -1,20 +0,0 @@ -\" -*-nroff-*- - -.FN splitstring -.TL -Split a string into words. -.CS -splitstring(str,char) -.RA -.AG str -A string. -.AG char -A character to split on -.RT -A vector of strings. The original `str' is split at ever occurrence of -`char' to generate a vector of strings. -.EX -> splitstring("/home/recog/steve/foo", "/") -[1] "home" "recog" "steve" "foo" -.KW mu -.WR diff --git a/Help/steady b/Help/steady deleted file mode 100644 index 744820c6..00000000 --- a/Help/steady +++ /dev/null @@ -1,86 +0,0 @@ -\" -*-nroff-*- -.BG -.FN steady -.TL -Find the steady-state portion of a parameter -.CS -steady(values, percent=25, mean=T, smoothing=F) -.RA -.AG values - A set of parameter tracks as returned by track(). The values would -normally correspond to formant data. -.OA -.AG percent -The size of the portion to search for. If percent is 25 (the default) -the returned values correspond to a portion of each segment of that -size (relative to the segment duration) which is the most steady state. -.AG mean -If mean is TRUE, the mean for each parameter (formant) accross the -steady state portion will be returned for each segment. -.AG smoothing -If smoothing is TRUE the data will be smoothed before the steady-state -portion is searched for. -.RT -The return value is a copy of the original `values' argument with the -additional components: -.RC steady -specifies the start and end indexes of the steady-state portion of each -segment, -.RC time -specifies the position of the midpoint of the steady state portion as a -proportion of the duration of each segment. -.RC mean -If the mean argument is `TRUE' this component specifies the means of -each parameter accross the steady state portion of each segment. - -.DT -The algorithm searches for the portion of the signal with the smallest -summed inter-euclidean distance (the euclidean distances between -successive points) for all segments of the parameter track of the -required size. Thus, if the signal consists of three formants, the three -dimensional euclidean distance is calculated between successive -samples. The distances are then summed over samples 1..n, 2..n+1 etc -where n is the required width. The smallest such sum is taken as the -most steady-state portion of the signal. - -.SA -track, segplot -.EX -> segs - start end utt -E "12489.4" "12586.9" "/home/dbase/dbase1/msadb001" -@ "12916.9" "12982.2" "/home/dbase/dbase1/msadb001" -E "13155.2" "13240.4" "/home/dbase/dbase1/msadb001" - -# First we extract the first three formants for our segment list: - -> data _ track(segs, "fm[1..3]") - -# Now we find the steady state portion: -> st _ steady(data, mean=T) - -# The $steady component shows the start and end indexes of the steady -# state portion for each segment: - -> st$steady - [,1] [,2] -vec 12 16 -vec 30 32 -vec 36 39 - -# The $time component shows the offset of the midpoint of the steady -# state portion, as a proportion of the total duration: -> st$time - vec vec vec - 0.6842105 0.8333333 0.21875 - -# The $mean component shows the mean of each parameter accross the steady -# state portion: -> st$mean - [,1] [,2] [,3] -meanvals 442.4348 1812.773 2565.309 -meanvals 355.2347 1973.285 2218.056 -meanvals 465.2812 1704.117 2396.117 - -.KW mu -.WR diff --git a/Help/sunplay b/Help/sunplay deleted file mode 100644 index ce6684bb..00000000 --- a/Help/sunplay +++ /dev/null @@ -1,15 +0,0 @@ -.BG -.FN sunplay -.TL -Play a SUN format .au audio file -.CS -sunplay(filename, internalspkr=F) -.RA -.AG filename -Audio file in SUN .au format. -.OA -.AG internalspkr -If TRUE play uses the internal speaker, -If FALSE (Default), the external speaker is used. -.KW mu -.WR diff --git a/Help/track b/Help/track deleted file mode 100644 index f71530b0..00000000 --- a/Help/track +++ /dev/null @@ -1,60 +0,0 @@ -\" -*-nroff-*- -.BG -.FN track -.TL -Extract track data for a segment list -.CS -track(segs, param, cut=NULL, index=T) -.RA -.AG segs -A mu+ segment list. -.AG param -One of the available parameter names (use `tracks()' to find which -parameters are available for the current database). -.OA -.AG cut -If this argument, a number between 0 and 1, is specified, each segment -is sampled at the corresponding point. For example, cut=0.5 will return -one sample per segment taken from the segment mid-point. -.AG index -If TRUE, an index to the returned data vector is constructed consisting -of start and end positions for each segment within the data vector and -the starting and ending times of each segment. -.RT -If `index' is `FALSE', a matrix of data is returned consisting of the -concatenation of the parameter tracks for each segment. The matrix may -have more than one column if the requested parameter is -multi-dimensional (for example the first three formants `"fm[1..3]"'). If -the `cut' argument was used, each row of the matrix will correspond to -one segment. If `index' is TRUE the return value has the following three -components: -.RC data -The concatenated matrix of data. -.RC index -A matrix with two columns giving the start and end indexes of each segment -within the `data' matrix. -.RC ftime -A matrix with two columns giving the start and end times of each -segment copied from the segment list. Both the `index' and the `ftime' -components will have the same number of rows as the original segment -list. -.SA -tracks, frames -.EX -# First we find out which tracks are available for the current Utterances: - -> tracks() -[1] "samples" "F0" "prob_voice" "rms" "ac_peak" -[6] "k1" "fm[1..4]" "bw[1..4]" - -# Now we extract the "samples" track (the raw sample speech data) for our -# segment list: - -my.samp <- track(my.segs, "samples") - -# Next we extract the first three formants at the mid point of each -# segment: - -my.form <- track(my.segs, "fm[1..3]", cut=0.5) -.KW mu -.WR diff --git a/Help/tracks b/Help/tracks deleted file mode 100644 index 00e603e3..00000000 --- a/Help/tracks +++ /dev/null @@ -1,25 +0,0 @@ -\" -*-nroff-*- -.BG -.FN tracks -.TL -Find the available tracks -.CS -tracks(utterance, duplicates=FALSE, extn=options("extensions")) -.OA -.AG utterance -An utterance name or an utterance vector. If a vector is provided, the -first element is taken as the utterance to be examined. -.AG duplicates -If duplicates is TRUE, duplicate entries will not be removed from the -result. The default behaviour is to remove duplicates. -.AG extn -A set of file extensions in which to look for parameter tracks. -.RT -A vector of parameter names corresponding to the parameters that may be -extracted from the current utterance. -.SA -track, frames -.EX -tracks("/mu/mu/MUDEMO/msajc001", extn=Extns.SSFF ) -.KW mu -.WR diff --git a/Help/train b/Help/train deleted file mode 100644 index 63c33561..00000000 --- a/Help/train +++ /dev/null @@ -1,38 +0,0 @@ -\" -*-nroff-*- -.BG -.FN train -.TL -Train a Gaussian Model -.CS -train(x, lab) -.RA -.AG x -A data vector or matrix. -.AG lab -A vector of labels parallel to `x'. -.RT -A structure with the following components: -.RC label -The unique labels in `lab'. -.RC means -The means for each dimension per unique label. -.RC cov -The combined covariance matrixes for each unique label. The -matrixes are joined with `rbind'. If the input data is -one-dimensional, this is just the standard deviation of the data. -.RC invcov -The combined inverse covariance matrixes for each unique label. The -matrixes are joined with `rbind'. If the input data is -one-dimensional, this is just the reciprocal of the standard deviation -of the data. -.DT -This function is used to train a gaussian model on a data set. The -result can be passed to either the `mahal' or `bayes.lab' functions to -classify either the training set (`x') or a test set with the same -number of dimensions. Train simply finds the mean and inverse -covariance matrix/standard deviation for the data corresponding to each -unique label in labs. -.SA -mahal, bayes.lab, mahalplot, bayes.plot -.KW mu -.WR diff --git a/Help/utt b/Help/utt deleted file mode 100644 index 33e62a77..00000000 --- a/Help/utt +++ /dev/null @@ -1,15 +0,0 @@ -.BG -.FN utt -.TL -Return the utterances portion of a segment list -.CS -utt(segs) -.RA -.AG segs -A mu+ segment list. -.RT -A vector of the utterance names for each segment. -.SA -mustart, muend, mudur, label -.KW mu -.WR diff --git a/Help/uttnum b/Help/uttnum deleted file mode 100644 index 8b5f5c5b..00000000 --- a/Help/uttnum +++ /dev/null @@ -1,22 +0,0 @@ -\" -*-nroff-*- -.BG -.FN uttnum -.TL -Find an utterance in Utterances -.CS -uttnum(utt, str) -.RA -.AG utt -A mu+ utterance vector. -.AG str -The full name of the utterance. -.RT -An integer, corresponding to the index of the utterance in the -Utterances vector. -.SA -mkdb -.EX - uttnum(my.utts, "/mu/mu/MUDEMO/msajc001") -.KW mu -.WR - diff --git a/Help/uttplot b/Help/uttplot deleted file mode 100644 index 6382bcdb..00000000 --- a/Help/uttplot +++ /dev/null @@ -1,94 +0,0 @@ -\" -*-nroff-*- -.BG -.FN uttplot -.TL -Plot a parameter for an utterance -.CS -function(segs, param, data, labtype, colour = T, linetype = F, xlim = NULL, - ylim = NULL, xlab = "time (ms)", ylab = "", main = "", zeros = F, pdat - = 0.5, boundplot = T, labels = NULL, labeltimes = NULL, labcentre - = F, differ = NULL, smoothing = F, flipped = F, axes = T, talk = F, - label = F, ispk = T, ...) -.RA -.AG segs -A mu+ segment list. -.AG param -One of the valid track parameters for the current segment list (see -`tracks'). -.OA -.AG data -If this is provided it should be a matrix of data corresponding to -`param' for the segment list `segs'. This data is plotted rather than -the data extracted from the database. -.AG labtype -The type of labels to use in the utterance plot. For example if -`labtype = "Syllable"' then the syllable boundaries will be drawn on -the graph and labelled accordingly. -.AG colour -If `TRUE' then different colours will be used for different parts of -the plot. If `FALSE' the plot will use only one colour (useful for prining). -.AG linetype -If `TRUE' then different line types will be used for the plot (useful -for printing). -.AG xlim -The range of the x-axis (time). -.AG ylim -The range of the y-axis. -.AG xlab -A label for the x-axis. -.AG ylab -A label for the y-axis. -.AG main -A main title for the plot. -.AG zeros -If TRUE, then the parts of the utterance where the probability of -voicing is less than `pdat' will not be plotted. This is only reliable -for the parameter track "F0". -.AG pdat -The threshold for probability of voicing used with `zeros'. -.AG boundplot -If TRUE, label boundaries will be drawn (for either the original -segments or the labels specified by `labtype'). -.AG labels -A vector of labels to be plotted instead of those corresponding to the -segment list or the labels specified by `labtype'. You must also -specify `labeltimes'. -.AG labeltimes -A vector of times at which to plot the labels in `labels'. -.AG labcentre -If TRUE, the labels in `labels' are drawn between the time values in -`labeltimes', rather than at these times. In this case `labels' must -contain one less element than `labeltimes'. -.AG differ -An integer greater than 1. If `differ' is 1 the first difference (first -derivitive, or gradient) is plotted and so on. -.AG smoothing -If TRUE, the data is smoothed before being plotted. -.AG flipped -If TRUE, the data is inverted. -.AG axes -If FALSE no axes are drawn on the plot. -.AG talk -If TRUE the sample data for the utterance will be retrieved and can be -played back interactively from the plot. -.AG label -If TRUE, labels can be added to the plot interactively. -.AG ispk -If TRUE, the internal speaker will be used for playback of speech (in -combination with the `talk' argument. If FALSE, the external speaker or -headphones will be used. -.PP -This function will also accept any of the standard graphical parameters -which can be passed to `par()', for example `cex' which specifies the -size of characters to be used. -.SE -A plot of the requested parameter track is generated. Segment -boundaries are drawn and labelled according to the options specified. -If `talk' is TRUE, parts of the utterance can be played back using the -left mouse button to define the boundaries of the section to be played. -The middle mouse button quits this interaction. -.SA -track, phon -.KW mu -.KW plot -.WR diff --git a/Help/words b/Help/words deleted file mode 100644 index 69b3ec07..00000000 --- a/Help/words +++ /dev/null @@ -1,24 +0,0 @@ -\" -*-nroff-*- -.BG -.FN words -.TL -Returns the words in an utterance -.CS -words(vec, utt) -.RA -.AG vec -Either an utterance name or an integer which is used as an index into -the utterance vector `utt'. -.OA utt -If `vec' is a number, this should be a mu+ utterance vector. -.RT -The words (text) from the utterance. -.SA -phon, label -.EX -# find the words in the first utterance of my.utts -words(1, my.utts) -# find the words in this utterance -words("/mu/mu/MUDEMO/msadb001") -.KW mu -.WR diff --git a/NAMESPACE b/NAMESPACE index 18ce6ffc..0ee8fb38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,234 @@ -# Export all names -exportPattern(".") +# Generated by roxygen2: do not edit by hand + +S3method("[",EPG) +S3method("[",spectral) +S3method("[",trackdata) +S3method(Math,trackdata) +S3method(Math2,trackdata) +S3method(Ops,trackdata) +S3method(Summary,trackdata) +S3method(as.emusegs,emuRsegs) +S3method(as.matrix,emusegs) +S3method(bark,default) +S3method(bark,spectral) +S3method(bark,trackdata) +S3method(bind,default) +S3method(bind,trackdata) +S3method(by,trackdata) +S3method(cbind,trackdata) +S3method(dim,trackdata) +S3method(dimnames,trackdata) +S3method(dur,emusegs) +S3method(dur,trackdata) +S3method(end,emusegs) +S3method(end,trackdata) +S3method(label,emusegs) +S3method(mel,default) +S3method(mel,spectral) +S3method(mel,trackdata) +S3method(plot,spectral) +S3method(plot,trackdata) +S3method(print,emuDBhandle) +S3method(print,emuRsegs) +S3method(print,emuRtrackdata) +S3method(print,emusegs) +S3method(print,trackdata) +S3method(rbind,trackdata) +S3method(sort,emuRsegs) +S3method(start,emusegs) +S3method(start,trackdata) +S3method(summary,emuDBhandle) +S3method(summary,emusegs) +S3method(summary,trackdata) +S3method(utt,emusegs) +export(Slope.test) +export(add_attrDefLabelGroup) +export(add_attributeDefinition) +export(add_files) +export(add_labelGroup) +export(add_levelDefinition) +export(add_linkDefinition) +export(add_perspective) +export(add_signalViaMatlab) +export(add_ssffTrackDefinition) +export(as.spectral) +export(as.trackdata) +export(autobuild_linkFromTimes) +export(bark) +export(bayes.dist) +export(bayes.lab) +export(bayesian.metric) +export(bayesplot) +export(bind) +export(buildtrack) +export(cen.sub) +export(classify) +export(classplot) +export(closest) +export(convert_BPFCollection) +export(convert_TextGridCollection) +export(convert_legacyEmuDB) +export(convert_txtCollection) +export(convert_wideToLong) +export(cr) +export(create_emuDB) +export(create_emuRdemoData) +export(create_emuRtrackdata) +export(create_itemsInLevel) +export(create_links) +export(create_spectrogram_image_as_raster) +export(crplot) +export(dapply) +export(dbnorm) +export(dbtopower) +export(dct) +export(dcut) +export(dcut.sub) +export(ddiff) +export(ddiff.sub) +export(delete_itemsInLevel) +export(dextract) +export(dextract.lab) +export(dextract.sub) +export(distance) +export(dplot) +export(dplot.norm) +export(dplot.time) +export(dsmooth) +export(dsmooth.sub) +export(dtime) +export(duplicate_level) +export(dur) +export(ellipse) +export(emusegs.database) +export(emusegs.query) +export(emusegs.type) +export(epgai) +export(epgci) +export(epgcog) +export(epgdi) +export(epggs) +export(epgplot) +export(epgsum) +export(eplot) +export(euclidean) +export(expand_labels) +export(export_BPFCollection) +export(export_TextGridCollection) +export(export_seglistToTxtCollection) +export(fapply) +export(frames) +export(frames.time) +export(freqtoint) +export(gerst.sub) +export(get.time.element) +export(get.trackkeywrd) +export(get_legalLabels) +export(get_levelCanvasesOrder) +export(get_signalCanvasesOrder) +export(get_trackdata) +export(import_mediaFiles) +export(is.seglist) +export(is.spectral) +export(is.trackdata) +export(label) +export(label_convert) +export(label_num) +export(linear) +export(linear.av) +export(list_attrDefLabelGroups) +export(list_attributeDefinitions) +export(list_bundles) +export(list_files) +export(list_labelGroups) +export(list_levelDefinitions) +export(list_linkDefinitions) +export(list_perspectives) +export(list_sampleRates) +export(list_sessions) +export(list_ssffTrackDefinitions) +export(load_emuDB) +export(lob.sub) +export(locus) +export(mahal) +export(mahal.dist) +export(mahalanobis.metric) +export(make.emuRsegs) +export(make.seglist) +export(makelab) +export(matscan) +export(mel) +export(modify.seglist) +export(moments) +export(mu.colour) +export(mu.colour.get) +export(mu.legend) +export(mu.linetype.get) +export(muclass) +export(nearey.sub) +export(norm) +export(normalize_length) +export(outliers) +export(palate) +export(perform) +export(plafit) +export(polygonplot) +export(query) +export(rad) +export(radians) +export(randomise.segs) +export(read.emusegs) +export(read_bundleList) +export(remove_attrDefLabelGroup) +export(remove_attributeDefinition) +export(remove_labelGroup) +export(remove_legalLabels) +export(remove_levelDefinition) +export(remove_linkDefinition) +export(remove_perspective) +export(remove_ssffTrackDefinition) +export(rename_attributeDefinition) +export(rename_bundles) +export(rename_emuDB) +export(replace_itemLabels) +export(requery_hier) +export(requery_seq) +export(resample_annots) +export(rescale.gerst) +export(rescale.lob) +export(rescale.nearey) +export(runBASwebservice_all) +export(runBASwebservice_chunker) +export(runBASwebservice_g2pForPronunciation) +export(runBASwebservice_g2pForTokenization) +export(runBASwebservice_maus) +export(runBASwebservice_minni) +export(runBASwebservice_pho2sylCanonical) +export(runBASwebservice_pho2sylSegmental) +export(serve) +export(set_legalLabels) +export(set_levelCanvasesOrder) +export(set_signalCanvasesOrder) +export(shift) +export(sortmatrix) +export(splitstring) +export(track.gradinfo) +export(track.gradinfo.sub) +export(trackfreq) +export(tracktimes) +export(train) +export(trapply) +export(update_itemsInLevel) +export(utt) +export(write.emusegs) +export(write_bundleList) +import(DBI) +import(jsonlite) +import(methods) +import(stats) +import(stringr) +import(tools) +import(uuid) +import(wrassp) +importFrom(grDevices,as.raster) +importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..1950890f --- /dev/null +++ b/NEWS.md @@ -0,0 +1,449 @@ +# emuR 2.5.1 + +## new features / performance tweaks / improvements + +* Documentation: Include package anchors in links for CRAN’s new HTML reference manuals +* `add_signalVia...()`: Drafting a new family of functions that allows users to use signal procissing from outside Wrassp; starting with Matlab + +## bug fixes + +# emuR 2.5.0 + +## new features / performance tweaks / improvements + +* CRUD operations: `create_itemsInLevel()` can now be used in more ways than before +* CRUD operations: Improved documentation for `create/update/delete_itemsInLevel()`; this is ongoing work + +## bug fixes + +* remove one instance of is.R() because it is no longer allowed on CRAN +* CRUD operations: `delete_itemsInLevel()` was heavily flawed and is now usable + +# emuR 2.4.2 + +## new features / performance tweaks / improvements + +* experimental: signal tracks can be stored in Rda format as well as SSFF + +## bug fixes + +* passing a data frame or tibble as language option to BAS Web Services calls was broken (closes #272, thanks to @johannacronenberg) +* fix error in create_itemsInLevel where end of last segment in a level could be miscalculated + +# emuR 2.4.1 + +## new features / performance tweaks / improvements + +## bug fixes + +* worked around a problem in vroom_write_lines that could materialize on Windows in rare cases +* fix loading databases with empty sessions (thanks to @FredrikKarlssonSpeech via #268, #271) + +# emuR 2.4.0 + +## new features / performance tweaks / improvements + +* cache for onTheFly computations in `get_trackData()` (thanks to @samgregory via #254) +* improved a small number of error messages + +## bug fixes + +* `add_files` not correctly matching file extension (thanks to @samgregory via #254) +* fixed level attribute mixup in generation of seglist (closes #263) +* workaround for `wrassp::readDataObj()` crashing R session. Temporary fix for issue #261. +* generic functions Math2/round and Math2/signif did not accept digits parameter for legacy trackdata objects +* adaptations to Tidyverse 2.0.0 and other adapations to make unit tests and cran checks smooth again + +# emuR 2.3.0 + +## new features / performance tweaks / improvements + +- added explicit checks for all `dir.create()` (closes #49) +- checking dir parameter in `import_mediaFiles()` (closes #108) +- `add_files()` now works with strings like `_annot.json` as `fileExtension` (closes #246) +- now prechecking user defined options in `.onLoad()` (closes #240) +- added new `sessionPattern` and `bundlePattern` args to `list_bundles()` and `list_sessions()` (closes #201) +- better output for warning in `get_trackdata()` regarding heterogeneous sampling rates (closes #189) +- using `cli` package to improve output of `summary(emuDBhandle)` +- slight performance increase in EQL by avoiding full table scans due to REGEXP +- checking for repeating sequences in `normalize_length()` (closes #211) +- added explicit error message when bad column name is used in DBconfig (closes #212) +- removing linkDefinition on error if it was added be4 (closes #226) + +## bug fixes + +- fixed bug in requery workflows like: `sl = query(ae, "[Phonetic== V]"); requery_hier(ae, sl[1:2,], level = "Text")` +- added missing loading of DBconfig to `write_bundleList()` (fixes #248) +- fixed bad handeling of dir paths containgin RegExp chars in `convert_TextGridCollection()` (fixes #207) + +# emuR 2.2.0 + +## new features / performance tweaks / improvements + +- compete rewrite of hierarchical queries + requeries (now using CTEs) + clean-up of the EQL engine +- `serve()` -> save bundle performance bump due to SQLite transaction improvement + using prep. statements +- `serve()` function now downloads current release .zip instead of cloning repo to save time and bandwidth +- fixed a few more `readr::` file vs path parameter warnings +- using `readr::write_lines()` function in `export_TextGridCollection()` to better support UTF-8 symbols +- added `create_spectrogram_image_as_raster` function that used the EMU-webApp +JavaScript code to draw a spectrogram as a raster + +## bug fixes + +- fixed invalid TextGrid export on empty levels + +# emuR 2.1.1 + +## new features / performance tweaks / improvements + +- `load_emuDB()` performance bump due to SQLite transaction improvement + using prep. statements +- `create_itemsInLevel()` now supports items of type SEGMENT +- added deprecated/not recommended texts to \code{eplot()}, \code{dplot()} and \code{dur()} help pages (fixes \#234) + +## bug fixes + +- implemented first version of `delete_itemsInLevel()` +- `requery_seq()` now returns correct attribute label vs. the level attribute +- fixed `In rbind(names(params), unlist(params, recursive = F)): number of columns of result is not a multiple of vector length (arg 1)` warning in `runBASwebservice_maus()` with `httr::upload_file()` +- added missing `()` to `return` statement +- no emuDBcache needed for `rename_emuDB()` + + +# emuR 2.1.0 + +## new features / performance tweaks / improvements + +- implemented `write_bundleList()` and `read_bundleList()` functions +- new `bundleListName` `serve()` parameter implemented +- added warning to `serve()` when either bundleComments or bundleFinishedEditing is set to true and no bundleListName was set (closes \#268) +- implemented `onTheFlyFunction` parameter for `get_trackdata()` which allows users to implement their own functions (input: path to wav file; output tibble/data.frame that has a column called `"frame_time"`) +- converting factors into characters in `get_trackdata()` and `normalize_length()` (closes \#224 and \#223) +- `add_perspective()` now sets `"restrictions"` -> `"showPerspectivesSidebar"` to `true` to make the side bar visible +- `normalize_length()` now doesn't throw tibble 3.0.0 warnings any more +- `query()` RegEx operators `=~` now using SQLites native support (wasn't available in earlier versions) +- replaced RCurl with httr dep +- moved some dependencies to `Suggests` field in DESCRIPTION file +- removed vignette stubs + +## bug fixes + +- try catching rounding errors of sample rate and start time in `get_trackdata()` + + +# emuR 2.0.4 + +## new features / performance tweaks / improvements + +- `export_seglistToTxtCollection()` now zero pads file names (should fix #219) +- implemented `write_bundleList()` and `read_bundleList()` functions +- new `bundleListName` `serve()` parameter implemented + +## bug fixes + +- using `rstudioapi::translateLocalUrl()` for ws connection url to permit RStudio to connect +- fixed bug in duplicate levels (bad level name insert into items table) that was causing the items array in the JSON file to stay empty +- `serve()` working outside of RStudio again (overlooked that `rstudioapi::translateLocalUrl()` needs RStudio) +- improved error formatting in `load_emuDB()` and `export_TextGridCollection` +- fixed `rename_bundles()` issue with sub-string matching of names (fixes \#220) +- `requery_hier()` works with non-main attributes again (always returned levels main attribute labels) + +# emuR 2.0.2 + +## new features / performance tweaks / improvements + +* changed default `resultTypes` to `"tibble"` (`query()`, `requery_hier()`, `requery_seq()`, `get_trackdata()`) +* changed url in printed output of serve to https +* `serve()` doesn't block the R console any more +* suppressing requery differing length warnings in `runBASwebservice_*` functions +* `requery_hier()` now persists the input segment list length by inserting NA rows for missing segments +* depricated vignettes are now stubs only (== empty containing notice pointing to the EMU-SDMS manual) +* implemented `rename_bundles()` function +* better `times_norm` calculation for `normalize_length()` +* better error message when empty or non existing session is passed into `add_files()` +* `serve()` `useViewer` parameter now implemented and the default. +* `serve()` now uses a single server to host the EMU-webApp and provide the websocket server for the emuDB +* `serve()` now works within RStudio when it is run as a web application + +## bug fixes + +# emuR 1.1.2 + +## new features / performance tweaks / improvements + +* rewrite of `list_files()` that leads to massive speed bump +* implemented `convert_wideToLong()` function to convert trackdata tibble objects to their long form representation (useful for spectral analysis) +* `query()` sub-function now sets `perl = TRUE` arguments in `regexp` call when using `sessionPattern` and `bundlePattern` (useful for e.g. for negative look-around `bundlePattern = "^((?!msajc003).)*$"`) +* implemented `export_seglistToTxtCollection()` +* normalize length now also using `list()` with `do.call(rbind, res_list)` +* implemted new `runMINNI` boolean parameter in `runBASwebservice_all()` + +## bug fixes + +* https default on `serve()` +* fixed problems of `calcFreqs = T` in `convert_wideToLong()` (possibly caused by newer version of dplyr?) + + +# emuR 1.1.1 + +## new features / performance tweaks / improvements + +## bug fixes + +* fixed bad column init. in `normalize_length()` column +* removed detritus LaTex files (e.g. .log, .aux) from `vignette/pics` directory (as requested by CRAN maintainer) + +# emuR 1.1.0 + +## new features / performance tweaks / improvements + +* URL encoding of bundle and session names to allow for URL string reserved characters in bundle and session names +* error message of `get_trackdata()` now contains seglist row index if "Can not extract following" +* `normalize_length()` now allows for additional non-numeric columns +* changed `stop()` to `warning()` in `get_trackdata()` when samplerates are inconsistent (closes \#190) +* better error message when there is a naming mismatch of `_emuDB` dir `_DBconfig.json` +* using `sub()` instead of `tools::file_path_sans_ext()` to handle `_` in file extensions +* `query()`, `requery_seq()` and `requery_hier()` now supports the `resultType` `"tibble"` (and `get_trackdata()`, `requery_seq()`, `requery_hier()` and `serve()` support them as input) +* `serve()` function now uses `seglist$start` and `seglist$end` instead of `seglist$sample_start` and `seglist$sample_end` +* implemented first version of `update_itemsInLevel()` (only label updates for now) +* now setting the `sample_start` and `sample_end` values in query results when EVENT levels are queried (previously only `start` was set) +* implemented first versions of `create_links()` (currently not checking for anything), `create_itemsInLevel()` (only EVENTs and ITEMs) and `update_itemsInLevel()` (only labels) and `delete_itemsInLevel()` +* `list_bundles()` outside of loop for performance bump in `get_trackdata()` +* better error message in `add_files()` if no files are found +* `consistentOutputType` of `get_trackdata()` is not set to `TRUE` and is reset to `T` if `resultType` is `"emuRtrackdata"` or `"tibble"` (fixes \#203) +* avoiding negative `times_rel` and `times_norm` values in `create_emuRtrackdata()` by setting them to 0 (caused by string to numeric conversion precision errors) +* `requery_seq()` now inserts NA values for the out of bounds rows instead of dropping them. +* implemented `list_sampleRates()` function +* `get_trackdata()` is now iteratively appending to a `list()` instead of into a SQLite temp table. This is a fairly large performance boost and also fixes \#206. +* added deprecation warnings to vignettes (added links to manual chapters) + +## bug fixes + +* propper fix for "now ordering by `items_idx` not by `start_start_seq_idx` which led to bad label sequences (fixes \#140)" +* fixed bad indexing in `normalize_length()` when sl_rowIdx values are not a `c(1, 2, 3, 4, ...)` sequence +* fixed `staticContours` SSFF tracks not being sent to EMU-webApp (fixes \#195) +* fixed bug with completely empty levels that caused a bad resort of levels in `_annot.json`s in `rewrite_allAnnots()` +* fixed bug in `add_files()` that was using the wrong variable (fixes \#196) +* added error message when querying levels without time-bearing sub-levels (closes \#150) +* fixed bug in `create_emuRtrackdata()` with handling trackdata object of class `spectral` +* correct recalculation of ITEM IDs of missing levels in .hlb files (== only present in ESPS files) in `convert_legacyEmuDB()` +* fixed handling of completely empty levels in .hlb files +* `normalize_length()` now handles various additional column types (not just `"numeric"`) + +# emuR 1.0.0 + +## new features / performance tweaks / improvements + +* implemented new `consistentOutputType` parameter for `get_trackdata()` to always return a `trackdata` or `emuRtrackdata` object independent of what the `cut` and `npoints` arguments are set to +* now removing `levelCanvasOrder` entry in `remove_levelDefinition()` (fixes \#156) +* `serve()` method now uses GET to deliver media files to the EMU-webApp. This avoids the base64 conversion overhead and is a quite significant load time improvement +* explicit error message in `convert_legacyEmuDB()` when invalid redundant links are found +* better error message in BPF parser +* `convert_legacyEmuDB()` automatically converts `.ssd` media files to `.wav` and normalizes the annotations to start at 0 (only if attr(ssd,'startTime') is not 0). +* added `sort()` S3 method for `emuRsegs` objects +* checking for badly sorted `emuRsegs` in `requery_hier()` and `requery_seq()` functions +* `create_emuRtrackdata()` returns a simple `data.frame` object not a `data.table` object +* `emuRtrackdata` object now contains a `times_norm` (normalized time values between 0 and 1 for each segment) column by default +* added note to `print.emuRsegs()` to give the user a hint about missing columns +* implemented `print.emuRtrackdata()` to avoid overly verbose output +* implemented `normalize_length()` function as S3 function to normalize the length of each segment in an `emuRtrackdata` object +* added `absolute_file_path` column to output of `list_files()` +* query engine does not rely on label index in label array any more (updated `convert_queryResultToEmuRsegs()` to use `resultAttrDef` instead of `labelIdx`). Closes \#164. +* added `browser` argument to `serve()` function which is passed on to `utils::browseURL()` function +* `requery_seq()` now uses `start_item_seq_idx` and `end_item_seq_idx` of seglist instead of `start_item_id` and `start_item_id` to simplify function +* implemented `check_emuDBhandle()` function that is used on every exported function that takes a `emuDBhandle` as an argument to check if the handle is still valid (closes \#176) +* implemented `"tibble"` as `resultType` option in `get_trackdata()`. This will probably replace the `"emuRtrackdata"` option in future (it contains exactly the same data/columns). +* prechecking if attribute definition is already defined (closes \#182) +* `get_trackdata()` now uses temporary SQL tables to store the intermediate results (massive performance gains!). Removed `nrOfAllocationRows` parameter as this is no longer needed as no matrix is used to store the intermediate results. (also closes \#125) +* `convert_TextGridCollectio()` using `dir.exists()` instead of `file.exists()` to check dirs +* all read operations now use the readr package (avoids encoding problems like \#187) +* `list_attributeDefinitions()` now allows for a name vector to be passed in +* rewrite of `rewrite_allAnnots()` functions for faster rewrites of `_annot.json` files to disk +* improved cleanup in testing DBconfig functions +* now ordering by `items_idx` not by `start_start_seq_idx` which led to bad label sequences (fixes \#140) + +## bug fixes + +* fixed problem of updating cache. Didn't handle `data.frame` object that was thought to be a vector correctly! +* fixed a bug in the BPF export function, which meant that WAVE files were only copied into one session +* added missing `$` in pattern arguments in `list.files` call in `list_files` (fixes \#170) +* not adding ssffTrackDefinition to DBconfig if user input is no (closes \#171) +* fixed bad `seq_start_seq_idx` and `seq_seq_idx` returned by `requery_seq()` (fixes \#183) +* fixed bad `seq_start_seq_idx` returned by internal `query_databaseHier()` function +* fixed bad sorting of `requery_seq()/requery_hier()` when `calcTimes = F` (still sorted by `start_sample` instead of the correct `seq_idx`) +* added `readr::parse_character()` to data received in `serve()` as this is recoded in windows (fixes \#188). + +# emuR 0.2.3 + +## new features / performance tweaks / improvements + +* tweaked `runBASwebservice_maus()`; improved performance for presegmented bundles +* performance bump for `fapply()` by preallocating result matrix +* performance bump for `trapply()` by preallocating result matrix +* performance bump for `mel.spectral()` by preallocating result matrix +* performance bump for `bark.spectral()` by preallocating result matrix +* updated DBI calls to comply with the latest best practices (using `DBI::dbExecute()` instead of `DBI::dbGetQuery()` for non-`SELECT` queries) +* BPF collection exporter documented and now public + +## bug fixes + +* `export_TextGridCollection()` now handles partial includes of bundle and session names correctly (issue \#147) +* added missing check if `anagestConfig` is defined to `rename_attributeDefinition()` +* setting useBytes to T to avoid reencoding under windows +* fixed bug in `add_ssffTrackDefinition()` that was trying to access `fp` which was renamed in a refactor to `filesDf` +* fixed export to autodetect S3 methods (cbind & rbind for trackdata) + +# emuR 0.2.2 + +## new features / performance tweaks / improvements + +* some changes to the parameter names in the BAS webservice functions +* convert_txtCollection and convert_BPFCollection now name topmost item "bundle" +* added functions to set and get level descriptions in DBconfig +* BAS webservice functions now perform a cache update prior to departure +* added multiple perspectives to ae demo database +* choosing explicit paths with intersecting hierarchies now possible +* remove levelDef & linkDef now implement force parameters +* new function convert_txtCollection converts plain text collections into single-node emuDB +* new functions runBASwebservice_* that call various BAS webservices from inside emuR +* NULLing out empty DFs on list_level/linkDefs for more consistent API +* `newLinkDefType` argument implemented in `autobuild_linkFromTimes()` to generate linkDefinition if so desired +* automatically removing superlevel from `levelCanvasOrder` if `convertSuperlevel` is set to `TRUE` in `autobuild_linkFromTimes()` + +## bug fixes + +* wrapped `readChar`s in `enc2utf8` to avoid encodings issues on windows +* updating label table correctly on add_attributeDefinition() (#138) +* runBASwebservice_maus / minni / all now no longer ignore unlinked items (idx -1) but treat them as linkless segments +* commented out `cat()` in `train()` function be be less verbose +* BAS webservice calls now get their own temp directories (UUID based). This avoids race conditions when several scripts are running in parallel. +* convert_txtCollection now treats perspectives as array (as it should) + +# emuR 0.2.1 + +## new features / performance tweaks / improvements + +* added new `EMUwebAppConfig -> perspectives -> signalCanvases -> minMaxValLims` config option to emuDB vignette +* requery_hier + requery_seq now implement the same timeRefSegmentLevel parameter as query (#135) + +## bug fixes + +* fixed requery\_hier() bug of requery on same attribute definition +* fixed requery\_hier() bug of requery on same level but different attribute definition + +# emuR 0.2.0 + +## new features / performance tweaks / improvements + +* rewrite of query engine to not require links_ext table any more (== redundant links) +* calcTimes parameter added to query() / requery\_seq() / requery\_hier() to make calculating times optional (extreme performance boost if no times have to be calculated) +* rewrite of annotJSONcharToBundleAnnotDFs() for faster loads emuDBs containing large annotJSONs +* replaced tidyjson as annot.json parser with own solution at tidyjson didn't scale well on larger annotation files +* added verbose parameter to export\_TextGridCollection() +* improved pre-check of dir exists in export\_TextGridCollection() +* added new replace\_itemLabels function +* improved export\_TextGridCollection() doc +* improved replace_itemLables() speed +* implemented rename\_emuDB() (\#116) +* implemented duplicate\_level() (\#113) +* implemented linkDuplicates parameter in duplicate\_level() +* autobuild\_linkFromTimes() speed improvements +* FUNCQ queries (start(),end(), medial()) now additionally support TRUE & FALSE and T & F values (vs. 0 & 1) +* added attrDefNames column to list\_levelDefinitions() output +* can now deal with read only emuDBs by copying the cache to tempdir() and making it writable for the user +* added start\_item\_seq\_idx and end\_item\_seq\_idx to emuRsegs object +* added start\_item\_seq\_idx and end\_item\_seq\_idx type values to all intermediate result tables +* added optional function to reduce hierarchical query results to left and right most children only (large performance gain on calcTimes = T) +* rewriting annot.json files now updates MD5 sums as well (avoids unnecessary reload on next load\_emuDB) +* rewriting annot.json files now writes all (including empty / missing) attributeDef. labels + +## bug fixes + +* fixed bad DBconfig gen. on add_perspective +* fixed list\_linkDefinitions() returning strings as factors +* fixed bad error message when passing in ITEM levels to autobuild\_linkFromTimes() +* fixed incorrect handling of DBconfig when writeToFS was set to FALSE (writeToFS is now called rewriteAllAnnots) + +# emuR 0.1.9 + +## new features / performance tweaks / improvements + +* also allowing "time = " in TextTiers +* "levels of type 'EVENT' are not allowed to be super levels (== parents) in a domination relationship" constraint enforced in add_linkDefinition +* added "MEDIAFILE\_SAMPLES" as constant name to access audio samples to get\_trackdata() function +* improved error message to include tgPath in create_DBconfigFromTextGrid function +* no integer return value returned by create_emuRdemoData() any more! It was implicitly returned from wrassp function call... +* improved the slow overlap checking function in the BPF parser (is now O(n) instead of O(n^2)) +* fixed col naming problems for new (unreleased) RSQLite version +* added export_TextGridCollection() function +* improved doc for get_trackdata +* constant naming of EMU-SDMS vs EMU_SDMS in various files +* rewriting all annotation file on add\_levelDefinition, remove\_levelDefinition + +## bug fixes + +* fixed problem in conversion to JSON with empty items array (object '{}' vs array '[]') +* fixed problem of keywords "number" | "time" | "xmin" | ... in labels causing TextGrid parser to fail +* fixed problem with to lax RegEx in TextGrid parser +* fixed validation problem with missing levels regarding types + +# emuR 0.1.8 + +## new features / performance tweaks / improvements + +* get_trackdata with onTheFly calculation now reuses AsspDataObj if the current utterance is the same as the previous (large performance gain especially on long audio files) +* checking if DBconfig exists for better error message if 'name' field is not set correctly in DBconfig +* setting PRAGMA temp_store = 2; for SQLite connections +* not extracting tables to R if no RegEx needed to create filtered_tmp tables (performance gain when querying large emuDBs) +* convert_BPFCollection can now assigns the same label to more than one item when unifying tiers +* newline at the end of load_emuDB if no redundant links are built +* queries using dominates operator '^' don't use linksExt table anymore -> large performances benefits +* only using \_filtered\_tmp tables if RegEx patterns are used +* changed primary key on items table which leads to massive performance gains (deleting _emuDBcache.sqlite required) + +## bug fixes + +* fixed error handling of create_emuRtrackdata + added @export to roxygen doc +* invalid annotJSONs generated by import_mediaFiles fixed +* convert_TextGridCollection can now handle nested folders again +* invalid UUIDs in DBConfig produced by convert_BPFCollection. Also added additional unit test to detect this. +* list_bundles uses session argument again +* fixed "Expression tree is too large (maximum depth 1000)" error in get_trackdata with long emuRsegs lists + +# emuR 0.1.7 + +* R depends version bump to 3.2.0 (as requested by CRAN maintainer) +* updated testthat::expect\_less\_than to expect\_lt calls (due to deprecated warnings) +* Using new .keep_all = T parameter of dplyr +* removed legacy version of EQL vignette (overlooked as inst/doc was in .gitignore) + +# emuR 0.1.6 + +* skipping in-depth thorough tests on CRAN for query and autobuild SQL functions + +# emuR 0.1.5 + +* fixed problem of interm\_res\_tables already being present with queries that have multiple recursion depth on both sides +of either -> or ^ operand (e.g. query (ae , "[[[Phonetic = n -> Phonetic =z] -> Phonetic = S ] ^ [Text = friends -> Text = she]]")) +* fixed bad URL in README.md +* added CITATION file + +# emuR 0.1.3.9000 + +* renamed SQL tables & columns from camel case to underscore notation +* variable SQL backend implementation + +# emuR 0.1.2.9000 + +* multiple check fixes on various platforms + +# emuR 0.1.1.9000 + +* `serve` problem with internalVars bug fixed +* file locking problem that caused vignettes to fail under windows problem fixed + +# emuR 0.1.0.9000 + +* massive refactor of all functions that used to refer to an emuDB by + name and optionally by its UUID. They now use the new emuDBhandle object + that is now returned by the `load_emuDB()` function. +* `convert_XXX_to_emuDB()` functions renamed to `convert_XXX()` diff --git a/R/MethodEPG.R b/R/MethodEPG.R index ab97c9f1..fa1ea33f 100644 --- a/R/MethodEPG.R +++ b/R/MethodEPG.R @@ -1,8 +1,14 @@ -"[.EPG" <- -function (palates, i, j, k) +##' expand EPG +##' +##' see function +##' +##' +##' @aliases [.EPG +##' @keywords internal +##' @export +"[.EPG" <- function (palates, i, j, k) { - o <- NextMethod("[") -class(o) <- c("EPG") -o + o <- NextMethod("[") + class(o) <- c("EPG") + o } - diff --git a/R/Methodspectral.R b/R/Methodspectral.R index f123ed4f..8f9bbbb9 100644 --- a/R/Methodspectral.R +++ b/R/Methodspectral.R @@ -1,42 +1,48 @@ -"[.spectral" <- -function (dat, i, j, drop) +##' Expand spectral +##' +##' see function +##' +##' +##' @aliases [.spectral +##' @keywords internal +##' @export +"[.spectral" <- function (dat, i, j, drop) { - -if(!is.trackdata(dat)) -{ -if(is.matrix(dat)) -{ -if(missing(j)) -j <- freqtoint(dat, trackfreq(dat)) -else -{ -if(is.logical(j)) -j <- trackfreq(dat)[j] -j <- freqtoint(dat, j) - -} - o <- NextMethod("[") - class(o) <- c(class(o), "spectral") - -attr(o, "fs") <- attr(dat, "fs")[j] - return(o) - -} -else{ - -if(missing(i)) -i <- freqtoint(dat, trackfreq(dat)) -else -{ -if(is.logical(i)) -i <- trackfreq(dat)[i] -i <- freqtoint(dat, i) -} -o <- NextMethod("[") -class(o) <- c(class(o), "spectral") -attr(o, "fs") <- attr(dat, "fs")[i] - return(o) -} -} + + if(!is.trackdata(dat)) + { + if(is.matrix(dat)) + { + if(missing(j)) + j <- freqtoint(dat, trackfreq(dat)) + else + { + if(is.logical(j)) + j <- trackfreq(dat)[j] + j <- freqtoint(dat, j) + + } + o <- NextMethod("[") + class(o) <- c(class(o), "spectral") + + attr(o, "fs") <- attr(dat, "fs")[j] + return(o) + + } + else{ + + if(missing(i)) + i <- freqtoint(dat, trackfreq(dat)) + else + { + if(is.logical(i)) + i <- trackfreq(dat)[i] + i <- freqtoint(dat, i) + } + o <- NextMethod("[") + class(o) <- c(class(o), "spectral") + attr(o, "fs") <- attr(dat, "fs")[i] + return(o) + } + } } - diff --git a/R/apply.R b/R/apply.R index 0f4cc352..3d72453f 100644 --- a/R/apply.R +++ b/R/apply.R @@ -1,63 +1,54 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - -"dapply" <- -function (trackdata, fun, ...) -{ - if (version$major >= 5 && oldClass(trackdata) != "trackdata") { - stop("argument to dapply is not of class trackdata.") - } - else if (!is.trackdata(trackdata)) - stop("argument to dapply is not of class trackdata.") - if (!is.matrix(trackdata$index)) { - trackdata$ftime <- rbind(trackdata$ftime) - trackdata$index <- rbind(trackdata$index) - } - thisrow <- 1 - newindex <- trackdata$index - newdata <- NULL - newftime <- trackdata$ftime - for (j in 1:nrow(trackdata$index)) { - newindex[j, 1] <- thisrow - tmp <- fun(trackdata[j]$data, trackdata[j]$ftime, ...) - if (is.matrix(tmp$data)) { - newdata <- rbind(newdata, tmp$data) - } - else { - newdata <- c(newdata, tmp$data) - } - newftime[j, ] <- tmp$ftime - if (is.matrix(tmp$data)) - thisrow <- thisrow + nrow(tmp$data) - else thisrow <- thisrow + length(tmp$data) - newindex[j, 2] <- thisrow - 1 - } - x <- list(data = as.matrix(newdata), index = newindex, ftime = newftime) - if (version$major >= 5) { - oldClass(x) <- "trackdata" - } - else { - class(x) <- "trackdata" - } - return(x) -} - - -`fapply` <- - function (specdata, fun, ..., power = FALSE, powcoeffs = c(10, - 10)) +##' Function that applies a function to an EMU spectral object +##' +##' Applies a function to an EMU spectral object. +##' +##' fapply performs a similar operation to apply except that it is specifically +##' designed for handling EMU spectral objects. +##' +##' @param specdata A matrix or trackdata object of class spectral +##' @param fun A function to be applied. +##' @param \dots Optional arguments to fun +##' @param power A single element logical vector. If TRUE, convert specdata to +##' power values i.e. apply the function to a * specdata +##' \eqn{\mbox{\textasciicircum}}{^}b or a * specdata$data +##' \eqn{\mbox{\textasciicircum}}{^}b where a and b powcoeffs defined below. +##' @param powcoeffs A 2 element numeric vector for converting dB values to +##' power values. Defaults to a = 10 and b = 10. See \code{power}. +##' @return If the output has the same dimensions has the input, then an object +##' of the same dimensionality and class is returned. Otherwise it may be a +##' vector or matrix depending on the function that is applied. ... +##' @section Warning : The function can be very slow if applied to a large +##' trackdata object. In this case, it may be faster to use a for-loop with the +##' desired function around $data +##' @author Jonathan Harrington +##' @seealso \code{\link{apply}} \code{\link{by.trackdata}} +##' @keywords utilities +##' @examples +##' +##' # mean value per spectrum, input is a spectral matrix +##' m <- fapply(vowlax.dft.5, sapply, FUN=mean) +##' # as above but after converting dB to powers before +##' # applying the function +##' m <- fapply(vowlax.dft.5, sapply, FUN=mean, power=TRUE) +##' # spectral range +##' r <- fapply(vowlax.dft.5, range) +##' # spectral moments applied to a trackdata object +##' # m is a four-dimensional trackdata object +##' m <- fapply(fric.dft, moments) +##' # 1st 3 DCT coefficients calculated in a spectral matrix +##' # d is a 3-columned matrix +##' d <- fapply(vowlax.dft.5, dct, 3) +##' # dct-smooth with 10 coefficients. d2 is spectral matrix +##' d2 <- fapply(vowlax.dft.5, dct, 10, TRUE) +##' # dct-smooth a trackdata object with 10 coefficients +##' d3 <- fapply(fric.dft[1:4,], dct, 10, TRUE) +##' +##' +##' @export fapply +'fapply' <- function (specdata, fun, ..., power = FALSE, + powcoeffs = c(10, 10)) { + if (!is.spectral(specdata)) stop("object must be of class spectral") if (power) @@ -68,7 +59,10 @@ function (trackdata, fun, ...) class(specdata$data) <- "spectral" for (j in 1:nrow(specdata$data)) { vals <- fun(specdata$data[j, ], ...) - omat <- rbind(omat, vals) + if(j == 1){ # preallocate matrix + omat = matrix(nrow = nrow(specdata$data), ncol = length(vals)) + } + omat[j, ] <- vals } if (ncol(omat) == ncol(specdata)) { dimnames(omat) <- dnames @@ -84,14 +78,17 @@ function (trackdata, fun, ...) } else { if(!is.matrix(specdata)) - { - samfreq = max(trackfreq(specdata))*2 - specdata = as.spectral(rbind(specdata), samfreq) - } - + { + samfreq = max(trackfreq(specdata))*2 + specdata = as.spectral(rbind(specdata), samfreq) + } + for (j in 1:nrow(specdata)) { vals <- fun(specdata[j, ], ...) - omat <- rbind(omat, vals) + if(j == 1){ # preallocate matrix + omat = matrix(nrow = nrow(specdata), ncol = length(vals)) + } + omat[j, ] <- vals } if (ncol(omat) == ncol(specdata)) { dimnames(omat) <- dnames @@ -107,8 +104,3 @@ function (trackdata, fun, ...) return(omat) } } - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/bark.R b/R/bark.R index 7816ff9c..ad592032 100644 --- a/R/bark.R +++ b/R/bark.R @@ -1,26 +1,135 @@ -# Convert Bark to Hz and vice-versa -# Formulae in -# H. Traunm\"uller (1990) "Analytical expressions for the -# tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100. -# f is a frequency in Hz unless inv=TRUE, -# in which case f is a frequency in Bark -# inv: if T, performs Bark to Hz conversion - - -"bark" <- -function (f, ...) { - UseMethod("bark") +##' Convert Hertz to Bark and Bark to Hertz +##' +##' The calculation is done using the formulae Traunmueller (1990) +##' +##' If 'data' is a spectral object, then +##' +##' the frequencies are changed so that they are proportional +##' +##' to the Bark scale and such that the Bark intervals +##' +##' between frequencies are con stant between the lowest +##' +##' and highest frequencies. More specifically, +##' +##' suppose that a spectral object has frequencies +##' +##' at 0, 1000, 2000, 3000, 4000 Hz. Then the corresponding +##' +##' frequencies extend in Bark between 0 and 17.46329 Bark +##' +##' in four equal intervals, and linear interpolation +##' +##' is used with the 'approx' function to obtain +##' +##' the dB values at those frequencies. Negative frequencies +##' +##' which are obtained for values of about less than 40 Hz +##' +##' are removed in the case of spectral objects. +##' +##' @aliases bark bark.trackdata bark.spectral bark.default +##' @param f A vector or matrix of data or a spectral object. +##' @param inv A single element logical vector. If FALSE, data are converted from +##' Hertz to Bark, if TRUE, data are converted from Bark to Hertz. (Does not apply +##' if 'data' is an oject of class 'spectral'. +##' @param \dots for generic only +##' @return +##' +##' A vector or matrix or spectral object of the same length and dimensions as +##' data. +##' @author Jonathan Harrington +##' @seealso +##' +##' \code{\link{mel}}, +##' +##' \code{\link{plot.spectral}} +##' @references Traunmueller, H. (1990) "Analytical expressions for the +##' tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100. +##' @keywords math +##' @examples +##' +##' +##' +##' +##' # convert Hertz values to Bark +##' +##' vec <- c(500, 1500, 2500) +##' +##' vec +##' +##' bark(vec) +##' +##' +##' +##' +##' +##' # convert Hertz values to Bark and back to Hertz +##' +##' bark(bark(vec, inv=TRUE)) +##' +##' +##' +##' +##' +##' # convert the $data values in a trackdata object to Bark +##' +##' # create a new track data object +##' +##' t1 <- dip.fdat +##' +##' t1[1] +##' +##' +##' +##' +##' +##' # convert Hertz to Bark +##' +##' t1$data <- bark(t1$data) +##' +##' t1[1] +##' +##' +##' +##' # warp the frequency axis of a spectral object such +##' +##' # that it is proportional to the Bark scale. +##' +##' w = bark(e.dft) +##' +##' oldpar = par(mfrow=c(1,2)) +##' +##' plot(w, type="l") +##' +##' +##' +##' # The values of w are at equal Bark intervals. Compare +##' +##' # with +##' +##' plot(e.dft, freq=bark(trackfreq(e.dft))) +##' +##' # the latter has a greater concentration of values +##' +##' # in a higher frequency range. +##' +##' par(oldpar) +##' +##' +##' @export bark +"bark" <- function (f, inv = FALSE, ...) { + UseMethod("bark") } -"bark.default" <- -function (f, inv = FALSE, ...) +##' @export +"bark.default" <- function (f, inv = FALSE, ...) { - if (!inv) { - result = ((26.81 * f)/(1960 + f)) - 0.53 - } else { - result = (1960 * (f + 0.53))/(26.28 - f) - } - return(result) + if (!inv) { + result = ((26.81 * f)/(1960 + f)) - 0.53 + } else { + result = (1960 * (f + 0.53))/(26.28 - f) + } + return(result) } - diff --git a/R/bayesdist.R b/R/bayesdist.R index 865a6916..a7639b54 100644 --- a/R/bayesdist.R +++ b/R/bayesdist.R @@ -1,51 +1,131 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Classify using Mahalanobis distance +##' +##' Classifies using Mahalanobis distance +##' +##' The \code{model} argument contains the mean and inverse covariance matrix +##' (or standard deviation if the data is one-dimensional) for each class in +##' the training set as well as the class labels. This function calculates the +##' Mahalanobis distance of each row of \code{data} from each class mean and +##' assigns the label of the closest mean to that row. The result is a vector +##' of labels corresponding to the rows of \code{data}. +##' +##' The Mahalanobis distance between a data point and a class is the Euclidean +##' distance between the point and the class mean divided by the covariance +##' matrix for the class. This means that classes with large covariances will +##' attract data points from a larger area than those with small covariances. +##' +##' @param data A vector or matrix of data +##' @param train A Gaussian model generated by \code{train}. +##' @return A label vector with one element per row of \code{data} +##' @seealso train +##' @references O'Shaughnessy, D. Speech Communication (Addison-Wesley: +##' Reading, MA. 1987) +##' @keywords misc +##' @export mahal "mahal" <- function(data, train) { - if (emu.options("deprecated.warnings")) - cat("mahal is deprecated, use classify with metric=\"mahal\"\n") + # if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more + warning("mahal is deprecated, use classify with metric=\"mahal\"\n") classify( data, train, metric="mahal" ) } + + + + + + + + +##' bayes lab +##' +##' see function +##' +##' +##' @keywords internal +##' @export bayes.lab "bayes.lab" <- function(data, train) { - if (emu.options("deprecated.warnings")) - cat("bayes.lab is deprecated, use classify with metric=\"bayes\"\n") + #if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more + warning("bayes.lab is deprecated, use classify with metric=\"bayes\"\n") classify( data, train, metric="bayes" ) } -"bayes.dist"<- - function(data, train, labels=NULL) + + + + + + + + +##' bayes dist +##' +##' see function +##' +##' +##' @keywords internal +##' @export bayes.dist +"bayes.dist" <- function(data, train, labels=NULL) { - if (emu.options("deprecated.warnings")) - cat("bayes.dist is deprecated, use distance with metric=\"bayes\"\n") + #if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more + warning("bayes.dist is deprecated, use distance with metric=\"bayes\"\n") distance( data, train, labels, metric="bayes") } -"mahal.dist" <- - function( data, train, labels=NULL ) + + + + + + + + +##' Calculate mahalanobis distances +##' +##' Calculates mahalanobis distances +##' +##' The \code{train} function finds the centroids and covariance matrices for a +##' set of data and corresponding labels: one per unique label. This function +##' can be used to find the mahalanobis distance of every data point in a +##' dataset to each of the class centroids. The columns of the resulting +##' matrix are marked with the label of the centroid to which they refer. The +##' function \code{mahal} should be used if you want to find the closest +##' centroid to each data point. +##' +##' @param data A matrix of numerical data points. +##' @param labels A vector of labels.. +##' @param train A gaussian model as returned by the \code{train} function. +##' @return A matrix of distances with one column for every class (label) in +##' the gaussian model. +##' @seealso train, mahal, bayes.lab, bayes.dist +##' @keywords misc +##' @export mahal.dist +"mahal.dist" <- function( data, train, labels=NULL ) { - if (emu.options("deprecated.warnings")) - cat("mahal.dist is deprecated, use distance with metric=\"mahal\"\n") + #if (emu.options("deprecated.warnings")) # commented out because glabal vars don't exist any more + #cat("mahal.dist is deprecated, use distance with metric=\"mahal\"\n") # commented out because glabal vars don't exist any more distance( data, train, labels, metric="mahal") } ## generalise bayes.dist and mahal.dist -"distance" <- - function( data, train, labels=NULL, metric="bayes" ) + + + + + + + + +##' distance +##' +##' see function +##' +##' +##' @keywords internal +##' @export distance +"distance" <- function( data, train, labels=NULL, metric="bayes" ) { ## data is a set of data points ## train is the result of the train fn and contains @@ -57,11 +137,11 @@ ## metric - one of "bayes" or "mahal" for bayesian or mahalanobis distance if(!is.matrix(data)) data <- cbind(data) - + ncols <- length(train$label) ndims <- ncol(data) probs <- matrix(0, nrow = nrow(data), ncol = ncols) - + for(lab in 1:ncols) { tmp <- (lab - 1) * ndims + 1 tmp1 <- tmp + ndims - 1 @@ -81,12 +161,26 @@ "euclidean.metric" <- function( data, mean ) { - return + return() } -"bayesian.metric" <- - function( data, mean, cov, invcov ) + + + + + + + + +##' bayesian metric +##' +##' see function +##' +##' +##' @keywords internal +##' @export bayesian.metric +"bayesian.metric" <- function( data, mean, cov, invcov ) { # calcuate the gaussian classification metric for multivariate data # given mean vector and covariance matrix @@ -98,8 +192,22 @@ return( det - apply(pow * t(x.u), 1, sum) ) } -"mahalanobis.metric" <- - function(data, mean, invcov) + + + + + + + + +##' mahalanobis metric +##' +##' see function +##' +##' +##' @keywords internal +##' @export mahalanobis.metric +"mahalanobis.metric" <- function(data, mean, invcov) { x.u <- t(data) - mean pow <- t(x.u) %*% invcov @@ -109,10 +217,51 @@ } + + + + + + + + +##' classify +##' +##' classifies data +##' +##' +##' @param data data to classify +##' @param train training data +##' @param metric bayes or mahal +##' @return The classification matrix. +##' @author Jonathan Harrington +##' @keywords models +##' @examples +##' +##' +##' ## The function is currently defined as +##' function (data, train, metric = "bayes") +##' { +##' probs <- distance(data, train, metric = metric) +##' if (metric == "bayes") { +##' best <- apply(probs, 1, max) +##' } +##' else if (metric == "mahal") { +##' best <- apply(probs, 1, min) +##' } +##' result <- rep("", length(best)) +##' for (lab in 1:length(train$label)) { +##' tmp <- probs[, lab] == best +##' result[tmp] <- train$label[lab] +##' } +##' result +##' } +##' +##' @export classify "classify" <- function(data, train, metric="bayes") { probs <- distance(data, train, metric=metric ) - + ## what's best depends on the metric, bayes is a prob. so max is best ## mahal is a distance so min is best if( metric=="bayes" ) { @@ -130,8 +279,23 @@ } + + + + + + + + +##' bayesplot +##' +##' bayesplot +##' +##' +##' @keywords internal +##' @export bayesplot bayesplot <- function(data, train, N = 10, ellipse = FALSE, - labs = NULL, xlab="", ylab="", colour = TRUE, ...) + labs = NULL, xlab="", ylab="", colour = TRUE, ...) { ## data is the original data, used for scaling ## train is the stuff you get from train() @@ -144,21 +308,57 @@ bayesplot <- function(data, train, N = 10, ellipse = FALSE, points[, 2] <- points[, 2] * (ry[2] - ry[1]) + ry[1] ## now classify each point blabs <- classify(points, train, metric="bayes") - plot(points, type = "n", xlim = rx, ylim = ry, xlab=xlab, ylab=ylab) + graphics::plot(points, type = "n", xlim = rx, ylim = ry, xlab=xlab, ylab=ylab) ulabs <- unique(blabs) k <- 1 colours <- mu.colour( ulabs, colour, FALSE )$colour for(j in ulabs) { temp <- muclass(blabs, j) - text(points[temp, ], blabs[temp], col = colours[k]) + graphics::text(points[temp, ], blabs[temp], col = colours[k]) k <- k + 1 } if(ellipse && !is.null(labs) ) { - par(new = TRUE) + oldpar = graphics::par(new = TRUE) + on.exit(graphics::par(oldpar)) eplot(data, labs, xlim = rx, ylim = ry, colour=colour, ...) } } + + + + + + + + +##' Train a Gaussian Model +##' +##' Trains a Gaussian Model +##' +##' This function is used to train a gaussian model on a data set. The result +##' can be passed to either the \code{mahal} or \code{bayes.lab} functions to +##' classify either the training set (\code{x}) or a test set with the same +##' number of dimensions. Train simply finds the mean and inverse covariance +##' matrix/standard deviation for the data corresponding to each unique label +##' in labs. +##' +##' @param x A data vector or matrix. +##' @param lab A vector of labels parallel to \code{x}. If missing, all data is +##' assumed to be from the same class. +##' @return A structure with the following components: +##' +##' \item{label}{ The unique labels in \code{lab}. } \item{means}{ The means +##' for each dimension per unique label. } \item{cov}{ The combined covariance +##' matrixes for each unique label. The matrixes are joined with \code{rbind}. +##' If the input data is one-dimensional, this is just the standard deviation +##' of the data. } \item{invcov}{ The combined inverse covariance matrixes for +##' each unique label. The matrixes are joined with \code{rbind}. If the input +##' data is one-dimensional, this is just the reciprocal of the standard +##' deviation of the data. } +##' @seealso mahal, bayes.lab, mahalplot, bayes.plot +##' @keywords misc +##' @export train "train"<- function(x, lab=rep("x",nrow(x))) { mat <- NULL @@ -191,7 +391,7 @@ bayesplot <- function(data, train, N = 10, ellipse = FALSE, mat$means <- NULL mat$cov <- NULL for(j in unique(lab)) { - cat("data for ", j, " \n" ) + # cat("data for ", j, " \n" ) temp <- lab == j mat$means <- c(mat$means, mean(x[temp])) mat$cov <- c(mat$cov, sqrt(var(x[temp]))) @@ -201,9 +401,3 @@ bayesplot <- function(data, train, N = 10, ellipse = FALSE, } mat } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/bind.R b/R/bind.R index df98d1c1..90bc33a9 100644 --- a/R/bind.R +++ b/R/bind.R @@ -1,29 +1,53 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - -"bind"<- function(a,...) +##' class method bind data +##' +##' binds data +##' +##' +##' @keywords internal +##' @export bind +"bind" <- function(a,...) { UseMethod("bind") } - ## default is just to use rbind +## default is just to use rbind + + + + + + + + +##' data binding +##' +##' binds data +##' +##' +##' @keywords internal +##' @export "bind.default" <- function(...) { rbind(...) } -"bind.trackdata"<- function(...) + + + + + + + + +##' bind trackdata +##' +##' binds different trackdata objects together +##' +##' +##' @param \dots trackdata objects +##' @keywords methods +##' @export +"bind.trackdata" <- function(...) { ## function to combine datasets into one single datasets ## any number of datasets accepted e.g. dcombine(x, y, z) @@ -50,10 +74,3 @@ } mat } - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/buildtrack.R b/R/buildtrack.R index 4a28aa39..beff0de8 100644 --- a/R/buildtrack.R +++ b/R/buildtrack.R @@ -1,45 +1,77 @@ -"buildtrack" <- -function(mylist, ftime=NULL, trackname="") +##' Build trackdata objects from the output of by() +##' +##' buildtrack() converts a list that is the output of by.trackdata() into a +##' trackdata object if the list components are matrices whose rows are +##' successive values in time. +##' +##' The default of by.trackdata() is to return a list. If each element of the +##' list consists of a matrix whose rows are values occurring at the times +##' given by the row dimension names of the matrix, then buildtrack() can be +##' used to convert the list into a trackdata object. If the times are not +##' given in the row dimension names, then these can be supplied as an +##' additional argument to buildtrack() +##' +##' @param mylist a list that ist output from by() +##' @param ftime ftime +##' @param trackname name of track data object +##' @author Jonathan Harrington +##' @seealso \code{\link{by}} +##' @keywords manip +##' @examples +##' +##' #vowlax.fdat is a track data objects of formant of the vowlax segment list +##' #calculate the difference between adjacent formant values +##' p = by(vowlax.fdat[1,2],INDICES=NULL, diff) +##' +##' p +##' +##' +##' #now build a track data object out of these values +##' m = buildtrack(p) +##' +##' m +##' +##' @export buildtrack +"buildtrack" <- function(mylist, ftime=NULL, trackname="") { -# convert a list that is usually output from by() into -# a trackdata object - -# examples -# p = by(vowlax.fdat[1:3,2], diff) -# m = trackfromlist(p) -# p = by(vowlax.fdat[1:3,], apply, 2, diff) -# o = trackfromlist(p) -# r = by(vowlax.fdat[1:3,2], smooth) -# should break because no ftimes -# trackfromlist(r) -# e = trackfromlist(r, ftime=vowlax.fdat[1:3,]$ftime) -# m = by(vowlax.fdat[1:3,], apply, 2, smooth) -# e = trackfromlist(m, ftime=vowlax.fdat[1:3,]$ftime) - -res <- NULL -for(j in 1:length(mylist)){ -if(!is.matrix(mylist[[j]])) -mylist[[j]] <- cbind(mylist[[j]]) -N <- nrow(mylist[[j]]) -if(is.null(ftime)) -{ -if(is.null(dimnames(mylist[[j]])[[1]])) -stop("can't find any ftime values") -times <- as.numeric(dimnames(mylist[[j]])[[1]]) -times <- c(times[1], times[N]) -} -else -times <- ftime[j,] -res$data <- rbind(res$data, cbind(mylist[[j]])) -res$index <- c(res$index, N) -res$ftime <- rbind(res$ftime, times) -} -# build indices -n <- res$index -right <- cumsum(n) -left <- c(1, right+1) -left <- left[-length(left)] -res$index <- cbind(left, right) -as.trackdata(res$data, res$index, res$ftime, trackname) + # convert a list that is usually output from by() into + # a trackdata object + + # examples + # p = by(vowlax.fdat[1:3,2], diff) + # m = trackfromlist(p) + # p = by(vowlax.fdat[1:3,], apply, 2, diff) + # o = trackfromlist(p) + # r = by(vowlax.fdat[1:3,2], smooth) + # should break because no ftimes + # trackfromlist(r) + # e = trackfromlist(r, ftime=vowlax.fdat[1:3,]$ftime) + # m = by(vowlax.fdat[1:3,], apply, 2, smooth) + # e = trackfromlist(m, ftime=vowlax.fdat[1:3,]$ftime) + + res <- NULL + for(j in 1:length(mylist)){ + if(!is.matrix(mylist[[j]])) + mylist[[j]] <- cbind(mylist[[j]]) + N <- nrow(mylist[[j]]) + if(is.null(ftime)) + { + if(is.null(dimnames(mylist[[j]])[[1]])) + stop("can't find any ftime values") + times <- as.numeric(dimnames(mylist[[j]])[[1]]) + times <- c(times[1], times[N]) + } + else + times <- ftime[j,] + res$data <- rbind(res$data, cbind(mylist[[j]])) + res$index <- c(res$index, N) + res$ftime <- rbind(res$ftime, times) + } + # build indices + n <- res$index + right <- cumsum(n) + left <- c(1, right+1) + left <- left[-length(left)] + res$index <- cbind(left, right) + as.trackdata(res$data, res$index, res$ftime, trackname) } - diff --git a/R/by.trackdata.R b/R/by.trackdata.R index 3ef17308..0d62550c 100644 --- a/R/by.trackdata.R +++ b/R/by.trackdata.R @@ -1,58 +1,108 @@ -`by.trackdata` <- -function (data, INDICES = NULL, FUN, ..., simplify = FALSE) +##' A method of the generic function by for objects of class 'trackdata' +##' +##' A given function 'FUN' is applied to the data corresponding to each segment +##' of data. +##' +##' It is the same as trapply but with the extension to subsume calculation to +##' groups of segments. Note, if you do not want to apply the function fun to a +##' special group of segments, use \link{trapply} instead. +##' +##' @aliases by.trackdata by +##' @param data a track data object +##' @param INDICES a list of segment indices, like a label vector +##' @param FUN a function that is applied to each segment +##' @param \dots arguments of the function fun +##' @param simplify simplify = TRUE , output is a matrix; simplify = FALSE a +##' list is returned +##' @return list or vector +##' @author Jonathan Harrington +##' @seealso \code{\link{trapply}}, \code{\link{by}}, \code{\link{trackdata}} +##' \code{\link{dapply}} \code{\link{smooth}} \code{\link{apply}} +##' @keywords methods +##' @examples +##' +##' data(demo.vowels) +##' data(demo.vowels.fm) +##' +##' +##' #mean F1 subsumed for each vowel +##' ################################ +##' lab = label(demo.vowels) +##' by(demo.vowels.fm[,1], lab ,sapply,mean,simplify=FALSE) +##' +##' +##' #mean F1 subsumed for segment onsets mids and offsets +##' ############################################## +##' data = demo.vowels.fm +##' llabs = NULL +##' for (ind in 1:dim(data$ftime)[1]) { +##' seglabs = rep("mid",data$index[ind,2]-data$index[ind,1]+1) +##' seglabs[1] = "on" +##' seglabs[length(seglabs)] = "off" +##' llabs = as.vector(c(llabs , seglabs)) +##' } +##' +##' by(demo.vowels.fm[,1], llabs , sapply, mean , simplify=FALSE) +##' +##' #mean F1 subsumed for segment onsets mids and offsets subsumed for each vowel +##' ##################################################################### +##' by(demo.vowels.fm[,1], list(lab = lab, llabs = llabs) , sapply, mean , simplify=FALSE) +##' +##' +##' +##' @export +`by.trackdata` <- function (data, INDICES = NULL, FUN, ..., simplify = FALSE) { -#there might be a problem with data$data therefore data is replaced by abitrary datam -# data=m.fdat.int[,1]; FUN=targtime; simplify=TRUE;INDICES = NULL - -datam = data - - orgindices = INDICES - fun = FUN - arr.indices = function(orgindices, datam) { - indices = NULL - if (is.null(orgindices) || is.vector(orgindices)) { - n = 1:nrow(datam$index) - if (is.null(orgindices)) { - indices = rep(n, datam$index[, 2] - datam$index[, - 1] + 1) - } - if (length(orgindices) == dim(datam$ftime)[1]) { - indices = NULL - for (ind in 1:dim(datam$ftime)[1]) { - indices = c(indices, rep(orgindices[ind], datam$index[ind, - 2] - datam$index[ind, 1] + 1)) - } - } - if (length(orgindices) == dim(datam$data)[1]) { - indices = orgindices - } - return(indices) - } else { - warning("Can not arrange INDICES!") - return(orgindices) - } - } + #there might be a problem with data$data therefore data is replaced by abitrary datam + # data=m.fdat.int[,1]; FUN=targtime; simplify=TRUE;INDICES = NULL + + datam = data + + orgindices = INDICES + fun = FUN + arr.indices = function(orgindices, datam) { indices = NULL - if (is.list(orgindices)) { - for (var in 1:length(names(orgindices))) { - orgindices[[var]] = arr.indices(orgindices[[var]],datam) + if (is.null(orgindices) || is.vector(orgindices)) { + n = 1:nrow(datam$index) + if (is.null(orgindices)) { + indices = rep(n, datam$index[, 2] - datam$index[, + 1] + 1) + } + if (length(orgindices) == dim(datam$ftime)[1]) { + indices = NULL + for (ind in 1:dim(datam$ftime)[1]) { + indices = c(indices, rep(orgindices[ind], datam$index[ind, + 2] - datam$index[ind, 1] + 1)) } + } + if (length(orgindices) == dim(datam$data)[1]) { indices = orgindices + } + return(indices) } else { - indices = arr.indices(orgindices,datam) + warning("Can not arrange INDICES!") + return(orgindices) } - result <- o <- by(I(datam$data), indices, fun, ...) - if (simplify) { - if (is.null(attributes(summary(o))$dim)) - result <- c(unlist(o)) - else { - result <- NULL - for (j in 1:length(o)) { - result <- rbind(result, o[[j]]) - } - } + } + indices = NULL + if (is.list(orgindices)) { + for (var in 1:length(names(orgindices))) { + orgindices[[var]] = arr.indices(orgindices[[var]],datam) + } + indices = orgindices + } else { + indices = arr.indices(orgindices,datam) + } + result <- o <- by(I(datam$data), indices, fun, ...) + if (simplify) { + if (is.null(attributes(summary(o))$dim)) + result <- c(unlist(o)) + else { + result <- NULL + for (j in 1:length(o)) { + result <- rbind(result, o[[j]]) + } } - result + } + result } - - diff --git a/R/cbind.trackdata.R b/R/cbind.trackdata.R index 6af26ad4..af562c6f 100644 --- a/R/cbind.trackdata.R +++ b/R/cbind.trackdata.R @@ -1,37 +1,77 @@ -cbind.trackdata <- -function (...) +##' A method of the generic function cbind for objects of class 'trackdata' +##' +##' Different track data objects from one segment list are bound by combining +##' the $data columns of the track data object by columns. +##' +##' All track data objects have to be track data of the same segment list. +##' Thus $index and $ftime values have to be identically for all track data +##' objects. Track data objects are created by get_trackdata(). The number of +##' rows of the track data objects must match. +##' +##' @aliases cbind.trackdata cbind +##' @param \dots track data objects +##' @return A track data object with the same $index and ftime values of the +##' source track data objects and with $data that includes all columns of +##' $data of the source track data objects. +##' @author Jonathan Harrington +##' @seealso \code{\link{cbind}}, \code{\link{rbind.trackdata}} +##' \code{\link{trackdata}} \code{\link{get_trackdata}} +##' @keywords methods +##' @examples +##' +##' data(vowlax) +##' +##' #segment list vowlax - first segment only +##' vowlax[1,] +##' +##' #F0 track data object for vowlax - first segment only +##' vowlax.fund[1,] +##' +##' #rms track data object for vowlax - first segment only +##' vowlax.rms[1,] +##' +##' +##' #now combine both track data objects +##' fund.rms.lax = cbind(vowlax.fund, vowlax.rms) +##' +##' #the combined track data object - first segment only +##' #The first column keeps vowlax.fund data, the second keeps vowlax.rms data +##' fund.rms.lax[1,] +##' +##' +##' @export +cbind.trackdata <- function (...) { -mat <- NULL -k <- 1 - for (j in list(...)) { -if(k==1) -{ -inds <- mat$index <- j$index -mat$ftime <- j$ftime -} -else -{ -if( nrow(j$index) != nrow(inds) ) -stop("can't column bind trackdata from different segment lists") -lvec = (j$index[,1]==inds[,1]) & (j$index[,2]==inds[,2]) -if(any(!lvec)) -stop("can't column bind trackdata from different segment lists") -} -k = k+1 -} - - - - for (j in list(...)) { - mat$data <- cbind(mat$data, j$data) - } - - if (version$major >= 5) { - oldClass(mat) <- "trackdata" + mat <- NULL + k <- 1 + for (j in list(...)) { + if(k==1) + { + inds <- mat$index <- j$index + mat$ftime <- j$ftime } - else { - class(mat) <- "trackdata" + else + { + if( nrow(j$index) != nrow(inds) ) + stop("can't column bind trackdata from different segment lists") + lvec = (j$index[,1]==inds[,1]) & (j$index[,2]==inds[,2]) + if(any(!lvec)) + stop("can't column bind trackdata from different segment lists") } - mat + k = k+1 + } + + + + for (j in list(...)) { + mat$data <- cbind(mat$data, j$data) + } + + if (version$major >= 5) { + oldClass(mat) <- "trackdata" + } + else { + class(mat) <- "trackdata" + } + mat } - diff --git a/R/classplot.R b/R/classplot.R index 5a86d567..25ba67d5 100644 --- a/R/classplot.R +++ b/R/classplot.R @@ -1,37 +1,98 @@ -`classplot` <- -function(model, xlim, ylim, N = 100, pch=15, col=NULL, legend=TRUE, position="topright", bg="gray90", ...) +##' Produce a classification plot from discriminant or SVM modelling +##' +##' The function classifies all point specified within the ranges of xlim and +##' ylim based on the training model specified in model. It then produces a +##' two-dimensional plot colour-coded for classifications. +##' +##' +##' @param model A two-dimensional training model output from qda(), lda() of +##' MASS package , or svm() of e1071 package +##' @param xlim A vector of two numeric elements specifying the range on the +##' x-axis (parameter 1) over which classifications should be made +##' @param ylim A vector of two elements specifying the range on the y-axis +##' (parameter 2) over which classifications should be made +##' @param N A vector of one numeric element which specifies the density of +##' classification (greater N gives higher density). The default is 100. +##' @param pch A single element numeric vector specifying the plotting symbol +##' to be used in the classification plot. Defaults to 15. +##' @param col Either Null in which case the colours for the separate classes +##' are col = c(1, 2, ...n) where n is the number of classes; or else a vector +##' specifying the desired colours that is the same length as there are +##' classes. +##' @param legend A single element logical vector specifying whether a legend +##' should be drawn. Defaults to TRUE +##' @param position A single element vector specifying the position in the +##' figure where the legend should be drawn. Defaults to "topright" +##' @param bg A single element vector specifying the background colour on which +##' the legend should be drawn. +##' @param ... Further arguments to plot. +##' @author Jonathan Harrington +##' @seealso \code{\link[MASS]{qda}}, \code{\link[MASS]{lda}}, svm of e1071 +##' package. There is a function plot.svm which produces a prettier plot for +##' SVMs. +##' @examples +##' +##' library(MASS) +##' # Data from female speaker 68 +##' temp = vowlax.spkr=="68" +##' # Quadratic discriminant analysis +##' fm.qda = qda(vowlax.fdat.5[temp,1:2], vowlax.l[temp]) +##' # Linear discriminant analysis +##' fm.lda = lda(vowlax.fdat.5[temp,1:2], vowlax.l[temp]) +##' +##' xlim=c(0,1000) +##' ylim=c(0,3000) +##' +##' oldpar = par(mfrow=c(1,2)) +##' classplot(fm.qda, xlim=xlim, ylim=ylim, main="QDA") +##' classplot(fm.lda, xlim=xlim, ylim=ylim, main="LDA") +##' +##' par(oldpar) +##' +##' +##' # install.packages("e1071") +##' # library(e1071) +##' # Support vector machine +##' \dontrun{fm.svm = svm(vowlax.fdat.5[temp,1:2], factor(vowlax.l[temp]))} +##' \dontrun{xlim = range(vowlax.fdat.5[temp,1])} +##' \dontrun{ylim = range(vowlax.fdat.5[temp,2])} +##' \dontrun{classplot(fm.svm, xlim=xlim, ylim=ylim, xlab="F1", ylab="F2", main="SVM")} +##' +##' @export classplot +`classplot` <- function(model, xlim, ylim, N = 100, + pch=15, col=NULL, legend=TRUE, + position="topright", bg="gray90", ...) { -if(any(class(model) %in% "svm")) -{ -priorlabels <- model$levels -if(ncol(model$SV)!=2) -stop("data must be two-dimensional") -} -else if(any(class(model) %in% c("lda", "qda"))) -{ -priorlabels <- rownames(model$means) -if(ncol(model$means)!=2) -stop("data must be two-dimensional") + if(any(class(model) %in% "svm")) + { + priorlabels <- model$levels + if(ncol(model$SV)!=2) + stop("data must be two-dimensional") + } + else if(any(class(model) %in% c("lda", "qda"))) + { + priorlabels <- rownames(model$means) + if(ncol(model$means)!=2) + stop("data must be two-dimensional") + } + pnts <- cbind(sort(rep(1:N/N, N)), rep(1:N/N, N)) + pnts[, 1] <- pnts[, 1] * (xlim[2] - xlim[1]) + xlim[1] + pnts[, 2] <- pnts[, 2] * (ylim[2] - ylim[1]) + ylim[1] + if(any(class(model) %in% c("lda", "qda"))) + blabs <- as.character(predict(model, pnts)$class) + else if(any(class(model) %in% "svm")) + blabs <- as.character(predict(model, pnts)) + k <- 1 + if(is.null(col)) + colours <- mu.colour(priorlabels, TRUE, FALSE)$colour + else + colours <- col + graphics::plot(pnts, xlim=xlim, ylim=ylim, ...) + for (j in priorlabels) { + temp <- muclass(blabs, j) + graphics::points(pnts[temp, ],pch=pch, col = colours[k]) + k <- k + 1 + } + if(legend) + graphics::legend(position, legend=priorlabels, col=colours, fill=colours, bg=bg) } - pnts <- cbind(sort(rep(1:N/N, N)), rep(1:N/N, N)) - pnts[, 1] <- pnts[, 1] * (xlim[2] - xlim[1]) + xlim[1] - pnts[, 2] <- pnts[, 2] * (ylim[2] - ylim[1]) + ylim[1] -if(any(class(model) %in% c("lda", "qda"))) - blabs <- as.character(predict(model, pnts)$class) -else if(any(class(model) %in% "svm")) - blabs <- as.character(predict(model, pnts)) - k <- 1 -if(is.null(col)) - colours <- mu.colour(priorlabels, TRUE, FALSE)$colour -else -colours <- col -plot(pnts, xlim=xlim, ylim=ylim, ...) - for (j in priorlabels) { - temp <- muclass(blabs, j) - points(pnts[temp, ],pch=pch, col = colours[k]) - k <- k + 1 - } - if(legend) - legend(position, legend=priorlabels, col=colours, fill=colours, bg=bg) - } - diff --git a/R/cr.R b/R/cr.R index 38ebea18..6c955a7d 100644 --- a/R/cr.R +++ b/R/cr.R @@ -1,317 +1,465 @@ -"cr" <- -function(A = 1, k = 1, p = 0, N = 16, samfreq = NULL, duration = NULL, -const = NULL, expon = NULL, plotf = TRUE, ylim = NULL, xlim=NULL, values - = FALSE, xlab = "Time (number of points)", ylab = "Amplitude", -type = "b", bw = NULL, dopoints = FALSE, ...) +##' Plot digital sinuoids. +##' +##' The function plots and/or sums digital sinusoids for different parameter +##' settings. +##' +##' +##' @param A A vector of amplitude values. Defaults to A = 1 +##' @param k A vector of cycles (repetitions). Defaults to k = 1 +##' @param p A vector of phase values between -pi/2 and pi/2. Defaults to 0. +##' @param N The number of points in the signal. Defaults to 16. +##' @param samfreq If NULL, then a sinusoid is plotted with a frequency of k +##' cycles per N points. Otherwise, if samfreq is an numeric, then the argument +##' to k is interpreted as the frequency in Hz and the sinusoid at that +##' frequency is plotted for however many points are specified by N. For +##' example, if samfreq is 40 (Hz), and if N is 40 and k = 1, then 1 cycle of a +##' 1 Hz sinusoid will be plotted. +##' @param duration Specify the duration in ms. If NULL, the default, then the +##' duration of the sinusoid is in points (N), otherwise if a numeric value is +##' supplied, then in ms. For example, 1/2 second of a 1 cycle sinusoid at a +##' sampling frequency of 40 Hz: duration = 500, k = 1, samfreq=40. A ms value +##' can be supplied only if the sampling frequency is also specified. +##' @param const A single numeric vector for shifting the entire sinusoid up or +##' down the y-axis. For example, when const is 5, then 5 + s, where s is the +##' sinusoid is plotted. Defaults to 0 (zero). +##' @param expon A numeric vector. If supplied, then what is plotted is +##' expon[j]\eqn{\mbox{\textasciicircum}}{^}(c(0:(N - 1) * A cos (2 * pi * k/N +##' * (0:(N-1))). For example, a decaying sinusoid is produced with +##' cr(expon=-0.9). Defaults to NULL (i.e. to expon = 1). +##' @param plotf A single-valued logical vector. If TRUE (default), the sinusoid +##' is plotted. +##' @param ylim A two-valued numeric vector for specifying the y-axis range. +##' @param xlim A two-valued numeric vector for specifying the y-axis range. +##' @param values If TRUE, then the values of the sinusoid are listed. Defaults to +##' FALSE. +##' @param xlab A character vector for plotting the x-axis title. +##' @param ylab A character vector for plotting the y-axis title. +##' @param type A character vector for specifying the line type (see par) +##' @param bw A numeric vector for specifying the bandwidth, if the sampling +##' frequency is supplied. The bandwidth is converted to an exponential (see +##' expon using exp( - rad(bw/2, samfreq = samfreq). +##' @param dopoints this is now redundant. +##' @param \dots Option for supplying further graphical parameters - see par. +##' @author Jonathan Harrington +##' @seealso \code{\link{crplot}} +##' @keywords dplot +##' @examples +##' +##' # cosine wave +##' cr() +##' +##' # doubling the frequency, 1/3 amplitude, phase = pi/4, 50 points +##' cr(A=1/3, k=2, p=pi/4, N=50) +##' +##' # sum 3 sinusoids of different frequencies) +##' cr(k=c(1, 3, 4)) +##' +##' # sum 2 sinusoids of different parameters +##' cr(c(1, 2), c(2, 10), c(0, -pi/3), N=200, type="l") +##' +##' +##' # store the above to a vector and overlay with noise +##' v = cr(c(1, 2), c(2, 10), c(0, -pi/3), N=200, type="l", values=TRUE) +##' r = runif(200, -3, 3) +##' v = v+r +##' plot(0:199, v, type="l") +##' +##' +##' # 100 points of a 50 Hz sinusoid with a 4 Hz bandwidth +##' # at a sampling frequency of 200 Hz +##' cr(k=50, bw=4, samfreq=2000, N=100) +##' +##' # the same but shift the y-axis by +4 (d.c. offset=+4) +##' cr(const=4, k=50, bw=4, samfreq=2000, N=100) +##' +##' # sinusoid multiplied by a decaying exponential (same effect as bandwidth) +##' cr(expon=-0.95, N=200, type="l") +##' +##' +##' @export cr +"cr" <- function(A = 1, k = 1, p = 0, N = 16, + samfreq = NULL, duration = NULL, + const = NULL, expon = NULL, plotf = TRUE, + ylim = NULL, xlim=NULL, values = FALSE, + xlab = "Time (number of points)", ylab = "Amplitude", + type = "b", bw = NULL, dopoints = FALSE, ...) { -## A: amplitude (arbitrary units); defaults to 1 -## to shift by a quarter wave, p = 2 * pi * .25 -## k: Number of sine wave k; if k is n, then -## n cycles will be plotted -## samfreq: (sampling frequency in Hz) -## if NULL, then a sinusoid is plotted -## with a frequency of k cycles per N points. -## otherwise, if samfreq is an integer value, -## then the argument to k is interpreted -## as the frequency in Hz and the sinusoid at -## that frequency is plotted for however many -## points are specified by N -## duration: the desired duration in ms -## of the sinusoid. If not supplied, the -## duration is N, the number of points -## bw: supply a bandwidth in Hz; only -## works if a sampling frequency is also specified -## dopoints: plot superimposed large data points on a line -# -vec.n <- c(length(A), length(k), length(p)) -if(!all(diff(vec.n)==0)) -{ -which <- vec.n == max(vec.n) -if(!which[1]) -A <- rep(A, max(vec.n) ) -if(!which[2]) -k <- rep(k, max(vec.n) ) -if(!which[3]) -p <- rep(p, max(vec.n) ) + ## A: amplitude (arbitrary units); defaults to 1 + ## to shift by a quarter wave, p = 2 * pi * .25 + ## k: Number of sine wave k; if k is n, then + ## n cycles will be plotted + ## samfreq: (sampling frequency in Hz) + ## if NULL, then a sinusoid is plotted + ## with a frequency of k cycles per N points. + ## otherwise, if samfreq is an integer value, + ## then the argument to k is interpreted + ## as the frequency in Hz and the sinusoid at + ## that frequency is plotted for however many + ## points are specified by N + ## duration: the desired duration in ms + ## of the sinusoid. If not supplied, the + ## duration is N, the number of points + ## bw: supply a bandwidth in Hz; only + ## works if a sampling frequency is also specified + ## dopoints: plot superimposed large data points on a line + # + vec.n <- c(length(A), length(k), length(p)) + if(!all(diff(vec.n)==0)) + { + which <- vec.n == max(vec.n) + if(!which[1]) + A <- rep(A, max(vec.n) ) + if(!which[2]) + k <- rep(k, max(vec.n) ) + if(!which[3]) + p <- rep(p, max(vec.n) ) + } + + if(dopoints) type <- "l" + if(!is.null(duration)) { + if(is.null(samfreq)) { + stop("must supply a sampling frequency if duration supplied") + } + N <- round((samfreq * duration)/1000) + } + if(!is.null(bw)) { + if(is.null(samfreq)) { + stop("must supply a sampling frequency if bandwidth supplied") + } + expon <- exp( - rad(bw/2, samfreq = samfreq)) + } + if(!is.null(samfreq) & is.numeric(samfreq)) + k <- (k * N)/samfreq + if(max(k) > N) { + stop("number of k must be less than N") + } + if(any((p > pi) | (p < - pi))) { + stop("p must be within plus or minus pi") + } + + mat <- rep(0, N) + t <- 1:N + k <- k + 1 + if(is.null(const)) + const <- rep(0, length(A)) + for(j in 1:length(A)) { + if(is.null(expon)) + expon <- rep(1, N) + else expon <- expon[j]^(c(0:(N - 1))) + theta <- (2 * pi * (k[j] - 1) * (t - 1))/N + wavef <- const[j] + A[j] * expon * cos(theta + p[j]) + mat <- mat + wavef + } + if(plotf) { + if(is.null(ylim)) + ylim <- range(mat) + if(is.null(xlim)) + xlim <- c(0, (N-1)) + graphics::plot(c(0:(N - 1)), mat, xlab = xlab, ylab = ylab, type = type, + ylim = ylim, xlim = xlim, ...) + if(dopoints) + graphics::points(c(0:(N - 1)), mat, pch = 16, mkh = 0.05) + + } + if(values) + mat + else invisible() } -if(dopoints) type <- "l" -if(!is.null(duration)) { -if(is.null(samfreq)) { -print("must supply a sampling frequency if duration supplied" -) -stop() -} -N <- round((samfreq * duration)/1000) -} -if(!is.null(bw)) { -if(is.null(samfreq)) { -print("must supply a sampling frequency if bandwidth supplied" -) -stop() -} -expon <- exp( - rad(bw/2, samfreq = samfreq)) -} -if(!is.null(samfreq) & is.numeric(samfreq)) -k <- (k * N)/samfreq -if(max(k) > N) { -print("number of k must be less than N") -stop() -} -if(any((p > pi) | (p < - pi))) { -print("p must be within plus or minus pi") -stop() -} -mat <- rep(0, N) -t <- 1:N -k <- k + 1 -if(is.null(const)) -const <- rep(0, length(A)) -for(j in 1:length(A)) { -if(is.null(expon)) -expon <- rep(1, N) -else expon <- expon[j]^(c(0:(N - 1))) -theta <- (2 * pi * (k[j] - 1) * (t - 1))/N -wavef <- const[j] + A[j] * expon * cos(theta + p[j]) -mat <- mat + wavef -} -if(plotf) { -if(is.null(ylim)) -ylim <- range(mat) -if(is.null(xlim)) -xlim <- c(0, (N-1)) -plot(c(0:(N - 1)), mat, xlab = xlab, ylab = ylab, type = type, -ylim = ylim, xlim = xlim, ...) -if(dopoints) -points(c(0:(N - 1)), mat, pch = 16, mkh = 0.05) -} -if(values) -mat -else invisible() -} -"crplot" <- -function(A = 1, k = 1, p = 0, N = 16, const = NULL, figsize = 8, -npoints = 500, col = 1, cplot = TRUE, splot = TRUE, numplot = TRUE, axes = TRUE, -incircle = TRUE, arrow = TRUE, linetype = 1, textplot = NULL, lineplot = NULL, -ylab = "Amplitude", super = NULL, xaxlab = NULL, type = "b", xlab = -"Time (number of points)", fconst = 3.5/3.1, pointconst = 1.2) -{ -if(A < -2 | A > 2) -stop("choose A to be between plus or minus 2") -if(N > 50) -numplot = FALSE - -"cr.lines" <- -function(pivals, Ax = 1, Ay = 1, plotshift, col = 1, lty = 2) -{ -# called from within crplot() only - c.x <- - plotshift - c.y <- 0 # add lines to a circle plot extending from the centre of the -# circle, to the circumference at pivals, where pivals is in radians - for(j in 1:length(pivals)) { - x.th1 <- c(c.x, Ax * - sin(pivals[j]) - plotshift) - y.th1 <- c(c.y, Ay * cos(pivals[j])) - lines(x.th1, y.th1, col = col, lty = lty) - } -} -"cr.super" <- -function(first = 0, last = pi/3, Ax = 1, Ay = 1, k = 1, p = 0, const - = NULL, figsize = 8, npoints = 500, N = npoints, col = 1, axes = TRUE, - ylab = "amplitude", lwd = 2) -{ - first <- round(1 + ((first * (npoints - 1))/(2 * pi))) - last <- round(1 + ((last * (npoints - 1))/(2 * pi))) - A <- (A * figsize)/8 - pin <- c(figsize, figsize/2) - plotshift <- 0.5 * pin[1] * 0.625 - nums <- seq(0 + p, (2 * pi) + p, length = npoints) - cosv <- Ay * cos(nums) - sinv <- Ax * - sin(nums) - plot(sinv[first:last] - plotshift, cosv[first:last], type = "l", xlim - = c( - pin[1]/2, pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), - axes = FALSE, ylab = "", xlab = "", lwd = lwd) - if(k == 0) - theta <- rep(0 + p, N) - else theta <- seq(0 + p, k * 2 * pi + p, by = (k * 2 * - pi)/N)[1:N] - sinpoints <- Ax * - sin(theta) - plotshift - cospoints <- Ay * cos(theta) - xvals <- seq(0, figsize/2, length = N) - par(new = TRUE) - plot(xvals[first:last], cospoints[first:last], type = "l", xlim = c( - - pin[1]/2, pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, - ylab = "", xlab = "", col = col, lwd = lwd) -} -"cr.text" <- -function(Ax, Ay, radius, textin, pivals, plotshift, col = 1) -{ -## called by Cr(); plots text at specified points around -## the circle. See Cr() for documentation - for(j in 1:length(textin)) { - numsin <- radius[j] * Ax * - sin(pivals[j]) - plotshift - numcos <- radius[j] * Ay * cos(pivals[j]) - text(numsin, numcos, textin[j], col = col) - } -} -## draw a circle and the corresponding sinusoid -## A, amplitude i.e. the radius -## k: the frequency -## p: a p of zero corresponds to the top of the circle -## N: number of datapoints -## const: a constant corresponding to k + A*cos(2*pi*k+p) -## figsize in area. 8 corresponds to 4 (width) x 2 (height) inches -## npoints: no. of points used to plot the circle -## col: colour. defaults to 1 -## cplot: do you want to plot both the circle ? default is T -## numplot: plot the numbers on the circle? default is T -## splot: do you want to plot the sinusoid? default is T -## plot the axes? defaults to T -## incircle: plot the inner circle showing the angle between points? -## arrow: plot an arrow on the part inner circle? defaults to T -## linetype: linetype for plotting the sinusoid. Defaults to a solid line -## textplot: a list containing $radius, $textin, $pivals -## for plotting text at specified angles and radii on -## the circle. $radius: a vector of amplitudes of the radii at -## which the text is to be plotted; $textin: a vector -## of chacacter labels to be plotted; $pivals: the angle, in radians -## relative to zero radians (top of the circle) at which -## the text is to be plotted. Defaults to NULL -## lineplot: plot lines from the centre of the circle -## to the circumference. lineplot should be a vector specifying -## the angle in radians (zero corresponds to the top of the circle) -## super: superimpose a part solid circle and corresponding -## sinusoid. This needs to be a list containing $first and -## $last, which are values between 0 and 2*pi defining -## the beginning and ending of the part circle which is -## to be superimposed -## xaxlab: a character vector. Add -## your own axis labels -## the sinusoid plot. The characters are placed at equal -## intervals from the beginning to the end of the sinusoid. -## fconst: this is to get the aspect ratio correct for -## postscript using setps(h=4, w=4) -## pointconst: the radius of numbers around the circle -if(k > N) { -print("number of k must be less than N") -stop() -} -if(p > pi | p < - pi) { -print("p must be within plus or minus pi") -stop() -} -## different scale factors for x and y to frob aspect ratio -Ay <- (A * figsize)/8 -Ax <- fconst * Ay# par(pin = c(figsize, figsize/2)) -pin <- c(figsize, figsize/2) -plotshift <- 0.5 * pin[1] * 0.625## . -## plot the circle -nums <- seq(0 + p, (2 * pi) + p, length = npoints) -cosv <- Ay * cos(nums) -sinv <- Ax * - sin(nums) -if(cplot) { -plot(sinv - plotshift, cosv, type = "l", xlim = c( - pin[1]/2, -pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, -ylab = ylab, xlab = "", col = col, lty = linetype) -} -## plot the points on the circle -if(k == 0) -theta <- rep(0 + p, N) -else theta <- seq(0 + p, k * 2 * pi + p, by = (k * 2 * -pi)/N)[1:N] -sinpoints <- (Ax * - sin(theta)) - plotshift -cospoints <- Ay * cos(theta) -if(cplot) { -if(numplot) { -points(cbind(sinpoints, cospoints), pch = 16, mkh = -0.05) -## plot the numbers around the circle with an extended radius -numvals <- round(theta %% (2 * pi), 2) -for(j in unique(numvals)) { -temp <- numvals == j -numsin <- pointconst * Ax * - sin(theta[temp][ - 1]) - plotshift -numcos <- pointconst * Ay * cos(theta[temp][1]) -text(numsin, numcos, paste(c(0:(N - 1))[temp], - collapse = " "), cex = 1, col = col) -} -} -if(incircle) { -## show the angle, theta, as radii from the first to the second point -if(k != 0 & k != N) { -x.th1 <- c( - plotshift, Ax * - sin(theta[1]) - - plotshift) -y.th1 <- c(0, Ay * cos(theta[1])) -x.th2 <- c( - plotshift, A * - sin(theta[2]) - - plotshift) -y.th2 <- c(0, Ay * cos(theta[2])) -lines(x.th1, y.th1, col = col) -lines(x.th2, y.th2, col = col)## . -## draw a part circle between these lines -cir.thet <- seq(theta[1], theta[2], length = - round((npoints * (theta[2] - theta[1]))/(2 * - pi))) -par(new = TRUE) -cos.in <- Ay * cos(cir.thet) * 0.5 -sin.in <- Ax * - sin(cir.thet) * 0.5 -plot(sin.in - plotshift, cos.in, type = "l", - xlim = c( - pin[1]/2, pin[1]/2), ylim = c( - - pin[2]/2, pin[2]/2), axes = FALSE, ylab = "", - xlab = "", col = col)## . -len.in <- length(cos.in) -if(arrow) - arrows(sin.in[len.in - 1] - plotshift, cos.in[ - len.in - 1], sin.in[len.in] - plotshift, - cos.in[len.in], col = col, code=2, length=.1) -} -} -} -## . -## plot the cosine wave -if(splot) { -xvals <- seq(0, figsize/2, length = N) * fconst - 0.2 -par(new = TRUE) -plot(xvals, cospoints, type = "l", xlim = c( - pin[1]/2, pin[1]/ -2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, ylab = -"", xlab = "", col = col, lty = linetype) -points(xvals, cospoints, pch = 16, mkh = 0.05) -} -## add any text if specified -if(!is.null(textplot)) cr.text(Ax, Ay, textplot$radius, textplot$textin, -textplot$pivals, plotshift, col = col) -## add any lines from the centre to the circumference -if(!is.null(lineplot)) -cr.lines(lineplot, Ax, Ay, plotshift, col = col) -if(axes) { -if(is.null(xaxlab)) -axis(side = 1, line = - A * 1.1, at = xvals, labels = -c(0:(N - 1))) -else axis(side = 1, line = - A * 1.1, at = seq(0, figsize/2, -length = length(xaxlab)) - 0.2, labels = xaxlab -) -mtext(xlab, at = figsize/4, line = A * 2, side = 1) -if(is.null(const)) -axis(side = 2) -else axis(side = 2, at = seq( - figsize/4, figsize/4, length = -5), labels = seq( - figsize/4, figsize/4, -length = 5) + const) -abline(h = 0, lty = 2) -} -## superimpose a part circle -if(!is.null(super)) { -par(new = TRUE) -cr.super(first = super$first, last = super$last, Ax = Ax, Ay = -Ay, k = k, p = p, figsize = figsize, -const = const, npoints = npoints) -} +##' Function to plot a digital sinusoid and the circle from which it is +##' derived. +##' +##' A digital sinusoid is derived the movement of a point around a circle. The +##' function shows the relationship between the two for various parameter +##' settings. +##' +##' +##' @param A Amplitude of the circle/sinusoid. +##' @param k Frequency of the sinusoid +##' @param p Phase of the sinusoid +##' @param N Number of points per cycle or revolution. +##' @param const A constant corresponding to k + A*cos(2*pi*k+p) +##' @param figsize Set the figure size as pin <- c(figsize, figsize/2). +##' Defaults to figsize = 8. +##' @param npoints The number of points used in plotting the circle. Defaults +##' to 500 +##' @param col An integer for the color in plotting the sinusoid and points +##' around the circle +##' @param cplot Now redundant +##' @param splot Now redundant +##' @param numplot Logical. If TRUE (defaults), the digital points around the +##' circle are numbered +##' @param axes Logical. If TRUE, plot axes. +##' @param incircle Logical. If TRUE, plot an the angle between digital points in +##' the circle. +##' @param arrow Logical. If TRUE, plot an arrow on incircle showing the direction +##' of movement. +##' @param linetype Specify a linetype. Same function as lty in plot +##' @param textplot A list containing $radius, $textin, $pivals for plotting +##' text at specified angles and radii on the circle. $radius: a vector of +##' amplitudes of the radii at which the text is to be plotted; $textin: a +##' vector of character labels to be plotted; $pivals: the angle, in radians +##' relative to zero radians (top of the circle) at which the text is to be +##' plotted. Defaults to NULL +##' @param lineplot Plot lines from the centre of the circle to the +##' circumference. lineplot is a vector specifying the angle in radians (zero +##' corresponds to the top of the circle) +##' @param ylab Specify a y-axis label. +##' @param super Superimpose a part solid circle and corresponding sinusoid. +##' This needs to be a list containing $first and $last, which are values +##' between 0 and 2*pi defining the beginning and ending of the part circle +##' which is to be superimposed +##' @param xaxlab Now redundant +##' @param xlab Specify an x-axis label. +##' @param type Specify a type. +##' @param fconst A single element numeric vector for the aspect ratio in a +##' postscript plot. Defaults to 3.5/3.1 which is appropriate for a postscript +##' setting of setps(h=4, w=4) +##' @param pointconst The radius for plotting the numbers around the circle. +##' Defaults to 1.2 * A +##' @author Jonathan Harrington +##' @seealso \code{\link{cr}} +##' @references Harrington, J, & Cassidy, S. 1999. Techniques in Speech +##' Acoustics. Kluwer +##' @keywords dplot +##' @examples +##' +##' crplot() +##' # sine wave +##' crplot(p=-pi/2) +##' +##' crplot(k=3) +##' +##' # aliasing +##' crplot(k=15) +##' +##' @export crplot +"crplot" <- function(A = 1, k = 1, p = 0, N = 16, const = NULL, figsize = 8, + npoints = 500, col = 1, cplot = TRUE, splot = TRUE, numplot = TRUE, axes = TRUE, + incircle = TRUE, arrow = TRUE, linetype = 1, textplot = NULL, lineplot = NULL, + ylab = "Amplitude", super = NULL, xaxlab = NULL, type = "b", + xlab = "Time (number of points)", fconst = 3.5/3.1, pointconst = 1.2) +{ + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + + if(A < -2 | A > 2) + stop("choose A to be between plus or minus 2") + if(N > 50) + numplot = FALSE + + "cr.lines" <- function(pivals, Ax = 1, Ay = 1, + plotshift, col = 1, lty = 2) + { + # called from within crplot() only + c.x <- - plotshift + c.y <- 0 # add lines to a circle plot extending from the centre of the + # circle, to the circumference at pivals, where pivals is in radians + for(j in 1:length(pivals)) { + x.th1 <- c(c.x, Ax * - sin(pivals[j]) - plotshift) + y.th1 <- c(c.y, Ay * cos(pivals[j])) + graphics::lines(x.th1, y.th1, col = col, lty = lty) + } + } + + "cr.super" <- function(first = 0, last = pi/3, Ax = 1, Ay = 1, + k = 1, p = 0, const = NULL, figsize = 8, + npoints = 500, N = npoints, col = 1, + axes = TRUE, ylab = "amplitude", lwd = 2) + { + first <- round(1 + ((first * (npoints - 1))/(2 * pi))) + last <- round(1 + ((last * (npoints - 1))/(2 * pi))) + A <- (A * figsize)/8 + pin <- c(figsize, figsize/2) + plotshift <- 0.5 * pin[1] * 0.625 + nums <- seq(0 + p, (2 * pi) + p, length = npoints) + cosv <- Ay * cos(nums) + sinv <- Ax * - sin(nums) + graphics::plot(sinv[first:last] - plotshift, cosv[first:last], type = "l", xlim + = c( - pin[1]/2, pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), + axes = FALSE, ylab = "", xlab = "", lwd = lwd) + if(k == 0) + theta <- rep(0 + p, N) + else theta <- seq(0 + p, k * 2 * pi + p, by = (k * 2 * + pi)/N)[1:N] + sinpoints <- Ax * - sin(theta) - plotshift + cospoints <- Ay * cos(theta) + xvals <- seq(0, figsize/2, length = N) + oldpar = graphics::par(new = TRUE) + on.exit(graphics::par(oldpar)) + graphics::plot(xvals[first:last], cospoints[first:last], type = "l", xlim = c( - + pin[1]/2, pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, + ylab = "", xlab = "", col = col, lwd = lwd) + } + + "cr.text" <- function(Ax, Ay, radius, textin, pivals, plotshift, col = 1) + { + ## called by Cr(); plots text at specified points around + ## the circle. See Cr() for documentation + for(j in 1:length(textin)) { + numsin <- radius[j] * Ax * - sin(pivals[j]) - plotshift + numcos <- radius[j] * Ay * cos(pivals[j]) + graphics::text(numsin, numcos, textin[j], col = col) + } + } + + + ## draw a circle and the corresponding sinusoid + ## A, amplitude i.e. the radius + ## k: the frequency + ## p: a p of zero corresponds to the top of the circle + ## N: number of datapoints + ## const: a constant corresponding to k + A*cos(2*pi*k+p) + ## figsize in area. 8 corresponds to 4 (width) x 2 (height) inches + ## npoints: no. of points used to plot the circle + ## col: colour. defaults to 1 + ## cplot: do you want to plot both the circle ? default is TRUE + ## numplot: plot the numbers on the circle? default is TRUE + ## splot: do you want to plot the sinusoid? default is TRUE + ## plot the axes? defaults to TRUE + ## incircle: plot the inner circle showing the angle between points? + ## arrow: plot an arrow on the part inner circle? defaults to TRUE + ## linetype: linetype for plotting the sinusoid. Defaults to a solid line + ## textplot: a list containing $radius, $textin, $pivals + ## for plotting text at specified angles and radii on + ## the circle. $radius: a vector of amplitudes of the radii at + ## which the text is to be plotted; $textin: a vector + ## of character labels to be plotted; $pivals: the angle, in radians + ## relative to zero radians (top of the circle) at which + ## the text is to be plotted. Defaults to NULL + ## lineplot: plot lines from the centre of the circle + ## to the circumference. lineplot should be a vector specifying + ## the angle in radians (zero corresponds to the top of the circle) + ## super: superimpose a part solid circle and corresponding + ## sinusoid. This needs to be a list containing $first and + ## $last, which are values between 0 and 2*pi defining + ## the beginning and ending of the part circle which is + ## to be superimposed + ## xaxlab: a character vector. Add + ## your own axis labels + ## the sinusoid plot. The characters are placed at equal + ## intervals from the beginning to the end of the sinusoid. + ## fconst: this is to get the aspect ratio correct for + ## postscript using setps(h=4, w=4) + ## pointconst: the radius of numbers around the circle + if(k > N) { + stop("number of k must be less than N") + } + if(p > pi | p < - pi) { + stop("p must be within plus or minus pi") + } + ## different scale factors for x and y to frob aspect ratio + Ay <- (A * figsize)/8 + Ax <- fconst * Ay# par(pin = c(figsize, figsize/2)) + pin <- c(figsize, figsize/2) + plotshift <- 0.5 * pin[1] * 0.625## . + ## plot the circle + nums <- seq(0 + p, (2 * pi) + p, length = npoints) + cosv <- Ay * cos(nums) + sinv <- Ax * - sin(nums) + if(cplot) { + graphics::plot(sinv - plotshift, cosv, type = "l", xlim = c( - pin[1]/2, + pin[1]/2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, + ylab = ylab, xlab = "", col = col, lty = linetype) + } + ## plot the points on the circle + if(k == 0) + theta <- rep(0 + p, N) + else theta <- seq(0 + p, k * 2 * pi + p, by = (k * 2 * + pi)/N)[1:N] + sinpoints <- (Ax * - sin(theta)) - plotshift + cospoints <- Ay * cos(theta) + if(cplot) { + if(numplot) { + graphics::points(cbind(sinpoints, cospoints), pch = 16, mkh = + 0.05) + ## plot the numbers around the circle with an extended radius + numvals <- round(theta %% (2 * pi), 2) + for(j in unique(numvals)) { + temp <- numvals == j + numsin <- pointconst * Ax * - sin(theta[temp][ + 1]) - plotshift + numcos <- pointconst * Ay * cos(theta[temp][1]) + graphics::text(numsin, numcos, paste(c(0:(N - 1))[temp], + collapse = " "), cex = 1, col = col) + } + } + if(incircle) { + ## show the angle, theta, as radii from the first to the second point + if(k != 0 & k != N) { + x.th1 <- c( - plotshift, Ax * - sin(theta[1]) - + plotshift) + y.th1 <- c(0, Ay * cos(theta[1])) + x.th2 <- c( - plotshift, A * - sin(theta[2]) - + plotshift) + y.th2 <- c(0, Ay * cos(theta[2])) + graphics::lines(x.th1, y.th1, col = col) + graphics::lines(x.th2, y.th2, col = col)## . + ## draw a part circle between these lines + cir.thet <- seq(theta[1], theta[2], length = + round((npoints * (theta[2] - theta[1]))/(2 * + pi))) + graphics::par(new = TRUE) + cos.in <- Ay * cos(cir.thet) * 0.5 + sin.in <- Ax * - sin(cir.thet) * 0.5 + graphics::plot(sin.in - plotshift, cos.in, type = "l", + xlim = c( - pin[1]/2, pin[1]/2), ylim = c( - + pin[2]/2, pin[2]/2), axes = FALSE, ylab = "", + xlab = "", col = col)## . + len.in <- length(cos.in) + if(arrow) + graphics::arrows(sin.in[len.in - 1] - plotshift, cos.in[ + len.in - 1], sin.in[len.in] - plotshift, + cos.in[len.in], col = col, code=2, length=.1) + } + } + } + ## . + ## plot the cosine wave + if(splot) { + xvals <- seq(0, figsize/2, length = N) * fconst - 0.2 + graphics::par(new = TRUE) + graphics::plot(xvals, cospoints, type = "l", xlim = c( - pin[1]/2, pin[1]/ + 2), ylim = c( - pin[2]/2, pin[2]/2), axes = FALSE, ylab = + "", xlab = "", col = col, lty = linetype) + graphics::points(xvals, cospoints, pch = 16, mkh = 0.05) + } + ## add any text if specified + if(!is.null(textplot)) cr.text(Ax, Ay, textplot$radius, textplot$textin, + textplot$pivals, plotshift, col = col) + ## add any lines from the centre to the circumference + if(!is.null(lineplot)) + cr.lines(lineplot, Ax, Ay, plotshift, col = col) + if(axes) { + if(is.null(xaxlab)) + graphics::axis(side = 1, line = - A * 1.1, at = xvals, labels = + c(0:(N - 1))) + else graphics::axis(side = 1, line = - A * 1.1, at = seq(0, figsize/2, + length = length(xaxlab)) - 0.2, labels = xaxlab + ) + graphics::mtext(xlab, at = figsize/4, line = A * 2, side = 1) + if(is.null(const)) + graphics::axis(side = 2) + else graphics::axis(side = 2, at = seq( - figsize/4, figsize/4, length = + 5), labels = seq( - figsize/4, figsize/4, + length = 5) + const) + graphics::abline(h = 0, lty = 2) + } + ## superimpose a part circle + if(!is.null(super)) { + graphics::par(new = TRUE) + cr.super(first = super$first, last = super$last, Ax = Ax, Ay = + Ay, k = k, p = p, figsize = figsize, + const = const, npoints = npoints) + } } - diff --git a/R/dapply.R b/R/dapply.R index 6aa0126d..2f9f0fde 100644 --- a/R/dapply.R +++ b/R/dapply.R @@ -1,17 +1,61 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' apply a function to each part of a trackdata object +##' +##' Given an Emu trackdata object, \code{dapply} will apply a given function to +##' the data corresponding to each segment of data. The result is a new +##' trackdata object. +##' +##' \code{dapply} can be used to apply an arbitrary function to trackdata +##' extracted from an Emu database. It can be used for example to smooth the +##' data (see \code{\link{dsmooth}}) or differentiate it (see +##' \code{\link{ddiff}}). +##' +##' Trackdata is made up of three components: a matrix of data \code{$data}, a +##' matrix of indexes (\code{$index}) and a matrix of segment times +##' (\code{$ftime}). The indexes contain the start and end rows for each +##' segment in the trackdata, the time matrix contains the start and end times +##' of each data segment. +##' +##' The function \code{fun} supplied to \code{dapply} should take one matrix of +##' data (corresponding to one segment) and a vector of two times being the +##' start and end of the data. It should return a modified data matrix, which +##' can have any number of rows or columns, and a new pair of start and end +##' times. The new start and end times are necessary because the operation +##' applied might shorten or interpolate the data and hence change the times +##' corresponding to the first and last rows of data. +##' +##' @param trackdata An Emu trackdata object +##' @param fun A function taking a matrix of data and a vector of times and +##' returning a list with components \code{$data} and \code{$ftime}. +##' @param \dots Additional arguments to be passed to \code{fun} +##' @return An Emu trackdata object with components: \item{data}{A matrix of +##' data with all segments concatenated by row.} \item{index}{A two column +##' matrix of the start and end rows for each segment} \item{ftime}{A two +##' column matrix of the start and end times for each segment} +##' @seealso \code{\link{dsmooth}} \code{\link{ddiff}} +##' @keywords misc +##' @examples +##' +##' +##' data(dip) +##' ## formant data of the first segment in segment list dip +##' fm <- dip.fdat[1] +##' +##' testfun <- function(data, ftime, n) { +##' ## return only the first n rows of data +##' ## doesn't check to see if there really are n rows... +##' newdata <- data[1:n,] +##' ## calculate a new end time +##' interval <- (ftime[2]-ftime[1])/nrow(data) +##' ftime[2] <- ftime[1] + interval*n +##' ## now return the required list +##' return( list( data=newdata, ftime=ftime ) ) +##' } +##' +##' fm.first3 <- dapply( fm, testfun, 3 ) +##' fm.first10 <- dapply( fm, testfun, 10 ) +##' +##' +##' @export dapply "dapply"<- function(trackdata, fun, ...) { ## data is a list as returned by track(), a vector @@ -21,29 +65,29 @@ ## vector and return an object with components $data and $ftime ## dapply must ensure that the resulting data component is ## still a matrix, even if the function returns a vector. - + if( version$major >= 5 && oldClass(trackdata)!="trackdata") { - stop("argument to dapply is not of class trackdata.") - } else if(class(trackdata)!="trackdata") stop("argument to dapply is not of class trackdata.") - - + } else if(!inherits(trackdata, "trackdata")) + stop("argument to dapply is not of class trackdata.") + + if(!is.matrix(trackdata$index)){ trackdata$ftime <- rbind(trackdata$ftime) trackdata$index <- rbind(trackdata$index) } - + thisrow <- 1 newindex <- trackdata$index newdata <- NULL newftime <- trackdata$ftime - + for(j in 1:nrow(trackdata$index)) { newindex[j,1] <- thisrow - + tmp <- fun(trackdata[j]$data, trackdata[j]$ftime, ...) - + if(is.matrix(tmp$data)){ newdata <- rbind(newdata, tmp$data) } else { @@ -51,15 +95,15 @@ } newftime[j,] <- tmp$ftime - + if(is.matrix(tmp$data)) thisrow <- thisrow + nrow(tmp$data) else thisrow <- thisrow + length(tmp$data) newindex[j,2] <- thisrow - 1 - + } - + x <- list(data=as.matrix(newdata), index=newindex, ftime=newftime) if( version$major >= 5 ) { oldClass(x) <- "trackdata" @@ -68,10 +112,3 @@ } return(x) } - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/dct.R b/R/dct.R index e5cb3121..22bb9087 100644 --- a/R/dct.R +++ b/R/dct.R @@ -1,41 +1,105 @@ -"dct" <- -function(wav, fit=FALSE, k=NULL) +##' Discrete Cosine Transformation +##' +##' Obtain the coefficients of the discrete cosine transformation (DCTRUE). +##' +##' The function calculates the DCT coefficients for any vector or +##' single-columned matrix. The function can also be used to obtain a smoothed +##' trajectory of the input data by summing the cosine waves derived from the +##' first few DCT coefficients. +##' +##' The algorithm first reflects the input signal about the last data point, N. +##' Thus if the input signal vec if of length N, the algorithm creates a vector +##' c(vec, rev(vec[-c(1,N)])). and the R fft function is applied to this +##' reflected signal. The DCT coefficients are real part of what is returned by +##' fft i.e. the amplitudes of the cosine waves of frequencies k = 0, 1, 2, +##' ...2 *(N-1) radians per sample. The phase is zero in all cases. The +##' amplitudes are calculated in such a way such that if these cosine waves are +##' summed, the original (reflected) signal is reconstructed. What is returned +##' by dct() are the amplitudes of the cosine waves (DCT coefficients) up to a +##' frequency of N radians/sample, i.e. a vector of cosine wave amplitudes that +##' has the same length as the original signal and of frequencies k = 0, 1, 2, +##' ... (N-1). Alternatively, if fit=TRUE, a smoothed signal of the same length +##' as the original signal is obtained based on a summation of the lowest +##' ordered DCT coefficients. This dct() algorithm returns very similar values +##' to DCT() with inv=FALSE written by Catherine Watson and used in Watson & +##' Harrington (1999). +##' +##' @param data a vector or single column matrix of numeric values to which the +##' 2nd order polynomial is to be fitted. +##' @param fit if FALSE, return the DCT coefficients; if TRUE, the values of the +##' smoothed trajectory are returned based on summing the cosine waves of the k +##' lowest ordered DCT coefficients, where k is the argument given below. +##' @param m The number of DCT coefficients that are returned or on which the +##' smoothed trajectory is based. Defaults to NULL which returns coefficients +##' of frequencies k = 0, 1,2 .. N-1 where N is the length of the input signal, +##' wav. If fit = TRUE and k = NULL, then the the sum of all the cosine waves +##' whose amplitudes are the DCT coefficients are returned - which is equal to +##' the original signal. k must be between 2 and the length of the signal. +##' @author Jonathan Harrington +##' @seealso \code{\link{plafit}} \code{\link{by}} +##' @references Watson, C. & Harrington, J. (1999). Acoustic evidence for +##' dynamic formant trajectories in Australian English vowels. Journal of the +##' Acoustical Society of America, 106, 458-468. +##' +##' Zahorian, S., and Jagharghi, A. (1993). Spectral-shape features versus +##' formants as acoustic correlates for vowels, Journal of the Acoustical +##' Society of America, 94, 19661982. +##' @keywords math +##' @examples +##' +##' data(vowlax) +##' # obtain the first four DCT coefficients +##' # (frequencies k = 0, 1, 2, 3) for some +##' # first formant frequency data +##' vec <- vowlax.fdat[1,1]$data +##' dct(vec, m=4) +##' +##' # obtain the corresponding smoothed +##' # trajectory +##' dct(vec, m=4 , fit=TRUE) +##' +##' @export dct +"dct" <- function (data, m=NULL, fit=FALSE) { - -if(!is.vector(wav) & !is.matrix(wav) ) -stop("input signal must be a vector or a one-columned matrix") -if(is.matrix(wav) ) -{ -if(ncol(wav)!=1) -stop("input signal must be a vector or a one-columned matrix") -} -if(is.vector(wav)) -nz <- names(wav) -if(is.matrix(wav)) -nz <- dimnames(wav)[[1]] -N <- length(wav) -if(is.null(k)) -k <- N -if(k < 2 | k > N) -stop("k must be between 2 and the length of the input signal") - -# program begins here -wav <- c(wav, rev(wav[-c(1, N)])) -Nref <- length(wav) -coeff <- Re(fft(wav, inverse=TRUE))/Nref - -if(fit) -{ -p <- 1:k -r <- c(Nref: (Nref-k+2)) -p <- c(p, r) -coeff[-p] <- 0 -coeff <- Re(fft(coeff, inverse=TRUE)) -} -coeff <-coeff[1:N] -names(coeff) <- nz -if(!fit) -coeff <- coeff[1:k] -coeff + if (is.matrix(data)) + nz <- dimnames(data)[[1]] + else nz <- names(data) + ldat <- length(data) + if(!is.null(m)) + { + if ((m < 1) | (m > ldat-1)) + stop("m must be between 1 and length(data)-1") + } + transdat <- vector(length = ldat) + + transdat[1] <- (2/(ldat * sqrt(2))) * sum(data) + for (n in 1:(ldat - 1)) { + j <- 0:(ldat - 1) + transdat[n + 1] <- (2/ldat) * sum(data * cos((pi * + n * (2 * j + 1))/(2 * ldat))) + } + names(transdat) <- nz + if(!fit) + { + if(is.null(m)) + return(transdat) + else + return(transdat[1:(m+1)]) + } + else { + data <- transdat + transdat <- vector(length = ldat) + if(is.null(m)) + m <- 1:(ldat - 1) + else + m <- 1:m + for (n in 0:(ldat - 1)) { + transdat[n + 1] <- (1/sqrt(2)) * data[1] * cos((pi * + 0 * (2 * n + 1))/(2 * ldat)) + sum(data[m + 1] * + cos((pi * m * (2 * n + 1))/(2 * ldat))) + } + } + names(transdat) <- nz + transdat } diff --git a/R/dct2.R b/R/dct2.R deleted file mode 100644 index 275553f9..00000000 --- a/R/dct2.R +++ /dev/null @@ -1,47 +0,0 @@ - -"dct" <- -function (data, m=NULL, fit=FALSE) -{ -# written by Catherine Watson, modified by Jonathan Harrington - if (is.matrix(data)) - nz <- dimnames(data)[[1]] -else nz <- names(data) - ldat <- length(data) -if(!is.null(m)) -{ -if ((m < 1) | (m > ldat-1)) -stop("m must be between 1 and length(data)-1") -} - transdat <- vector(length = ldat) - - transdat[1] <- (2/(ldat * sqrt(2))) * sum(data) - for (n in 1:(ldat - 1)) { - j <- 0:(ldat - 1) - transdat[n + 1] <- (2/ldat) * sum(data * cos((pi * - n * (2 * j + 1))/(2 * ldat))) - } -names(transdat) <- nz -if(!fit) -{ -if(is.null(m)) -return(transdat) -else -return(transdat[1:(m+1)]) -} - else { -data <- transdat -transdat <- vector(length = ldat) -if(is.null(m)) - m <- 1:(ldat - 1) -else -m <- 1:m - for (n in 0:(ldat - 1)) { - transdat[n + 1] <- (1/sqrt(2)) * data[1] * cos((pi * - 0 * (2 * n + 1))/(2 * ldat)) + sum(data[m + 1] * - cos((pi * m * (2 * n + 1))/(2 * ldat))) - } - } -names(transdat) <- nz - transdat -} - diff --git a/R/ddiff.R b/R/ddiff.R index 3f2a37fc..e53546c6 100644 --- a/R/ddiff.R +++ b/R/ddiff.R @@ -1,43 +1,44 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Differentiation of tracks +##' +##' Differentiates a list, as returned by track, to the nth order, readjusting +##' the index and ftime values each time. +##' +##' +##' @aliases ddiff ddiff.sub +##' @param dataset track data object - a list as returned by track +##' @param n the order of differentiation +##' @param smoothing if TRUE track is smoothed +##' @author Jonathan Harrington +##' @keywords math +##' @export ddiff "ddiff"<- function(dataset, n = 1, smoothing = TRUE) { ## differentiates a list, as returned by track, to the nth ## order, readjusting the index and ftime values each time ## dataset: a list as returned by track ## n: the order of differentiation - + ## now we apply the function to the data using dapply outdat <- dapply(dataset, ddiff.sub, n = n) if(smoothing) dsmooth(outdat) - else outdat + else outdat } + +##' @export ddiff.sub <- function(data, ftime, n) { ## a function to be called by dapply ## data: a data matrix ## ftime: a start-end pair ## n: number of times to differentiate - ## smoothing: if T, apply smooth to data too + ## smoothing: if TRUE, apply smooth to data too ## returns: a list of $data values differentiated ## and $ftime values adjusted accordingly ## values in $data that are returned are per millisecond if(is.matrix(data)) lval <- nrow(data) else lval <- length(data - ) + ) if(lval < 1) stop("not enough data points in ddiff") ## compute the time between samples interval <- (ftime[2] - ftime[1])/lval @@ -45,7 +46,7 @@ ddiff.sub <- function(data, ftime, n) data <- diff(data, differences = n) if(is.matrix(data)) lval <- nrow(data) - else lval <- length(data) + else lval <- length(data) timefactor <- (n * interval)/2 ftime[1] <- ftime[1] + timefactor ftime[2] <- ftime[2] - timefactor @@ -54,12 +55,3 @@ ddiff.sub <- function(data, ftime, n) ## and return the data in the required format list(data = data, ftime = ftime) } - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: - - diff --git a/R/dextract.R b/R/dextract.R index e1d98cba..b79506d5 100644 --- a/R/dextract.R +++ b/R/dextract.R @@ -1,16 +1,42 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - +##' Extract a subset of data from a trackdata object +##' +##' A function that cuts up trackdata either at a proportional time or +##' proportionally between two times. It is a subsidiary function of dplot() +##' +##' +##' @aliases dextract dextract.sub +##' @param dataset A trackdata object +##' @param start A single valued numeric vector corresponding to a proportional +##' time between zero (the onset of the trackdata) and one (the offset of the +##' trackdata). +##' @param end As start, but optional +##' @return If both start and end are specified, a trackdata object is +##' returned, otherwise a vector if the original trackdata is one-dimensional +##' and the end argument is not used, or a matrix if the original trackdata has +##' more than one dimension and the end argument is not used +##' @author Jonathan Harrington +##' @seealso \code{dcut} +##' @keywords datagen +##' @examples +##' +##' data(demo.vowels.f0) +##' data(demo.vowels.fm) +##' +##' form = demo.vowels.fm +##' # get the formants at the midpoint: f50 is a matrix +##' # same as dcut(form, .5, prop=TRUE) +##' f50 = dextract(form, 0.5) +##' # get the formants between the 25% and 75% time points +##' # fcut is a trackdata object +##' # same as dcut(form, .25, .75, prop=TRUE) +##' fcut = dextract(form, 0.25, 0.75) +##' # get F0 at the midpoint. fzero50 is a vector +##' # same as dcut(fzero, .5, prop=TRUE) +##' fzero = demo.vowels.f0 +##' fzero50 = dextract(fzero, 0.5) +##' +##' +##' @export dextract dextract <- function(dataset, start, end) { if((start < 0) | (start > 1)) { stop("proportional duration must be between 0 and 1") @@ -23,7 +49,7 @@ dextract <- function(dataset, start, end) { stop("proportional start must be less than proportional end") } } - + if(missing(end)) { leftin <- dataset$index[, 1] rightin <- dataset$index[, 2] @@ -37,23 +63,18 @@ dextract <- function(dataset, start, end) { } } + +##' @export +"dextract.sub" <- function (data, ftime, start, end) +{ # helper function for use via dapply, returns a new # trackdata element cut at start/end proportions -"dextract.sub" <- -function (data, ftime, start, end) -{ - len <- nrow(data) -start <- floor(start * (len - 1) + 1) -end <- ceiling(end * (len - 1) + 1) - - newdata <- data[start:end, ] - times <- seq(ftime[1], ftime[2], length = len) - newftime <- times[c(start, end)] - return(list(data = newdata, ftime = newftime)) + len <- nrow(data) + start <- floor(start * (len - 1) + 1) + end <- ceiling(end * (len - 1) + 1) + + newdata <- data[start:end, ] + times <- seq(ftime[1], ftime[2], length = len) + newftime <- times[c(start, end)] + return(list(data = newdata, ftime = newftime)) } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/dextract.lab.R b/R/dextract.lab.R index 9935a4b8..fc77f6bf 100644 --- a/R/dextract.lab.R +++ b/R/dextract.lab.R @@ -1,59 +1,60 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Extract a subset of data from a trackdata object +##' +##' Extract a subset of data from a trackdata object according to the label in +##' the parallel label vector. +##' +##' +##' @param dataset A trackdata object returned from \code{track}. +##' @param labs A vector of labels parallel to \code{trackdata$index}, i.e. one +##' for each segment in the trackdata. +##' @param labtype A vector of labels for which data is to be extracted. +##' @return A trackdata object which is a subset of \code{trackdata} containing +##' only the data for those labels in \code{labtype}. The result has the same +##' components as the input \code{trackdata}: +##' +##' \item{data}{ A vector or matrix of numerical data. } \item{index}{ A two +##' column matrix giving the start and end indices into the data vector for +##' each segment. } \item{ftime}{ A two column matrix giving the start and end +##' times for each segment. } +##' @seealso track, dextract, get.time.element, frames.time +##' @keywords internal +##' @export dextract.lab "dextract.lab"<- function(dataset, labs, labtype = unique(labs)) { -# extract data values from a dataset ($data, $index, $ftime) -# according to labtype (e.g. "i:", c("i:", "u:"). -# labs is parallel to dataset$index; labtype are -# the label types for which the values in dataset are -# to be extracted - mat <- NULL - lvals <- dataset$index[, 2] - dataset$index[, 1] + 1 - newlabs <- rep(labs, lvals) - temp <- muclass(newlabs, labtype) - if(is.matrix(dataset$data)) - vals <- dataset$data[temp, ] - else - vals <- dataset$data[temp] - - temp.lab <- muclass(labs, labtype) - - if(!is.null(dataset$ftime)) - ftimes <- dataset$ftime[temp.lab, ] - - finds <- dataset$index[temp.lab, ] - ## readjust the indeces - diffinds <- finds[, 2] - finds[, 1] + 1 - right <- cumsum(diffinds) - first.left <- diffinds - 1 - left <- right - first.left - finds <- cbind(left, right) - mat$data <- vals - mat$index <- finds - if(!is.null(dataset$ftime)) - mat$ftime <- ftimes - if( version$major >= 5 ) { - oldClass(mat) <- "trackdata" - } else { - class(mat) <- "trackdata" - } - mat + # extract data values from a dataset ($data, $index, $ftime) + # according to labtype (e.g. "i:", c("i:", "u:"). + # labs is parallel to dataset$index; labtype are + # the label types for which the values in dataset are + # to be extracted + mat <- NULL + lvals <- dataset$index[, 2] - dataset$index[, 1] + 1 + newlabs <- rep(labs, lvals) + temp <- muclass(newlabs, labtype) + if(is.matrix(dataset$data)) + vals <- dataset$data[temp, ] + else + vals <- dataset$data[temp] + + temp.lab <- muclass(labs, labtype) + + if(!is.null(dataset$ftime)) + ftimes <- dataset$ftime[temp.lab, ] + + finds <- dataset$index[temp.lab, ] + ## readjust the indices + diffinds <- finds[, 2] - finds[, 1] + 1 + right <- cumsum(diffinds) + first.left <- diffinds - 1 + left <- right - first.left + finds <- cbind(left, right) + mat$data <- vals + mat$index <- finds + if(!is.null(dataset$ftime)) + mat$ftime <- ftimes + if( version$major >= 5 ) { + oldClass(mat) <- "trackdata" + } else { + class(mat) <- "trackdata" + } + mat } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/dfuns.R b/R/dfuns.R index f096c2d7..97857cf5 100644 --- a/R/dfuns.R +++ b/R/dfuns.R @@ -1,19 +1,21 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - - - +##' Find the time and position of a data element. +##' +##' Finds the time and position of a data element. +##' +##' The dataset returned from \code{track} or \code{frames} consists of a +##' matrix of data (the \code{data} component) and two index components +##' (\code{index} and \code{ftime}). The data for all segments is concatenated +##' together in \code{$data}. This function can be used to find out which +##' segment a particular row of \code{$data} corresponds to. +##' +##' @param dataset A dataset returned by \code{track} or \code{frames}. +##' @param datanum An integer, an index into the \code{data} component of +##' \code{dataset}. +##' @return The segment number which contains the element \code{datanum} of +##' \code{dataset$data}. +##' @seealso track, frames +##' @keywords misc +##' @export frames.time "frames.time" <- function(dataset, datanum) { ## return the time and the number of the segment element @@ -33,6 +35,26 @@ } + + + + + + + + +##' Get data for a given time +##' +##' Gets data for a given time +##' +##' +##' @param timeval A time in milliseconds +##' @param dataset A trackdata object as returned by \code{track}. +##' @return The element number of \code{trackdata$data} corresponding to +##' \code{time} +##' @seealso track, frames +##' @keywords misc +##' @export get.time.element "get.time.element"<- function(timeval, dataset) { ## timeval: a time in milliseconds @@ -45,10 +67,3 @@ right.i <- dataset$index[numrows, 2] round(((timeval - left)/(right - left)) * (right.i - left.i)) + 1 } - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/dimtrackdata.R b/R/dimtrackdata.R index 26225b6f..224acd65 100644 --- a/R/dimtrackdata.R +++ b/R/dimtrackdata.R @@ -1,16 +1,49 @@ -dim.trackdata <- -function(x) -{ -# function returns the dimension attributes of -# a trackdata object as the number of segments x number of tracks -c(nrow(x$index), ncol(x$data)) -} -"dimnames.trackdata" <- -function(x) +##' A method of the generic function dim for objects of class 'trackdata' +##' +##' The function returns the dimension attributes of a track data object. +##' +##' The function returns the dimension attributes of a track data object as the +##' number of segments x number of tracks. c(nrow(x$index), ncol(x$data)) +##' +##' @aliases dim.trackdata dim +##' @param x a track data object +##' @author Jonathan Harrington +##' @keywords methods +##' @examples +##' +##' #isol.fdat is the formant track of the segment list isol +##' +##' #write out the dimension of the track data object +##' dim(isol.fdat) +##' +##' #because there are 13 segments +##' isol.fdat$ftime +##' +##' #and there are 4 rows for each segment (see here for the first segment) +##' isol.fdat$data[1,] +##' +##' @export +dim.trackdata <- function(x) { -trackdata = x -dimnames(trackdata$data) + # function returns the dimension attributes of + # a trackdata object as the number of segments x number of tracks + c(nrow(x$index), ncol(x$data)) } + + +##' Dimnames of trackdata object +##' +##' returns dimension names of trackdata objects +##' +##' +##' @param x trackdata object +##' @keywords methods +##' @export +"dimnames.trackdata" <- function(x) +{ + trackdata = x + dimnames(trackdata$data) +} diff --git a/R/dplot.R b/R/dplot.R index dceacbeb..d1013d25 100644 --- a/R/dplot.R +++ b/R/dplot.R @@ -1,338 +1,434 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - - -`dplot` <- -function (x, labs = NULL, offset = 0, prop=TRUE, - average = FALSE, - xlim = NULL, ylim = NULL, lty = FALSE, normalise = FALSE, - colour = TRUE, lwd = NULL, pch=NULL, legend = "topright", axes = TRUE, type="l", - n = 20, ...) -{ - - - -if(prop) +##' A function to plot one or more columns of EMU-trackdata as a function of +##' time (DEPRECATED see below) +##' +##' A general purpose routine for plotting EMU-trackdata on a single plot. +##' Tracks can be aligned at an arbitrary position, length normalised or +##' averaged. The plots can be colour-coded for different category types. +##' DEPRECATED as this function does not play well with with the new +##' resultType = "tibble" of \code{get_trackdata()}. See +##' \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/recipe-plottingSnippets.html} +##' for an alternative plotting routines using ggplot2. +##' +##' @aliases dplot dplot.norm dplot.time +##' @param x An EMU-trackdata object +##' @param labs A label vector with one element for each row in 'dataset' +##' @param offset Either: A single numeric vector between 0 and 1. 0 and 1 +##' denote synchronize the trackdata at their temporal onsets and offsets +##' respectively; 0.5 denotes synchronization at the temporal midpoint, etc. Or +##' a numeric vector of the same length as x specifying the synchronisation +##' point per segment +##' @param prop A single element character vector specifying whether the tracks +##' should be aligned proportionally or relative to millisecond times. Defaults +##' to proportional alignment +##' @param average If TRUE, the data for each unique label in 'labs' is +##' averaged +##' @param xlim A vector of two numeric values specifying the x-axis range +##' @param ylim A vector of two numeric values specifying the y-axis range +##' @param lty A single element logical vector. Defaults to FALSE. If TRUE, plot +##' each label type in a different linetype +##' @param normalise If TRUE, the data for each segment is linearly time +##' normalised so that all observations have the same length. The number of +##' points used in the linear time normalisation is control by the argument n. +##' @param colour A single element logical vector. Defaults to TRUE to plot each +##' label type in a different colour +##' @param lwd A code passed to the lwd argument in plotting functions. 'lwd' +##' can be either a single element numeric vector, or its length must be equal +##' to the number of unique types in labs. For example, if lwd=3 and if labs = +##' c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +##' lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +##' default is NULL in which case all lines are drawn with lwd=1 +##' @param pch A code passed to the pch argument in plotting functions. +##' Functions in the same way as lwd above +##' @param legend Either a character vector to plot the legend. Possible values +##' are: "bottomright"', '"bottom"', '"bottomleft"', '"left"', '"topleft"', +##' '"top"', '"topright"', '"right"' and '"center"'. This places the legend on +##' the inside of the plot frame at the given location. Partial argument +##' matching is used. Or a logical vector: legend = FALSE suppresses legend +##' plotting. legend = TRUE plots it at the default, legend = "topright" +##' @param axes A single element logical vector. Defaults to TRUE to plot the axes +##' @param type The default line type. Default to "l" for a line plot +##' @param n A single element numeric vector. Only used if normalise=TRUE. The +##' number of data points used to linearly time normalise each track +##' @param ... graphical options \link{par} +##' @return NULL +##' @author Jonathan Harrington +##' @seealso \code{\link{dcut}} \code{\link{get_trackdata}} +##' @keywords dplot +##' @examples +##' +##' +##' # Plot of column 1 (which happens to be the 1st formant) of an EMU-trackdata object +##' dplot(dip.fdat[,1]) +##' +##' +##' # As above but only observations 1 to 5 +##' dplot(dip.fdat[1:5,1]) +##' +##' +##' # column 2 (which happens to be of the second formant) and colour-coded +##' # for each label-type +##' dplot(dip.fdat[,2], dip.l) +##' +##' +##' # put the legend bottom left +##' dplot(dip.fdat[,2], dip.l, legend="bottomleft") +##' +##' +##' # as above with no legend and averaged per category +##' dplot(dip.fdat[,2], dip.l, legend=FALSE, average=TRUE) +##' +##' +##' # both formants averaged +##' dplot(dip.fdat[,1:2], dip.l, average=TRUE) +##' +##' +##' # F2 only with linear-time normalisation +##' dplot(dip.fdat[,2], dip.l, norm=TRUE) +##' +##' +##' # linear time-normalisation, both formants and averaged +##' dplot(dip.fdat[,1:2], dip.l, norm=TRUE, average=TRUE) +##' +##' +##' # synchronise at the temporal midpoint before averaging, F2 only +##' dplot(dip.fdat[,2], dip.l, offset=0.5, average=TRUE) +##' +##' +##' # synchronise 60 ms before the diphthong offset +##' dplot(dip.fdat[,2], dip.l, offset=dip.fdat$ftime[,2]-60, prop=FALSE) +##' +##' +##' # as above averaged, no colour with linetype, +##' # different plot symbols double line thickness in the range between +- 20 ms +##' dplot(dip.fdat[,2], dip.l, offset=dip.fdat$ftime[,2]-60, prop=FALSE, +##' average=TRUE, colour=FALSE, lty=TRUE, pch=1:3, lwd =2, type="b", xlim=c(-20, 20)) +##' +##' +##' +##' +##' @export dplot +`dplot` <- function (x, labs = NULL, offset = 0, prop=TRUE, + average = FALSE, xlim = NULL, ylim = NULL, + lty = FALSE, normalise = FALSE, colour = TRUE, + lwd = NULL, pch=NULL, legend = "topright", + axes = TRUE, type="l", n = 20, ...) { -if(length(offset) != 1) -stop("Specify only one offset time when prop=T") -else if ((offset < 0) | (offset > 1)) - stop("offset must be between 0 and 1 when prop=T") - - -} - -else -if(nrow(x) != length(offset)) -stop("nrow(x) and length(offset) must be the same when prop=F") - - pout <- NULL - if (is.matrix(x$data)) { - pout <- as.list(NULL) - pout$data <- as.list(NULL) - mat <- NULL - if (is.null(ylim)) - ylim <- range(x$data) - numcols <- ncol(x$data) - - for (j in 1:ncol(x$data)) { - mat <- x - mat$data <- mat$data[, j] - if (!normalise) - vals <- dplot.time(mat, labs = labs, offset = offset, - prop=prop, average = average, xlim = xlim, - ylim = ylim, lty = lty, - colour = colour, legend = legend, lwd = lwd, pch=pch, type=type) - else vals <- dplot.norm(mat, labs = labs, average = average, - xlim = xlim, ylim = ylim, lty = lty, - colour = colour, legend = legend, lwd = lwd, pch=pch, type=type, n = n) - par(new = TRUE) - pout$data[[j]] <- vals$data - if (j == ncol(x$data)) { - pout$time <- vals$time - pout$labs <- vals$labs - } - } - } - else { - if (!normalise) - pout <- dplot.time(x, labs = labs, offset = offset, - prop=prop, average = average, xlim = xlim, ylim = ylim, - lty = lty, colour = colour, - lwd = lwd, pch=pch, type=type, legend = legend) - else pout <- dplot.norm(x, labs = labs, average = average, xlim = xlim, - ylim = ylim, lty = lty, colour = colour, - lwd = lwd, pch=pch, type=type, legend = legend, n = n) - } - - par(new = FALSE) - - invisible(pout) - title(...) - box(...) - if (axes) { - axis(side = 1) - axis(side = 2) + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + + if(prop) + { + if(length(offset) != 1) + stop("Specify only one offset time when prop=TRUE") + else if ((offset < 0) | (offset > 1)) + stop("offset must be between 0 and 1 when prop=TRUE") + + + } + + else + if(nrow(x) != length(offset)) + stop("nrow(x) and length(offset) must be the same when prop=FALSE") + + pout <- NULL + if (is.matrix(x$data)) { + pout <- as.list(NULL) + pout$data <- as.list(NULL) + mat <- NULL + if (is.null(ylim)) + ylim <- range(x$data) + numcols <- ncol(x$data) + + for (j in 1:ncol(x$data)) { + mat <- x + mat$data <- mat$data[, j] + if (!normalise) + vals <- dplot.time(mat, labs = labs, offset = offset, + prop=prop, average = average, xlim = xlim, + ylim = ylim, lty = lty, + colour = colour, legend = legend, lwd = lwd, pch=pch, type=type) + else vals <- dplot.norm(mat, labs = labs, average = average, + xlim = xlim, ylim = ylim, lty = lty, + colour = colour, legend = legend, lwd = lwd, pch=pch, type=type, n = n) + graphics::par(new = TRUE) + pout$data[[j]] <- vals$data + if (j == ncol(x$data)) { + pout$time <- vals$time + pout$labs <- vals$labs + } } + } + else { + if (!normalise) + pout <- dplot.time(x, labs = labs, offset = offset, + prop=prop, average = average, xlim = xlim, ylim = ylim, + lty = lty, colour = colour, + lwd = lwd, pch=pch, type=type, legend = legend) + else pout <- dplot.norm(x, labs = labs, average = average, xlim = xlim, + ylim = ylim, lty = lty, colour = colour, + lwd = lwd, pch=pch, type=type, legend = legend, n = n) + } + + graphics::par(new = FALSE) + + invisible(pout) + graphics::title(...) + graphics::box(...) + if (axes) { + graphics::axis(side = 1) + graphics::axis(side = 2) + } } - -`dplot.time` <- -function (x, labs = NULL, offset = 0, prop=TRUE, - average = FALSE, - xlim = NULL, ylim = NULL, lty = FALSE, colour = TRUE, - lwd = NULL, pch=NULL, legend = "topright", type="l", ...) +##' @export +`dplot.time` <- function (x, labs = NULL, offset = 0, prop=TRUE, + average = FALSE, xlim = NULL, ylim = NULL, + lty = FALSE, colour = TRUE, lwd = NULL, + pch=NULL, legend = "topright", type="l", ...) { - ovec <- as.list(NULL) - samrate <- 1000/((x$ftime[1, 2] - x$ftime[1, - 1])/(x$index[1, 2] - x$index[1, 1])) - if (is.null(labs)) - labs <- rep(1, nrow(x$index)) - col.lty <- mu.colour(labs, colour, lty, lwd, pch) - colour <- col.lty$colour - lty <- col.lty$linetype - lwd <- col.lty$lwd - pch <- col.lty$pch - if (prop) - ref.time <- x$ftime[, 1] + ((x$ftime[, 2] - - x$ftime[, 1]) * offset) - else ref.time <- offset - maxlen <- 2 * (max(x$index[, 2] - x$index[, 1] + - 1)) - pointval <- round(maxlen/2) - mat.na <- matrix(NA, nrow(x$index), maxlen) - for (j in 1:nrow(x$index)) { - left <- x$index[j, 1] - right <- x$index[j, 2] - length.index <- right - left + 1 - times <- x$ftime[j, ] - refn <- ref.time[j] - inval <- closest(seq(times[1], times[2], length = length.index), - refn) - inval <- inval[1] - left.na <- pointval - inval + 1 - right.na <- left.na + length.index - 1 - mat.na[j, left.na:right.na] <- x$data[left:right] - } - z <- apply(mat.na, 2, mean, na.rm = TRUE) - natemp <- is.na(z) - nums <- c(1:length(natemp)) - nonums <- nums[!natemp] - interval <- 1000/samrate - if (is.null(xlim)) - xlim <- c(nonums[1], nonums[length(nonums)]) - else xlim <- c(pointval + xlim[1]/interval, pointval + xlim[2]/interval) - time1 <- (1 - pointval) * interval - time2 <- (ncol(mat.na) - pointval) * interval - xtime <- seq(time1, time2, length = ncol(mat.na)) - xtimelim <- (xlim - pointval) * interval - if (is.null(ylim)) - ylim <- range(mat.na, na.rm = TRUE) - if (!average) { - for (j in 1:nrow(mat.na)) { - plot(xtime, mat.na[j, ], xlim = xtimelim, ylim = ylim, - xlab = "", ylab = "", axes = FALSE, type = type, - col = colour[j], lty = as.numeric(lty[j]), bty="n", - lwd = as.numeric(lwd[j]), pch=as.numeric(pch[j])) - par(new = TRUE) - } - ovec$data <- mat.na - ovec$time <- xtime - ovec$labs <- labs + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + + ovec <- as.list(NULL) + samrate <- 1000/((x$ftime[1, 2] - x$ftime[1, + 1])/(x$index[1, 2] - x$index[1, 1])) + if (is.null(labs)) + labs <- rep(1, nrow(x$index)) + col.lty <- mu.colour(labs, colour, lty, lwd, pch) + colour <- col.lty$colour + lty <- col.lty$linetype + lwd <- col.lty$lwd + pch <- col.lty$pch + if (prop) + ref.time <- x$ftime[, 1] + ((x$ftime[, 2] - + x$ftime[, 1]) * offset) + else ref.time <- offset + maxlen <- 2 * (max(x$index[, 2] - x$index[, 1] + + 1)) + pointval <- round(maxlen/2) + mat.na <- matrix(NA, nrow(x$index), maxlen) + for (j in 1:nrow(x$index)) { + left <- x$index[j, 1] + right <- x$index[j, 2] + length.index <- right - left + 1 + times <- x$ftime[j, ] + refn <- ref.time[j] + inval <- closest(seq(times[1], times[2], length = length.index), + refn) + inval <- inval[1] + left.na <- pointval - inval + 1 + right.na <- left.na + length.index - 1 + mat.na[j, left.na:right.na] <- x$data[left:right] + } + z <- apply(mat.na, 2, mean, na.rm = TRUE) + natemp <- is.na(z) + nums <- c(1:length(natemp)) + nonums <- nums[!natemp] + interval <- 1000/samrate + if (is.null(xlim)) + xlim <- c(nonums[1], nonums[length(nonums)]) + else xlim <- c(pointval + xlim[1]/interval, pointval + xlim[2]/interval) + time1 <- (1 - pointval) * interval + time2 <- (ncol(mat.na) - pointval) * interval + xtime <- seq(time1, time2, length = ncol(mat.na)) + xtimelim <- (xlim - pointval) * interval + if (is.null(ylim)) + ylim <- range(mat.na, na.rm = TRUE) + if (!average) { + for (j in 1:nrow(mat.na)) { + graphics::plot(xtime, mat.na[j, ], xlim = xtimelim, ylim = ylim, + xlab = "", ylab = "", axes = FALSE, type = type, + col = colour[j], lty = as.numeric(lty[j]), bty="n", + lwd = as.numeric(lwd[j]), pch=as.numeric(pch[j])) + graphics::par(new = TRUE) } - else { - if (!is.null(labs)) { - outmat <- NULL - outlabs <- NULL - for (j in unique(labs)) { - temp <- labs == j - vals <- mat.na[temp, ] - if (is.matrix(vals)) { - mvals <- apply(vals, 2, mean, na.rm = TRUE) - } - else { - mvals <- vals - } - outmat <- rbind(outmat, mvals) - outlabs <- c(outlabs, j) - } + ovec$data <- mat.na + ovec$time <- xtime + ovec$labs <- labs + } + else { + if (!is.null(labs)) { + outmat <- NULL + outlabs <- NULL + for (j in unique(labs)) { + temp <- labs == j + vals <- mat.na[temp, ] + if (is.matrix(vals)) { + mvals <- apply(vals, 2, mean, na.rm = TRUE) } else { - outmat <- apply(mat.na, 2, mean, na.rm = TRUE) - outmat <- rbind(outmat) - outlabs <- 1 - } - col.code <- match(col.lty$legend$lab, unique(labs)) - colour <- col.lty$legend$col - lty <- col.lty$legend$lty - lwd <- col.lty$legend$lwd - pch <- col.lty$legend$pch - for (j in 1:nrow(outmat)) { - plot(xtime, outmat[j, ], xlim = xtimelim, ylim = ylim, - xlab = "", ylab = "", axes = FALSE, type = type, bty="n", - col = colour[col.code[j]], lty = as.numeric(lty[col.code[j]]), - lwd = as.numeric(lwd[col.code[j]]), pch = as.numeric(pch[col.code[j]])) - par(new = TRUE) + mvals <- vals } - ovec$data <- outmat - ovec$time <- xtime - ovec$labs <- outlabs + outmat <- rbind(outmat, mvals) + outlabs <- c(outlabs, j) + } + } + else { + outmat <- apply(mat.na, 2, mean, na.rm = TRUE) + outmat <- rbind(outmat) + outlabs <- 1 + } + col.code <- match(col.lty$legend$lab, unique(labs)) + colour <- col.lty$legend$col + lty <- col.lty$legend$lty + lwd <- col.lty$legend$lwd + pch <- col.lty$legend$pch + for (j in 1:nrow(outmat)) { + graphics::plot(xtime, outmat[j, ], xlim = xtimelim, ylim = ylim, + xlab = "", ylab = "", axes = FALSE, type = type, bty="n", + col = colour[col.code[j]], lty = as.numeric(lty[col.code[j]]), + lwd = as.numeric(lwd[col.code[j]]), pch = as.numeric(pch[col.code[j]])) + graphics::par(new = TRUE) } - - if (is.logical(legend)) { - if (legend) - { - legend <- "topright" - if ((type=="l") | is.null(pch)) - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) - else if(type=="p") - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - pch = as.numeric(col.lty$legend$pch) ) - else + ovec$data <- outmat + ovec$time <- xtime + ovec$labs <- outlabs + } + + if (is.logical(legend)) { + if (legend) + { + legend <- "topright" + if ((type=="l") | is.null(pch)) legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) - } + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) + else if(type=="p") + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + pch = as.numeric(col.lty$legend$pch) ) + else + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) } - else - { - if ((type=="l") | is.null(pch)) - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) - else if(type=="p") - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - pch = as.numeric(col.lty$legend$pch) ) - else - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) - - } - - invisible(ovec) - + } + else + { + if ((type=="l") | is.null(pch)) + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) + else if(type=="p") + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + pch = as.numeric(col.lty$legend$pch) ) + else + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) + + } + + invisible(ovec) + } - -`dplot.norm` <- -function (x, labs = NULL, average = FALSE, - xlim = NULL, ylim = NULL, lty = FALSE, type="l", - colour = TRUE, lwd = NULL, pch=NULL, legend = "topright", - n = 20) +##' @export +`dplot.norm` <- function (x, labs = NULL, average = FALSE, xlim = NULL, + ylim = NULL, lty = FALSE, type = "l", colour = TRUE, + lwd = NULL, pch = NULL, legend = "topright", n = 20) { - ovec <- NULL - if (is.null(ylim)) - ylim <- range(x$data) - if (is.null(xlim)) - xlim <- c(0, 1) - if (is.null(labs)) { - labs <- rep(1, nrow(x$index)) - } - col.lty <- mu.colour(labs, colour, lty, lwd, pch) - colour <- col.lty$colour - lty <- col.lty$linetype - lwd <- col.lty$lwd - pch <- col.lty$pch - mat.na <- linear(x, n) - mat.na$ftime <- x$ftime - class(mat.na) <- "trackdata" - xvec <- seq(0, 1, length = n) - lval <- nrow(x$index) - if (!average) { - for (j in 1:lval) { - plot(xvec, mat.na[j]$data, xlim = xlim, ylim = ylim, - xlab = "", ylab = "", axes = FALSE, type = type, bty="n", - col = colour[j], lty = as.numeric(lty[j]), - lwd = as.numeric(lwd[j]), pch = as.numeric(pch[j])) - par(new = TRUE) - } - ovec$data <- mat.na - ovec$time <- xvec - ovec$labs <- labs + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + + ovec <- NULL + if (is.null(ylim)) + ylim <- range(x$data) + if (is.null(xlim)) + xlim <- c(0, 1) + if (is.null(labs)) { + labs <- rep(1, nrow(x$index)) + } + col.lty <- mu.colour(labs, colour, lty, lwd, pch) + colour <- col.lty$colour + lty <- col.lty$linetype + lwd <- col.lty$lwd + pch <- col.lty$pch + mat.na <- linear(x, n) + mat.na$ftime <- x$ftime + class(mat.na) <- "trackdata" + xvec <- seq(0, 1, length = n) + lval <- nrow(x$index) + if (!average) { + for (j in 1:lval) { + graphics::plot(xvec, mat.na[j]$data, xlim = xlim, ylim = ylim, + xlab = "", ylab = "", axes = FALSE, type = type, bty="n", + col = colour[j], lty = as.numeric(lty[j]), + lwd = as.numeric(lwd[j]), pch = as.numeric(pch[j])) + graphics::par(new = TRUE) } - else { - if (!is.null(labs)) { - outmat <- NULL - outlabs <- NULL - for (j in unique(labs)) { - temp <- labs == j - vals <- mat.na[temp]$data - vals <- matrix(vals, ncol = n, byrow = TRUE) - if (is.matrix(vals)) { - mvals <- apply(vals, 2, mean) - } - else { - mvals <- vals - } - outmat <- rbind(outmat, mvals) - outlabs <- c(outlabs, j) - } + ovec$data <- mat.na + ovec$time <- xvec + ovec$labs <- labs + } + else { + if (!is.null(labs)) { + outmat <- NULL + outlabs <- NULL + for (j in unique(labs)) { + temp <- labs == j + vals <- mat.na[temp]$data + vals <- matrix(vals, ncol = n, byrow = TRUE) + if (is.matrix(vals)) { + mvals <- apply(vals, 2, mean) } else { - outmat <- apply(matrix(mat.na, ncol = 20, byrow = TRUE), - 2, mean) - outmat <- rbind(outmat) - outlabs <- 1 - } - col.code <- match(col.lty$legend$lab, unique(labs)) - colour <- col.lty$legend$col - lty <- col.lty$legend$lty - lwd <- col.lty$legend$lwd - pch <- col.lty$legend$pch - for (j in 1:nrow(outmat)) { - plot(xvec, outmat[j, ], xlim = xlim, ylim = ylim, - xlab = "", ylab = "", axes = FALSE, type = type, bty="n", - col = colour[col.code[j]], lty = as.numeric(lty[col.code[j]]), - lwd = as.numeric(lwd[col.code[j]]), pch = as.numeric(pch[col.code[j]])) - par(new = TRUE) + mvals <- vals } - ovec$data <- outmat - ovec$time <- xvec - ovec$labs <- labs + outmat <- rbind(outmat, mvals) + outlabs <- c(outlabs, j) + } } - - if (is.logical(legend)) { - if (legend) - { - legend <- "topright" - if ((type=="l") | is.null(pch)) - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) - else if(type=="p") - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - pch = as.numeric(col.lty$legend$pch) ) - else + else { + outmat <- apply(matrix(mat.na, ncol = 20, byrow = TRUE), + 2, mean) + outmat <- rbind(outmat) + outlabs <- 1 + } + col.code <- match(col.lty$legend$lab, unique(labs)) + colour <- col.lty$legend$col + lty <- col.lty$legend$lty + lwd <- col.lty$legend$lwd + pch <- col.lty$legend$pch + for (j in 1:nrow(outmat)) { + graphics::plot(xvec, outmat[j, ], xlim = xlim, ylim = ylim, + xlab = "", ylab = "", axes = FALSE, type = type, bty="n", + col = colour[col.code[j]], lty = as.numeric(lty[col.code[j]]), + lwd = as.numeric(lwd[col.code[j]]), pch = as.numeric(pch[col.code[j]])) + graphics::par(new = TRUE) + } + ovec$data <- outmat + ovec$time <- xvec + ovec$labs <- labs + } + + if (is.logical(legend)) { + if (legend) + { + legend <- "topright" + if ((type=="l") | is.null(pch)) legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) - } + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) + else if(type=="p") + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + pch = as.numeric(col.lty$legend$pch) ) + else + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) } - else - { - if ((type=="l") | is.null(pch)) - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) - else if(type=="p") - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - pch = as.numeric(col.lty$legend$pch) ) - else - legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, - lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) - - } - invisible(ovec) + } + else + { + if ((type=="l") | is.null(pch)) + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd)) + else if(type=="p") + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + pch = as.numeric(col.lty$legend$pch) ) + else + legend(legend, NULL, col.lty$legend$lab, col = col.lty$legend$col, + lty = as.numeric(col.lty$legend$lty), lwd = as.numeric(col.lty$legend$lwd),pch = as.numeric(col.lty$legend$pch) ) + + } + invisible(ovec) } diff --git a/R/dsmooth.R b/R/dsmooth.R index 4cb53872..a3c6e2d5 100644 --- a/R/dsmooth.R +++ b/R/dsmooth.R @@ -1,17 +1,17 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Smooth the data in a trackdata object. +##' +##' Smooths each dataset in a trackdata object using a running mean smoother. +##' +##' This function uses the \code{dapply} function to apply \code{smooth} to the +##' data for each segment. +##' +##' @aliases dsmooth dsmooth.sub +##' @param dataset A trackdata object as returned from \code{track}. +##' @return The result of applying the \code{smooth} function to each column of +##' the data for each segment in the trackdata object. +##' @seealso smooth, dapply +##' @keywords misc +##' @export dsmooth "dsmooth"<- function(dataset) { ## dataset: a list, as returned by track @@ -20,6 +20,8 @@ } + +##' @export "dsmooth.sub" <- function(data, ftime) { if(is.matrix(data)){ @@ -31,9 +33,3 @@ } return( list(data=data, ftime=ftime) ) } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/dtime.R b/R/dtime.R index d3070663..0a60192c 100644 --- a/R/dtime.R +++ b/R/dtime.R @@ -1,28 +1,21 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' time signal times +##' +##' see function +##' +##' +##' @keywords internal +##' @export dtime dtime <- function(dataset, times, single = TRUE, average = TRUE) { if(!is.matrix(dataset$data)) dataset$data <- cbind(dataset$data) - + if(!is.matrix(dataset$index)) { dataset$index <- rbind(dataset$index) dataset$ftime <- rbind(dataset$ftime) } - + mat <- NULL - + for(j in 1:length(times)) { left <- dataset$index[j, 1] right <- dataset$index[j, 2] @@ -46,7 +39,7 @@ dtime <- function(dataset, times, single = TRUE, average = TRUE) { } mat <- rbind(mat, dat[cval, ]) } - + if(ncol(mat) == 1) { c(mat) } @@ -60,70 +53,155 @@ dtime <- function(dataset, times, single = TRUE, average = TRUE) { -"dcut" <- -function (trackdata, left.time, right.time, single = TRUE, average = TRUE, - prop = FALSE) + + + + + + + + +##' Function to extract a vector or matrix from EMU-Trackdata at a single time +##' point of to create another EMU-trackdata object between two times. +##' +##' A general purpose tool for extracting data from track objects either at a +##' particular time, or between two times. The times can be values in +##' milliseconds or proportional times between zero (the onset) and one (the +##' offset). +##' +##' This function extracts data from each segment of a trackdata object. +##' +##' If 'prop=FALSE' the time arguments ('left.time' and 'right.time') are +##' interpreted as millisecond times and each should be a vector with the same +##' length as the number of segments in 'trackdata'. If 'prop=TRUE' the time +##' arguments should be single values between zero (the onset of the segment) +##' and one (the offset). +##' +##' If 'right.time' is omitted then a single data point corresponding to +##' 'left.time' for each segment is returned. +##' +##' @aliases dcut dcut.sub +##' @param trackdata An Emu trackdata object. +##' @param left.time Either: a numeric vector of the same length as there are +##' observations in trackdata. Or: a single value between 0 and 1. In the first +##' case, the left time boundary of trackdata[n,] is cut at left.time[n], in +##' the second case, and if prop=TRUE, it is cut at that proportional time. +##' @param right.time Either: a numeric vector of the same length as there are +##' observations in trackdata. Or: a single value between 0 and 1. In the first +##' case, the right time boundary of trackdata[n,] is cut at right.time[n], in +##' the second case, and if prop=TRUE, it is cut at that proportional time. +##' @param single If TRUE, one value is returned per segment. This applies when +##' the requested time falls between two track frames. When single=TRUE, the +##' preceding value is returned, unless average=TRUE (see below), in which case +##' the average value of the two frames is returned. when the right.time +##' argument is omitted +##' @param average A single element logical vector - see single above. Applies +##' only when the right.times argument is omitted and when single = TRUE +##' @param prop If TRUE left.time and right.time are interpreted as +##' proportions, if FALSE, they are interpreted as millisecond times +##' @return A trackdata object if both 'left.time' and 'right.time' are +##' specified, otherwise a matrix if 'right.time' is unspecified and the +##' trackdata object has multiple columns of data or a vector if right.time' is +##' unspecified and the trackdata object has a single column of data. +##' @author Jonathan Harrington +##' @seealso \code{\link{get_trackdata}}, \code{\link{dplot}}, \code{\link{eplot}} +##' @keywords datagen +##' @examples +##' +##' # the data values of the trackdata object at the temporal midpoint +##' # (midvals is matrix of F1 and F2 data) +##' dip.fdat[1:10] +##' midvals <- dcut(dip.fdat, 0.5, prop=TRUE) +##' midvals[1:10,] +##' +##' +##' # the data values of the trackdata object between +##' # extending from 20 +##' # (bet is a trackdata object of F1 and F2 values) +##' bet <- dcut(dip.fdat, 0.2, 0.8, prop=TRUE) +##' bet[1] +##' +##' +##' # the data values of the trackdata object at 30 ms after +##' # the start time of the trackdata object +##' # (time30 is a matrix of F1 and F2 data +##' times <- dip.fdat$ftime[,1]+30 +##' times[1:10] +##' time30 <- dcut(dip.fdat, times) +##' time30[1:10] +##' +##' +##' # the data values of the trackdata object +##' # between the start time and 30 ms after the start time +##' # (int is a trackdata object of F1 and F2 values extending +##' # from the start of the diphthongs up to 30 ms after the diphthongs) +##' int <- dcut(dip.fdat, dip.fdat$ftime[,1], times) +##' int[1] +##' +##' @export dcut +"dcut" <- function (trackdata, left.time, right.time, + single = TRUE, average = TRUE, prop = FALSE) { - if (prop) { - if (missing(right.time)) - omat <- dextract(trackdata, left.time) - else omat <- dextract(trackdata, left.time, right.time) - } + if (prop) { + if (missing(right.time)) + omat <- dextract(trackdata, left.time) + else omat <- dextract(trackdata, left.time, right.time) + } + else { + if (missing(right.time)) + omat <- dtime(trackdata, left.time, single = single, + average = average) else { - if (missing(right.time)) - omat <- dtime(trackdata, left.time, single = single, - average = average) - else { - if (length(left.time) != nrow(trackdata$ftime)) { - stop("different number of elements in left.time and $ftime") - } - if (length(right.time) != nrow(trackdata$ftime)) { - stop("different number of elements in right.time and $ftime") - } - if (any(left.time < trackdata$ftime[, 1])) - stop("some $ftime[,1] values are less than left.time ") - if (any(right.time > trackdata$ftime[, 2])) - stop("some $ftime[,2] values are greater than right.time ") - if (any(right.time <= left.time)) - stop("some right.time values are before the corresponding left.time") - lval <- nrow(trackdata$index) - for (j in 1:lval) { - tdat <- dcut.sub(trackdata[j], left.time[j], - right.time[j]) - if (j == 1) - omat <- tdat - else omat <- bind(omat, tdat) - } - } + if (length(left.time) != nrow(trackdata$ftime)) { + stop("different number of elements in left.time and $ftime") + } + if (length(right.time) != nrow(trackdata$ftime)) { + stop("different number of elements in right.time and $ftime") + } + if (any(left.time < trackdata$ftime[, 1])) + stop("some $ftime[,1] values are less than left.time ") + if (any(right.time > trackdata$ftime[, 2])) + stop("some $ftime[,2] values are greater than right.time ") + if (any(right.time <= left.time)) + stop("some right.time values are before the corresponding left.time") + lval <- nrow(trackdata$index) + for (j in 1:lval) { + tdat <- dcut.sub(trackdata[j], left.time[j], + right.time[j]) + if (j == 1) + omat <- tdat + else omat <- bind(omat, tdat) + } } -if(is.spectral(trackdata$data)) -{ -if(is.trackdata(omat)) -{ -attr(omat$data, "fs") <- attr(trackdata$data, "fs") -if(!is.spectral(omat$data)) -class(omat$data) <- c(class(omat$data), "spectral") -} -else -{ -attr(omat, "fs") <- attr(trackdata$data, "fs") -if(!is.spectral(omat)) -class(omat) <- c(class(omat), "spectral") -} - -} - return(omat) + } + if(is.spectral(trackdata$data)) + { + if(is.trackdata(omat)) + { + attr(omat$data, "fs") <- attr(trackdata$data, "fs") + if(!is.spectral(omat$data)) + class(omat$data) <- c(class(omat$data), "spectral") + } + else + { + attr(omat, "fs") <- attr(trackdata$data, "fs") + if(!is.spectral(omat)) + class(omat) <- c(class(omat), "spectral") + } + + } + return(omat) } - +##' @export "dcut.sub" <- function(trackdata, left.time, right.time) { vals <- trackdata$data left <- trackdata$ftime[1] right <- trackdata$ftime[2] - + if(is.matrix(vals)) N <- nrow(vals) else @@ -131,28 +209,22 @@ class(omat) <- c(class(omat), "spectral") times <- seq(left, right, length = N) first <- closest(times, left.time) - + if(length(first) > 1) first <- round(mean(first)) - + second <- closest(times, right.time) - + if(length(second) > 1) second <- round(mean(second)) - + if(is.matrix(vals)) trackdata$data <- vals[first:second, ] else trackdata$data <- cbind(vals[first:second] - ) + ) trackdata$ftime <- cbind(times[first], times[second]) trackdata$index <- cbind(1, length(first:second)) as.trackdata(trackdata$data, trackdata$index, trackdata$ftime) } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/emuR-add_signal.R b/R/emuR-add_signal.R new file mode 100644 index 00000000..65ca92aa --- /dev/null +++ b/R/emuR-add_signal.R @@ -0,0 +1,399 @@ +#' add_signalViaMatlab +#' @md +#' +#' @description Use a Matlab function to derive an extra signal file for each +#' bundle of the Emu database. A new track definition will be added to the +#' database automatically. +#' +#' @details This function enables EMU-SDMS users you take advantage of tool boxes +#' and signal processing functions written in Matlab. The Matlab function must meet +#' certain requirements as detailed below, and it will always be run against the +#' entire database (either one bundle at a time or the whole database at a time, +#' but never only a part of the database). +#' +#' The Matlab function must: +#' +#' * Be defined in a file of its own. +#' * Accept named parameters. +#' * Accept at least the parameters `inputFilename` and `outputFilename`, both +#' strings. +#' * Use the file at `inputFilename` and produce a new file `outputFilename`; +#' the new file must be a `.mat` file containing the variables `data`, +#' `sampleRate`, `startTime`, `units`, and `comment`. +#' +#' You can find examples of Matlab functions that meet these requirements by running +#' [create_emuRdemoData()] and then looking at the subdirectory `add_signal_files/matlab/`. +#' +#' The Matlab function can accept more parameters to influence the signal +#' processing. These parameters need not be the same values for the entire +#' database. They can be used, for example, to modify the signal processing +#' algorithms in a speaker-specific way. +#' +#' If `oneMatlabFunctionCallPerFile` is `TRUE`, the function will be called once +#' for every bundle of the database; in that case, all parameters +#' to the Matlab function will be 1x1 matrices. If `oneMatlabFunctionCallPerFile` +#' is `FALSE`, the Matlab function will only be called once for the entire database; +#' in that case, all parameters will be 1xN matrices with N equal to the number +#' of bundles in the database. `add_signalViaMatlab` will create a temporary `.m` +#' script. That script may, for example, contain code like this: +#' +#' ```matlab +#' demoSignalScalerForOneFile(inputFilename="msajc003.wav", outputFilename="/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc003.mat", scalingFactor=1); +#' demoSignalScalerForOneFile(inputFilename="msajc010.wav", outputFilename="/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc010.mat", scalingFactor=4); +#' ``` +#' +#' Or like this: +#' +#' ```matlab +#' demoSignalScalerForManyFiles(inputFilename=["msajc003.wav", "msajc010.wav",], outputFilename=["/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc003.mat", "/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc010.mat], scalingFactor=[1, 4]); +#' ``` +#' +#' In both cases, `scalingFactor` is a parameter that `demoSignalScalerForOneFile` +#' and `demoSignalScalerForManyFiles` happen to accept. These are the demo functions +#' you can find by running [create_emuRdemoData]. +#' +#' The input file will typically be the media file of the bundle, but can be one +#' of the other files stored in the bundle. If you need that, use the `inputFileExtension` +#' parameter. +#' +#' The output `.mat` files that need to be written by the Matlab function will +#' be converted – by `emuR` – to `.Rda` files and saved in each bundle folder with +#' the file extension `outputFileExtension`. +#' +#' The working directory of the Matlab function will be the same as that of the +#' current R session, see [base::getwd()]. +#' +#' You need a working and licensed Matlab instance on your computer. It will be +#' called via [matlabr::run_matlab_code()]. +#' +#' Matlab is a trademark of The MathWorks, Inc. +#' +#' @examples +#' +#' \dontrun{ +#' ########################### +#' # Setting up some demo data +#' +#' library(dplyr) +#' library(ggplot2) +#' library(emuR) +#' base_dir = tempdir() +#' emuR::create_emuRdemoData(base_dir) +#' emuDBhandle = emuR::load_emuDB(file.path(base_dir, +#' "emuR_demoData", +#' "ae_emuDB")) +#' segmentList = query(emuDBhandle, "Phonetic == ei") +#' +#' ######################################################### +#' # Calling a Matlab function without additional parameters +#' +#' add_signalViaMatlab(emuDBhandle = emuDBhandle, +#' matlabFunctionName = "demoSignalScalerForOneFile", +#' outputFileExtension = "sound", +#' trackName = "unchangedSound", +#' trackColumn = "data[,1]", +#' paths_to_add = c(file.path(base_dir, +#' "emuR_demoData", +#' "add_signal_scripts", +#' "matlab"))) +#' +#' # paths_to_add tells Matlab where to find the demoSignalScalerForOneFile function. +#' # This will create a new track definition called unchangedSound. The track’s +#' # file format will be Rda. All files for this track will have the extension +#' # .sound and will contain the new signal within the variable data[,1]. +#' +#' list_ssffTrackDefinitions(emuDBhandle) +#' +#' # The "new" signal will just be a copy of the sound signal, because we have not +#' # included a scalingFactor parameter. Therefore, demoSignalScalerForOneFile will +#' # read the wav files and output them mostly unchanged (the values may be on a +#' # different scale). You can check it like this: +#' +#' td_media = get_trackdata(emuDBhandle, segmentList, "MEDIAFILE_SAMPLES") +#' td_new = get_trackdata(emuDBhandle, segmentList, "unchangedSound") +#' +#' ggplot(td_media) + +#' aes(x = times_rel, y = T1) + +#' facet_grid(vars(paste(session, bundle))) + +#' geom_line() + +#' ggtitle("Three sound signals, original") +#' ggplot(td_new) + +#' aes(x = times_rel, y = T1) + +#' facet_grid(vars(paste(session, bundle))) + +#' geom_line() + +#' ggtitle("Three sound signals, output by Matlab at new scale") +#' +#' # Observe that the two graphs look the same except for the scale. +#' +#' ########################################### +#' # Calling a Matlab function with parameters +#' +#' bundleList = +#' emuR::list_bundles(emuDBhandle = emuDBhandle) %>% +#' dplyr::rename(bundle = name) +#' parameterList = +#' bundleList %>% +#' mutate(scalingFactor = case_match(bundle, +#' "msajc022" ~ 4, +#' "msajc023" ~ 2, +#' .default = 1)) +#' add_signalViaMatlab(emuDBhandle = emuDBhandle, +#' matlabFunctionName = "demoSignalScalerForOneFile", +#' outputFileExtension = "sound2", +#' trackName = "scaledSound", +#' trackColumn = "data[,1]", +#' matlabFunctionParameters = parameterList, +#' paths_to_add = c(file.path(base_dir, +#' "emuR_demoData", +#' "add_signal_scripts", +#' "matlab"))) +#' +#' # This will create a new track definition called scaledSound: +#' +#' list_ssffTrackDefinitions(emuDBhandle) +#' +#' # The "new" signal will be a copy of the original sound signals, but two bundles +#' # will be scaled up (multiplied by a given factor). The scaling factor was determined +#' # through the parameterList data frame, which contained a column scalingFactor. +#' # If the Matlab function expected other parameters, the data frame would have to +#' # contain columns accordingly. You can see that two of the bundles have changed +#' # their scale, but the shape is still the same: +#' +#' td_media = get_trackdata(emuDBhandle, segmentList, "MEDIAFILE_SAMPLES") +#' td_scaled = get_trackdata(emuDBhandle, segmentList, "scaledSound") +#' +#' ggplot(td_media) + +#' aes(x = times_rel, y = T1) + +#' facet_grid(vars(paste(session, bundle))) + +#' geom_line() + +#' ggtitle("Three sound signals, original") +#' ggplot(td_scaled) + +#' aes(x = times_rel, y = T1) + +#' facet_grid(vars(paste(session, bundle))) + +#' geom_line() + +#' ggtitle("Three sound signals, with different scaling factors applied") +#' } +#' +#' +#' @param emuDBhandle The Emu database to work on. +#' @param matlabFunctionName Name of a Matlab function to use for signal processing. +#' Must be available on Matlab’s search path; see `paths_to_add`. +#' @param outputFileExtension The file extension for the new derived signal file +#' to be created within each bundle. +#' @param trackName The name of the new track that will be created automatically. +#' @param trackColumn The column of data to be used from the result files generated +#' by Matlab. Should usually start with `data[` or `data$`. +#' @param oneMatlabFunctionCallPerFile Whether to call `matlabFunctionName` once +#' per file (TRUE) or once for the entire database (FALSE). `FALSE` will +#' be necessary if you want Matlab to process bundles in parallel. +#' @param inputFileExtension The file extension of the files to operate on. Defaults +#' to the standard media file extension of the current Emu database. +#' @param matlabFunctionParameters Data frame with parameters for `matlabFunctionName`. +#' Needs to contain the columns `session` and `bundle` plus one column for +#' each function parameter. The column names will be used as parameter names. +#' Must contain *one row for every bundle, without exception*. +#' @param paths_to_add List of paths where Matlab will look for functions. This +#' is usually handled by [matlabr::run_matlab_code], but it adds the paths +#' *after* the code, so we need to handle it in `emuR`. +#' @param ... Other parameters are passed on to [matlabr::run_matlab_code]. +#' +#' @export +add_signalViaMatlab = function(emuDBhandle, + matlabFunctionName, + outputFileExtension, + trackName, + trackColumn, + oneMatlabFunctionCallPerFile = TRUE, + inputFileExtension = NULL, + matlabFunctionParameters = NULL, + paths_to_add = NULL, + ...) { + # By "using" these variables, we make them required arguments in terms of R UI: + emuDBhandle + matlabFunctionName + outputFileExtension + trackName + trackColumn + + if (is.null(inputFileExtension)) { + DBconfig = load_DBconfig(emuDBhandle) + inputFileExtension = DBconfig$mediafileExtension + } + + listOfFiles = + listOfFilesForExternalSignalProcessing("add_signalViaMatlab", + emuDBhandle, + inputFileExtension, + outputFileExtension) + + filenameParameters = + listOfFiles %>% + dplyr::select(.data$session, .data$bundle, .data$inputFilename, outputFilename = .data$intermediateFilename) %>% + encodeStringsAsMatlabLiterals() + + if (is.null(matlabFunctionParameters)) { + matlabFunctionParameters = tibble::tibble(session = character(), bundle = character()) + } else { + matlabFunctionParameters = + matlabFunctionParameters %>% + encodeStringsAsMatlabLiterals() + } + + matlabCommands = + dplyr::left_join(filenameParameters, + matlabFunctionParameters, + by = c("session", "bundle")) %>% + dplyr::select(-.data$session, -.data$bundle) + + if (oneMatlabFunctionCallPerFile) { + matlabCommands = + matlabCommands %>% + makeKeyValuePairs() %>% + tidyr::unite("allParameters", + tidyselect::everything(), + sep = ", ") %>% + dplyr::mutate(command = paste0(matlabFunctionName, + "(", + .data$allParameters, + ")")) + } else { + matlabCommands = + matlabCommands %>% + dplyr::summarise(dplyr::across(tidyselect::everything(), + ~ paste0(.x, collapse = ", ")), + dplyr::across(tidyselect::everything(), + ~ paste0("[", .x, "]"))) %>% + makeKeyValuePairs() %>% + tidyr::unite("allParameters", + tidyselect::everything(), + sep = ", ") %>% + dplyr::mutate(command = paste0(matlabFunctionName, + "(", + .data$allParameters, + ")")) + } + + # This is a workaround: I could just pass paths_to_add on to run_matlab_code, + # but it adds the paths *after* the code, which cannot work. + code = matlabCommands$command + if (!is.null(paths_to_add)) { + paths_to_add = matlabr::add_path(paths_to_add) + code = c(paths_to_add, code) + } + + matlabr::run_matlab_code(code, + endlines = TRUE, + ...) + + convertMatlabIntermediateFilesToRda(listOfFiles) + + add_ssffTrackDefinition(emuDBhandle = emuDBhandle, + name = trackName, + columnName = trackColumn, + fileExtension = outputFileExtension, + fileFormat = "Rda") +} + + +listOfFilesForExternalSignalProcessing = function(functionName, + emuDBhandle, + inputFileExtension, + outputFileExtension) { + listOfFiles = + list_files(emuDBhandle, inputFileExtension) %>% + dplyr::mutate(inputFilename = .data$absolute_file_path, + outputFilename = file.path(emuDBhandle$basePath, + paste0(.data$session, session.suffix), + paste0(.data$bundle, bundle.dir.suffix), + paste0(.data$bundle, ".", outputFileExtension)), + intermediateDir = file.path(tempdir(), + functionName, + emuDBhandle$UUID, + paste0(.data$session, session.suffix)), + intermediateFilename = file.path(.data$intermediateDir, + paste0(.data$bundle, ".mat"))) + + fs::dir_create(path = listOfFiles$intermediateDir, recurse = TRUE) + + return(listOfFiles) +} + + +# Converts .mat files as stored by external Matlab code to .Rda files. +# +# The argument files must be a data frame with the columns `intermediateFilename` +# and `outputFilename`. The `.mat` file must exist at the intermediate path, and +# the `.Rda` file will be created at the output path. +# +convertMatlabIntermediateFilesToRda = function(files) { + for (i in rownames(files)) { + currentFile = files[i,] + + rawData = R.matlab::readMat(currentFile$intermediateFilename) + + data = rawData$data + units = rawData$units + sampleRate = rawData$sampleRate[1,1] + startTime = rawData$startTime[1,1] + comment = rawData$comment[1,1] + + save(data, + units, + sampleRate, + startTime, + comment, + file = currentFile$outputFilename) + } +} + + +# Expects a data frame and transforms all character-typed columns except those +# named "session" or "bundle". +# +# The transformation includes: +# - escaping all double quotes (") in the values as two double quotes ("") +# - adding one double quote to the beginning and end of each value +# +encodeStringsAsMatlabLiterals = function(matlabFunctionParameters) { + for (column in colnames(matlabFunctionParameters)) { + if (column == "session" || column == "bundle") { + next + } + + columnType = typeof(dplyr::pull(matlabFunctionParameters, column)) + + if (columnType == "character") { + matlabFunctionParameters[column] = + stringr::str_replace_all(dplyr::pull(matlabFunctionParameters, column), + '"', + '""') + + matlabFunctionParameters[column] = + paste0('"', + dplyr::pull(matlabFunctionParameters, column), + '"') + } + } + + return(matlabFunctionParameters) +} + + +# Expects a data frame and transforms all columns except those named "session" +# or "bundle". +# +# The transformation consists of prepending each value with "columnName=". +# +makeKeyValuePairs = function(data) { + for (column in colnames(data)) { + if (column == "session" || column == "bundle") { + next + } + + data[column] = paste0(column, + "=", + dplyr::pull(data, column)) + } + + return(data) +} diff --git a/R/emuR-annotations_crud.R b/R/emuR-annotations_crud.R new file mode 100644 index 00000000..05de39ae --- /dev/null +++ b/R/emuR-annotations_crud.R @@ -0,0 +1,993 @@ +##' Create new items programmatically +##' +##' @description Create annotation items programmatically on a single level. +##' You have to pass in a data frame, called `itemsToCreate`, describing +##' the new items. The required columns depend on the type of the level (ITEM, +##' EVENT, or SEGMENT). +##' +##' This function belongs to emuR’s CRUD family of functions, which let the user +##' manipulate items programmatically: +##' +##' * Create items ([create_itemsInLevel]) +##' * Read items ([query]) +##' * Update items ([update_itemsInLevel]) +##' * Delete items ([delete_itemsInLevel])) +##' +##' @details +##' This function creates new annotation items on an existing level, in existing +##' bundles. +##' +##' Regardless of the type of level you are creating items on, your input data +##' frame `itemsToCreate` must describe your new items by specifying the columns +##' `session`, `bundle`, `level`, `attribute` and `labels`. `level` must have the +##' same value for all rows, as we can only create items on one level at a time. +##' +##' `attribute` must also have the same value for all rows, and it must be an +##' existing attribute that belongs to the `level`. +##' +##' A major use case for this function is to obtain a segment list using [query], +##' modify the segment list and feed it to this function. That is why the column +##' `labels` has a plural name: segment lists also have a column `labels` and +##' not `label`. The same is true for the sequence index columns introduced below. +##' +##' Creating new items works differently depending on the level type. The three +##' types are explained in the following sections. +##' +##' ## Levels of type ITEM +##' +##' In addition to the columns that are always required, ITEM-typed levels require +##' a column with a sequence index to be present in the `itemsToCreate` data +##' frame. Its name must be `start_item_seq_idx`. This name was chosen instead +##' of `sequence_index` because it is present as a column name in segment lists +##' obtained with [query]. That makes it easer to use a segment list as input to +##' [create_itemsInLevel()]. +##' +##' Along the time axis, there can be multiple annotation items on every level. +##' Their order within the level is given by their sequence index. All *existing* +##' items have a natural-valued sequence index and there are no gaps in the +##' sequences (i.e. if a level contains N annotation items, they are indexed 1..N). +##' +##' Any newly created item must be given a sequence index. The sequence index may +##' be real-valued (it will automatically be replaced with a natural value). To +##' prepend the new item to the existing ones, pass a value lower than one. To +##' append it to the existing items, you can either pass `NA` or any value that +##' you know is greater than N (the number of existing items in that level). It +##' does not need to be exactly N+1. To place the new item between two existing +##' ones, use any real value between the sequence indexes of the existing neighbors. +##' +##' If you are appending multiple items at the same time, every sequence index +##' (including `NA`) can only be used once per session/bundle/level combination +##' (because session/bundle/level/sequence index are the unique identifier of an +##' item). +##' +##' After creating the items, all sequence indexes (which may now be real-valued, +##' natural-valued or NA) are sorted in ascending order and then replaced with +##' the values 1..N, where N is the number of items on that level. While sorting, +##' `NA` values are placed at the end. +##' +##' ## Levels of type EVENT +##' +##' In addition to the columns that are always required, EVENT-typed levels require +##' a column with the time of the event to be present in the `itemsToCreate` data +##' frame. Its name must be `start`. This name was chosen because it is present +##' as a column name in segment lists obtained with [query]. That makes it easer +##' to use a segment list as input to [create_itemsInLevel()]. The `end` column +##' in segment lists is 0 for EVENT-typed levels. +##' +##' The `start` column must be given in milliseconds. +##' +##' You cannot create an EVENT item at a point on the time axis where another +##' item already exists on the same level. If you specify such an event, the +##' entire function will fail. +##' +##' ## Levels of type SEGMENT +##' +##' You can only create SEGMENT-typed items in bundles where the respective level +##' is empty. +##' +##' In addition to the columns that are always required, SEGMENT-typed levels +##' require the column `start` to be present in the `itemsToCreate` data frame, +##' representing the start time of the segment. It must be given in milliseconds. +##' +##' Segments also need to have an end, and there are two strategies to determine +##' the end. Either, you explicitly provide an `end` column in the `itemsToCreate` +##' data frame. It must be given in milliseconds. If you do that, you have to +##' specify the `calculateEndTimeForSegments` parameter as `FALSE`. +##' +##' Alternatively, you can leave `calculateEndTimeForSegments` at `TRUE` (which +##' is the default) and provide your `itemsToCreate` data frame without an `end` +##' column. In that case, the end time will be aligned to the next neighbor’s +##' start time. The end time of the last segment will be aligned with the end of +##' the annotated media file. +##' +##' +##' @param emuDBhandle emuDB handle as returned by [load_emuDB] +##' @param itemsToCreate A data frame with the columns: +##' * `session` (character) +##' * `bundle` (character) +##' * `level` (character) +##' * `attribute` (character) +##' * `labels` (character) +##' * `start_item_seq_idx` (numeric; only when `level` refers to a ITEM-typed +##' level) +##' * `start` (numeric, milliseconds; only when `level` refers to an EVENT-typed +##' or SEGMENT-typed level) +##' * `end` (numeric, milliseconds; only when `level` refers to a SEGMENT-typed +##' level and `calculateEndTimeForSegments` is `FALSE`) +##' @param calculateEndTimeForSegments *Only applicable if the level type is SEGMENT.* +##' If set to `TRUE`, then each segment’s end time is automatically aligned +##' with the start time of the following segment. In that case, user-provided +##' end times are ignored. The last segment’s end time is the end time of the +##' annotated media file. If set to `FALSE`, then the user has to provide +##' an end time for each segment. +##' @param allowGapsAndOverlaps *Only applicable if the level type is SEGMENT +##' and `calculateEndTimeForSegments` is `FALSE`.* +##' If set to `FALSE`, this function fails when `itemsToCreate` contains +##' gaps or overlaps between segments. The offending segments are returned invisibly. +##' You can inspect them by assigning the return value to a variable. The return +##' value will include a new column `gap_samples` that indicates the size +##' of the gap (positive values) or overlap (negative values) with the previous +##' segment, respectively. It is measured in audio samples, not in milliseconds. +##' Setting this to `TRUE` allows the function to complete even with gaps +##' and/or overlaps, but this is **not recommended as it can cause bugs in +##' the EMU-webApp**. +##' @param rewriteAllAnnots should changes be written to file system (_annot.json +##' files) (intended for expert use only) +##' @param verbose if set to `TRUE`, more status messages are printed +##' +##' @export +##' @importFrom rlang .data +##' @md + create_itemsInLevel = function(emuDBhandle, + itemsToCreate, + calculateEndTimeForSegments = TRUE, + allowGapsAndOverlaps = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE) { + # check that only one level is provided + levelName = unique(itemsToCreate$level) + if(length(levelName) > 1 || length(levelName) < 1) { + stop("'itemsToCreate' contains multiple levels or none at all! The created ITEMs have to be on the same level!") + } + + # check that only one attribute is provided (currenlty only single attributes allowed) + attributeName = unique(itemsToCreate$attribute) + if(length(levelName) > 1){ + stop("'itemsToCreate' contains multiple attributes! The created ITEMs have to be on the same attribute!") + } + # @FIXME make sure the provided attribute belongs to the provided level + + ## check the level exists and has a known type (other types can only exist when the Emu system is changed fundamentally) + levelDefinition = get_levelDefinition(emuDBhandle, levelName) + if(is.null(levelDefinition)) stop("level '", levelName, "' doesn't exist!") + if (!(levelDefinition$type %in% c("ITEM", "EVENT", "SEGMENT"))) { + stop(paste0("The level:", levelName, " provided is not of type ITEM, EVENT or SEGMENT")) + } + + ## check that all required columns are available + required_colnames = c("session", "bundle", "level", "attribute", "labels") + if(levelDefinition$type == "ITEM"){ + required_colnames = c(required_colnames, "start_item_seq_idx") + }else if(levelDefinition$type == "EVENT"){ + required_colnames = c(required_colnames, "start") + }else if(levelDefinition$type == "SEGMENT"){ + if (calculateEndTimeForSegments) { + required_colnames = c(required_colnames, "start") + if ("end" %in% names(itemsToCreate)) { + stop("itemsToCreate contains an 'end' column, but calculateEndTimeForSegments is TRUE; please decide how end times should be determined.") + } + } else { + required_colnames = c(required_colnames, "start", "end") + } + } + + if(!all(required_colnames %in% names(itemsToCreate))){ + stop(paste0("Not all required columns are available in itemsToCreate data.frame! ", + "The required columns are: ", paste(required_colnames, collapse = "; "))) + } + + ## check types of columns + if(!is.character(itemsToCreate$session) | + !is.character(itemsToCreate$bundle) | + !is.character(itemsToCreate$level) | + !is.character(itemsToCreate$attribute) | + !is.character(itemsToCreate$labels) + ){ + stop(paste0("Not all columns match the required type!")) + } + if(levelDefinition$type == "ITEM"){ + if(!is.numeric(itemsToCreate$start_item_seq_idx)) stop(paste0("Not all columns match the required type!")) + }else if(levelDefinition$type == "EVENT"){ + if(!is.numeric(itemsToCreate$start)) stop(paste0("Not all columns match the required type!")) + }else if(levelDefinition$type == "SEGMENT"){ + if(!is.numeric(itemsToCreate$start)) stop(paste0("Not all columns match the required type!")) + if(!calculateEndTimeForSegments) { + if(!is.numeric(itemsToCreate$end)) stop(paste0("Not all columns match the required type!")) + } + } + # rename labels column to label to match labels SQL table column name + colnames(itemsToCreate)[colnames(itemsToCreate)=="labels"] <- "label" + + ## check that every session/bundle combination exists + bundleList = list_bundles(emuDBhandle) + + invalidItems = dplyr::anti_join(x = itemsToCreate, + y = bundleList, + by = c("session", + "bundle" = "name")) + + if (nrow(invalidItems) != 0) { + stop("Some of the session/bundle combinations provided are invalid: ", invalidItems) + } + + # extract all items + items_all = DBI::dbReadTable(emuDBhandle$connection, "items") + + itemsToUpdate = NULL + + ## for ITEM levels + if(levelDefinition$type == "ITEM"){ + + ## Make sure all sequence indexes are unique within their respective level/attribute pair. + itemsToCreate %>% + dplyr::group_by(.data$session, .data$bundle, .data$level, .data$attribute) %>% + dplyr::do(ensureSequenceIndexesAreUnique(.data)) + + ## check for conflicting seq index + in_both = dplyr::inner_join(itemsToCreate, + items_all, + by = c("session", + "bundle", + "level", + "start_item_seq_idx" = "seq_idx")) + if(nrow(in_both) > 0){ + stop("Found existing items with same 'session', 'bundle', 'level', 'start_item_seq_idx'!") + } + } + ## for EVENT + if(levelDefinition$type == "EVENT"){ + + # get sample_rate vector + bundles_df = DBI::dbReadTable(emuDBhandle$connection, "bundle") + sample_rate = dplyr::left_join(itemsToCreate, + bundles_df, + by = c("session", "bundle" = "name")) %>% + dplyr::select(dplyr::starts_with("sample_rate")) + + sample_rate = dplyr::as_tibble(sample_rate) # incase it isn't already + sample_rate = dplyr::pull(sample_rate[,ncol(sample_rate)]) # relevant when multiple sample_rate.x sample_rate.y cols are present + + + # calc. sample_point + itemsToCreate$sample_point = round((itemsToCreate$start / 1000) * sample_rate) + + # check that times don't exist + in_both = dplyr::inner_join(itemsToCreate, + items_all, + by = c("session", + "bundle", + "level", + "sample_point")) + + if(nrow(in_both) > 0){ + stop("Found existing items with same 'session', 'bundle', 'level', 'sample_point'!") + } + + # calculate correct start_item_seq_idx + + # find existing items on same levels that are in itemsToCreate + items_exist_in_levels = dplyr::left_join(items_all, + itemsToCreate, + by = c("session", + "bundle", + "level"), + relationship = "many-to-many") + + items_exist_in_levels = items_exist_in_levels[!is.na(items_exist_in_levels$db_uuid.y),] + + # create data.frame object that contains them both + # that can be used to sort by sample_point to calcualte start_item_seq_idx + items_to_sort = rbind( + dplyr::tibble(session = items_exist_in_levels$session, + bundle = items_exist_in_levels$bundle, + level = items_exist_in_levels$level, + attribute = attributeName, + label = "XXX", + item_id = items_exist_in_levels$item_id, + sample_point = items_exist_in_levels$sample_point.x, + item_from = "items_exist_in_levels" + ), + dplyr::tibble(session = itemsToCreate$session, + bundle = itemsToCreate$bundle, + level = itemsToCreate$level, + attribute = itemsToCreate$attribute, + label = itemsToCreate$label, + item_id = -1, + sample_point = itemsToCreate$sample_point, + item_from = "itemsToCreate" + ) + ) + + items_sorted = items_to_sort %>% + dplyr::group_by(.data$session, .data$bundle, .data$level) %>% + dplyr::arrange(.data$sample_point, .by_group = TRUE) %>% + dplyr::mutate(start_item_seq_idx = dplyr::row_number()) + + itemsToCreate = items_sorted %>% + dplyr::filter(.data$item_from == "itemsToCreate") %>% + dplyr::ungroup() + + itemsToUpdate = items_sorted %>% + dplyr::filter(.data$item_from == "items_exist_in_levels") %>% + dplyr::ungroup() + + + + } + ## for SEGMENT + if(levelDefinition$type == "SEGMENT"){ + + # get sample_rate vector + bundles_df = DBI::dbReadTable(emuDBhandle$connection, "bundle") + sample_rate = dplyr::left_join(itemsToCreate, + bundles_df, + by = c("session", "bundle" = "name")) %>% + dplyr::select(dplyr::starts_with("sample_rate")) + + sample_rate = dplyr::as_tibble(sample_rate) # in case it isn't already + sample_rate = dplyr::pull(sample_rate[,ncol(sample_rate)]) # relevant when multiple sample_rate.x sample_rate.y cols are present + + + # calculate sample_start based on user input + itemsToCreate$sample_start = round((itemsToCreate$start / 1000 + 0.5 / sample_rate) * sample_rate) + + if (calculateEndTimeForSegments == TRUE) { + # Ignore user values and calculate each segment’s sample end based on the + # following segment’s sample start. For the last segment, use the annotated + # media file’s duration as sample_end. + + itemsToCreate %>% + dplyr::group_by(.data$session, .data$bundle, .data$level) %>% + dplyr::arrange(.data$sample_start, .by_group = TRUE) %>% + dplyr::mutate(sample_end = dplyr::lead(.data$sample_start) - 1) -> itemsToCreate + + # test if no duplicate sample_start values exists + itemsToCreate %>% + dplyr::select("session", "bundle", "level", "sample_start") %>% + dplyr::distinct() -> distinct_sample_start_rows + + if(nrow(distinct_sample_start_rows) != nrow(itemsToCreate)){ + stop("Found duplicate sample_start values on same level") + } + + # fix end times of last segments (set to length of wavs) + # -> currently NA due to dplyr::lead not having a next value 4 them + last_items = itemsToCreate[is.na(itemsToCreate$sample_end),] + + wav_paths = file.path(emuDBhandle$basePath, + paste0(last_items$session, session.suffix), + paste0(last_items$bundle, bundle.dir.suffix), + paste0(last_items$bundle, ".wav")) + + itemsToCreate[is.na(itemsToCreate$sample_end),]$sample_end = sapply(wav_paths, FUN = function(wav_path){attr(wrassp::read.AsspDataObj(wav_path), "endRecord")}) + } else { + # Use user-provided itemsToCreate$end to determine each segment’s sample_end + itemsToCreate$sample_end = round((itemsToCreate$end / 1000 - 0.5 / sample_rate) * sample_rate) + + + # Make sure no negative-duration items and no zero-length items and no + # items shorter than one sample exist. + # + # The straightforward way would be to filter for: + # end <= start + # But rounding and float precision make it more intricate. + segments_with_duration_below_one_sample = itemsToCreate %>% + dplyr::filter( (.data$end - .data$start)/1000 < 1/sample_rate # This rule focuses on user input (measured in milliseconds). + | .data$sample_end < .data$sample_start) # This rule focuses on the derived values that will actually be used (measured in samples). + # + # Generally, the second rule should follow from the first (but not the first from the second). + # + # But I recently learned that R’s round() does round-half-to-even and not + # round-half-up, and I have been trying to grok the consequences w.r.t. + # to the +/- 0.5 terms in emuR’s conversion rules between seconds and samples. + # + # And I can no longer wrap my head around whether there are consequences + # for the relationship between these two filter rules. So I’ll just leave + # them both, since that won’t hurt. + + # Note: sample_end LESS THAN sample_start is not allowed, but + # sample_end EQUALS sample_start would be allowed. On the other hand, + # end EQUALS start (measured in seconds or milliseconds) would *not* be + # allowed. That’s due to a subtlety of the _annot.json format, which + # specifies sampleStart and sampleDur(ation), but not sampleEnd. + # + # Per definition: + # sample_start == sample_end <=> sampleDur == 0 + # + # Also per definition: sampleDur==0 means that the segment only spans its + # starting sample. Converted to seconds, this is 1/sampleRate and NOT 0. + # Therefore, sampleDur==0 is not actually zero-length, and is therefore + # allowed. And so is sample_start == sample_end. But not start==end. + # + # Reference: EMU-SDMS Manual, Chapter File Formats, description of sampleDur. + + if (nrow(segments_with_duration_below_one_sample) != 0) { + warning(paste("itemsToCreate contains", + nrow(segments_with_duration_below_one_sample), + "segments with duration below one sample or even negative. This is never allowed.", + "Inspect this function's return value to see them. Exiting.")) + return(invisible(segments_with_duration_below_one_sample)) + } + + if (!allowGapsAndOverlaps) { + segments_with_gaps_or_overlap = itemsToCreate %>% + dplyr::group_by(.data$session, .data$bundle) %>% + dplyr::arrange(.data$sample_start, .by_group = TRUE) %>% + dplyr::mutate(gap_samples = .data$sample_start - dplyr::lag(.data$sample_end) - 1) %>% + dplyr::filter(!is.na(.data$gap_samples) & .data$gap_samples != 0) + + if (nrow(segments_with_gaps_or_overlap) != 0) { + warning(paste("itemsToCreate contains", + nrow(segments_with_gaps_or_overlap), + "segments with gaps between them or an overlap with their predecessor.", + "Inspect this function's return value to see them, and check", + "the documentation of the parameter 'allowGapsAndOverlaps'", + "for details. Exiting.")) + return(invisible(segments_with_gaps_or_overlap)) + } + } + } + + # check if there are already any segments in bundles + + sl = query(emuDBhandle, + query = paste0(unique(itemsToCreate$level), " =~ .* "), + sessionPattern = paste0(unique(itemsToCreate$session), collapse = "|"), + bundlePattern = paste0(unique(itemsToCreate$bundle), collapse = "|")) + + if(nrow(sl) != 0){ + stop("SEGMENT items already exist on the specified bundles & level. This is not permitted!") + } + + itemsToCreate %>% + dplyr::group_by(.data$session, .data$bundle, .data$level) %>% + dplyr::arrange(.data$sample_start, .by_group = TRUE) %>% + dplyr::mutate(start_item_seq_idx = 1:(dplyr::n())) -> itemsToCreate + } + + ## + ## Get the label index for each attribute (the label index marks the order of attributes within their level) + ## + itemsToCreate$labelIndex = get_labelIndex(emuDBhandle = emuDBhandle, + levelName = itemsToCreate$level, + attributeName = itemsToCreate$attribute) + + + # remove old tmp tables if they exist + remove_annotCrudTmpTables(emuDBhandle) + + ## + ## Copy items data into temporary table + ## + create_annotCrudTmpTables(emuDBhandle) + + # update start_item_seq_idx in items_annot_crud_tmp table with itemsToUpdate + if(!is.null(itemsToUpdate)){ + if(nrow(itemsToUpdate) >= 1){ + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("UPDATE items_annot_crud_tmp ", + "SET seq_idx = ? ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " AND item_id = ?")) + + DBI::dbBind( + statement, + list( + itemsToUpdate$start_item_seq_idx, + rep(emuDBhandle$UUID,length(itemsToUpdate$start_item_seq_idx)), + itemsToUpdate$session, + itemsToUpdate$bundle, + itemsToUpdate$item_id + ) + ) + + DBI::dbClearResult(statement) + + } + } + + ## + ## Split the item list into individual items (identified by the session/bundle/level/sequenceIndex tuple), + ## and proceed separately for each of them + ## + itemsToCreate %>% + dplyr::group_by(.data$session, + .data$bundle, + .data$level, + .data$start_item_seq_idx) %>% + dplyr::do(insertItemIntoDatabase(emuDBhandle, + .data, + levelDefinition$type)) + + + ## + ## Rewrite sequence indexes + ## + rewrite_allSequenceIndexes(emuDBhandle) + ## @todo fail if the user is trying to add sequence indexes that are already + ## there - or accept it silently, producing undefined order? + + ## + ## Move data from temporary items table back to normal table + ## + moveback_annotCrudTmpTables(emuDBhandle) + + if (rewriteAllAnnots) { + rewrite_annots(emuDBhandle, + verbose = verbose) + } + + invisible(NULL) +} + +## A function called read_itemsInLevel will not exist - query() is the function that does the job. +# read_itemsInLevel = function (...) + + +##' Update items programmatically +##' +##' @description Update annotation items programmatically. You have to pass in a +##' data frame, called `itemsToUpdate`, describing the new state of the items. +##' The required columns are described below. +##' +##' This function belongs to emuR’s CRUD family of functions, which let the user +##' manipulate items programmatically: +##' +##' * Create items ([create_itemsInLevel]) +##' * Read items ([query]) +##' * Update items ([update_itemsInLevel]) +##' * Delete items ([delete_itemsInLevel])) +##' +##' @param emuDBhandle emuDB handle as returned by [load_emuDB] +##' @param itemsToUpdate A data frame with the columns: +##' * `session` (character) +##' * `bundle` (character) +##' * `level` (character) +##' * `start_item_seq_idx` (character) +##' * `attribute` (character) +##' * `labels` (character) +##' @param rewriteAllAnnots should changes be written to file system (_annot.json +##' files) (intended for expert use only) +##' @param verbose if set to `TRUE`, more status messages are printed +##' +##' @export +##' @md +update_itemsInLevel = function (emuDBhandle, + itemsToUpdate, + rewriteAllAnnots = TRUE, + verbose = TRUE) { + + + ## + ## Find the index of each attribute definition on its respective level + ## + itemsToUpdate$label_index = get_labelIndex(emuDBhandle = emuDBhandle, + levelName = itemsToUpdate$level, + attributeName = itemsToUpdate$attribute) + + + ## + ## First thing, make sure all the items whose labels are gonna be changed do exist + ## + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("SELECT count(*) ", + "FROM items ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " AND level = ? ", + " AND seq_idx = ?")) + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToUpdate)), + itemsToUpdate$session, + itemsToUpdate$bundle, + itemsToUpdate$level, + itemsToUpdate$start_item_seq_idx + ) + ) + + existenceMatrix = DBI::dbFetch(statement) + DBI::dbClearResult(statement) + + if (!all(existenceMatrix)) { + total = nrow(itemsToUpdate) + notFound = length(existenceMatrix[existenceMatrix == 0, ]) + + warning (paste( + "Error:", + notFound, + "of the", + total, + "specified items do/does not exist" + )) + + if (verbose) { + warning(itemsToUpdate[existenceMatrix == 0, ]) + } else { + warning ("Set verbose to TRUE to see them listed.") + } + + return (invisible(NULL)) + } + + # get item_ids of matching entries + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("SELECT item_id ", + "FROM items ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " AND level = ? ", + " AND seq_idx = ?")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToUpdate)), + itemsToUpdate$session, + itemsToUpdate$bundle, + itemsToUpdate$level, + itemsToUpdate$start_item_seq_idx + ) + ) + + item_id_list = DBI::dbFetch(statement) + DBI::dbClearResult(statement) + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT OR REPLACE INTO labels (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " label_idx, ", + " name, ", + " label ", + ") VALUES (?, ?, ?, ?, ?, ?, ?)")) + # rename labels column to label to match labels SQL table column name + colnames(itemsToUpdate)[colnames(itemsToUpdate) == "labels"] <- "label" + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToUpdate)), + itemsToUpdate$session, + itemsToUpdate$bundle, + item_id_list$item_id, + itemsToUpdate$label_index, + itemsToUpdate$attribute, + itemsToUpdate$label + ) + ) + rowsAffected = DBI::dbGetRowsAffected(statement) + DBI::dbClearResult(statement) + + if (rewriteAllAnnots) { + rewrite_annots(emuDBhandle, + bundles = unique(data.frame(session = itemsToUpdate$session, + name = itemsToUpdate$bundle)), + verbose = verbose) + } + + invisible(NULL) +} + + + +##' Delete items programmatically +##' +##' @description Delete annotation items programmatically. You have to pass in a +##' data frame, called `itemsToDelete`, describing these items. The required +##' columns are described below. +##' +##' This function belongs to emuR’s CRUD family of functions, which let the user +##' manipulate items programmatically: +##' +##' * Create items ([create_itemsInLevel]) +##' * Read items ([query]) +##' * Update items ([update_itemsInLevel]) +##' * Delete items ([delete_itemsInLevel])) +##' +##' @details +##' This function deletes annotation items from existing levels. Your input data +##' frame `itemsToDelete` must describe the items by specifying the columns +##' `session`, `bundle`, and `start_item_id`. +##' +##' Be careful with this function: You can use it to create problematic situations, +##' for example gaps in the annotation levels, and the function currently has +##' no checks to prevent this. Instead, you need to explicitly confirm that you +##' are aware of this, either by setting `sayYes` to `TRUE` or by interactively +##' responding yes to the prompt this function presents. +##' +##' A major use case for this function is to obtain a segment list using [query], +##' possibly modify the segment list and feed it to this function. That is why +##' the column `start_item_id` is not called `item_id`: segment lists include +##' the former column name, not the latter. +##' +##' @param emuDBhandle emuDB handle as returned by [load_emuDB] +##' @param itemsToDelete A data frame with the columns: +##' * `session` (character) +##' * `bundle` (character) +##' * `start_item_id` (numeric) +##' @param sayYes When you call this function, it warns you about problems it +##' may create. You can skip that question if you set the `sayYes` parameter to +##' TRUE. This is useful when you want to use the function non-interactively. +##' @param rewriteAllAnnots should changes be written to file system (_annot.json +##' files) (intended for expert use only) +##' @param verbose if set to `TRUE`, more status messages are printed +##' +##' @export +##' @md +delete_itemsInLevel = function (emuDBhandle, + itemsToDelete, + sayYes = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE) { + + if (!sayYes) { + input_key <- readline(prompt = paste( + "Currently no checks are performed so use at own risk!", + "This could, for example, create gaps in the annotation.", + "Do you wish to continue anyway (y/N)? ", + sep = "\n") + ) + if(input_key != "y") return() + } + + + # check that all required columns are present + required_colnames = c("session", "bundle", "start_item_id") + + if(!all(required_colnames %in% names(itemsToDelete))){ + stop(paste0("Not all required columns are available in itemsToDelete data.frame! ", + "The required columns are: ", paste(required_colnames, collapse = "; "))) + } + + + # check types of columns + if(!is.character(itemsToDelete$session) | + !is.character(itemsToDelete$bundle) | + !is.numeric(itemsToDelete$start_item_id) + ){ + stop(paste0("Not all columns match the required type!")) + } + + + # check that no sequences are present (i.e. rows spanning more than one item). + # sequences can come from querying thing like [Phonetic == a -> Phonetic == b]. + if ("end_item_id" %in% names(itemsToDelete)) { + items_that_are_really_sequences = itemsToDelete %>% + dplyr::filter(.data$start_item_id != .data$end_item_id) + + if (nrow(items_that_are_really_sequences) != 0) { + warning(paste("itemsToDelete contains", + nrow(items_that_are_really_sequences), + "rows that span more than one item (start_item_id !=", + "end_item_id). This is not allowed. Inspect this function's", + "return value to see them. Exiting.")) + return(invisible(items_that_are_really_sequences)) + } + } + + + # remove old tmp tables if they exist + remove_annotCrudTmpTables(emuDBhandle) + + ## + ## Copy items data into temporary table + ## + create_annotCrudTmpTables(emuDBhandle) + + + ######################### + # items + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("DELETE FROM items_annot_crud_tmp ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " and item_id = ?")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToDelete)), + itemsToDelete$session, + itemsToDelete$bundle, + itemsToDelete$start_item_id + ) + ) + + + numberOfDeletedItems = DBI::dbGetRowsAffected(statement) + DBI::dbClearResult(statement) + + ######################### + # labels + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("DELETE FROM labels_annot_crud_tmp ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " and item_id = ?")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToDelete)), + itemsToDelete$session, + itemsToDelete$bundle, + itemsToDelete$start_item_id + ) + ) + + + numberOfDeletedLabels = DBI::dbGetRowsAffected(statement) + DBI::dbClearResult(statement) + + ######################### + # links from + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("DELETE FROM links_annot_crud_tmp ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " and from_id = ?")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToDelete)), + itemsToDelete$session, + itemsToDelete$bundle, + itemsToDelete$start_item_id + ) + ) + + + numberOfDeletedLinks = DBI::dbGetRowsAffected(statement) + DBI::dbClearResult(statement) + + ######################### + # links to + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("DELETE FROM links_annot_crud_tmp ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " and to_id = ?")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(itemsToDelete)), + itemsToDelete$session, + itemsToDelete$bundle, + itemsToDelete$start_item_id + ) + ) + + + numberOfDeletedLinks = numberOfDeletedLinks + DBI::dbGetRowsAffected(statement) + DBI::dbClearResult(statement) + + + ## Rewrite sequence indexes + ## + rewrite_allSequenceIndexes(emuDBhandle) + + ## + ## Move data from temporary items table back to normal table + ## + moveback_annotCrudTmpTables(emuDBhandle) + + + print(paste0("Deleted ", + numberOfDeletedItems, + " item(s), ", + numberOfDeletedLabels, + " label(s) and ", + numberOfDeletedLinks, + " link(s).")) + + if (rewriteAllAnnots) { + rewrite_annots(emuDBhandle, + verbose = verbose) + } + + invisible(NULL) +} + +##' create links between items +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param links data.frame like object containing linking information. The required columns +##' are: +##' \itemize{ +##' \item \code{session}: +##' \item \code{bundle} +##' \item \code{from_id} +##' \item \code{to_id} +##' } +##' @param rewriteAllAnnots should changes be written to file system (_annot.json +##' files) (intended for expert use only) +##' @param verbose if set to \code{TRUE}, more status messages are printed +##' @export +create_links = function(emuDBhandle, + links, + rewriteAllAnnots = TRUE, + verbose = TRUE) { + + input_key <- readline(prompt = "Currently no checks are performed so use at own risk! Do you wish to continue anyway (y/N)? ") + if(input_key != "y") return() + + # todo check if items are all present in database + # todo check that all links are valid + # todo check that no links cross each other + + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO links (", + " db_uuid, ", + " session, ", + " bundle, ", + " from_id, ", + " to_id, ", + " label", + ") VALUES (?, ?, ?, ?, ?, NULL)")) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(links)), + links$session, + links$bundle, + links$from_id, + links$to_id + ) + ) + + DBI::dbClearResult(statement) + + if (rewriteAllAnnots) { + rewrite_annots(emuDBhandle, + verbose = verbose) + } + + invisible(NULL) +} + + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-annotations_crud.R') +# test_file('tests/testthat/test_zzz_cleanUp.R') diff --git a/R/emuR-autobuild.R b/R/emuR-autobuild.R new file mode 100644 index 00000000..b5cc102c --- /dev/null +++ b/R/emuR-autobuild.R @@ -0,0 +1,376 @@ +##' Autobuild links between two levels using their time information +##' +##' Autobuild links between two time levels. This is typically done when converting from +##' a database / annotation format that allows parallel time tiers / levels but does +##' not permit annotational units to be linked to each other, except by +##' matching time information (such as Praat's TextGrid format). The super-level has to be of the +##' type SEGMENT and the sub-level either of type EVENT or of type SEGMENT. If +##' this is the case and a according link definition is defined for the emuDB, +##' this function automatically links the events or segments of the sub-level which occur +##' within (startSample to (startSample + sampleDur)) the segments of the super-level to those segments. +##' +##' The type of link definition (ONE_TO_MANY, MANY_TO_MANY, ONE_TO_ONE) is relevant whether a link +##' is generated or not (e.g. overlapping segments are linked in a MANY_TO_MANY relationship +##' but not in a ONE_TO_MANY relationship). For more information on the structural +##' elements of an emuDB see \code{vignette(emuDB)}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param superlevelName name of level to link from (link definition required in emuDB) +##' @param sublevelName name of level to link to (link definition required in emuDB) +##' @param rewriteAllAnnots should changes be written to file system (_annot.json files) after +##' completing autobuild process (intended for expert use only) +##' @param convertSuperlevel if set to TRUE a backup of the superlevel will be created and the actual +##' superlevel will be converted to a level of type ITEM +##' @param backupLevelAppendStr string appended to level name for backup level +##' @param newLinkDefType type of new linkDefinition (either \code{"ONE_TO_MANY"}, +##' \code{"MANY_TO_MANY"} or \code{"ONE_TO_ONE"}) which is passed to +##' \code{\link{add_linkDefinition}}. If NULL (the default) \code{\link{add_linkDefinition}} +##' isn't called and a linkDefintion is expected to be present. +##' @param verbose show progress bars and further information +##' @export +##' @keywords emuR autobuild +##' @seealso add_linkDefinition +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded myTGcolDB emuDB +##' # (see ?create_emuRdemoData, ?convert_TextGridCollection, +##' # and vignette(emuR_intro) for more information) +##' +##' # add linkDefinition as one has to be present for +##' # the autobuild function to work +##' add_linkDefinition(emuDBhandle = myTGcolDB, +##' type = "ONE_TO_MANY", +##' superlevelName = "Syllable", +##' sublevelName = "Phoneme") +##' +##' # envoke autobuild function to build hierarchy for converted TextGridCollection +##' autobuild_linkFromTimes(emuDBhandle = myTGcolDB, +##' superlevelName = "Syllable", +##' sublevelName = "Phoneme", +##' convertSuperlevel = TRUE) +##' +##' } +autobuild_linkFromTimes <- function(emuDBhandle, + superlevelName, + sublevelName, + rewriteAllAnnots = TRUE, + convertSuperlevel = FALSE, + backupLevelAppendStr = '-autobuildBackup', + newLinkDefType = NULL, + verbose = TRUE){ + + check_emuDBhandle(emuDBhandle) + + # add linkDefintions if newLinkDefType is present + if(!is.null(newLinkDefType)){ + add_linkDefinition(emuDBhandle, + type = newLinkDefType, + superlevelName = superlevelName, + sublevelName = sublevelName) + } + + dbConfig = load_DBconfig(emuDBhandle) + + foundSuperLevelDev = NULL + foundSubLevelDev = NULL + foundLinkDef = NULL + + # check if linkDefinition exists and levelDefinitions (LD) of superlevelName + # is of type SEGMENT and LD of subleveName is of type EVENT | SEGMENT + found = FALSE + for(ld in dbConfig$linkDefinitions){ + if (ld$superlevelName == superlevelName && ld$sublevelName == sublevelName){ + levDefSuper = get_levelDefinition(emuDBhandle, ld$superlevelName) + levDefSub = get_levelDefinition(emuDBhandle, ld$sublevelName) + + if(levDefSuper$type == "ITEM" | levDefSub$type == "ITEM"){ + if(!is.null(newLinkDefType)){ + # remove link again if it was added + remove_linkDefinition(emuDBhandle, + superlevelName = superlevelName, + sublevelName = sublevelName) + } + stop(paste0("The super level type and sub level type can not be of type 'ITEM'. ", + "The super level type is: '", levDefSuper$type, "' and the sub level", + " type is '", levDefSub$type, "'.")) + } + + if(levDefSuper$type == 'SEGMENT' && (levDefSub$type == 'SEGMENT' || levDefSub$type == 'EVENT')){ + found = TRUE + foundSuperLevelDev = levDefSuper + foundSubLevelDev = levDefSub + foundLinkDef = ld + break + } + } + } + + if(!found){ + stop('Did not find linkDefintion for: ', superlevelName, '->', sublevelName, + '. Defined linkDefinitions are: ', + sapply(dbConfig$linkDefinitions, function(x){paste0(x$superlevelName, '->', x$sublevelName, '; ')})) + } + + if(convertSuperlevel){ + # check if backup links exist + res = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid ='", emuDBhandle$UUID, "' ", + " AND level = '", paste0(superlevelName, backupLevelAppendStr), "'")) + + if(dim(res)[1] !=0){ + stop("Can not backup level! Items table already has entries belonging to level: ", + paste0(superlevelName, backupLevelAppendStr)) + } + + + # + if(length(foundSuperLevelDev$attributeDefinitions) > 1){ + stop("Backup of parellel labels not implemented yet!") + } + + # create temp tables + DBI::dbExecute(emuDBhandle$connection, "CREATE TEMP TABLE IF NOT EXISTS bndl_max_item_id_tmp ( + db_uuid VARCHAR(36), + session TEXT, + bundle TEXT, + bndl_max_item_id INTEGER, + PRIMARY KEY (db_uuid, session, bundle) + )") + + # create bndl_max_item_id_tmp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO bndl_max_item_id_tmp ", + "SELECT db_uuid, ", + " session, ", + " bundle, ", + " max(item_id) AS bndl_max_item_id ", + "FROM items ", + "WHERE db_uuid = '", emuDBhandle$UUID, "' ", + "GROUP BY ", + " db_uuid, ", + " session, ", + " bundle")) + + # backup labels belonging to superlevel (labels have to be backed up before + # items to avoid maxID problem (maybe should rewrite query to avoid this in + # future versions using labels table to determin maxID)) + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels ", + "SELECT ", + " l.db_uuid, ", + " l.session, ", + " l.bundle, ", + " (l.item_id + mid.bndl_max_item_id) AS item_id, ", + " l.label_idx, ", + " l.name || '", backupLevelAppendStr, "' AS name, ", + " l.label ", + "FROM items AS it, ", + " labels AS l, ", + " bndl_max_item_id_tmp AS mid ", + "WHERE it.db_uuid = '", emuDBhandle$UUID, "'", + " AND it.db_uuid = l.db_uuid ", + " AND it.session = l.session ", + " AND it.bundle = l.bundle ", + " AND it.item_id = l.item_id ", + " AND it.db_uuid = mid.db_uuid ", + " AND it.session = mid.session ", + " AND it.bundle = mid.bundle ", + " AND it.level = '", superlevelName, "'")) + + + # backup items belonging to superlevel (=duplicate level with new ids) + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items ", + "SELECT ", + " items.db_uuid, ", + " items.session, ", + " items.bundle, ", + " (item_id + bndl_max_item_id) AS item_id, ", + " items.level || '", backupLevelAppendStr, "' AS level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur ", + "FROM items, ", + " bndl_max_item_id_tmp ", + "WHERE items.db_uuid = bndl_max_item_id_tmp.db_uuid ", + " AND items.session = bndl_max_item_id_tmp.session ", + " AND items.bundle = bndl_max_item_id_tmp.bundle ", + " AND items.level = '", superlevelName, "'")) + + # drop temp tables + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "bndl_max_item_id_tmp")) + + } + + # create temp table to store all links in. Duplicates will be removed in a sep. query -> performance improvement! + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE TEMP TABLE IF NOT EXISTS autob_all_links_tmp (", + "db_uuid VARCHAR(36),", + "session TEXT,", + "bundle TEXT,", + "from_id INTEGER,", + "to_id INTEGER)")) + + + # query DB depending on type of sublevelDefinition + if(foundSubLevelDev$type == 'EVENT'){ + + # get all links and store in temp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO autob_all_links_tmp (db_uuid, session, bundle, from_id, to_id) ", + "SELECT ", + " super.db_uuid, ", + " super.session, ", + " super.bundle, ", + " super.item_id AS 'from_id', ", + " sub.item_id AS 'to_id' ", + "FROM items AS 'super', ", + " items AS 'sub' ", + "WHERE super.level = '", superlevelName, "' ", + " AND sub.level = '", sublevelName, "' ", + " AND super.db_uuid = '", emuDBhandle$UUID, "' ", + " AND sub.db_uuid = '", emuDBhandle$UUID, "' ", + " AND super.session = sub.session", + " AND super.bundle = sub.bundle ", + " AND (sub.sample_point + 0 >= super.sample_start + 0) ", + " AND sub.sample_point <= (super.sample_start + super.sample_dur)")) + + }else{ + + if(ld$type == "ONE_TO_MANY"){ + + # get all links and store in temp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO autob_all_links_tmp (db_uuid, session, bundle, from_id, to_id) ", + "SELECT ", + " super.db_uuid, ", + " super.session, ", + " super.bundle, ", + " super.item_id AS 'from_id', ", + " sub.item_id AS 'to_id' ", + "FROM items as super JOIN items as sub ", + "WHERE (super.level = '", superlevelName, "'", + " AND sub.level = '", sublevelName, "' ", + " AND super.db_uuid = '", emuDBhandle$UUID, "' ", + " AND sub.db_uuid = '", emuDBhandle$UUID, "' ", + " AND super.session = sub.session ", + " AND super.bundle = sub.bundle ", + " AND (sub.sample_start + 0 >= super.sample_start + 0)) ", # + 0 added to ensure numeric comparison + " AND ((sub.sample_start + sub.sample_dur) <= (super.sample_start + super.sample_dur))")) + + + }else if(ld$type == "MANY_TO_MANY"){ + + # get all links and store in temp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO autob_all_links_tmp (db_uuid, session, bundle, from_id, to_id) ", + "SELECT ", + " super.db_uuid, ", + " super.session, ", + " super.bundle, ", + " super.item_id AS 'from_id', ", + " sub.item_id AS 'to_id' ", + "FROM items as super ", + " JOIN items as sub ", + "WHERE super.level = '", superlevelName, "'", + " AND sub.level = '", sublevelName, "' ", + " AND super.db_uuid = '", emuDBhandle$UUID, "' ", + " AND sub.db_uuid = '", emuDBhandle$UUID, "' ", + " AND super.session = sub.session ", + " AND super.bundle = sub.bundle ", + " AND (((sub.sample_start + 0 >= super.sample_start + 0) ", + " AND ((sub.sample_start + sub.sample_dur) <= (super.sample_start + super.sample_dur))) ", # within + " OR ((sub.sample_start + 0 <= super.sample_start + 0) ", + " AND ((sub.sample_start + sub.sample_dur) >= (super.sample_start + 0)) ", + " AND ((sub.sample_start + sub.sample_dur) <= (super.sample_start + super.sample_dur))) ", # left overlap + " OR ((sub.sample_start + 0 >= super.sample_start + 0) ", + " AND ((sub.sample_start + 0) <= (super.sample_start + super.sample_dur)) ", + " AND ((sub.sample_start + sub.sample_dur) >= (super.sample_start + super.sample_dur))) ", # right overlap + " OR ((sub.sample_start + 0 <= super.sample_start + 0) ", + " AND ((sub.sample_start + sub.sample_dur) >= (super.sample_start + super.sample_dur))))")) # left and right overlap + + }else if(ld$type == "ONE_TO_ONE"){ + + # get all links and store in temp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO autob_all_links_tmp (db_uuid, session, bundle, from_id, to_id) ", + "SELECT ", + " super.db_uuid, ", + " super.session, ", + " super.bundle, ", + " super.item_id AS 'from_id', ", + " sub.item_id AS 'to_id' ", + "FROM items as super ", + "JOIN items as sub ", + "WHERE (super.level = '", superlevelName, "' ", + " AND sub.level = '", sublevelName, "' ", + " AND super.db_uuid = '", emuDBhandle$UUID, "' ", + " AND sub.db_uuid = '", emuDBhandle$UUID, "' ", + " AND super.session = sub.session ", + " AND super.bundle = sub.bundle ", + " AND (sub.sample_start + 0 = super.sample_start + 0)) ", + " AND ((sub.sample_start + sub.sample_dur) = (super.sample_start + super.sample_dur)) ")) # are exatly the same + + } + } + + # remove duplicates with left join and insert into links table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO links ", + "SELECT * ", + "FROM autob_all_links_tmp ", + "LEFT JOIN links ", + "USING(db_uuid, session, bundle, from_id, to_id) ", + "WHERE links.from_id IS NULL")) + + # drop temp tables + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "autob_all_links_tmp")) + + if(convertSuperlevel){ + # change levelDefinition type + for(i in 1:length(dbConfig$levelDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$name == superlevelName){ + dbConfig$levelDefinitions[[i]]$type = 'ITEM' + } + } + # generate levelDefinition for backup level + foundSuperLevelDev$name = paste0(foundSuperLevelDev$name, backupLevelAppendStr) + for(i in 1:length(foundSuperLevelDev$attributeDefinitions)){ + foundSuperLevelDev$attributeDefinitions[[i]]$name = paste0(foundSuperLevelDev$attributeDefinitions[[i]]$name, backupLevelAppendStr) + } + dbConfig$levelDefinitions[[length(dbConfig$levelDefinitions) + 1]] = foundSuperLevelDev + + # convert superlevel to ITEM level + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE items ", + "SET type = 'ITEM', ", + " sample_point = null, ", + " sample_start = null, ", + " sample_dur = null ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND level ='", superlevelName,"'")) + } + + # write DBconfig to disc + store_DBconfig(emuDBhandle, dbConfig) + + # remove super from levelCanvasOrder + if(convertSuperlevel){ + psp = list_perspectives(emuDBhandle) + if(nrow(psp) > 0){ + for(i in 1:nrow(psp)){ + curPsp = psp[1,] + lco = get_levelCanvasesOrder(emuDBhandle, curPsp$name) + if(superlevelName %in% lco){ + set_levelCanvasesOrder(emuDBhandle, curPsp$name, lco[!lco %in% superlevelName]) + } + } + } + } + + if(rewriteAllAnnots){ + rewrite_annots(emuDBhandle, verbose=verbose) + } + +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-autobuild.R') diff --git a/R/emuR-autoproc_annots.R b/R/emuR-autoproc_annots.R new file mode 100644 index 00000000..e9d5f83d --- /dev/null +++ b/R/emuR-autoproc_annots.R @@ -0,0 +1,747 @@ +##' Replace item labels +##' +##' Replace the labels of all annotation items, or more specifically +##' of attribute definitions belonging to annotation items, in an emuDB that +##' match the provided \code{origLabels} character vector which the +##' corresponding labels provided by the \code{newLabels} character vector. +##' The indices of the label vectors provided are used to match the labels +##' (i.e. \code{origLabels[i]} will be replaced by \code{newLabels[i]}). +##' +##' +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param attributeDefinitionName name of a attributeDefinition of a emuDB +##' where the labels are to be replaced +##' @param origLabels character vector containing labels that are to be replaced +##' @param newLabels character vector containing labels that are to replaced +##' the labels of \code{origLabels}. This vector has to be of equal length +##' to the \code{origLabels} vector. +##' @param verbose Show progress bars and further information +##' @export +##' @seealso \code{\link{load_emuDB}} +##' @keywords emuDB +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # replace all "I" and "p" labels with "I_replaced" and "p_replaced" +##' replace_itemLabels(ae, attributeDefinitionName = "Phonetic", +##' origLabels = c("I", "p"), +##' newLabels = c("I_replaced", "p_replaced")) +##' +##' } +##' +replace_itemLabels <- function(emuDBhandle, + attributeDefinitionName, + origLabels, + newLabels, + verbose = TRUE) { + + ############################# + # check input parameters + + check_emuDBhandle(emuDBhandle) + + allAttrNames = get_allAttributeNames(emuDBhandle) + if(!attributeDefinitionName %in% allAttrNames){ + stop(paste0("No attributeDefinitionName: ", attributeDefinitionName, + " found in emuDB! The available attributeNames are: ", + paste0(get_allAttributeNames(emuDBhandle), collapse = "; "))) + } + + if((!inherits(origLabels, "character")) | (!inherits(newLabels, "character")) | length(origLabels) != length(newLabels)){ + stop("origLabels and newLabels have to be a character vector of the same length!") + } + + # + ############################# + if(verbose){ + cat("INFO: creating temporary index...\n") + } + # create temp index + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE INDEX IF NOT EXISTS label_replace_tmp_idx ", + "ON labels(db_uuid, name, label)")) + + # progressbar + if(verbose){ + cat("\n INFO: replacing ", length(origLabels), " attribute labels\n") + pb <- utils::txtProgressBar(min = 0, max = length(origLabels), style = 3) + } + + # transaction start + DBI::dbBegin(emuDBhandle$connection) + + for(i in 1:length(origLabels)){ + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE labels ", + "SET label = '", newLabels[i], "' ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND name = '", attributeDefinitionName, "' ", + " AND label = '", origLabels[i], "'")) + if(verbose){ + utils::setTxtProgressBar(pb, i) + } + } + + # transaction end + DBI::dbCommit(emuDBhandle$connection) + + # remove temp index + DBI::dbExecute(emuDBhandle$connection, paste0("DROP INDEX IF EXISTS label_replace_tmp_idx")) + + # close progress bar if open + if(exists('pb')){ + close(pb) + cat("\n") + } + + rewrite_annots(emuDBhandle, verbose = verbose) + +} + +##' Duplicate level +##' +##' Duplicate level of emuDB including all of its items and its various +##' attributeDefinitions. If the \code{duplicateLinks} variable is set +##' to \code{TRUE} all the links to and from the original items are +##' duplicated. +##' +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param levelName name of level in emuDB that is to be duplicated +##' @param duplicateLevelName name given to newly duplicated level +##' @param duplicateLinks if set to \code{TRUE} (the default) all the +##' links to and from the original items are duplicated to point to the +##' new items of the new duplicate level. +##' @param linkDuplicates link the duplicated ITEMs to the originals. This +##' can only be set to \code{TRUE} if \code{duplicateLinks} is set to \code{FALSE}. +##' @param linkDefType type given to link definition. Only relevant if \code{linkDuplicates} +##' is set to \code{TRUE}. +##' @param verbose show progress bars and further information +##' @export +##' @seealso \code{\link{load_emuDB}} +##' @keywords emuDB +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # duplicate Phonetic level +##' duplicate_level(ae, levelName = "Phonetic", +##' duplicateLevelName = "Phonetic2") +##' +##' } +##' +duplicate_level <- function(emuDBhandle, + levelName, + duplicateLevelName, + duplicateLinks = TRUE, + linkDuplicates = FALSE, + linkDefType = "ONE_TO_ONE", + verbose = TRUE) { + + check_emuDBhandle(emuDBhandle) + + ldefs = list_levelDefinitions(emuDBhandle) + + if(!levelName %in% ldefs$name){ + stop(paste0(levelName, " is not a valid level name! Available levels are: ", + paste0(ldefs$name, collapse = "; "))) + } + + if(duplicateLevelName %in% ldefs$name){ + stop(paste0(duplicateLevelName, " already exists in the emuDB: ", emuDBhandle$dbName)) + } + + if(duplicateLinks & linkDuplicates){ + stop(paste0("duplicateLinks & linkDuplicates are both set to TRUE! This is not allowed!")) + } + + ldef = ldefs[ldefs$name == levelName,] + + ######################### + # duplicate item entries + + # create temp tables + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE TEMP TABLE IF NOT EXISTS bndl_max_item_id_tmp ", + "(db_uuid VARCHAR(36), ", + " session TEXT, ", + " bundle TEXT, ", + " bndl_max_item_id INTEGER, ", + "PRIMARY KEY (db_uuid, session, bundle))")) + # create bndl_max_item_id_tmp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO bndl_max_item_id_tmp ", + "SELECT ", + " db_uuid, ", + " session, ", + " bundle, ", + " max(item_id) AS bndl_max_item_id ", + "FROM items ", + "WHERE db_uuid = '", emuDBhandle$UUID, "' ", + "GROUP BY db_uuid, session, bundle")) + # duplicate level items table elements + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items ", + "SELECT ", + " items.db_uuid, ", + " items.session, ", + " items.bundle, ", + " (item_id + bndl_max_item_id) AS item_id, ", + "'", duplicateLevelName, "' AS level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur ", + "FROM items, ", + " bndl_max_item_id_tmp ", + "WHERE items.db_uuid = bndl_max_item_id_tmp.db_uuid ", + " AND items.session = bndl_max_item_id_tmp.session ", + " AND items.bundle = bndl_max_item_id_tmp.bundle ", + " AND items.level = '", levelName, "'")) + + ########################## + # duplicate labels entries + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels ", + "SELECT l.db_uuid, ", + " l.session, ", + " l.bundle, ", + " (l.item_id + mid.bndl_max_item_id) AS item_id, ", + " l.label_idx, ", + "CASE WHEN l.name = '", levelName, "' ", + "THEN '", duplicateLevelName, "' ", + "ELSE l.name END AS name, l.label ", + "FROM items AS it, ", + " labels AS l, ", + " bndl_max_item_id_tmp AS mid ", + "WHERE it.db_uuid = l.db_uuid ", + " AND it.session = l.session ", + " AND it.bundle = l.bundle ", + " AND it.item_id = l.item_id ", + " AND it.db_uuid = mid.db_uuid ", + " AND it.session = mid.session ", + " AND it.bundle = mid.bundle ", + " AND it.level = '", levelName, "'")) + + if(duplicateLinks){ + ########################## + # duplicate links entries + + # where duplicate items are parents + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO links ", + "SELECT ", + " li.db_uuid, ", + " li.session, ", + " li.bundle, ", + " (li.from_id + mid.bndl_max_item_id) AS from_id, ", + " li.to_id, ", + " li.label ", + "FROM items AS it, ", + " links AS li, ", + " bndl_max_item_id_tmp AS mid ", + "WHERE it.db_uuid = li.db_uuid ", + " AND it.session = li.session ", + " AND it.bundle = li.bundle ", + " AND it.item_id = li.from_id ", + " AND it.db_uuid = mid.db_uuid ", + " AND it.session = mid.session ", + " AND it.bundle = mid.bundle ", + " AND it.level = '", levelName, "'")) + + # where duplicate items are children + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO links ", + "SELECT ", + " li.db_uuid, ", + " li.session, ", + " li.bundle, ", + " li.from_id, ", + " (li.to_id + mid.bndl_max_item_id) AS to_id, ", + " li.label ", + "FROM items AS it, ", + " links AS li, ", + " bndl_max_item_id_tmp AS mid ", + "WHERE it.db_uuid = li.db_uuid ", + " AND it.session = li.session ", + " AND it.bundle = li.bundle ", + " AND it.item_id = li.to_id ", + " AND it.db_uuid = mid.db_uuid ", + " AND it.session = mid.session ", + " AND it.bundle = mid.bundle ", + " AND it.level = '", levelName, "'")) + + }else{ + if(linkDuplicates){ + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO links ", + "SELECT ", + " it1.db_uuid, ", + " it1.session, ", + " it1.bundle, ", + " it1.item_id AS from_id, ", + " it2.item_id AS to_id, ", + " null AS label ", + "FROM items AS it1, ", + " items AS it2 ", + "WHERE it1.db_uuid = it2.db_uuid ", + " AND it1.session = it2.session ", + " AND it1.bundle = it2.bundle ", + " AND it1.level = '", levelName,"' ", + " AND it2.level = '", duplicateLevelName,"' ", + " AND it1.type = it2.type ", + " AND it1.seq_idx = it2.seq_idx")) + } + } + + # drop temp tables + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "bndl_max_item_id_tmp")) + + ######################## + # add levelDefs + add_levelDefinition(emuDBhandle, + duplicateLevelName, + type = ldef$type, + rewriteAllAnnots = FALSE, + verbose = verbose) + + ######################## + # add linkDefinitions + if(duplicateLinks){ + linkDefs = list_linkDefinitions(emuDBhandle) + # super + superLds = linkDefs[linkDefs$superlevelName == levelName,] + if(nrow(superLds) > 0){ + for(i in 1:nrow(superLds)){ + add_linkDefinition(emuDBhandle, + type = superLds[i,]$type, + superlevelName = duplicateLevelName, + sublevelName = superLds[i,]$sublevelName) + } + } + + # sub + subLds = linkDefs[linkDefs$sublevelName == levelName,] + if(nrow(subLds) > 0){ + for(i in 1:nrow(subLds)){ + add_linkDefinition(emuDBhandle, + type = subLds[i,]$type, + superlevelName = subLds[i,]$superlevelName, + sublevelName = duplicateLevelName) + } + } + } + + if(linkDuplicates){ + add_linkDefinition(emuDBhandle, + type = linkDefType, + superlevelName = levelName, + sublevelName = duplicateLevelName) + } + + ######################## + # add attributeDefintions + attrDefs = list_attributeDefinitions(emuDBhandle, levelName) + for(i in 1:nrow(attrDefs)){ + if(attrDefs[i,]$name != levelName){ + internal_add_attributeDefinition(emuDBhandle, + levelName = duplicateLevelName, + name = attrDefs[i,]$name, + type = attrDefs[i,]$type, + rewriteAllAnnots = FALSE, + verbose = verbose, + insertLabels = FALSE) + } + # copy legalLabels + ll = get_legalLabels(emuDBhandle, levelName, attrDefs[i,]$name) + if(!is.na(ll)){ + set_legalLabels(emuDBhandle, + duplicateLevelName, + attrDefs[i,]$name, + legalLabels = ll) + } + # copy labelGroups + attrDefLgs = list_attrDefLabelGroups(emuDBhandle, + levelName, + attributeDefinitionName = attrDefs[i,]$name) + if(nrow(attrDefLgs) > 0){ + for(j in 1:nrow(attrDefLgs)){ + if(attrDefs[i,]$name == levelName){ + tmpAttrDefName = duplicateLevelName + }else{ + tmpAttrDefName = attrDefs[i,]$name + } + add_attrDefLabelGroup(emuDBhandle, duplicateLevelName, + attributeDefinitionName = tmpAttrDefName, + labelGroupName = attrDefLgs[j,]$name, + labelGroupValues = unlist(stringr::str_split(attrDefLgs[j,]$values, "; "))) + } + } + } + + rewrite_annots(emuDBhandle, verbose = verbose) + +} + + +##' List sample rates of media and annotation (_annot.json) files +##' +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param sessionPattern A regular expression pattern matching session names to be searched from the database +##' @param bundlePattern A regular expression pattern matching bundle names to be searched from the database +##' +##' @return tibble with the columns +##' \itemize{ +##' \item session +##' \item bundle +##' \item sample_rate_annot_json +##' \item sample_rate_media_file +##' } +##' \code{session}, \code{b} +##' @export +list_sampleRates <- function(emuDBhandle, sessionPattern = '.*', bundlePattern = '.*'){ + + db_config = load_DBconfig(emuDBhandle) + + bndls = DBI::dbReadTable(emuDBhandle$connection, "bundle") + + # filter sessions/bundles + ses_bool = emuR_regexprl(sessionPattern, bndls$session) + bndl_bool = emuR_regexprl(bundlePattern, bndls$name) + bndls = bndls[ses_bool & bndl_bool,] + + if(nrow(bndls) < 1) stop("no bundles found that match the sessionPattern & bundlePattern") + + bndls$sample_rate_media_file = -1 + + bndls$sample_rate_annot_json = -1 + + for(row_idx in 1:nrow(bndls)){ + annot_json_path = file.path(emuDBhandle$basePath, + paste0(bndls[row_idx,]$session, session.suffix), + paste0(bndls[row_idx,]$name, bundle.dir.suffix), + paste0(bndls[row_idx,]$name, bundle.annotation.suffix, ".json")) + + media_file_path = file.path(emuDBhandle$basePath, + paste0(bndls[row_idx,]$session, session.suffix), + paste0(bndls[row_idx,]$name, bundle.dir.suffix), + paste0(bndls[row_idx,]$name, ".", db_config$mediafileExtension)) + + + annot_json_sample_rate = jsonlite::fromJSON(annot_json_path)$sampleRate + bndls$sample_rate_annot_json[row_idx] = annot_json_sample_rate + + media_file_sample_rate = attr(wrassp::read.AsspDataObj(media_file_path, end = 20), "sampleRate") + bndls$sample_rate_media_file[row_idx] = media_file_sample_rate + } + + res = dplyr::as_tibble(dplyr::select(bndls, + "session", + "bundle" = "name", + "sample_rate_media_file", + "sample_rate_annot_json")) + return(res) +} + +##' Resample annotations (\code{_annot.json}) files of emuDB +##' +##' Resample all annotations (\code{_annot.json}) files of emuDB to a specified +##' sample rate. It is up to the user to ensure that the samplerates of +##' the annot.json files match those of the \code{.wav} files. +##' +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param newSampleRate target sample rate +##' @param verbose show progress bars and further information +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # resample +##' resample_annots(ae, newSampleRate = 16000) +##' +##' } +resample_annots <- function(emuDBhandle, newSampleRate, verbose = TRUE) { + + stop("not implemented yet!!!") + + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE items ", + "SET sample_rate = ", newSampleRate, ", ", + " sample_point = ROUND((sample_point / sample_rate) * ", newSampleRate, ") ", + " sample_start = ROUND(((sample_start - 0.5) / sample_rate) * ", newSampleRate, ") ", + " sample_dur = sample_dur ")) + + DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * FROM items WHERE level = 'Tone'")) +} + + + +## Append an item (on a given level) to each bundle +## +## @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +## @param levelName Name of the level to which to append the new items +## @param labels Character vector containing one label for each attributeDefinition of \code{levelName} +## @param sessionPattern A (RegEx) pattern matching sessions to be included in the operation +## @param bundlePattern A (RegEx) pattern matching bundles to be included in the operation +## @param verbose Show progress bars and further information +## @export +## @seealso \code{\link{change_labels}} +## @keywords emuDB +## @examples +## \dontrun{ +## TO DO - Add example +## } +append_itemsToLevel = function(emuDBhandle, + levelName, + labels, + sessionPattern = ".*", + bundlePattern = ".*", + verbose = TRUE) { + ## + ## Check pre-conditions + ## + levelDefinition = get_levelDefinition(emuDBhandle, levelName) + + if (is.null(levelDefinition)) { + stop("Error: The given level does not exist ") + } + + if (length(labels) != length(levelDefinition$attributeDefinitions)) { + stop ( + paste0( + "Error: The number of labels (", + length(labels), + ") must match the number of attribute definitions (", + length(levelDefinition$attributeDefinitions), + ") for the given level (", + levelName, + ")" + ) + ) + } + + + ## + ## Filter bundles according to sessionPattern and bundlePattern + ## + bundles = list_bundles(emuDBhandle) + sessionMatch = emuR_regexprl (sessionPattern, bundles$session) + bundleMatch = emuR_regexprl (bundlePattern, bundles$name) + bundles = bundles [sessionMatch & bundleMatch, ] + + + ## + ## Get sample rate + ## + statement = DBI::dbSendStatement( + emuDBhandle$connection, + "SELECT sample_rate FROM bundle + WHERE + db_uuid = ? AND + session = ? AND + name = ?" + ) + + DBI::dbBind(statement, + list(rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name)) + sampleRate = DBI::dbFetch(statement) + DBI::dbClearResult(statement) + + + ## + ## Get appropriate item ID + ## + statement = DBI::dbSendStatement( + emuDBhandle$connection, + "SELECT max(item_id) FROM items + WHERE + db_uuid = ? AND + session = ? AND + bundle = ?" + ) + + DBI::dbBind(statement, + list(rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name)) + itemID = DBI::dbFetch(statement) + DBI::dbClearResult(statement) + + itemID[is.na(itemID),1] = 0 + itemID = itemID + 1 + + + ## + ## Get appropriate sequence index + ## + statement = DBI::dbSendStatement( + emuDBhandle$connection, + "SELECT max(seq_idx) FROM items + WHERE + db_uuid = ? AND + session = ? AND + bundle = ? AND + level = ?" + ) + + DBI::dbBind(statement, + list(rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name, + rep(levelName, nrow(bundles)))) + sequenceIndex = DBI::dbFetch(statement) + DBI::dbClearResult(statement) + + sequenceIndex[is.na(sequenceIndex),1] = 0 + sequenceIndex = sequenceIndex + 1 + + + ## + ## Insert items + ## + statement = DBI::dbSendStatement( + emuDBhandle$connection, + "INSERT INTO items + (db_uuid, session, bundle, item_id, level, type, seq_idx, sample_rate) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + ) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name, + itemID[,1], + rep(levelName, nrow(bundles)), + rep("ITEM", nrow(bundles)), + sequenceIndex[,1], + sampleRate$sample_rate + ) + ) + ## @todo check success + DBI::dbClearResult(statement) + + + ## + ## Insert labels + ## + + for (i in 1:length(labels)) { + attributeDefinition = emuR::list_attributeDefinitions(emuDBhandle, + levelName)[i, "name"] + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + "INSERT INTO labels + (db_uuid, session, bundle, item_id, label_idx, name, label) + VALUES (?, ?, ?, ?, ?, ?, ?)" + ) + + DBI::dbBind( + statement, + list( + rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name, + itemID[,1], + rep(i, nrow(bundles)), + rep(attributeDefinition, nrow(bundles)), + rep(labels[i], nrow(bundles)) + ) + ) + + ## @todo check success + DBI::dbClearResult(statement) + } + + rewrite_annots(emuDBhandle, verbose = verbose) +} + +# ##' Add items to an empty level +# ##' +# ##' +# ##' Although an object of class emuRsegs may be passed into this function +# ##' it is not obligatory. The requiered column names of the data.frame +# ##' object passed into this function are +# ##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +# ##' @param levelName Name of the level to which to add the items +# ##' @param seglist Segmentlist containing information about the items that are +# ##' to be added to the specified level. +# ##' @param verbose Show progress bars and further information +# ##' @export +# ##' @seealso \code{\link{change_labels}} +# ##' @keywords emuDB +# ##' @examples +# ##' \dontrun{ +# ##' TO DO - Add example +# ##' } +# add_itemsToEmptyLevel <- function(emuDBhandle, levelName, seglist, verbose = TRUE){ +# +# levelDef = get_levelDefinition(emuDBhandle, levelName) +# +# if(is.null(levelDef)){ +# stop("Specified level does not exist!") +# } +# +# # check input parameters +# if(levelDef$type == "SEGMENT"){ +# # if(is.null(sampleStart) || is.null(sampleEnd)){ +# # stop("Specified level is of type SEGMENT! Both sampleStart and sampleEnd have to be set!") +# # } +# # if(length(labels) != length(sampleStart) || length(labels) != length(sampleEnd)){ +# # stop("labels, sampleStart and sampleEnd have to be of the same length!") +# # } +# }else if(levelDef$type == "EVENT"){ +# # if(is.null(sampleStart)){ +# # stop("Specified level is of type EVENT! sampleStart has to be set!") +# # } +# # if(length(labels) != length(sampleStart)){ +# # stop("labels and sampleStart have to be of the same length!") +# # } +# }else{ +# stop("ITEM levels not supported yet!") +# } +# +# # check that level is empty +# res = DBI::dbGetQuery(emuDBhandle$connection, statement = paste0("SELECT * FROM items WHERE level='", levelName, "'")) +# if(nrow(res) != 0){ +# stop("Specified level is not empty!") +# } +# +# # use dplyr to sort seglist (in case these go mixed up somehow) +# sortedSl = seglist %>% dplyr::arrange(session, bundle, sample_start) +# +# +# if(levelDef$type == "SEGMENT"){ +# }else if(levelDef$type == "EVENT"){ +# +# sortedSl %>% +# dplyr::group_by(session, bundle) %>% +# dplyr::mutate(seq_idx = row_number()) +# +# # create items df +# itemsDf = data.frame(db_uuid = emuDBhandle$UUID, +# session = sortedSl$session, +# bundle = sortedSl$bundle, +# item_id = sortedSl$start_item_id, # SIC!!! +# level = rep(levelName, length), +# type = , +# seq_idx = , +# sample_rate = , +# sample_point = , +# sample_start = , +# sample_dur = ) +# +# }else{ +# stop("ITEM levels not supported yet!") +# } +# } + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_emuR-autoproc_annots.R') diff --git a/R/emuR-bas_webservices.R b/R/emuR-bas_webservices.R new file mode 100644 index 00000000..a679f43d --- /dev/null +++ b/R/emuR-bas_webservices.R @@ -0,0 +1,656 @@ +##' Runs several BAS webservices, starting from an orthographic transcription +##' +##' This function calls the BAS webservices G2P, MAUS, Pho2Syl, MINNI and (if necessary) Chunker. +##' Starting from an orthographic transcription, it derives a tokenized orthographical word tier +##' using the G2P tool. It also derives canonical pronunciations (in SAMPA) for the words. +##' If at least one audio file is longer than 60 seconds, the function then calls the Chunker webservice +##' to presegment the recordings. Subsequently, the webservice MAUS is called to derive a phonetic +##' segmentation. A second, rough segmentation is created by running the phoneme decoder MINNI. +##' Finally, syllabification is performed by calling Pho2Syl. \strong{This function requires an internet connection.} +##' +##' All necessary level, attribute and link definitions are created in the process. +##' Note that this function will run all BAS webservices with default parameters, with four exceptions: +##' \itemize{ +##' \item{Chunker: force=rescue} +##' \item{G2P: embed=maus} +##' \item{Pho2Syl: wsync=yes} +##' \item{MAUS: USETRN=[true if Chunker was called or transcription is a segment tier, false otherwise]} +##' } +##' If you wish to change parameters, you must use the individual runBASwebservices functions. This will also allow +##' you to carry out manual corrections in between the steps, or to use different languages for different webservices. +##' +##' @family BAS webservice functions +##' +##' @export +##' @param handle emuDB handle +##' @param transcriptionAttributeDefinitionName name of the attribute (not level!) containing an orthographic transcription. +##' @param language language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +##' Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +##' session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +##' Up-to-date lists of the languages accepted by all webservices can be found here: +##' \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help} +##' +##' @param orthoAttributeDefinitionName attribute name for orthographic words +##' @param canoAttributeDefinitionName attribute name for canonical pronunciations of words +##' @param chunkAttributeDefinitionName attribute name for the chunk segmentation. +##' Please note that the chunk segmentation will only be generated if your emuDB contains +##' audio files beyond the one minute mark. +##' @param mausAttributeDefinitionName attribute name for the MAUS segmentation +##' @param minniAttributeDefinitionName attribute name for the MINNI segmentation +##' @param sylAttributeDefinitionName attribute name for syllable segmentation +##' @param canoSylAttributeDefinitionName attribute name for syllabified canonical pronunciations of words +##' @param runMINNI if set to \code{TRUE} (the default) the MINNI service is also run. As the MINNI service contains +##' less languages than the others it can be useful to turn this off. +##' @param patience If a web service call fails, it is repeated a further n times, with n being the value of patience. +##' Must be set to a value between 0 and 3. +##' @param verbose Display progress bars and other information +##' @param resume If a previous call to this function has failed (and you think you have fixed the issue +##' that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +##' will only work if your R temporary directory has not been deleted or emptied in the meantime. + +runBASwebservice_all <- function(handle, + transcriptionAttributeDefinitionName, + language, + orthoAttributeDefinitionName = "ORT", + canoAttributeDefinitionName = "KAN", + mausAttributeDefinitionName = "MAU", + minniAttributeDefinitionName = "MINNI", + sylAttributeDefinitionName = "MAS", + canoSylAttributeDefinitionName = "KAS", + chunkAttributeDefinitionName = "TRN", + runMINNI = TRUE, + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + + func = "all" + transcriptionLevel = get_levelNameForAttributeName(handle, transcriptionAttributeDefinitionName) + + oldBasePath = handle$basePath + + if (is.null(transcriptionLevel)) { + stop("Could not find a level for attribute ", transcriptionAttributeDefinitionName) + } + + running_chunker = FALSE + chunkLevel = NULL + + # if our transcription is a segment level, we assume it is a manual chunk segmentation + if (get_levelDefinition(handle, transcriptionLevel)$type == "SEGMENT") { + chunkLevel = transcriptionLevel # the transcription is the chunk segmentation + usetrn = "true" # we use it for MAUS + } + + # else, we check if we will need to perform automatic chunk segmentation + else if (bas_long_enough_for_chunker(handle, oldBasePath)) { + running_chunker = TRUE # we need to run the chunker + chunkLevel = chunkAttributeDefinitionName + usetrn = "true" # the to-be-created chunk segmentation will be used for MAUS + } + + else + { + chunkAttributeDefinitionName = NULL + usetrn = "false" + } + + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_g2p_for_tokenization_dbi( + handle = handle, + transcriptionAttributeDefinitionName = transcriptionAttributeDefinitionName, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + language = language, + verbose = verbose, + resume = resume, + params = list(), + func = func, + patience = patience + ) + + bas_run_g2p_for_pronunciation_dbi( + handle = handle, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + canoAttributeDefinitionName = canoAttributeDefinitionName, + language = language, + verbose = verbose, + resume = resume, + params = list(embed = "maus"), + func = func, + patience = patience + ) + + + # if we previously decided to run automatic chunk segmentation + if (running_chunker) + { + bas_run_chunker_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + chunkAttributeDefinitionName = chunkAttributeDefinitionName, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + rootLevel = transcriptionLevel, + params = list(force = "rescue"), + resume = resume, + verbose = verbose, + language = language, + oldBasePath = oldBasePath, + perspective = "default", + func = func, + patience = patience + ) + } + + bas_run_maus_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + language = language, + chunkLevel = chunkLevel, + mausAttributeDefinitionName = mausAttributeDefinitionName, + verbose = verbose, + resume = resume, + params = list(USETRN = usetrn), + oldBasePath = oldBasePath, + perspective = "default", + turnChunkLevelIntoItemLevel = TRUE, + func = func, + patience = patience + ) + if(runMINNI){ + bas_run_minni_dbi( + handle = handle, + language = language, + minniAttributeDefinitionName = minniAttributeDefinitionName, + rootLevel = NULL, + verbose = verbose, + resume = resume, + params = list(), + oldBasePath = oldBasePath, + perspective = "default", + func = func, + patience = patience + ) + } + + bas_run_pho2syl_canonical_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + canoSylAttributeDefinitionName = canoSylAttributeDefinitionName, + language = language, + verbose = verbose, + params = list(), + resume = resume, + func = func, + patience = patience + ) + + orthoLevel = orthoAttributeDefinitionName + + bas_run_pho2syl_segmental_dbi( + handle = handle, + segmentAttributeDefinitionName = mausAttributeDefinitionName, + language = language, + sylAttributeDefinitionName = sylAttributeDefinitionName, + superLevel = orthoLevel, + resume = resume, + params = list(wsync = "yes"), + verbose = verbose, + func = func, + patience = patience + ) + + + mausLevel = mausAttributeDefinitionName + + # remove the ORT -> MAU link as it is has been made redundant by the ORT -> MAS -> MAU path + remove_linkDefinition(handle, + orthoLevel, + mausLevel, + force = TRUE, + verbose = FALSE) + + if (running_chunker) + { + # turn the chunk segmentation into an item level (as time information is now on the MAU tier) + bas_segment_to_item_level(handle, chunkLevel) + + # remove the transcription -> ORT link + # as it has been made redundant by the transcription -> TRN -> ORT path + remove_linkDefinition(handle, + transcriptionLevel, + orthoAttributeDefinitionName, + force = TRUE, + verbose = FALSE) + } + + handle = bas_clear(handle, oldBasePath, func) + rewrite_annots(handle, verbose = verbose) +} + +##' Runs MAUS webservice to create a phonetic segmentation +##' +##' This function calls the BAS webservice MAUS to generate a phonemic segmentation. +##' It requires a word-tokenized tier with a SAMPA pronunciation, which can be generated +##' by the function \link{runBASwebservice_g2pForPronunciation}. +##' \strong{This function requires an internet connection.} +##' +##' All necessary level, link and attribute definitions are created in the process. +##' +##' @family BAS webservice functions +##' +##' @export +##' +##' @param canoAttributeDefinitionName name of the attribute (not level!) containing the SAMPA word pronunciations. +##' If this attribute resides on a segment level, the segment time information is used as a presegmentation. +##' If it is an item level, no assumption is made about the temporal position of segments. +##' @param chunkLevel if you have a chunk segmentation level, you can provide it to improve the speed and accuracy +##' of MAUS. The chunk segmentation level must be a segment level, and it must link to the level of canoAttributeDefinitionName. +##' @param turnChunkLevelIntoItemLevel if TRUE, and if a chunk level is provided, the chunk level is converted into an ITEM level after segmentation +##' @param params named list of parameters to be passed on to the webservice. It is your own responsibility to +##' ensure that these parameters are compatible with the webservice API +##' (see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +##' Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +##' and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}. +##' @param perspective the webApp perspective that the new level will be added to. +##' If NULL, the new level is not added to any perspectives. +##' +##' @inheritParams runBASwebservice_all + +runBASwebservice_maus <- function(handle, + canoAttributeDefinitionName, + language, + mausAttributeDefinitionName = "MAU", + chunkLevel = NULL, + turnChunkLevelIntoItemLevel = TRUE, + params = NULL, + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "maus" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_maus_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + mausAttributeDefinitionName = mausAttributeDefinitionName, + language = language, + chunkLevel = chunkLevel, + verbose = verbose, + resume = resume, + params = params, + oldBasePath = oldBasePath, + perspective = perspective, + turnChunkLevelIntoItemLevel = turnChunkLevelIntoItemLevel, + func = func, + patience = patience + ) + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + + +##################################################################### +############################## G2P ################################## +##################################################################### + + +##' Tokenizes an orthographic transcription. +##' +##' This function calls the webservice G2P to break up a transcription into tokens, or words. +##' In addition to tokenization, G2P performs normalization of numbers and other special words. +##' A call to this function is usually followed by a call to \link{runBASwebservice_g2pForPronunciation}. +##' \strong{This function requires an internet connection.} +##' +##' All necessary level, link and attribute definitions are created in the process. +##' +##' @family BAS webservice functions +##' +##' @export +##' @inheritParams runBASwebservice_maus +##' @inheritParams runBASwebservice_all + +runBASwebservice_g2pForTokenization <- function(handle, + transcriptionAttributeDefinitionName, + language, + orthoAttributeDefinitionName = "ORT", + params = list(), + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + + func = "g2p_tokenization" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_g2p_for_tokenization_dbi( + handle = handle, + transcriptionAttributeDefinitionName = transcriptionAttributeDefinitionName, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + language = language, + verbose = verbose, + resume = resume, + params = params, + func = func, + patience = patience + ) + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + +##' Creates canonical pronunciation attributes for a tier of tokenized orthographical words. +##' +##' This function calls the G2P webservice to add canonical pronunciation attributes in SAMPA (default) +##' or IPA to a tier of tokenized orthographical words. It is usually called after tokenization +##' with \link{runBASwebservice_g2pForTokenization}. Its output can be used as input to +##' \link{runBASwebservice_maus} or \link{runBASwebservice_chunker}. +##' \strong{This function requires an internet connection.} +##' +##' By default, G2P is called in MAUS embed mode. This is important if you intend to use MAUS +##' afterwards. To disable MAUS embed mode, call this function with params=list(embed="no"). +##' To derive IPA symbols, add outsym="ipa" to the parameter list. +##' +##' @family BAS webservice functions +##' @export +##' +##' @param orthoAttributeDefinitionName name of a attribute (not level!) containing orthographic words. +##' +##' @inheritParams runBASwebservice_all +##' @inheritParams runBASwebservice_maus + + +runBASwebservice_g2pForPronunciation <- function(handle, + orthoAttributeDefinitionName, + language, + + canoAttributeDefinitionName = "KAN", + + params = list(embed = "maus"), + + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "g2p_pronunciation" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_g2p_for_pronunciation_dbi( + handle = handle, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + language = language, + canoAttributeDefinitionName = canoAttributeDefinitionName, + verbose = verbose, + resume = resume, + params = params, + func = func, + patience = patience + ) + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + + + +##################################################################### +########################### CHUNKER ################################# +##################################################################### + +##' Creates a chunk segmentation using the webservice Chunker. +##' +##' When audio input files are longer than approximately 10 minutes, alignment-based segmentation +##' tools such as MAUS will take a long time to run. In these cases, the Chunker pre-segments +##' the input into more digestible "chunks". As input, it requires a word tier with canonical +##' pronunciation attributes (which can be derived by \link{runBASwebservice_g2pForPronunciation}). +##' The resulting chunk level can be passed as input to \link{runBASwebservice_maus}. +##' \strong{This function requires an internet connection.} +##' +##' Please note that the chunker output is \strong{not} a semantically meaningful sentence +##' or turn segmentation, meaning that it cannot be used for analyses of sentence durations and the like. +##' By default, the chunker is called in force rescue mode. This means that the chunker is first run +##' in its normal mode, and switches to forced chunking mode only when it fails to find chunks that +##' are short enough for processing by MAUS. To disable the force mode completely, call this function with +##' params=list(force="false"). To skip the normal chunking mode and go directly into forced chunking +##' mode, use params=list(force="true"). +##' +##' @family BAS webservice functions +##' +##' @export +##' @param canoAttributeDefinitionName name of the attribute (not level!) containing a canonical pronunciation of the words. +##' @param rootLevel if provided, the new level will be linked to the root level +##' @param orthoAttributeDefinitionName if provided, chunk attributes will contain orthographic instead of SAMPA strings. +##' Must be paired with the canonical pronunciation attributes in canoAttributeDefinitionName. +##' @param chunkAttributeDefinitionName attribute name for the chunk segmentation +##' +##' @inheritParams runBASwebservice_all +##' @inheritParams runBASwebservice_maus + +runBASwebservice_chunker <- function(handle, + canoAttributeDefinitionName, + language, + + chunkAttributeDefinitionName = "TRN", + rootLevel = NULL, + orthoAttributeDefinitionName = NULL, + + params = list(force = "rescue"), + + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "chunker" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_chunker_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + chunkAttributeDefinitionName = chunkAttributeDefinitionName, + orthoAttributeDefinitionName = orthoAttributeDefinitionName, + language = language, + verbose = verbose, + params = params, + oldBasePath = oldBasePath, + perspective = perspective, + resume = resume, + rootLevel = rootLevel, + func = func, + patience = patience + ) + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + + + + +##################################################################### +############################ MINNI ################################## +##################################################################### + +##' Creates a rough phonetic segmentation by running the phoneme decoder webservice MINNI. +##' +##' The MINNI phoneme decoder performs phoneme-based decoding on the signal without input from +##' the transcription. Therefore, labelling quality is usually worse than that obtained from +##' MAUS (\link{runBASwebservice_maus}). Contrary to MAUS however, there is no need for a +##' pre-existing transcription. +##' +##' All necessary level, link and attribute definitions are created in the process. +##' +##' @family BAS webservice functions +##' @export +##' +##' @inheritParams runBASwebservice_all +##' @inheritParams runBASwebservice_maus +##' @inheritParams runBASwebservice_chunker + + + +runBASwebservice_minni <- function(handle, + language, + + minniAttributeDefinitionName = "MINNI", + rootLevel = NULL, + + params = list(), + + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "minni" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_minni_dbi( + handle = handle, + language = language, + minniAttributeDefinitionName = minniAttributeDefinitionName, + verbose = verbose, + rootLevel = rootLevel, + resume = resume, + params = params, + oldBasePath = oldBasePath, + perspective = perspective, + func = func, + patience = patience + ) + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + + +##################################################################### +########################### PHO2SYL ################################# +##################################################################### + +##' Adds syllabified word labels to a word level that already contains canonical pronunciations. +##' +##' This function calls the webservice Pho2Syl to add syllabified canonical pronunciation labels +##' to a word level that already contains unsyllabified canonical pronunciation labels (as can be +##' derived using \link{runBASwebservice_g2pForPronunciation}). \strong{This function requires an internet +##' connection.} +##' +##' @family BAS webservice functions +##' @export +##' @param canoAttributeDefinitionName name of the attribute (not level!) containing a canonical pronunciation of the words. +##' +##' @inheritParams runBASwebservice_all +##' @inheritParams runBASwebservice_maus + +runBASwebservice_pho2sylCanonical <- function(handle, + canoAttributeDefinitionName, + language, + canoSylAttributeDefinitionName = "KAS", + params = list(), + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "pho2syl_canonical" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_pho2syl_canonical_dbi( + handle = handle, + canoAttributeDefinitionName = canoAttributeDefinitionName, + language = language, + verbose = verbose, + canoSylAttributeDefinitionName = canoSylAttributeDefinitionName, + resume = resume, + params = params, + func = func, + patience = patience + ) + + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} + + +##' Creates a syllable segmentation on the basis of a phonetic segmentation. +##' +##' This function calls the BAS webservice Pho2Syl to create a syllable segmentation on the basis +##' of a phonetic segmentation (created by, for example, \link{runBASwebservice_maus}). +##' You can provide the level of your word segmentation, or of any other hierarchically +##' dominant segmentation, via the superLevel parameter. This way, the new syllable +##' items can be linked up into the pre-existing hierarchy. If you do not provide +##' this input, the syllables will only be linked down to the segments. +##' +##' All necessary level, link and parameter definitions are created in the process. +##' By default, Pho2Syl is run in word synchronized mode. To override this, call this function +##' with the parameter params=list(wsync="no"). +##' +##' @family BAS webservice functions +##' @export +##' @param segmentAttributeDefinitionName name of the attribute (not level!) containing a phonetic segmentation. +##' @param superLevel name of the segments' parent level (typically the word level). +##' If set to NULL, the syllable level cannot be linked up. +##' +##' @inheritParams runBASwebservice_all +##' @inheritParams runBASwebservice_maus + +runBASwebservice_pho2sylSegmental <- function(handle, + segmentAttributeDefinitionName, + language, + + superLevel = NULL, + + sylAttributeDefinitionName = "MAS", + + params = list(wsync = "yes"), + + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE) +{ + check_emuDBhandle(handle) + func = "pho2syl_segmental" + oldBasePath = handle$basePath + handle = bas_prepare(handle, resume, verbose, func) + + bas_run_pho2syl_segmental_dbi( + handle = handle, + segmentAttributeDefinitionName = segmentAttributeDefinitionName, + language = language, + verbose = verbose, + sylAttributeDefinitionName = sylAttributeDefinitionName, + superLevel = superLevel, + resume = resume, + params = params, + func = func, + patience = patience + ) + + + handle = bas_clear(handle, oldBasePath, func) + + rewrite_annots(handle, verbose = verbose) +} diff --git a/R/emuR-bas_webservicesDBI.R b/R/emuR-bas_webservicesDBI.R new file mode 100644 index 00000000..135ba000 --- /dev/null +++ b/R/emuR-bas_webservicesDBI.R @@ -0,0 +1,2536 @@ +##################################################################### +############################# MAUS ################################## +##################################################################### + +bas_run_maus_dbi <- function(handle, + canoAttributeDefinitionName, + mausAttributeDefinitionName, + language, + chunkLevel, + verbose, + params, + resume, + oldBasePath, + perspective, + turnChunkLevelIntoItemLevel, + patience, + func) +{ + service = "runMAUS" + workdir = bas_workdir(handle, func) + + mausLevel = mausAttributeDefinitionName + + bas_check_this_is_a_new_label(handle, mausAttributeDefinitionName) + + canoLevel = get_levelNameForAttributeName(handle, canoAttributeDefinitionName) + if (is.null(canoLevel)) { + stop("Could not find a level for label", canoAttributeDefinitionName) + } + + if (is.null(chunkLevel) && + get_levelDefinition(handle, canoLevel)$type == "SEGMENT") { + chunkAttributeDefinitionName = canoAttributeDefinitionName + } + + if (!is.null(chunkLevel)) + { + if (is.null(get_levelDefinition(handle, chunkLevel))) + { + stop("Could not find level ", chunkLevel) + } + + if (get_levelDefinition(handle, chunkLevel)$type != "SEGMENT") + { + stop("Chunk level ", chunkLevel, " must be a segment level") + } + + if (!("USETRN" %in% names(params))) + { + if (verbose) + { + cat("INFO: Setting USETRN to true (chunk level:", + chunkLevel, + ")\n") + } + params$USETRN = "true" + } + } + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + + if (verbose) + { + cat("INFO: Running MAUS on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n") + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", canoAttributeDefinitionName, "=~.*\\S.*]") + cano_items = suppressWarnings(query(handle, queryTxt, calcTimes = FALSE)) + + if (!is.null(chunkLevel)) + { + queryTxt = paste0("[", list_attributeDefinitions(handle, chunkLevel)[1, "name"], "=~ .*]") + trn_items_tmp = query(handle, queryTxt, calcTimes = FALSE) + + if (nrow(trn_items_tmp) > 0) + { + if (trn_items_tmp[1, "type"] != "SEGMENT") + { + stop("Chunk segmentation must be of type SEGMENT") + } + } + + trn_items = query(handle, queryTxt) + } + + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, mausAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + cano_items_bundle = cano_items[cano_items$bundle == bundle & + cano_items$session == session,] + + if (nrow(cano_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + kanfile = file.path(workdir, paste0(bundle, ".kan.par")) + maufile = file.path(workdir, paste0(bundle, ".mau.par")) + signalfile = bas_get_signal_path(handle, session, bundle, oldBasePath) + + kancon <- file(kanfile) + open(kancon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), kancon, useBytes = TRUE) + + bas_id = 0 + item_id_to_bas_id = new.env(hash = TRUE) + bas_id_to_item_id = new.env(hash = TRUE) + + for (label_idx in 1:nrow(cano_items_bundle)) + { + cano_label = stringr::str_trim(cano_items_bundle[label_idx, "labels"]) + cano_item_id = cano_items_bundle[label_idx, "start_item_id"] + + kanline = paste0("KAN: ", bas_id, " ", cano_label) + writeLines(kanline, kancon, useBytes = TRUE) + + item_id_to_bas_id[[toString(cano_item_id)]] = bas_id + bas_id_to_item_id[[toString(bas_id)]] = cano_item_id + bas_id = bas_id + 1 + } + + if (!is.null(chunkLevel)) + { + trn_items_bundle = trn_items[trn_items$bundle == bundle & + trn_items$session == session,] + + if (nrow(trn_items_bundle) == 0) + { + close(kancon) + next + } + + if (nrow(trn_items_bundle) > 0) + { + # suppress differing length warning + linked_kan_items = suppressWarnings(requery_hier( + handle, + trn_items_bundle, + canoAttributeDefinitionName, + calcTimes = FALSE, + collapse = TRUE + )) + # suppress differing length warning + linked_trn_items = suppressWarnings(requery_hier( + handle, + linked_kan_items %>% tidyr::drop_na(labels), + chunkLevel, + calcTimes = TRUE, + collapse = TRUE + )) + + if(nrow(linked_kan_items) != nrow(linked_trn_items)) + { + stop("Something has gone wrong in the turn query...") + } + + for(trn_idx in 1:nrow(linked_trn_items)) + { + turn_start = linked_trn_items[trn_idx, "sample_start"] + turn_end = linked_trn_items[trn_idx, "sample_end"] + item_id_start = linked_kan_items[trn_idx, "start_item_id"] + item_id_end = linked_kan_items[trn_idx, "end_item_id"] + bas_id_start = item_id_to_bas_id[[toString(item_id_start)]] + bas_id_end = item_id_to_bas_id[[toString(item_id_end)]] + + trnline = paste( + "TRN:", + turn_start, + turn_end - turn_start, + paste0(bas_id_start:bas_id_end, collapse = ","), + "_" + ) + + writeLines(trnline, kancon, useBytes = TRUE) + } + } + } + + close(kancon) + + curlParams = list( + LANGUAGE = language, + OUTFORMAT = "par", + SIGNAL = httr::upload_file(signalfile), + BPF = httr::upload_file(kanfile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + mauLines = bas_curl(service, curlParams, maufile, session, bundle, patience) + + if (length(mauLines) > 0) + { + for (line_idx in 1:length(mauLines)) + { + line = mauLines[line_idx] + if (stringr::str_detect(line, "^MAU:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 5) + start = as.integer(splitline[2]) + duration = as.integer(splitline[3]) + item_id = max_id + seq_idx + label = stringr::str_replace_all(stringr::str_trim(splitline[5]), "'", "''") + + bas_id = splitline[[4]] + + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + seq_idx = seq_idx, + item_id = item_id, + level = + mausLevel, + samplerate = samplerate, + type = "SEGMENT", + sample_start = start, + sample_dur = duration + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + mausAttributeDefinitionName, + label = label + ) + + if (as.integer(bas_id) >= 0) + { + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + from_id = bas_id_to_item_id[[splitline[4]]], + to_id = + item_id + ) + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + + add_levelDefinition(handle, + mausLevel, + "SEGMENT", + verbose = FALSE, + rewriteAllAnnots = FALSE) + bas_new_canvas(handle, perspective, mausLevel) + add_linkDefinition(handle, "ONE_TO_MANY", canoLevel, mausLevel) + + mausDescription = bas_paste_description("Phonetic segmentation by MAUS", canoAttributeDefinitionName, service, params) + set_attributeDescription(handle, mausLevel, mausAttributeDefinitionName, mausDescription) + + if (turnChunkLevelIntoItemLevel && !is.null(chunkLevel)) { + bas_segment_to_item_level(handle, chunkLevel) + } +} + +##################################################################### +############################# MINNI ################################# +##################################################################### + + +bas_run_minni_dbi <- function(handle, + language, + minniAttributeDefinitionName, + verbose, + rootLevel, + params, + resume, + oldBasePath, + perspective, + patience, + func) +{ + service = "runMINNI" + workdir = bas_workdir(handle, func) + + minniLevel = minniAttributeDefinitionName + bas_check_this_is_a_new_label(handle, minniAttributeDefinitionName) + + if(!is.null(rootLevel)) + { + if (is.null(get_levelDefinition(handle, rootLevel))) + { + stop("Could not find level ", rootLevel) + } + } + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + + if (verbose) + { + cat("INFO: Running MINNI on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n") + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + signalfile = bas_get_signal_path(handle, session, bundle, oldBasePath) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, minniAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + top_id = bas_get_top_id(handle, session, bundle, rootLevel) + + max_id = bas_get_max_id(handle, session, bundle) + + minnifile = file.path(workdir, paste0(bundle, ".minni.par")) + + + curlParams = list( + LANGUAGE = language, + OUTFORMAT = "par", + SIGNAL = httr::upload_file(signalfile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + minniLines = bas_curl(service, curlParams, minnifile, session, bundle, patience) + + if (length(minniLines) > 0) + { + seq_idx = 1 + for (line_idx in 1:length(minniLines)) + { + line = minniLines[line_idx] + if (stringr::str_detect(line, "^MAU:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 5) + item_id = max_id + seq_idx + label = stringr::str_replace_all(stringr::str_trim(splitline[5]), "'", "''") + + bas_id = splitline[[4]] + + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + seq_idx = seq_idx, + item_id = item_id, + level = minniLevel, + samplerate = + samplerate, + type = "SEGMENT", + sample_start = as.integer(splitline[2]), + sample_dur = + as.integer(splitline[3]) + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + minniAttributeDefinitionName, + label = label + ) + + if ((!is.null(top_id)) && bas_id >= 0) + { + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + from_id = top_id, + to_id = item_id + ) + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + + add_levelDefinition(handle, + minniLevel, + "SEGMENT", + verbose = FALSE, + rewriteAllAnnots = FALSE) + bas_new_canvas(handle, perspective, minniLevel) + + if (!is.null(rootLevel)) + { + add_linkDefinition(handle, "ONE_TO_MANY", rootLevel, minniLevel) + } + + minniDescription = bas_paste_description("Rough phonetic segmentation", NULL, service, params) + set_attributeDescription(handle, minniLevel, minniAttributeDefinitionName, minniDescription) +} + +##################################################################### +############################## G2P ################################## +##################################################################### + +bas_run_g2p_for_tokenization_dbi <- function(handle, + transcriptionAttributeDefinitionName, + canoAttributeDefinitionName, + orthoAttributeDefinitionName, + language, + verbose, + resume, + params, + patience, + func) +{ + service = "runG2P" + workdir = bas_workdir(handle, func) + + orthoLevel = orthoAttributeDefinitionName + + bas_check_this_is_a_new_label(handle, orthoAttributeDefinitionName) + + transcriptionLevel = get_levelNameForAttributeName(handle, transcriptionAttributeDefinitionName) + if (is.null(transcriptionLevel)) { + stop("Could not find a level for label ", transcriptionAttributeDefinitionName) + } + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + + if (verbose) + { + cat( + "INFO: Running G2P tokenizer on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n" + ) + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", transcriptionAttributeDefinitionName, "=~.*\\S.*]") + transcription_items = suppressWarnings(query(handle, queryTxt, calcTimes = FALSE)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, orthoAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + transcription_items_bundle = transcription_items[transcription_items$bundle == bundle & + transcription_items$session == session,] + + if (nrow(transcription_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + for (label_idx in 1:nrow(transcription_items_bundle)) + { + transcription_label = stringr::str_trim(transcription_items_bundle [label_idx, "labels"]) + transcription_item_id = transcription_items_bundle [label_idx, "start_item_id"] + + textfile = file.path(workdir, paste0( + bundle, + ".", + toString(transcription_item_id), + ".txt" + )) + g2pfile = file.path(workdir, + paste0( + bundle, + ".", + toString(transcription_item_id), + ".g2p.par" + )) + + writeLines(transcription_label, con = textfile, useBytes = TRUE) + + curlParams = list( + lng = language, + iform = "txt", + oform = "bpfs", + i = httr::upload_file(textfile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + g2pLines = bas_curl(service, curlParams, g2pfile, session, bundle, patience) + + if (length(g2pLines) > 0) + { + for (line_idx in 1:length(g2pLines)) + { + line = g2pLines[line_idx] + if (stringr::str_detect(line, "^ORT:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 3) + item_id = max_id + seq_idx + label = stringr::str_replace_all(splitline[3], "'", "''") + + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + seq_idx = seq_idx, + item_id = item_id, + level = + orthoLevel, + samplerate = samplerate, + type = "ITEM" + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + orthoAttributeDefinitionName, + label = label + ) + + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + from_id = transcription_item_id, + to_id = + item_id + ) + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + add_levelDefinition(handle, + orthoLevel, + "ITEM", + verbose = FALSE, + rewriteAllAnnots = FALSE) + add_linkDefinition(handle, "ONE_TO_MANY", transcriptionLevel, orthoLevel) + + orthoDescription = bas_paste_description("Tokenized and normalized orthography level", + transcriptionAttributeDefinitionName, + service, + params) + set_attributeDescription(handle, orthoLevel, orthoAttributeDefinitionName, orthoDescription) +} + + +bas_run_g2p_for_pronunciation_dbi <- function(handle, + orthoAttributeDefinitionName, + canoAttributeDefinitionName, + language, + verbose, + resume, + params, + patience, + func) +{ + service = "runG2P" + workdir = bas_workdir(handle, func) + + orthoLevel = get_levelNameForAttributeName(handle, orthoAttributeDefinitionName) + if (is.null(orthoLevel)) { + stop("Could not find a level for label ", orthoAttributeDefinitionName) + } + + bas_check_this_is_a_new_label(handle, canoAttributeDefinitionName) + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + + if (verbose) + { + cat("INFO: Running G2P on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n") + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", orthoAttributeDefinitionName, "=~.*\\S.*]") + ortho_items = suppressWarnings(query(handle, queryTxt, calcTimes = FALSE)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, canoAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + ortho_items_bundle = ortho_items[ortho_items$bundle == bundle & + ortho_items$session == session,] + + if (nrow(ortho_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + orthofile = file.path(workdir, paste0(bundle, ".orth.par")) + kanfile = file.path(workdir, paste0(bundle, ".kan.par")) + + orthoCon <- file(orthofile) + open(orthoCon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), orthoCon, useBytes = TRUE) + + bas_id = 0 + + bas_id_to_item_id = new.env(hash = TRUE) + + for (label_idx in 1:nrow(ortho_items_bundle)) + { + ortho_label = stringr::str_trim(ortho_items_bundle[label_idx, "labels"]) + ortho_item_id = ortho_items_bundle[label_idx, "start_item_id"] + + writeLines(paste("ORT:", bas_id, ortho_label), orthoCon, useBytes = TRUE) + bas_id_to_item_id[[toString(bas_id)]] = ortho_item_id + bas_id = bas_id + 1 + } + + close(orthoCon) + + curlParams = list( + lng = language, + iform = "bpf", + oform = "bpfs", + i = httr::upload_file(orthofile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + g2pLines = bas_curl(service, curlParams, kanfile, session, bundle, patience) + + if (length(g2pLines) > 0) + { + for (line_idx in 1:length(g2pLines)) + { + line = g2pLines[line_idx] + if (stringr::str_detect(line, "^KAN:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 3) + item_id = bas_id_to_item_id[[splitline[2]]] + label = stringr::str_replace_all(stringr::str_trim(splitline[3]), "'", "''") + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = bas_get_max_label_idx(handle, session, bundle, item_id) + 1, + label_name = + canoAttributeDefinitionName, + label = label + ) + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + + internal_add_attributeDefinition( + handle, + orthoLevel, + canoAttributeDefinitionName, + verbose = FALSE, + rewriteAllAnnots = FALSE, + insertLabels = FALSE + ) + canoDescription = bas_paste_description("Canonical pronunciation word forms", + orthoAttributeDefinitionName, + service, + params) + + set_attributeDescription(handle, orthoLevel, canoAttributeDefinitionName, canoDescription) +} + +##################################################################### +############################ CHUNKER ################################ +##################################################################### + +bas_run_chunker_dbi <- function(handle, + canoAttributeDefinitionName, + chunkAttributeDefinitionName, + language, + verbose, + rootLevel, + orthoAttributeDefinitionName, + params, + resume, + oldBasePath, + perspective, + patience, + func) +{ + service = "runChunker" + workdir = bas_workdir(handle, func) + + chunkLevel = chunkAttributeDefinitionName + bas_check_this_is_a_new_label(handle, chunkAttributeDefinitionName) + + canoLevel = get_levelNameForAttributeName(handle, canoAttributeDefinitionName) + if (is.null(canoLevel)) { + stop("Could not find a level for label ", canoAttributeDefinitionName) + } + + if (!is.null(rootLevel)) + { + if (is.null(get_levelDefinition(handle, rootLevel))) + { + stop("Could not find level ", rootLevel) + } + } + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + if (verbose) + { + cat( + "INFO: Running Chunker on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n" + ) + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", canoAttributeDefinitionName, "=~.*\\S.*]") + cano_items = suppressWarnings(query(handle, queryTxt, calcTimes = FALSE)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, chunkAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + cano_items_bundle = cano_items[cano_items$bundle == bundle & + cano_items$session == session,] + + top_id = bas_get_top_id(handle, session, bundle, rootLevel) + + if (nrow(cano_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + kanfile = file.path(workdir, paste0(bundle, ".kan.par")) + trnfile = file.path(workdir, paste0(bundle, ".trn.par")) + signalfile = bas_get_signal_path(handle, session, bundle, oldBasePath) + + kancon <- file(kanfile) + open(kancon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), kancon, useBytes = TRUE) + + bas_id = 0 + item_id_to_bas_id = new.env(hash = TRUE) + bas_id_to_item_id = new.env(hash = TRUE) + + for (label_idx in 1:nrow(cano_items_bundle)) + { + cano_label = stringr::str_trim(cano_items_bundle[label_idx, "labels"]) + cano_item_id = cano_items_bundle[label_idx, "start_item_id"] + + if (!is.null(orthoAttributeDefinitionName)) + { + # suppress differing length warning + ortho_labels = suppressWarnings(requery_hier( + handle, + seglist = cano_items_bundle[label_idx,], + level = orthoAttributeDefinitionName, + calcTimes = FALSE + )) + + if (length(ortho_labels) > 0) + { + writeLines(paste0("ORT: ", bas_id, " ", ortho_labels[1, "labels"]), + kancon, useBytes = TRUE) + } + } + + writeLines(paste0("KAN: ", bas_id, " ", cano_label), + kancon, useBytes = TRUE) + item_id_to_bas_id[[toString(cano_item_id)]] = bas_id + bas_id_to_item_id[[toString(bas_id)]] = cano_item_id + bas_id = bas_id + 1 + } + + close(kancon) + + curlParams = list( + language = language, + audio = httr::upload_file(signalfile), + bpf = httr::upload_file(kanfile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + trnLines = bas_curl(service, curlParams, trnfile, session, bundle, patience) + + if (length(trnLines) > 0) + { + for (line_idx in 1:length(trnLines)) + { + line = trnLines[line_idx] + if (stringr::str_detect(line, "^TRN:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 5) + start = as.integer(splitline[2]) + duration = as.integer(splitline[3]) + item_id = max_id + seq_idx + label = stringr::str_replace_all(stringr::str_trim(splitline[5]), "'", "''") + + bas_ids = splitline[[4]] + bas_ids_split = stringr::str_split(bas_ids, ",")[[1]] + + if (as.integer(bas_ids_split[1]) >= 0) + { + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + seq_idx = seq_idx, + item_id = item_id, + level = + chunkLevel, + samplerate = samplerate, + type = "SEGMENT", + sample_start = start, + sample_dur = duration + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + chunkAttributeDefinitionName, + label = label + ) + + for (split_idx in 1:length(bas_ids_split)) + { + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + to_id = bas_id_to_item_id[[bas_ids_split[split_idx]]], + from_id = + item_id + ) + } + + if (!is.null(top_id)) + { + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + to_id = item_id, + from_id = top_id + ) + } + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + + add_levelDefinition(handle, + chunkLevel, + "SEGMENT", + verbose = FALSE, + rewriteAllAnnots = FALSE) + bas_new_canvas(handle, perspective, chunkLevel) + add_linkDefinition(handle, "ONE_TO_MANY", chunkLevel, canoLevel) + if (!is.null(rootLevel)) + { + add_linkDefinition(handle, "ONE_TO_MANY", rootLevel, chunkLevel) + } + + chunkDescription = bas_paste_description("Chunk segmentation", canoAttributeDefinitionName, service, params) + set_attributeDescription(handle, chunkLevel, chunkAttributeDefinitionName, chunkDescription) +} + +##################################################################### +############################ PHO2SYL ################################ +##################################################################### + +bas_run_pho2syl_canonical_dbi <- function(handle, + canoAttributeDefinitionName, + language, + verbose, + canoSylAttributeDefinitionName, + resume, + params, + patience, + func) +{ + service = "runPho2Syl" + workdir = bas_workdir(handle, func) + + canoLevel = get_levelNameForAttributeName(handle, canoAttributeDefinitionName) + if (is.null(canoLevel)) { + stop("Could not find a level for label ", canoAttributeDefinitionName) + } + + bas_check_this_is_a_new_label(handle, canoSylAttributeDefinitionName) + + bundles_list = bas_evaluate_language_option(handle = handle, language = language) + + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + if (verbose) + { + cat( + "INFO: Running Pho2Syl (canonical) on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n" + ) + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", canoAttributeDefinitionName, "=~.*\\S.*]") + cano_items = suppressWarnings(query(handle, queryTxt, calcTimes = FALSE)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, canoSylAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + cano_items_bundle = cano_items[cano_items$bundle == bundle & + cano_items$session == session,] + + + if (nrow(cano_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + kanfile = file.path(workdir, paste0(bundle, ".kan.par")) + kasfile = file.path(workdir, paste0(bundle, ".kas.par")) + + kancon <- file(kanfile) + open(kancon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), kancon, useBytes = TRUE) + + bas_id = 0 + bas_id_to_item_id = new.env(hash = TRUE) + + for (label_idx in 1:nrow(cano_items_bundle)) + { + cano_label = stringr::str_trim(cano_items_bundle[label_idx, "labels"]) + cano_item_id = cano_items_bundle[label_idx, "start_item_id"] + + kanline = paste0("KAN: ", bas_id, " ", cano_label) + writeLines(kanline, kancon, useBytes = TRUE) + + bas_id_to_item_id[[toString(bas_id)]] = cano_item_id + bas_id = bas_id + 1 + } + + close(kancon) + + curlParams = list( + lng = language, + i = httr::upload_file(kanfile), + tier = "KAN", + oform = "bpf" + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + kasLines = bas_curl(service, curlParams, kasfile, session, bundle, patience) + + if (length(kasLines) > 0) + { + for (line_idx in 1:length(kasLines)) + { + line = kasLines[line_idx] + if (stringr::str_detect(line, "^KAS:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 3) + item_id = max_id + seq_idx + label = stringr::str_replace_all(stringr::str_trim(splitline[3]), "'", "''") + + bas_id = splitline[[2]] + if (as.integer(bas_id) >= 0) + { + item_id = bas_id_to_item_id[[bas_id]] + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = bas_get_max_label_idx(handle, session, bundle, item_id) + 1, + label_name = + canoSylAttributeDefinitionName, + label = label + ) + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } + internal_add_attributeDefinition( + handle, + canoLevel, + canoSylAttributeDefinitionName, + verbose = FALSE, + rewriteAllAnnots = FALSE, + insertLabels = FALSE + ) + + kasDescription = bas_paste_description("Syllabified canonical pronunciation word forms", + canoAttributeDefinitionName, + service, + params) + set_attributeDescription(handle, canoLevel, canoSylAttributeDefinitionName, kasDescription) +} + +bas_run_pho2syl_segmental_dbi <- function(handle, + segmentAttributeDefinitionName, + language, + verbose, + sylAttributeDefinitionName, + superLevel, + resume, + params, + patience, + func) +{ + sylLevel = sylAttributeDefinitionName + + bas_check_this_is_a_new_label(handle, sylAttributeDefinitionName) + + segmentLevel = get_levelNameForAttributeName(handle, segmentAttributeDefinitionName) + if (is.null(segmentLevel)) { + stop("Could not find a level for label ", segmentLevel) + } + + if (get_levelDefinition(handle, segmentLevel)$type != "SEGMENT") { + stop(segmentLevel, + " must be a segment tier in order to run pho2syl from segment") + } + + if (!is.null(superLevel)) + { + if (is.null(get_levelDefinition(handle, superLevel))) { + stop("Could not find level ", superLevel) + } + } + + multilink = FALSE + if ("wsync" %in% names(params) && params$wsync == "no") + { + multilink = TRUE + } + + languages = bas_evaluate_language_option(handle = handle, language = language) + + if (!is.null(superLevel)) + { + bas_run_pho2syl_segmental_dbi_anchored( + handle = handle, + segmentAttributeDefinitionName = segmentAttributeDefinitionName, + segmentLevel = segmentLevel, + languages = languages, + verbose = verbose, + sylAttributeDefinitionName = sylAttributeDefinitionName, + sylLevel = sylLevel, + superLevel = superLevel, + resume = resume, + params = params, + allowmultilink = multilink, + func = func, + patience = patience + ) + } + + else + { + bas_run_pho2syl_segmental_dbi_unanchored( + handle = handle, + segmentAttributeDefinitionName = segmentAttributeDefinitionName, + segmentLevel = segmentLevel, + languages = languages, + verbose = verbose, + sylAttributeDefinitionName = sylAttributeDefinitionName, + sylLevel = sylLevel, + resume = resume, + params = params, + patience = patience, + func = func + ) + } + + add_levelDefinition(handle, + sylLevel, + "SEGMENT", + verbose = FALSE, + rewriteAllAnnots = FALSE) + + sylDescription = bas_paste_description("Syllable segmentation", segmentAttributeDefinitionName, "runPho2Syl", params) + set_attributeDescription(handle, sylLevel, sylAttributeDefinitionName, sylDescription) + + if (!is.null(superLevel)) + { + if (multilink) + { + add_linkDefinition(handle, "MANY_TO_MANY", superLevel, sylLevel) + } + else + { + add_linkDefinition(handle, "ONE_TO_MANY", superLevel, sylLevel) + } + } + + add_linkDefinition(handle, "ONE_TO_MANY", sylLevel, segmentLevel) + + if (verbose) + { + cat("INFO: Autobuilding syllable -> segment links from time information\n") + } + + autobuild_linkFromTimes( + handle, + sylLevel, + segmentLevel, + convertSuperlevel = TRUE, + rewriteAllAnnots = FALSE, + verbose = verbose + ) + + remove_levelDefinition( + handle, + paste0( + sylLevel, + formals(autobuild_linkFromTimes)$backupLevelAppendStr + ), + force = TRUE, + verbose = FALSE + ) +} + +bas_run_pho2syl_segmental_dbi_anchored <- function(handle, + segmentAttributeDefinitionName, + segmentLevel, + languages, + verbose, + sylAttributeDefinitionName, + sylLevel, + superLevel, + resume, + params, + allowmultilink, + patience, + func) +{ + service = "runPho2Syl" + workdir = bas_workdir(handle, func) + + bundles_list = languages + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + if (verbose) + { + cat( + "INFO: Running Pho2Syl (segmental) on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n" + ) + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", list_attributeDefinitions(handle, superLevel)[1, "name"], "=~ .*]") + word_items = query(handle, + queryTxt, + calcTimes = TRUE, + timeRefSegmentLevel = segmentLevel) + + queryTxt = paste0("[", segmentAttributeDefinitionName, "=~.*\\S.*]") + maus_items = suppressWarnings(query(handle, + queryTxt, + calcTimes = TRUE, + timeRefSegmentLevel = segmentLevel)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, sylAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + word_items_bundle = word_items[word_items$bundle == bundle & + word_items$session == session,] + + maus_items_bundle = maus_items[maus_items$bundle == bundle & + maus_items$session == session,] + + if (nrow(word_items_bundle) > 0 && + nrow(maus_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + maufile = file.path(workdir, paste0(bundle, ".mau.par")) + masfile = file.path(workdir, paste0(bundle, ".mas.par")) + + maucon <- file(maufile) + open(maucon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), maucon, useBytes = TRUE) + + bas_id = 0 + bas_id_to_word_item_id = new.env(hash = TRUE) + + written_anything = FALSE + + + mau_start = -1 + mau_idx = 1 + + for (word_idx in 1:nrow(word_items_bundle)) + { + word_item_id = word_items_bundle[word_idx, "start_item_id"] + word_end = word_items_bundle[word_idx, "sample_end"] + word_start = word_items_bundle[word_idx, "sample_start"] + written_mau = FALSE + + while (maus_items_bundle[mau_idx, "sample_end"] <= word_end && + mau_idx <= nrow(maus_items_bundle)) + { + mau_label = stringr::str_trim(maus_items_bundle[mau_idx, "labels"]) + mau_start = maus_items_bundle[mau_idx, "sample_start"] + mau_end = maus_items_bundle[mau_idx, "sample_end"] + + if (stringr::str_length(mau_label) > 0 && + mau_start >= word_start) + { + writeLines( + paste0( + "MAU: ", + mau_start, + " ", + mau_end - mau_start, + " ", + bas_id, + " ", + mau_label + ), + maucon, + useBytes = TRUE + ) + + written_mau = TRUE + written_anything = TRUE + } + + mau_idx = mau_idx + 1 + } + if (written_mau) + { + bas_id_to_word_item_id[[toString(bas_id)]] = word_item_id + bas_id = bas_id + 1 + } + } + + close(maucon) + + if (written_anything) + { + curlParams = list( + lng = language, + tier = "MAU", + oform = "bpf", + i = httr::upload_file(maufile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + masLines = bas_curl(service, curlParams, masfile, session, bundle, patience) + + if (length(masLines) > 0) + { + for (line_idx in 1:length(masLines)) + { + line = masLines[line_idx] + if (stringr::str_detect(line, "^MAS:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 5) + item_id = max_id + seq_idx + start = as.integer(splitline[[2]]) + dur = as.integer(splitline[[3]]) + label = stringr::str_replace_all(stringr::str_trim(splitline[[5]]), "'", "''") + + bas_ids = splitline[[4]] + bas_ids_split = stringr::str_split(bas_ids, ",")[[1]] + + if ((!allowmultilink) && length(bas_ids_split) > 1) + { + stop( + "Bundle ", + bundle, + " session ", + session, + ": ", + "Pho2Syl returned item with multiple links despite wsync not being set to yes: ", + line + ) + } + + if (as.integer(bas_ids_split[1]) >= 0) + { + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + level = sylLevel, + type = "SEGMENT", + seq_idx = seq_idx, + sample_start = start, + sample_dur = dur, + samplerate = samplerate + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + sylAttributeDefinitionName, + label = label + ) + + for (split_idx in 1:length(bas_ids_split)) + { + word_item_id = bas_id_to_word_item_id[[bas_ids_split[split_idx]]] + + + bas_add_link( + handle = handle, + session = session, + bundle = bundle, + from_id = word_item_id, + to_id = item_id + ) + } + } + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } +} + +bas_run_pho2syl_segmental_dbi_unanchored <- function(handle, + segmentAttributeDefinitionName, + segmentLevel, + languages, + verbose, + sylAttributeDefinitionName, + sylLevel, + resume, + params, + patience, + func) +{ + service = "runPho2Syl" + workdir = bas_workdir(handle, func) + bundles_list = languages + if (nrow(bundles_list) > 0) + { + bas_ping(verbose) + if (verbose) + { + cat( + "INFO: Running Pho2Syl (segmental) on emuDB containing", + nrow(bundles_list), + "bundle(s)...\n" + ) + progress = 0 + pb = utils::txtProgressBar( + min = 0, + max = nrow(bundles_list), + initial = 0, + style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + queryTxt = paste0("[", segmentAttributeDefinitionName, "=~.*\\S\\.*]") + maus_items = suppressWarnings(query(handle, + queryTxt, + calcTimes = TRUE, + timeRefSegmentLevel = segmentLevel)) + + for (bundle_idx in 1:nrow(bundles_list)) + { + bundle = bundles_list[bundle_idx, "bundle", drop = TRUE] + session = bundles_list[bundle_idx, "session", drop = TRUE] + language = bundles_list[bundle_idx, "language", drop = TRUE] + + samplerate = bas_get_samplerate(handle, session, bundle) + + if (resume && + bas_label_exists_in_bundle(handle, session, bundle, sylAttributeDefinitionName)) + { + if (verbose) + { + cat("\nSkipping bundle", bundle) + } + next + } + + maus_items_bundle = maus_items[maus_items$bundle == bundle & + maus_items$session == session,] + + if (nrow(maus_items_bundle) > 0) + { + seq_idx = 1 + max_id = bas_get_max_id(handle, session, bundle) + + maufile = file.path(workdir, paste0(bundle, ".mau.par")) + masfile = file.path(workdir, paste0(bundle, ".mas.par")) + + maucon <- file(maufile) + open(maucon, "w") + writeLines(paste0("SAM: ", samplerate, "\nLBD:"), maucon, useBytes = TRUE) + + + for (mau_idx in 1:nrow(maus_items_bundle)) + { + mau_label = stringr::str_trim(maus_items_bundle[mau_idx, "labels"]) + mau_start = maus_items_bundle[mau_idx, "sample_start"] + mau_end = maus_items_bundle[mau_idx, "sample_end"] + + + if (stringr::str_length(mau_label) > 0) + { + writeLines(paste0( + "MAU: ", + mau_start, + " ", + mau_end - mau_start, + " 0 ", + mau_label + ), + maucon, + useBytes = TRUE) + + } + } + + close(maucon) + + curlParams = list( + lng = language, + tier = "MAU", + oform = "bpf", + i = httr::upload_file(maufile) + ) + + for (key in names(params)) + { + if (!(key %in% names(curlParams))) + { + curlParams[[key]] = params[[key]] + } + } + + masLines = bas_curl(service, curlParams, masfile, session, bundle, patience) + + if (length(masLines) > 0) + { + for (line_idx in 1:length(masLines)) + { + line = masLines[line_idx] + if (stringr::str_detect(line, "^MAS:")) + { + splitline = stringr::str_split_fixed(line, "\\s+", n = 5) + item_id = max_id + seq_idx + start = as.integer(splitline[[2]]) + dur = as.integer(splitline[[3]]) + label = stringr::str_replace_all(stringr::str_trim(splitline[[5]]), "'", "''") + + bas_ids = splitline[[4]] + bas_ids_split = stringr::str_split(bas_ids, ",")[[1]] + + if (as.integer(bas_ids_split[1]) >= 0) + { + bas_add_item( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + level = sylLevel, + type = "SEGMENT", + seq_idx = seq_idx, + sample_start = start, + sample_dur = dur, + samplerate = samplerate + ) + + seq_idx = seq_idx + 1 + + bas_add_label( + handle = handle, + session = session, + bundle = bundle, + item_id = item_id, + label_idx = 1, + label_name = + sylAttributeDefinitionName, + label = label + ) + } + } + } + } + } + if (verbose) + { + utils::setTxtProgressBar(pb, bundle_idx) + } + } + } + if (verbose) + { + cat("\n") + } +} + +##################################################################### +############################ HELPERS ################################ +##################################################################### +bas_workdir <- function(handle, func) +{ + return(file.path(tempdir(), "emuR_bas_workdir", handle$UUID, func)) +} + +bas_tmpdbdir <- function(handle, func) +{ + return(file.path(tempdir(), "emuR_bas_tmpDB", handle$UUID, func)) +} + +bas_prepare <- function(handle, resume, verbose, func) +{ + workdir = bas_workdir(handle, func) + tmpdbdir = bas_tmpdbdir(handle, func) + + if (dir.exists(workdir)) + { + unlink(workdir, recursive = TRUE) + } + + created = dir.create(workdir, recursive = TRUE) + if(!created){ + stop("Couldn't create ", workdir) + } + + dbConfig = load_DBconfig(handle) + + tmpBasePath = file.path(tmpdbdir, paste0(handle$dbName, emuDB.suffix)) + oldBasePath = handle$basePath + + tmpCache = file.path(tmpBasePath, + paste0(handle$dbName, database.cache.suffix)) + + oldCache = file.path(oldBasePath, + paste0(handle$dbName, database.cache.suffix)) + + if (!(resume && file.exists(tmpCache))) + { + if (verbose) + { + cat("INFO: Preparing temporary database. This may take a while...\n") + } + if (dir.exists(tmpdbdir)) + { + unlink(tmpdbdir, recursive = TRUE) + } + + created = dir.create(tmpBasePath, recursive = TRUE) + if(!created){ + stop("Couldn't create ", tmpBasePath) + } + + + if (!file.copy(oldCache, tmpCache, overwrite = TRUE)) + { + stop("Could not create temporary DB cache") + } + } + + handle$connection <- DBI::dbConnect(RSQLite::SQLite(), tmpCache) + RSQLite::initRegExp(handle$connection) + + update_cache(handle, verbose = verbose) + + handle$basePath <- tmpBasePath + + store_DBconfig(handle, dbConfig) + + + return(handle) +} + +bas_clear <- function(handle, oldBasePath, func) +{ + workdir = bas_workdir(handle, func) + tmpdbdir = bas_tmpdbdir(handle, func) + + oldCache = file.path(oldBasePath, paste0(handle$dbName, database.cache.suffix)) + + if (!file.copy(file.path( + handle$basePath, + paste0(handle$dbName, database.cache.suffix) + ), oldCache, overwrite = TRUE)) + { + stop("Could not copy temporary DB cache into original DB") + } + + dbConfig = load_DBconfig(handle) + + handle$basePath <- oldBasePath + store_DBconfig(handle, dbConfig) + + unlink(workdir, recursive = TRUE) + unlink(tmpdbdir, recursive = TRUE) + + handle$connection <- DBI::dbConnect(RSQLite::SQLite(), oldCache) + + return(handle) +} + +bas_evaluate_result <- function (result) +{ + return(result) +} + +bas_download <- function(result, + target, + session = "", + bundle = "") +{ + if (stringr::str_detect(result, "false")) + { + stop("Unsuccessful webservice call in bundle ", + bundle, + ", session ", + session, + ": ", + result) + } + + downloadLink = stringr::str_match(result, "(.*)")[1, 2] + utils::download.file( + downloadLink, + target, + method = "auto", + quiet = TRUE, + mode = "w", + cacheOK = TRUE + ) + + lines = try(readr::read_lines(target)) + + if (inherits(lines, "try-error")) + { + stop("Bundle ", + bundle, + ", session ", + session, + ": Cannot read from G2P output ", + target) + } + + if (length(lines) == 0) + { + stop("Bundle ", + bundle, + ", session ", + session, + ": Zero line output from webservice: ", + target) + } + + return(lines) +} + +bas_get_max_label_idx <- function(handle, session, bundle, item_id) +{ + queryTxt = paste0( + "SELECT max(label_idx) FROM labels", + basic_cond(handle, session, bundle), + "AND item_id==", + item_id + ) + + return(DBI::dbGetQuery(handle$connection, queryTxt)[1, 1]) +} + +bas_get_max_id <- function(handle, session, bundle, items_table_name = "items") +{ + queryTxt = paste0("SELECT max(item_id) FROM ", items_table_name, + basic_cond(handle, session, bundle)) + + res = DBI::dbGetQuery(handle$connection, queryTxt) + if (nrow(res) == 0 || is.na(res[1, 1]) || is.null(res[1, 1])) + { + return(0) + } + return(res[1, 1]) +} + +bas_add_item <- function(handle, + session, + bundle, + item_id, + level, + type = 'ITEM', + seq_idx, + samplerate, + sample_start = "NULL", + sample_dur = "NULL", + sample_point = "NULL") +{ + queryTxt = paste0( + "INSERT INTO items VALUES('", + handle$UUID, + "','", + session, + "','", + bundle, + "',", + item_id, + ",'", + level, + "','", + type, + "', ", + seq_idx, + ",", + samplerate, + ",", + sample_point, + ",", + sample_start, + ",", + sample_dur, + ")" + ) + + DBI::dbExecute(handle$connection, queryTxt) +} + +bas_add_label <- function(handle, + session, + bundle, + item_id, + label_idx, + label_name, + label) +{ + queryTxt = paste0( + "INSERT INTO labels VALUES('", + handle$UUID, + "', '", + session, + "', '", + bundle, + "', ", + item_id, + ", ", + label_idx, + ", '", + label_name, + "', '", + label, + "')" + ) + + DBI::dbExecute(handle$connection, queryTxt) +} + +bas_add_link <- function(handle, + session, + bundle, + from_id, + to_id) +{ + queryTxt = paste0( + "INSERT INTO links VALUES('", + handle$UUID, + "', '", + session, + "', '", + bundle, + "', ", + from_id, + ", ", + to_id, + ", NULL)" + ) + DBI::dbExecute(handle$connection, queryTxt) +} + +bas_get_signal_path <- function(handle, session, bundle, basePath) +{ + queryTxt = paste0( + "SELECT annotates FROM bundle", + basic_cond(handle, session, bundle, bundlename = + "name") + ) + res = DBI::dbGetQuery(handle$connection, queryTxt) + if (nrow(res) > 0) + { + return(file.path( + basePath, + paste0(session, session.suffix), + paste0(bundle, bundle.dir.suffix), + res[1, 1] + )) + } + return(NULL) +} + +bas_get_samplerate <- function(handle, session, bundle) +{ + queryTxt = paste0( + "SELECT sample_rate FROM bundle", + basic_cond(handle, session, bundle, bundlename = + "name") + ) + res = DBI::dbGetQuery(handle$connection, queryTxt) + if (nrow(res) > 0) + { + return(res[1, 1]) + } + return(NULL) +} + +bas_get_top_id <- function(handle, session, bundle, topLevel) +{ + queryTxt = paste0( + "SELECT item_id FROM items", + basic_cond(handle, session, bundle), + "AND level=='", + topLevel, + "'" + ) + + res = DBI::dbGetQuery(handle$connection, queryTxt) + if (nrow(res) == 0) { + return(NULL) + } + if (nrow(res) > 1) { + stop( + "Bundle ", + bundle, + ", session ", + session, + ": More than one possible node on level ", + topLevel, + " in bundle ", + bundle + ) + } + return(res[[1, 1]]) +} + +bas_new_canvas <- function(handle, perspective, canvas) +{ + if (!is.null(perspective)) + { + if (!(perspective %in% list_perspectives(handle)$name)) + { + add_perspective(handle, perspective) + } + + set_levelCanvasesOrder(handle, + perspectiveName = perspective, + c(get_levelCanvasesOrder(handle, perspective), canvas)) + } +} + +bas_label_item_join <- function(labels_table, items_table) +{ + return( + paste0( + " ", + labels_table, + " JOIN ", + items_table, + " ON ", + labels_table, + ".db_uuid==", + items_table, + ".db_uuid AND ", + labels_table, + ".session==", + items_table, + ".session AND ", + labels_table, + ".bundle==", + items_table, + ".bundle AND ", + labels_table, + ".item_id==", + items_table, + ".item_id " + ) + ) +} + + + +bas_label_exists_in_bundle <- + function(handle, session, bundle, labelName) + { + queryTxt = paste0( + "SELECT count(*) FROM labels", + basic_cond(handle, session, bundle), + "AND name=='", + labelName, + "'" + ) + return(DBI::dbGetQuery(handle$connection, queryTxt)[1, 1] > 0) + } + +bas_link_exists_in_bundle <- + function(handle, + session, + bundle, + superLevel, + subLevel) + { + queryTxt = paste0( + "SELECT count(*) FROM links", + basic_cond(handle, session, bundle), + "AND from_id in (SELECT item_id FROM items", + basic_cond(handle, session, bundle), + "AND name=='", + superLevel, + "') AND + to_id in (SELECT item_id FROM items", + basic_cond(handle, session, bundle), + "AND name=='", + subLevel, + "')" + ) + return(DBI::dbGetQuery(handle$connection, queryTxt)[1, 1] > 0) + } + +bas_ping <- function(verbose) +{ + if (verbose) + { + cat("INFO: Sending ping to webservices provider.\n") + } + + res = httr::GET(url = "https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/getLoadIndicator") +} + + +bas_long_enough_for_chunker <- function(handle, basePath) +{ + bundles = list_bundles(handle) + if (nrow(bundles) > 0) + { + for (bundle_idx in nrow(bundles)) + { + annotates = bas_get_signal_path(handle, + session = bundles[bundle_idx, "session", drop = TRUE], + bundles[bundle_idx, "name", drop = TRUE], + basePath = basePath) + obj = wrassp::read.AsspDataObj(annotates) + if ((attr(obj, "endRecord") / attr(obj, "sampleRate")) > 60) + { + return (TRUE) + } + } + } + return (FALSE) +} + +bas_segment_to_item_level <- function(handle, segmentLevel) +{ + bas_segment_to_item_level_dbi(handle, segmentLevel) + + dbConfig = load_DBconfig(handle) + + for (i in 1:length(dbConfig$levelDefinitions)) { + if (dbConfig$levelDefinitions[[i]]$name == segmentLevel) { + dbConfig$levelDefinitions[[i]]$type = 'ITEM' + } + } + + store_DBconfig(handle, dbConfig) + + perspectives = list_perspectives(handle) + if (nrow(perspectives) > 0) + { + for (perspective_idx in 1:nrow(perspectives)) + { + perspective = perspectives[perspective_idx, "name"] + oldOrder = get_levelCanvasesOrder(handle, perspective) + set_levelCanvasesOrder(handle, perspective, oldOrder[oldOrder != segmentLevel]) + } + } +} + + +bas_segment_to_item_level_dbi <- + function(handle, segmentLevel, items_table = "items") + { + queryTxt = paste0( + "UPDATE ", + items_table, + " SET type='ITEM', sample_point = NULL, sample_start = NULL, sample_dur = NULL WHERE level=='", + segmentLevel, + "' AND db_uuid=='", + handle$UUID, + "'" + ) + + DBI::dbExecute(handle$connection, queryTxt) + } + +bas_check_this_is_a_new_label <- function(handle, label) +{ + if (!is.null(get_levelNameForAttributeName(handle, label))) { + stop("There is already a level with label ", label) + } + + if (!is.null(get_levelDefinition(handle, label))) { + stop("Level ", label, " already exists!") + } +} + + +bas_evaluate_language_option <- function(handle, language) +{ + bundles = list_bundlesDBI(handle) %>% + dplyr::rename(bundle = "name") %>% + dplyr::arrange(.data$session, .data$bundle) + + if (is.data.frame(language)) + { + if (nrow(language) != nrow(bundles)) + { + stop( + "You have provided a dataframe as language option. This dataframe must contain the same number of bundles as your emuDB. ", + "Currently, your emuDB contains ", + nrow(bundles), + " bundles, and the language dataframe ", + nrow(language), + " bundles." + ) + } + language = language %>% + dplyr::arrange(.data$session, .data$bundle) + if ( any(language$session != bundles$session) || + any(language$bundle != bundles$bundle) ) + { + stop("Your language dataframe must contain the same bundles as your emuDB.") + } + return(language) + } + else + { + if (!is.character(language)) + { + stop("Language option must either be a string or a dataframe.") + } + languages = bundles + languages$language = language + return(languages) + } +} + +get_attributeDescription <- function(handle, level, label) +{ + dbConfig = load_DBconfig(handle) + + df = list_levelDefinitions(handle) + + if (!(level %in% df$name)) + { + stop("There is no level named ", level) + } + + df = list_attributeDefinitions(handle, level) + + if (!(label %in% df$name)) + { + stop("There is no attribute definition named ", + label, + " on level ", + level) + } + + ld = get_levelDefinition(handle, level) + + for (j in 1:length(ld$attributeDefinitions)) + { + if (ld$attributeDefinitions[[j]]$name == label) + { + return(ld$attributeDefinitions[[j]]$description) + } + } +} + +set_attributeDescription <- + function(handle, level, label, description) + { + dbConfig = load_DBconfig(handle) + + df = list_levelDefinitions(handle) + + if (!(level %in% df$name)) + { + stop("There is no level named ", level) + } + + df = list_attributeDefinitions(handle, level) + + if (!(label %in% df$name)) + { + stop(paste0( + "There is no attribute definition named ", + label, + " on level ", + level + )) + } + + for (i in 1:length(dbConfig$levelDefinitions)) + { + if (dbConfig$levelDefinitions[[i]]$name == level) + { + for (j in 1:length(dbConfig$levelDefinitions[[i]]$attributeDefinitions)) + { + if (dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$name == label) + { + dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$description = description + break + } + } + break + } + } + + store_DBconfig(handle, dbConfig) + } + + +bas_curl_inner <- function(service, params, file, session, bundle) +{ + success = tryCatch({ + # res = RCurl::postForm( + # paste0( + # "https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/", + # service + # ), + # .params = params, + # style = "HTTPPOST", + # .opts = RCurl::curlOptions(connecttimeout = 10, timeout = 10000) + # ) + res = httr::POST(url = paste0( + "https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/", + service + ), + body = params + ) + + lines = bas_download(httr::content(res, "text"), file, session, bundle) + return(TRUE) + }, + error = function (cond) + { + message("Error calling ", service) + message(cond) + return(FALSE) + }) +} + +bas_curl <- function(service, params, file, session, bundle, patience) +{ + if(patience < 0 || patience > 3) + { + stop("Invalid patience value; must lie between 0 and 3.") + } + + attempts = 0 + success = FALSE + + while(attempts <= patience && (!success)) + { + attempts = attempts + 1 + success = bas_curl_inner(service, params, file, session, bundle) + } + + if(!success) + { + stop("Call to ", service, " failed ", attempts, " time(s). Aborting.") + } + + lines = readr::read_lines(file) + + return(lines) +} + +bas_paste_description <- + function(description, source, service, params) + { + + version_resp = httr::GET(url = paste0( + "https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runGetVersion?service=", + service + )) + + version = stringr::str_trim(httr::content(version_resp, "text")) + + if(nchar(version) == 0) + { + warning("Could not retrieve version number for service", service) + } + + description = paste0(description, " automatically derived ") + if (!is.null(source)) + { + description = paste0(description, "from '", source, "' ") + } + + if(!is.null(params$RULESET)){ + # fix dual entry in RULESET which is caused by file + # file upload + params$RULESET = params$RULESET$path + } + description = + paste0( + description, + "by BAS webservice ", + service, " (", version, "), on ", + Sys.time(), + ", with the following parameters: (", + paste0(c(rbind( + names(params), unlist(params) + )), collapse = " "), + ")" + ) + + return(description) + } diff --git a/R/emuR-bundleList.R b/R/emuR-bundleList.R new file mode 100644 index 00000000..658399e3 --- /dev/null +++ b/R/emuR-bundleList.R @@ -0,0 +1,135 @@ +##' read bundleList +##' @description read bundleList JSON file in emuDB +##' @details Read bundleList JSON file in emuDB that is stored in +##' the databases root dir sub-dir \code{bundleLists/} +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param name name of bundleList (excluding the _bundleList.json suffix) +##' @return tibble with the columns \code{session}, \code{name}, +##' \code{comment}, \code{finishedEditing} +##' @export +read_bundleList <- function(emuDBhandle, + name){ + + bl_dir_path = file.path(emuDBhandle$basePath, "bundleLists") + # check if folder exists + if(!dir.exists(bl_dir_path)){ + stop("no bundleList dir found in emuDB in dir: ", bl_dir_path) + } + + bl_path = file.path(bl_dir_path, + paste0(name, "_bundleList.json")) + + return(tibble::as_tibble(jsonlite::read_json(bl_path, simplifyVector = TRUE))) + +} + +##' write bundleList +##' @description write bundleList JSON file to emuDB +##' @details Write bundleList JSON file to emuDB sub-dir \code{bundleLists/} +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param name name of bundleList (excluding the _bundleList.json suffix) +##' @param bundleList tibble/data.frame with the columns \code{session}, \code{name}, +##' \code{comment} (optional), \code{finishedEditing} (optional). Use \link{list_bundles} +##' @param seglist segment list returned by \link{query} function. If set the +##' \code{bundleList} parameter will be ignored and a bundleList will be created by +##' collapsing the segments as timeAnchors into the \code{_bundleList.json} +##' @param updateDBconfig if set to TRUE (the default) DBconfig will be updated +##' with the fields +##' @param verbose be verbose +##' \itemize{ +##' \item \code{"bundleComments": true} +##' \item \code{"bundleFinishedEditing": true} +##' } +##' @export +##' @importFrom rlang .data +write_bundleList <- function(emuDBhandle, + name, + bundleList, + seglist, + updateDBconfig = TRUE, + verbose = TRUE){ + + if(missing(name)){ + stop("name parameter must be given") + } + # set path vars + bl_dir_path = file.path(emuDBhandle$basePath, "bundleLists") + + if(!dir.exists(bl_dir_path)){ + if(verbose){ + print(paste0("INFO: No bundleList dir found in emuDB (path: ", bl_dir_path, ")! Creating directory...")) + } + created = dir.create(bl_dir_path) + if(!created){ + stop("Couldn't create ", bl_dir_path) + } + } + + DBconfig = load_DBconfig(emuDBhandle) + + bl_path = file.path(bl_dir_path, paste0(name, "_bundleList.json")) + + # update DBconfig to display + if(is.null(DBconfig$EMUwebAppConfig$restrictions$bundleComments) || is.null(DBconfig$EMUwebAppConfig$restrictions$bundleFinishedEditing)){ + # TODO ask user to set? + DBconfig$EMUwebAppConfig$restrictions$bundleComments = TRUE + DBconfig$EMUwebAppConfig$restrictions$bundleFinishedEditing = TRUE + store_DBconfig(emuDBhandle, DBconfig) + } + + + if(missing(seglist)){ + + bundleList %>% + dplyr::select("session", + "name", + dplyr::contains("comment"), + dplyr::contains("finishedEditing")) -> bundleList + + # add if not available + if(!"comment" %in% names(bundleList)){ + bundleList$comment = "" + } + if(!"finishedEditing" %in% names(bundleList)){ + bundleList$finishedEditing = FALSE + } + + jsonlite::write_json(bundleList, bl_path, pretty = TRUE) + } else { + if(!missing(bundleList)){ + warning("'bundleList' parameter is ignored as 'seglist' parameter is set") + } + + dataWithTimeAnchors = list() + distinctBundles = seglist %>% + dplyr::select("session", "bundle") %>% + dplyr::distinct() + + for(i in 1:nrow(distinctBundles)){ + sesBool = distinctBundles[i,]$session == seglist$session + bndlBool = distinctBundles[i,]$bundle == seglist$bundle + start_sample_vals = round(((seglist[sesBool & bndlBool,]$start / 1000) + 0.5 / seglist[sesBool & bndlBool,]$sample_rate) + * seglist[sesBool & bndlBool,]$sample_rate) + # end_sample_vals calculated with + 1 as EMU-webApp seems to always mark the right boundary left of the selected sample + end_sample_vals = round(((seglist[sesBool & bndlBool,]$end / 1000) + 0.5/seglist[sesBool & bndlBool,]$sample_rate) * + seglist[sesBool & bndlBool,]$sample_rate) + # append to dataWithTimeAnchors + dataWithTimeAnchors[[i]] = list(session = distinctBundles[i,]$session, + name = distinctBundles[i,]$bundle, + comment = "", + finishedEditing = FALSE, + timeAnchors = data.frame(sample_start = start_sample_vals, + sample_end = end_sample_vals)) + + } + + jsonBundleList = jsonlite::toJSON(dataWithTimeAnchors, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + + writeLines(jsonBundleList, + bl_path, + useBytes = TRUE) + } +} diff --git a/R/emuR-convert_BPFCollection.R b/R/emuR-convert_BPFCollection.R new file mode 100644 index 00000000..d056fff7 --- /dev/null +++ b/R/emuR-convert_BPFCollection.R @@ -0,0 +1,1362 @@ +##' Convert a Bas Partitur File Collection (audio and BAS Partitur files) to an emuDB +##' +##' Converts a Bas Partitur File Collection to an emuDB. Expects a collection of the following form: +##' One master directory containing any number of file pairs (= bundles). A file pair +##' consists of an audio file with the extension and a BPF file with the extension . +##' Apart from extensions, the names of corresponding audio and BPF files must be identical. Each BPF +##' file is converted into an emuDB annot file. An emuDB config file matching the data base is created +##' after parsing. +##' +##' @param sourceDir path to the directory containing the Bas Partitur File collection +##' @param targetDir directory where the new emuDB should be saved; if it does not exist, +##' the function tries to create one +##' @param dbName name given to the new emuDB +##' @param bpfExt extension of BPF files (default = "par") +##' @param audioExt extension of audio files (default = "wav") +##' @param extractLevels optional vector containing the names of levels that should be extracted. +##' If NULL (the default) all levels found in the BPF collection are extracted. +##' @param refLevel optional name of level used as reference for symbolic links. If NULL (the default), a link-less data base is created. +##' @param unifyLevels optional vector containing names of levels to be unified with the reference level. This means that +##' they are treated as labels of the reference level rather than independent items. At the moment, only purely symbolic +##' (class 1) levels can be unified. Links between the reference level and levels in unifyLevels must be one-to-one. +##' @param newLevels optional vector containing names of levels in the BPF collection that are not part of the standard +##' BPF levels. See \url{https://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html#Partitur_tiersdef} for details on +##' standard BPF levels. +##' @param newLevelClasses optional vector containing the classes of levels in the newLevels vector as integers. +##' Must have the same length and order as newLevels. +##' @param segmentToEventLevels optional vector containing names of segment levels with overlapping segments. +##' The parser treats segments on these levels as events (SEGMENT xyz becomes EVENT xyz_start and EVENT xyz_end). +##' If a level contains segmental overlap but is not in this vector, the parser will throw an error. If overlap +##' resolution leads to event overlap (e.g. if one segment's end coincides with the next segment's start), an error is thrown either way. If in doubt whether a level contains segmental overlap, try running the converter with segmentToEventLevels = NULL and see whether an error occurs. +##' @param verbose display infos, warnings and show progress bar +##' @return NULL +##' @export +##' @seealso convert_TextGridCollection, convert_legacyEmuDB +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: a dir with equally named file pairs *.wav and *.par +##' # (see ?create_emuRdemoData on how to create a demo) +##' +##' # convert file pairs *.wav and *.par in /tmp/BPF_collection into emuRDB 'NewEmuR' in +##' # dir /tmp/DirNewEmuR; the tier 'ORT' acts as the (word) reference tier; the +##' # tier 'KAN' is one-to-one bound to 'ORT' as a label +##' convert_BPFCollection("/tmp/BPF_collection","/tmp/DirNewEmuR",'NewEmuR', +##' bpfExt='par',audioExt='wav',refLevel='ORT',unifyLevels=c('KAN')) +##' +##' } +##' + + +convert_BPFCollection <- function(sourceDir, + targetDir, + dbName, + bpfExt = 'par', + audioExt = 'wav', + extractLevels = NULL, + refLevel = NULL, + newLevels = NULL, + newLevelClasses = NULL, + segmentToEventLevels = NULL, + unifyLevels = NULL, + verbose = TRUE) +{ + + + # --------------------------------------------------------------------------- + # -------------------------- Get directories -------------------------------- + # --------------------------------------------------------------------------- + + sourceDir = suppressWarnings(normalizePath(sourceDir)) + targetDir = suppressWarnings(normalizePath(targetDir)) + basePath = file.path(targetDir, paste0(dbName, emuDB.suffix)) + + if (!dir.exists(targetDir)) { + res = try(suppressWarnings(dir.create(targetDir))) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create target directory ", targetDir) + } + } + + # --------------------------------------------------------------------------- + # --------------- First round of argument checks ---------------------------- + # --------------------------------------------------------------------------- + + check_bpfArgumentWithoutLevelClasses(sourceDir = sourceDir, + basePath = basePath, + newLevels = newLevels, + newLevelClasses = newLevelClasses, + standardLevels = BPF_STANDARD_LEVELS, + verbose = verbose, + refLevel = refLevel, + audioExt = audioExt, + extractLevels = extractLevels) + + # --------------------------------------------------------------------------- + # ---------------- Combine standard and new level classes ------------------- + # --------------------------------------------------------------------------- + + levelClasses = as.list(BPF_STANDARD_LEVEL_CLASSES) + names(levelClasses) = BPF_STANDARD_LEVELS + levelClasses[newLevels] = newLevelClasses + + # --------------------------------------------------------------------------- + # ---------------------- Second round of argument checks -------------------- + # --------------------------------------------------------------------------- + + check_bpfArgumentWithLevelClasses(unifyLevels = unifyLevels, + refLevel = refLevel, + extractLevels = extractLevels, + levelClasses = levelClasses, + segmentToEventLevels) + + # --------------------------------------------------------------------------- + # -------------------------- Get file pair list ---------------------------- + # --------------------------------------------------------------------------- + + filePairList = create_filePairList(sourceDir, + sourceDir, + bpfExt, + audioExt) + + # --------------------------------------------------------------------------- + # ------------------------ Initialize temporary dbHandle -------------------- + # --------------------------------------------------------------------------- + + dbHandle = emuDBhandle(dbName, basePath = basePath, uuid::UUIDgenerate(), ":memory:") + # insert into emuDB table + queryTxt = paste0("INSERT INTO emu_db (uuid, name) VALUES('", dbHandle$UUID, "', '", dbName,"')") + DBI::dbExecute(dbHandle$connection, queryTxt) + + # --------------------------------------------------------------------------- + # ------------------------ Initialize progress bar -------------------------- + # --------------------------------------------------------------------------- + + if(verbose) + { + progress = 0 + nbFilePairs = length(filePairList) / 2 + + cat("INFO: Parsing BPF collection containing", nbFilePairs, "file pair(s)...\n") + pb = utils::txtProgressBar(min = 0, max = nbFilePairs, initial = progress, style=3) + utils::setTxtProgressBar(pb, progress) + } + + # --------------------------------------------------------------------------- + # -------------------- Initialize "tracker" variables ----------------------- + # --------------------------------------------------------------------------- + + # Trackers record data returned from the parsing process for later use. + # Level and link tracker record all level types and link types/directions found. + # They are used for creating the config file and post-processing. + levelTracker = list() + linkTracker = list() + + # Tracker for warnings produced by the parser. + # Will be displayed in batch after parsing is complete (less annoying than cluttering the terminal). + warningsTracker = list(semicolonFound = list()) + + # --------------------------------------------------------------------------- + # --------------------------- Loop over bundles ----------------------------- + # --------------------------------------------------------------------------- + + for(idx in 1:nrow(filePairList)[1]) + { + + # --------------------------------------------------------------------------- + # ------------------ Get session and bundle names --------------------------- + # --------------------------------------------------------------------------- + + session = get_bpfSession(filePath = filePairList[idx, 1], + sourceDir = sourceDir) + + bpfPath = normalizePath(filePairList[idx, 1], winslash = .Platform$file.sep) + bundle = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(bpfPath)) + annotates = basename(filePairList[idx, 2]) + + # Escaping single quotes in anything user-generated that will be fed into SQL + session = stringr::str_replace_all(session, "'", "''") + bundle = stringr::str_replace_all(bundle, "'", "''") + annotates = stringr::str_replace_all(annotates, "'", "''") + + # ----------------------------------------------------------------------- + # -------- Get sample rate for comparison with info in BPF header ------- + # ----------------------------------------------------------------------- + + asspObj = wrassp::read.AsspDataObj(filePairList[idx, 2]) + samplerate = attributes(asspObj)$sampleRate + + # ----------------------------------------------------------------------- + # --------------- Write session and bundle to temp DB ------------------- + # ----------------------------------------------------------------------- + queryTxt = paste0("SELECT name from session WHERE name='", session, "'") + all_sessions = DBI::dbGetQuery(dbHandle$connection, queryTxt) + + if(!session %in% all_sessions) + { + queryTxt = paste0("INSERT INTO session VALUES('", dbHandle$UUID, "', '", session, "')") + DBI::dbExecute(dbHandle$connection, queryTxt) + } + + queryTxt = paste0("INSERT INTO bundle VALUES('", dbHandle$UUID, "', '", session, "', '", bundle, "', '", + annotates, "', ", samplerate, ", 'NULL')") + + DBI::dbExecute(dbHandle$connection, queryTxt) + + # ----------------------------------------------------------------------- + # ------------------------------ Parse BPF ------------------------------ + # ----------------------------------------------------------------------- + + returnContainer = parse_BPF(dbHandle, + bpfPath = bpfPath, + bundle = bundle, + session = session, + refLevel = refLevel, + extractLevels = extractLevels, + samplerate = samplerate, + segmentToEventLevels = segmentToEventLevels, + levelClasses = levelClasses, + unifyLevels = unifyLevels) + levelInfo = returnContainer$levelInfo + linkInfo = returnContainer$linkInfo + semicolonFound = returnContainer$semicolonFound + + # ----------------------------------------------------------------------- + # --------------------- Update tracker variables ------------------------ + # ----------------------------------------------------------------------- + + if(semicolonFound) + { + warningsTracker$semicolonFound[[length(warningsTracker$semicolonFound) + 1L]] = bpfPath + } + + if(length(levelInfo) > 0) + { + levelTracker = update_bpfLevelTracker(levelInfo = levelInfo, + levelTracker = levelTracker) + } + + if(length(linkInfo) > 0) + { + linkTracker = update_bpfLinkTracker(linkInfo = linkInfo, + linkTracker = linkTracker) + } + + # ----------------------------------------------------------------------- + # -------------------------- Update progress bar ------------------------ + # ----------------------------------------------------------------------- + + if(verbose) + { + utils::setTxtProgressBar(pb, idx) + } + } + + # --------------------------------------------------------------------------- + # ----------------------------- Post-processing ----------------------------- + # --------------------------------------------------------------------------- + + if(verbose) + { + cat("\n") + cat("INFO: Doing some post-processing...\n") + } + + # --------------------------------------------------------------------------- + # ---------------- Resolve link type and direction ambiguities -------------- + # --------------------------------------------------------------------------- + + + if(length(linkTracker) > 0) + { + linkTracker = link_bpfDisambiguation(dbHandle, linkTracker = linkTracker, + refLevel = refLevel) + } + + # --------------------------------------------------------------------------- + # ------ Link from bundle level to refLevel and levels above refLevel ------- + # --------------------------------------------------------------------------- + + if(!is.null(refLevel)) + { + linkTracker = link_bpfUtteranceLevel(dbHandle, linkTracker = linkTracker, + refLevel = refLevel) + } + + # --------------------------------------------------------------------------- + # ---------------------- Create dbConfig Schema ----------------------------- + # --------------------------------------------------------------------------- + + if(verbose) + { + cat("INFO: Creating EMU database config schema...\n") + } + + DBconfig = create_bpfSchema(levelTracker = levelTracker, + linkTracker = linkTracker, + dbName = dbName, + dbUUID = dbHandle$UUID, + audioExt = audioExt) + + # --------------------------------------------------------------------------- + # ------------------------- Create and store database ----------------------- + # --------------------------------------------------------------------------- + + res = try(dir.create(basePath)) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create directory ", basePath) + } + + store_DBconfig(dbHandle, DBconfig) + + make_bpfDbSkeleton(dbHandle) + + copy_bpfMediaFiles(basePath = basePath, + sourceDir = sourceDir, + mediaFiles = filePairList[,2], + verbose = verbose) + + rewrite_annots(dbHandle, verbose = verbose) + + # --------------------------------------------------------------------------- + # -------------- Display any warnings collected during parsing -------------- + # --------------------------------------------------------------------------- + + if(verbose) + { + display_bpfSemicolonWarnings(warningsTracker) + } +} + + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Copy media files from BPF collection to emu DB +## +## @param basePath +## @param sourceDir +## @param mediaFiles +## @param audioExt +## @param verbose +## @keywords emuR BPF Emu +## @return session + +copy_bpfMediaFiles <- function(basePath, + mediaFiles, + sourceDir, + verbose) +{ + # --------------------------------------------------------------------------- + # -------------------------- Initialize progress bar ------------------------ + # --------------------------------------------------------------------------- + + if(verbose) + { + progress = 0 + nbMediaFiles = length(mediaFiles) + + cat("INFO: Copying", nbMediaFiles, "media files to EMU database...\n") + pb = utils::txtProgressBar(min = 0, max = nbMediaFiles, initial = progress, style=3) + utils::setTxtProgressBar(pb, progress) + } + + + for(idx in 1:length(mediaFiles)) + { + + # ------------------------------------------------------------------------- + # ------------------- Deduce target dir from media file path -------------- + # ------------------------------------------------------------------------- + + targetDir = file.path(basePath, + paste0(get_bpfSession(filePath = mediaFiles[[idx]], + sourceDir = sourceDir), + session.suffix), + paste0(sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(mediaFiles[[idx]])), + bundle.dir.suffix) + ) + + # ------------------------------------------------------------------------- + # ---------------------------------- Copy file ---------------------------- + # ------------------------------------------------------------------------- + + res = try(file.copy(mediaFiles[[idx]], targetDir)) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not copy media file from ", mediaFiles[[idx]], " to ", targetDir) + } + + # ------------------------------------------------------------------------- + # ---------------------------- Update progress bar ------------------------ + # ------------------------------------------------------------------------- + + if(verbose) + { + utils::setTxtProgressBar(pb, idx) + } + + # Newline after progress bar: + } + if(verbose) + { + cat("\n") + } +} + + + + + + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Deduce session name from file path (complex paths concatenated by underscores) +## +## @param filePath +## @param sourceDir +## @keywords emuR BPF Emu +## @return session + +get_bpfSession <- function(filePath, + sourceDir) +{ + DEFAULT_SESSION_NAME = "0000" + + session = normalizePath(dirname(filePath), winslash = "/") + sourceDir = normalizePath(sourceDir, winslash = "/") + sourceDir = stringr::str_replace(sourceDir, "/$", "") # remove final / which stays on sourceDir in Windows + session = stringr::str_replace_all(session, sourceDir, "") + session = stringr::str_replace_all(session, .Platform$file.sep, "_") + session = stringr::str_replace_all(session, "^_", "") + + if(session == "") + { + session = DEFAULT_SESSION_NAME + } + return(session) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## First round of argument checks +## +## @param sourceDir +## @param basePath +## @param newLevels +## @param newLevelClasses +## @param standardLevels +## @param verbose +## @param refLevel +## @param audioExt +## @param extractLevels +## @keywords emuR BPF Emu +## @return + +check_bpfArgumentWithoutLevelClasses <- function(sourceDir, + basePath, + newLevels, + newLevelClasses, + standardLevels, + verbose, + refLevel, + audioExt, + extractLevels) +{ + if(!file.exists(sourceDir)) + { + stop("Source directory does not exist!") + } + + if(file.exists(basePath)) + { + stop('The directory ', basePath, ' already exists. Can not generate a new emuDB here.') + } + + if(length(newLevels) != length(newLevelClasses)) + { + stop("Length of newLevels and newLevelClasses must be identical.") + } + + if(!all(newLevelClasses %in% c(1,2,3,4,5))) + { + stop("Level classes must be integers between 1 and 5. See BPF specifications for details.") + } + + if(any(newLevels %in% standardLevels)) + { + stop("You cannot introduce a standard BPF level via the newLevels argument. ", + "Standard BPF levels are: '", paste(standardLevels, collapse = "', '"), "'") + } + + if(is.null(refLevel) && verbose) + { + ans = readline("WARNING: No reference level has been declared. EMU database will be built without any symbolic links. Do you wish to continue? (y/n) ") + if(!ans == "y") + { + stop("BPF converter interrupted.") + } + } + + if(!is.null(extractLevels)) + { + if(!is.null(refLevel)) + { + if(!refLevel %in% extractLevels) + { + stop("Reference level is not in extractLevels") + } + } + } +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Second round of argument checks +## +## @param unifyLevels +## @param refLevel +## @param extractLevels +## @param levelClasses +## @param segmentToEventLevels +## @keywords emuR BPF Emu +## @return + +check_bpfArgumentWithLevelClasses <- function(unifyLevels, + refLevel, + extractLevels, + levelClasses, + segmentToEventLevels) +{ + for(level in c(unifyLevels, refLevel, extractLevels)) + { + if(!level %in% names(levelClasses)) + { + stop("Unknown level: ", level, ". If this is not a standard BPF level you need to declare this level via the newLevels argument, and assign it a class via the newLevelClasses argument") + } + } + + # Throw an exception if a link-less level is made reference level. + if(!is.null(refLevel)) + { + if(levelClasses[[refLevel]] %in% c(2, 3)) + { + stop("Link-less level ", refLevel, " cannot be reference level.") + } + } + + if(!is.null(unifyLevels)) + { + if(is.null(refLevel)) + { + stop("If you want to unify levels with the reference level, you must declare a reference level.") + } + if(refLevel %in% unifyLevels) + { + stop("Reference level cannot be unified with itself.") + } + if(any(levelClasses[unifyLevels] != 1)) + { + stop("Levels to be unified with the reference level must be of class 1 (time-less).") + } + } + + if(any(!levelClasses[segmentToEventLevels] %in% c(2,4))) + { + stop("Only segment levels (classes 2 and 4) can be listed in segmentToEventLevels.") + } +} + + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Update level tracker with information from parsing process +## +## @param levelInfo +## @param levelTracker +## @keywords emuR BPF Emu +## @return levelTracker + +update_bpfLevelTracker <- function(levelInfo, + levelTracker) +{ + for(idx in 1:length(levelInfo)) + { + found = FALSE + if(length(levelTracker) > 0) + { + for(jdx in 1:length(levelTracker)) + { + if(levelTracker[[jdx]][["key"]] == levelInfo[[idx]][["key"]] && + levelTracker[[jdx]][["type"]] == levelInfo[[idx]][["type"]]) + { + for(label in levelInfo[[idx]][["labels"]]) + { + if(!label %in% levelTracker[[jdx]][["labels"]]) + { + levelTracker[[jdx]][["labels"]][[length(levelTracker[[jdx]][["labels"]]) + 1L]] = label + } + } + found = TRUE + break + } + } + } + + if(!found) + { + levelTracker[[length(levelTracker) + 1L]] = levelInfo[[idx]] + } + } + + return(levelTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Update link tracker with information from parsing process +## +## @param linkTracker +## @param linkInfo +## @keywords emuR BPF Emu +## @return linkTracker + +update_bpfLinkTracker <- function(linkTracker, + linkInfo) +{ + for(jdx in 1:length(linkInfo)) + { + found = FALSE + if(length(linkTracker) > 0) + { + for(kdx in 1:length(linkTracker)) + { + if(linkTracker[[kdx]][["fromkey"]] == linkInfo[[jdx]][["fromkey"]] && + linkTracker[[kdx]][["tokey"]] == linkInfo[[jdx]][["tokey"]] && + linkTracker[[kdx]][["type"]] == linkInfo[[jdx]][["type"]]) + { + found = TRUE + linkTracker[[kdx]][["countRight"]] = linkTracker[[kdx]][["countRight"]] + linkInfo[[jdx]][["countRight"]] + linkTracker[[kdx]][["countWrong"]] = linkTracker[[kdx]][["countWrong"]] + linkInfo[[jdx]][["countWrong"]] + break + } + } + } + + if(!found) + { + linkTracker[[length(linkTracker) + 1L]] = linkInfo[[jdx]] + } + } + + return(linkTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Disambiguate link directions and types in case individual BPFs did not agree on them +## +## @param emuDBhandle +## @param linkTracker +## @param refLevel +## @keywords emuR BPF Emu +## @return list(linkTracker) + +link_bpfDisambiguation <- function(emuDBhandle, linkTracker, + refLevel) +{ + # ------------------------------- THE PROBLEM ------------------------------- + # + # Individual BPFs might not agree on the direction of certain links. + # + # EXAMPLE: There is a BPF collection with two levels: "ORT" at the word level (reference level), + # and "TRN" at the turn level. In any BPF where there are at least two words per turn, + # the parser will recognize "TRN" as being hierarchically higher. + # But if there is one BPF with only one word per turn (i.e. a local ONE_TO_ONE relationship), + # the parser will by default put the reference level on top. Thus, we have conflicting link + # directions in the SQL data base, and conflicting entries in the linkTracker: + # + # fromkey = "ORT", tokey = "TRN", type = ..., countRight = ..., countWrong = ... + # fromkey = "TRN", fromkey = "ORT", type = ..., countRight = ..., countWrong = ... + # + # Individual BPFs might also not agree on the types of links. + # EXAMPLE: links between words ("ORT") and syllables ("MAS") might be ONE_TO_MANY in the collection as a whole, + # but in a bundle with only monosyllabic words the parser will have interpreted them as ONE_TO_ONE. + # In this case, we would have two conflicting pieces of information in the link tracker: + # + # fromkey = "ORT", tokey = "MAS", type = "ONE_TO_ONE" + # fromkey = "ORT", tokey = "MAS", type = "ONE_TO_MANY" + + # --------------------------------------------------------------------------- + # --- Collect pairs of levels between which links have to be turned around -- + # --------------------------------------------------------------------------- + + turnAround = get_bpfTurnAround(linkTracker = linkTracker) + + # --------------------------------------------------------------------------- + # --------------------- Set countRight & countWrong to NA ------------------- + # --------------------------------------------------------------------------- + + # (we don't need them anymore and they interfere with the unique() function) + + for(idx in 1:length(linkTracker)) + { + linkTracker[[idx]][["countRight"]] = NA + linkTracker[[idx]][["countWrong"]] = NA + } + + # --------------------------------------------------------------------------- + # ---- Turn the links from turnAround in the temp DB and the link tracker --- + # --------------------------------------------------------------------------- + + if(length(turnAround) > 0) + { + turn_bpfLinks(emuDBhandle, turnAround = turnAround) + linkTracker = turn_bpfLinkTrackerEntries(turnAround = turnAround, + linkTracker = linkTracker) + } + + # --------------------------------------------------------------------------- + # ------------------------------ Merge link types --------------------------- + # --------------------------------------------------------------------------- + + linkTracker = merge_bpfLinkTypes(linkTracker = linkTracker) + + # --------------------------------------------------------------------------- + # -------------- Return link tracker to caller function --------------------- + # --------------------------------------------------------------------------- + + return(linkTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get a list of all links (as fromkey - tokey pairs) that need to be turned around +## +## @param linkTracker +## @keywords emuR BPF Emu +## @return turnAround + +get_bpfTurnAround <- function(linkTracker) +{ + turnAround = list() + + for(idx in 1:length(linkTracker)) + { + # Boolean that's switched on when encountering a link tracker entry whose direction is the reverse of the current one's. + # If there isn't one, there is no direction issue, and therefore no turnAround entry. + turnAroundNecessary = FALSE + + # countRight and countWrong for the current entry's direction. + countRight = 0 + countWrong = 0 + + for(jdx in 1:length(linkTracker)) + { + if(linkTracker[[idx]][["fromkey"]] == linkTracker[[jdx]][["fromkey"]] && + linkTracker[[idx]][["tokey"]] == linkTracker[[jdx]][["tokey"]]) + { + countRight = countRight + linkTracker[[jdx]][["countRight"]] + countWrong = countWrong + linkTracker[[jdx]][["countWrong"]] + } + + else if(linkTracker[[idx]][["fromkey"]] == linkTracker[[jdx]][["tokey"]] && + linkTracker[[idx]][["tokey"]] == linkTracker[[jdx]][["fromkey"]]) + { + countRight = countRight + linkTracker[[jdx]][["countWrong"]] + countWrong = countWrong + linkTracker[[jdx]][["countRight"]] + turnAroundNecessary = TRUE + } + } + + if(turnAroundNecessary) + { + # ----------------------------------------------------------------------- + # ----------------- Evaluate countRight and countWrong ------------------ + # ----------------------------------------------------------------------- + + # If countRight ends up greater, the direction of the current link tracker entry "wins". + if(countRight > countWrong) + { + turnAround[[length(turnAround) + 1L]] = + list(fromkey = linkTracker[[idx]][["tokey"]], tokey = linkTracker[[idx]][["fromkey"]]) + } + + # If countWrong ends up greater, the reverse direction "wins". + else if(countRight < countWrong) + { + turnAround[[length(turnAround) + 1L]] = + list(fromkey = linkTracker[[idx]][["fromkey"]], tokey = linkTracker[[idx]][["tokey"]]) + } + + # Special case: If countRight == countWrong, we could get mirror entries in turnAround. + # Therefore, check whether the reverse of a given entry exists in turnAround. + # If it does, don't add the current entry. + else if(countRight == countWrong) + { + found = FALSE + for(link in turnAround) + { + if(link$fromkey == linkTracker[[idx]][["tokey"]] && link$tokey == linkTracker[[idx]][["fromkey"]]) + { + found = TRUE + break + } + } + + if(!found) + { + turnAround[[length(turnAround) + 1L]] = + list(fromkey = linkTracker[[idx]][["fromkey"]], tokey = linkTracker[[idx]][["tokey"]]) + } + } + } + } + + # Remove duplicates from turnAround. + turnAround = unique(turnAround) + + return(turnAround) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Turn around eligible links in the temp DB +## +## @param emuDBhandle +## @param turnAround +## @keywords emuR BPF Emu +## @return + +turn_bpfLinks <- function(emuDBhandle, turnAround) +{ + for(link in turnAround) + { + queryTxt = paste0("UPDATE links SET from_id = to_id, to_id = from_id WHERE from_id IN", + "(SELECT item_id FROM items WHERE level = '", link[["fromkey"]], + "' AND db_uuid = links.db_uuid AND session = links.session AND bundle = links.bundle) ", + "AND to_id IN(SELECT item_id FROM items WHERE level = '", link[["tokey"]], "' ", + "AND db_uuid = links.db_uuid AND session = links.session AND bundle = links.bundle);") + DBI::dbExecute(emuDBhandle$connection, queryTxt) + } +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Turn around eligible entries in the link tracker +## +## @param turnAround +## @param linkTracker +## @keywords emuR BPF Emu +## @return linkTracker + +turn_bpfLinkTrackerEntries <- function(turnAround = turnAround, + linkTracker = linkTracker) +{ + for(idx in 1:length(turnAround)) + { + for(jdx in 1:length(linkTracker)) + { + if(turnAround[[idx]][["fromkey"]] == linkTracker[[jdx]][["fromkey"]] && + turnAround[[idx]][["tokey"]] == linkTracker[[jdx]][["tokey"]]) + { + linkTracker[[jdx]][["fromkey"]] = turnAround[[idx]][["tokey"]] + linkTracker[[jdx]][["tokey"]] = turnAround[[idx]][["fromkey"]] + + # If an entry of type ONE_TO_MANY is turned around, it becomes MANY_TO_MANY (as MANY_TO_ONE is not an option in emuR) + if(linkTracker[[jdx]][["type"]] == "ONE_TO_MANY") + { + linkTracker[[jdx]][["type"]] = "MANY_TO_MANY" + } + } + } + } + return(linkTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Merge link types in linkTracker +## +## @param linkTracker +## @keywords emuR BPF Emu +## @return linkTracker + +merge_bpfLinkTypes <- function(linkTracker) +{ + for(idx in 1:length(linkTracker)) + { + for(jdx in 1:length(linkTracker)) + { + if(linkTracker[[idx]][["fromkey"]] == linkTracker[[jdx]][["fromkey"]] && + linkTracker[[idx]][["tokey"]] == linkTracker[[jdx]][["tokey"]]) + { + if(linkTracker[[idx]][["type"]] %in% c("ONE_TO_ONE", "ONE_TO_MANY") && + linkTracker[[jdx]][["type"]] %in% c("ONE_TO_MANY", "MANY_TO_MANY")) + { + linkTracker[[idx]][["type"]] = linkTracker[[jdx]][["type"]] + } + } + } + } + + # Remove duplicates created in the merging process. + linkTracker = unique(linkTracker) + + return(linkTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Create links from the bundle level to the next highest level(s) +## +## @param emuDBhandle +## @param linkTracker +## @param refLevel +## @keywords emuR BPF Emu +## @return list(linkTracker) + +link_bpfUtteranceLevel <- function(emuDBhandle, linkTracker, + refLevel) +{ + # --------------------------------------------------------------------------- + # --- Get list of levels that should be linked to from the bundle level -- + # --------------------------------------------------------------------------- + + # (contains refLevel and any levels that are hierarchically higher than refLevel) + + underUtterance = get_bpfLevelsUnderUtterance(linkTracker = linkTracker, + refLevel = refLevel) + + for(level in underUtterance) + { + # ------------------------------------------------------------------------- + # ---------- Create links from bundle to current level in temp DB --------- + # ------------------------------------------------------------------------- + + nbItems = link_bpfUtteranceLevelToCurrentLevel(emuDBhandle, currentLevel = level) + + # ------------------------------------------------------------------------- + # ----------------- Determine link type (cardinality) --------------------- + # ------------------------------------------------------------------------- + + # Check whether there is one item of this specific level per bundle, or more than one. + # This determines whether the links from 'bundle' are ONE_TO_ONE or ONE_TO_MANY. + + queryTxt = paste0("SELECT DISTINCT db_uuid, session, bundle FROM items WHERE level = '", level, "'") + distinctUuidSessionBundle = DBI::dbGetQuery(emuDBhandle$connection, queryTxt) + nbBundles = nrow(distinctUuidSessionBundle) + + if(nbBundles < nbItems) + { + linkType = "ONE_TO_MANY" + } + + else + { + linkType = "ONE_TO_ONE" + } + + # ------------------------------------------------------------------------- + # -------------------------- Update link tracker -------------------------- + # ------------------------------------------------------------------------- + + linkTracker[[length(linkTracker) + 1L]] = list(fromkey = "bundle", + tokey = level, + type = linkType) + } + + # --------------------------------------------------------------------------- + # -------------------------- Return link tracker ---------------------------- + # --------------------------------------------------------------------------- + + return(linkTracker) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get list of levels that should be linked to from the Utterance level (reference level plus any levels above reference level) +## +## @param linkTracker +## @param refLevel +## @keywords emuR BPF Emu +## @return dbSchema + +get_bpfLevelsUnderUtterance <- function(linkTracker, + refLevel) +{ + underUtterance = list(refLevel) + + if(length(linkTracker) == 0) { return(underUtterance)} + + for(idx in 1:length(linkTracker)) + { + if(linkTracker[[idx]][["tokey"]] == refLevel) + { + underUtterance[[length(underUtterance) + 1L]] = linkTracker[[idx]][["fromkey"]] + } + } + + return(underUtterance) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Link utterance level with current level +## +## @param emudBhandle +## @param currentLevel +## @keywords emuR BPF Emu +## @return nbItems + +link_bpfUtteranceLevelToCurrentLevel <- function(emuDBhandle, currentLevel) +{ + # Get UUID, session, bundle and item_id of all items of the relevant level + queryTxt = paste0("SELECT db_uuid, session, bundle, item_id FROM items WHERE level = '", currentLevel, "'") + uuidSessionBundleItemID = DBI::dbGetQuery(emuDBhandle$connection, queryTxt) + + # Loop over all items on this level + for(idx in 1:nrow(uuidSessionBundleItemID)) + { + db_uuid = uuidSessionBundleItemID[idx,][["db_uuid"]] + session = uuidSessionBundleItemID[idx,][["session"]] + bundle = uuidSessionBundleItemID[idx,][["bundle"]] + itemID = uuidSessionBundleItemID[idx,][["item_id"]] + + # Link all items to their corresponding bundle item + # (same UUID, session & bundle, item_id is always 1). + queryTxt = paste0("INSERT INTO links VALUES('", db_uuid, "', '", session, "', '", bundle, "', 1, ", itemID, ", NULL)") + DBI::dbExecute(emuDBhandle$connection, queryTxt) + } + + nbItems = nrow(uuidSessionBundleItemID) + return(nbItems) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Create an emuDB schema from link and level tracker +## +## @param levelTracker +## @param linkTracker +## @param dbName +## @param dbUUID +## @param audioExt +## @keywords emuR BPF Emu +## @return dbSchema + +create_bpfSchema <- function(levelTracker, + linkTracker, + dbName, + dbUUID, + audioExt) +{ + + # --------------------------------------------------------------------------- + # --- Get default level order and level definitions from level tracker ------ + # --------------------------------------------------------------------------- + + defaultLevelOrder = get_bpfDefaultLevelOrder(levelTracker = levelTracker) + levelDefinitions = get_bpfLevelDefinitions(levelTracker = levelTracker) + + # --------------------------------------------------------------------------- + # ------------------- Get link definitions from link tracker ---------------- + # --------------------------------------------------------------------------- + + linkDefinitions = get_bpfLinkDefinitions(linkTracker = linkTracker) + + # --------------------------------------------------------------------------- + # ------------------------------ Create DB schema --------------------------- + # --------------------------------------------------------------------------- + + # Create signalCanvas config. + sc = list(order = c("OSCI","SPEC"), + assign = list(), + contourLims = list()) + + # Create perspective. + defPersp = list(name = 'default', + signalCanvases = sc, + levelCanvases = list(order = defaultLevelOrder), + twoDimCanvases = list(order = list())) + + dbSchema = list(name = dbName, + UUID = dbUUID, + mediafileExtension = audioExt, + ssffTrackDefinitions = list(), + levelDefinitions = levelDefinitions, + linkDefinitions = linkDefinitions, + EMUwebAppConfig = list(perspectives=list(defPersp), + activeButtons = list(saveBundle = TRUE, + showHierarchy = TRUE))) + + return(dbSchema) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get default level order from level tracker +## +## @param levelTracker +## @keywords emuR BPF Emu +## @return defaultLevelOrder + +get_bpfDefaultLevelOrder <- function(levelTracker) +{ + defaultLevelOrder = list() + + if(length(levelTracker) > 0) + { + for(levelIdx in 1:length(levelTracker)) + { + if(levelTracker[[levelIdx]][["type"]] %in% c("SEGMENT", "EVENT")) + { + defaultLevelOrder[[length(defaultLevelOrder)+1L]] = levelTracker[[levelIdx]][["key"]] + } + } + } + return(defaultLevelOrder) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get level definitions from level tracker +## +## @param levelTracker +## @keywords emuR BPF Emu +## @return levelDefinitions + +get_bpfLevelDefinitions <- function(levelTracker) +{ + levelDefinitions = list() + + if(length(levelTracker) > 0) + { + for(levelIdx in 1:length(levelTracker)) + { + attrDefList = list() + + for(label in levelTracker[[levelIdx]][["labels"]]) + { + description = "" + if(label != "bundle") + { + description = "Imported from BPF collection" + } + attrDefList[[length(attrDefList) + 1L]] = list(name = label, + type = "STRING", + description = description) + } + + levelDefinitions[[length(levelDefinitions) + 1L]] = list(name = levelTracker[[levelIdx]][["key"]], + type = levelTracker[[levelIdx]][["type"]], + attributeDefinitions = attrDefList) + } + } + + return(levelDefinitions) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get link defintions from link tracker +## +## @param linkTracker +## @keywords emuR BPF Emu +## @return linkDefinitions + +get_bpfLinkDefinitions <- function(linkTracker = linkTracker) +{ + linkDefinitions = list() + if(length(linkTracker) > 0) + { + for(linkIdx in 1:length(linkTracker)) + { + linkDefinitions[[length(linkDefinitions)+1L]] = list(type = linkTracker[[linkIdx]][["type"]], + superlevelName = linkTracker[[linkIdx]][["fromkey"]], + sublevelName = linkTracker[[linkIdx]][["tokey"]]) + } + } + return(linkDefinitions) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Construct a skeleton (empty folders) for the EMU database +## +## @param emuDBhandle +## @keywords emuR BPF Emu +## @return + +make_bpfDbSkeleton <- function(emuDBhandle) +{ + # --------------------------------------------------------------------------- + # ------------------------- Create session directories ---------------------- + # --------------------------------------------------------------------------- + + queryTxt = paste0("SELECT name FROM session WHERE db_uuid = '", emuDBhandle$UUID, "'") + sessions = DBI::dbGetQuery(emuDBhandle$connection, queryTxt) + + for(idx in 1:nrow(sessions)) + { + session = paste0(sessions[idx,], session.suffix) + res = try(dir.create(file.path(emuDBhandle$basePath, session))) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create session directory ", file.path(emuDBhandle$basePath, session)) + } + } + # --------------------------------------------------------------------------- + # ------------------------- Create bundle directories ----------------------- + # --------------------------------------------------------------------------- + + queryTxt = paste0("SELECT name, session FROM bundle WHERE db_uuid = '", emuDBhandle$UUID, "'") + bundles = DBI::dbGetQuery(emuDBhandle$connection, queryTxt) + for(jdx in 1:nrow(bundles)) + { + bundle = paste0(bundles[jdx,1], bundle.dir.suffix) + session = paste0(bundles[jdx,2], session.suffix) + res = try(dir.create(file.path(emuDBhandle$basePath, session, bundle))) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create bundle directory ", file.path(emuDBhandle$basePath, session, bundle)) + } + } +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Display collected warnings about semicolon entries in batch +## +## @param warningsTracker +## @keywords emuR BPF Emu +## @return + +display_bpfSemicolonWarnings <- function(warningsTracker) +{ + msg = paste0("WARNING: The following BPF files contain links pointing to the space between items (using ';'). ", + "This feature has not been implemented yet, so the affected items were treated as link-less:\n") + + for(path in warningsTracker$semicolonFound) + { + msg = paste0(msg, path, "\n") + } + + if(length(warningsTracker$semicolonFound) > 0) + { + warning(msg) + } +} + + +# --------------------------------------------------------------------------- +# ------------------------ Standard BPF levels ------------------------------ +# --------------------------------------------------------------------------- + +# To add a new level type to the BPF format: + +# - add new level name (three character string) to BPF_STANDARD_LEVELS +# - add its class (integer in range 1-5) to BPF_STANDARD_LEVEL_CLASSES +# - the order of both vectors must match (i.e. if you add the name at position 10, add the class at position 10 as well) + +# If you do not wish to extend the format directly in the source code, use newLevels and newLevelClasses arguments. + +BPF_STANDARD_LEVELS = c( + "KAN", "KAS", "PTR", "ORT", "TRL", "TR2", "SUP", "DAS", "PRS", "NOI", + "POS", "LMA", "TRS", "TRW", "PRO", "SYN", "FUN", "LEX", "TLN", + "IPA", "GES", "USH", "USM", "OCC", + "PRM", "LBG", "LBP", + "SAP", "MAU", "WOR", "PHO", "MAS", "USP", "TRN", + "PRB" +) + +BPF_STANDARD_LEVEL_CLASSES = c( + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, + 5 +) diff --git a/R/emuR-convert_TextGridCollection.R b/R/emuR-convert_TextGridCollection.R new file mode 100644 index 00000000..6217efa1 --- /dev/null +++ b/R/emuR-convert_TextGridCollection.R @@ -0,0 +1,233 @@ +##' Convert a TextGridCollection (e.g. .wav & .TextGrid files) to emuDB +##' +##' Converts a TextGridCollection to an emuDB by searching a given directory for .wav & .TextGrid files (default +##' extensions) with the same base name. First, the function generates a file pair list +##' containing paths to files with the same base +##' name. It then generates an emuDB DBconfig based on the first TextGrid in this list which specifies +##' the allowed level names and types in the new emuDB. After this it converts all file pairs to the new format, +##' checking whether they comply to the newly generated database configuration. For +##' more information on the emuDB format see \code{vignette{emuDB}}. +##' Note that since Praat TextGrids do not permit explicit hierarchical structures, the created emuDB does not contain +##' any links or link definitions. You can however use the \code{\link{autobuild_linkFromTimes}} function after the conversion process +##' to automatically build a hierarchal structure. +##' +##' @param dir path to directory containing the TextGridCollection (nested directory structures are permitted as the +##' function recursively searches through directories, generating the session names from dir. structure) +##' @param dbName name given to the new emuDB +##' @param targetDir directory where to save the new emuDB +##' @param tgExt extension of TextGrid files (default=TextGrid, meaning file names of the form baseName.TextGrid) +##' @param audioExt extension of audio files (default=wav, meaning file names of the form baseName.wav) +##' @param tierNames character vector containing names of tiers to extract and convert. If NULL (the default) all +##' tiers are converted. +##' @param verbose display infos & show progress bar +##' @export +##' @import tools +##' @importFrom rlang .data +##' @return NULL +##' @examples +##' \dontrun{ +##' +##' ########################################################## +##' # prerequisite: directory containing .wav & .TextGrid files +##' # (see \code{?create_emuRdemoData} how to create demo data) +##' +##' # convert TextGridCollection and store +##' # new emuDB in folder provided by tempdir() +##' convert_TextGridCollection(dir = "/path/to/directory/", +##' dbName = "myTGcolDB", +##' targetDir = tempdir()) +##' +##' +##' # same as above but this time only convert +##' # the information stored in the "Syllable" and "Phonetic" tiers +##' convert_TextGridCollection(dir = "/path/to/directory/", +##' dbName = "myTGcolDB", +##' targetDir = tempdir(), +##' tierNames = c("Syllable", "Phonetic")) +##' +##'} +convert_TextGridCollection <- function(dir, dbName, + targetDir, tgExt = 'TextGrid', + audioExt = 'wav', tierNames = NULL, + verbose = TRUE){ + # normalize paths + dir = suppressWarnings(normalizePath(dir)) + targetDir = suppressWarnings(normalizePath(targetDir)) + # check if dir exists + if(!dir.exists(dir)){ + stop("dir does not exist!") + } + # create + if(!dir.exists(targetDir)){ + res=dir.create(targetDir,recursive = TRUE) + if(!res){ + stop("Could not create target directory: ",targetDir," !\n") + } + } + + basePath=file.path(targetDir, paste0(dbName, emuDB.suffix)) + # check if base path dir already exists + if(dir.exists(basePath)){ + stop('The directory ', basePath, ' already exists. Can not generate new emuDB if directory called ', dbName, ' already exists!') + }else{ + res=dir.create(basePath) + if(!res){ + stop("Could not create base directory: ",basePath," !\n") + } + } + + # generate file pair list + fpl = create_filePairList(dir, dir, audioExt, tgExt) + + progress = 0 + + if(verbose){ + cat("INFO: Loading TextGridCollection containing", length(fpl[,1]), "file pairs...\n") + pb = utils::txtProgressBar(min = 0, max = length(fpl[,1]), initial = progress, style=3) + utils::setTxtProgressBar(pb, progress) + } + + # gereate DBconfig from first TextGrid in fpl + DBconfig = create_DBconfigFromTextGrid(fpl[1,2], dbName, basePath,tierNames) + + # create tmp dbHandle + dbHandle = emuDBhandle(dbName, basePath, DBconfig$UUID, connectionPath = ":memory:") + + # store to tmp DBI + add_emuDbDBI(dbHandle) + + # store db DBconfig file + store_DBconfig(dbHandle, DBconfig) + + # allBundles object to hold bundles without levels and links + allBundles = list() + + + # loop through fpl + DBI::dbBegin(dbHandle$connection) + for(i in 1:dim(fpl)[1]){ + + # create session name + sesName = gsub('^_', + '', + gsub(.Platform$file.sep, + '_', + gsub(normalizePath(dir, + winslash = .Platform$file.sep), + '', + dirname(normalizePath(fpl[i,1], + winslash = .Platform$file.sep)), + fixed = TRUE), + fixed = TRUE), + fixed = TRUE) + + # session file path + if(sesName == ""){ + sesName = "0000" + sfp = file.path(basePath, paste0(sesName, session.suffix)) + }else{ + sfp = file.path(basePath, paste0(sesName, session.suffix)) + } + # create session dir + if(!dir.exists(sfp)){ + res = dir.create(sfp) + if(!res){ + stop("Could not create session directory: ", sfp, " !\n") + } + } + + # create session entry if it doesn't already exist + sesDF = DBI::dbGetQuery(dbHandle$connection, paste0("SELECT * FROM session WHERE name = '", sesName, "'")) + if(nrow(sesDF) == 0){ + DBI::dbExecute(dbHandle$connection, paste0("INSERT INTO ", + " session ", + "VALUES('", dbHandle$UUID, "', '", sesName, "')")) + } + + # media file + mfPath = fpl[i,1] + mfBn = basename(mfPath) + + # get sampleRate of audio file + asspObj = wrassp::read.AsspDataObj(mfPath, begin = 0, end = 1, samples = TRUE) + sampleRate = attributes(asspObj)$sampleRate + # create bundle name + bndlName = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(fpl[i,1])) + + # parse TextGrid + bundleAnnotDFs = TextGridToBundleAnnotDFs(fpl[i,2], + sampleRate = sampleRate, + name = bndlName, + annotates = paste0(bndlName, ".wav")) + + # remove unwanted levels + if(!is.null(tierNames)){ + # filter items + bundleAnnotDFs$items = dplyr::filter(bundleAnnotDFs$items, .data$level %in% tierNames) + # filter labels + bundleAnnotDFs$labels = dplyr::filter(bundleAnnotDFs$labels, .data$name %in% tierNames) + } + + # add to bundle table + add_bundleDBI(dbHandle, + sessionName = sesName, + name = bndlName, + annotates = bundleAnnotDFs$annotates, + sampleRate = bundleAnnotDFs$sampleRate, + MD5annotJSON = "") + # add to items, links, labels tables + store_bundleAnnotDFsDBI(dbHandle, + bundleAnnotDFs, + sessionName = sesName, + bundleName = bndlName) + + + # validate bundle + valRes = validate_bundleDBI(dbHandle, + session = sesName, + bundle = bndlName) + + if(valRes$type != 'SUCCESS'){ + stop('Parsed TextGrid did not pass validator! The validator message is: ', valRes$message) + } + + # create bndl folder + bDir = paste0(bndlName, bundle.dir.suffix) + bfp = file.path(sfp,bDir) + res = dir.create(bfp) + if(!res){ + stop("Could not create bundle directory ",bfp," !\n") + } + + # store media file + newMfPath = file.path(bfp, mfBn) + if(file.exists(mfPath)){ + file.copy(from = mfPath, to = newMfPath) + }else{ + stop("Media file :'", mfPath, "' does not exist!") + } + + + # update pb + if(verbose){ + utils::setTxtProgressBar(pb, i) + } + + } + DBI::dbCommit(dbHandle$connection) + + if(verbose){ + cat('\n') # hack to have newline after pb + } + + # store all annotations + rewrite_annots(dbHandle, verbose = verbose) + + + +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-convert_TextGridCollection.R') diff --git a/R/emuR-convert_txtCollection.R b/R/emuR-convert_txtCollection.R new file mode 100644 index 00000000..f29fe1ae --- /dev/null +++ b/R/emuR-convert_txtCollection.R @@ -0,0 +1,264 @@ +##' Converts a collection of audio files and plain text transcriptions into an emuDB +##' +##' This function takes as input pairs of media files (i.e. wav files) and plain text +##' transcriptions files. It creates a new emuDB with one bundle per media file, and +##' turns the associated transcription into an item in that bundle. For this purpose, +##' media files and text files belonging to the same bundle must be named identically +##' (with the exception of their respective file extensions). The newly created +##' emuDB is stored in the target directory, and its handle is returned. +##' +##' @param dbName name of the new emuDB +##' @param sourceDir directory containing the plain text transcription files and media files +##' @param targetDir directory where the new emuDB will be stored +##' @param txtExtension file extension of transcription files +##' @param mediaFileExtension file extension of media files +##' @param attributeDefinitionName label name of the transcription items +##' @param cleanWhitespaces if true, any sequence of whitespaces in the transcription (including newlines and tabs) +##' is transformed into a single blank +##' @param verbose display progress bar +##' +##' @export +##' @seealso convert_BPFCollection, convert_TextGridCollection + +convert_txtCollection <- function(dbName, + sourceDir, + targetDir, + txtExtension = 'txt', + mediaFileExtension = 'wav', + attributeDefinitionName = 'transcription', + cleanWhitespaces = TRUE, + verbose = TRUE) +{ + transcriptionLevel = 'bundle' + if(transcriptionLevel == attributeDefinitionName) + { + stop("Transcription label must not be ", attributeDefinitionName) + } + + # --------------------------------------------------------------------------- + # -------------------------- Get directories -------------------------------- + # --------------------------------------------------------------------------- + + sourceDir = suppressWarnings(normalizePath(sourceDir)) + targetDir = suppressWarnings(normalizePath(targetDir)) + basePath = file.path(targetDir, paste0(dbName, emuDB.suffix)) + + if(!dir.exists(targetDir)) + { + res = try(suppressWarnings(dir.create(targetDir))) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create target directory ", targetDir) + } + } + + if(dir.exists(basePath)) + { + stop("emuDB ", basePath, " already exists") + } + + + # --------------------------------------------------------------------------- + # -------------------------- Get file pair list ---------------------------- + # --------------------------------------------------------------------------- + + filePairList = create_filePairList(sourceDir, + sourceDir, + txtExtension, + mediaFileExtension) + + # --------------------------------------------------------------------------- + # ---------------------------- Initialize dbHandle -------------------------- + # --------------------------------------------------------------------------- + + + dbHandle = emuDBhandle(dbName, basePath = basePath, uuid::UUIDgenerate(), ":memory:") + # insert into emuDB table + queryTxt = paste0("INSERT INTO emu_db (uuid, name) VALUES('", dbHandle$UUID, "', '", dbName,"')") + DBI::dbExecute(dbHandle$connection, queryTxt) + + # --------------------------------------------------------------------------- + # ------------------------ Initialize progress bar -------------------------- + # --------------------------------------------------------------------------- + + if (verbose) + { + progress = 0 + nbFilePairs = length(filePairList) / 2 + + cat("INFO: Parsing plain text collection containing", nbFilePairs, "file pair(s)...\n") + pb = utils::txtProgressBar( + min = 0, max = nbFilePairs, initial = progress, style = 3 + ) + utils::setTxtProgressBar(pb, progress) + } + + # --------------------------------------------------------------------------- + # --------------------------- Loop over bundles ----------------------------- + # --------------------------------------------------------------------------- + + for (idx in 1:nrow(filePairList)[1]) + { + # --------------------------------------------------------------------------- + # ------------------ Get session and bundle names --------------------------- + # --------------------------------------------------------------------------- + + session = get_bpfSession(filePath = filePairList[idx, 2], + sourceDir = sourceDir) + + txtPath = normalizePath(filePairList[idx, 1], winslash = .Platform$file.sep) + wavPath = normalizePath(filePairList[idx, 2], winslash = .Platform$file.sep) + bundle = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(wavPath)) + annotates = basename(wavPath) + + # Escaping single quotes in anything user-generated that will be fed into SQL + session = stringr::str_replace_all(session, "'", "''") + bundle = stringr::str_replace_all(bundle, "'", "''") + annotates = stringr::str_replace_all(annotates, "'", "''") + + # ----------------------------------------------------------------------- + # -------------------------- Get sample rate ---------------------------- + # ----------------------------------------------------------------------- + + asspObj = wrassp::read.AsspDataObj(filePairList[idx, 2]) + samplerate = attributes(asspObj)$sampleRate + + # ----------------------------------------------------------------------- + # --------------- Write session and bundle to temp DB ------------------- + # ----------------------------------------------------------------------- + queryTxt = paste0("SELECT name from session WHERE name='", session, "'") + all_sessions = DBI::dbGetQuery(dbHandle$connection, queryTxt) + + if (!session %in% all_sessions) + { + queryTxt = paste0("INSERT INTO session VALUES('", dbHandle$UUID, "', '", session, "')") + DBI::dbExecute(dbHandle$connection, queryTxt) + } + + queryTxt = paste0( + "INSERT INTO bundle VALUES('", dbHandle$UUID, "', '", session, "', '", bundle, "', '", + annotates, "', ", samplerate, ", 'NULL')" + ) + + DBI::dbExecute(dbHandle$connection, queryTxt) + + lines = suppressWarnings(readr::read_lines(filePairList[idx, 1])) + + transcription = paste(lines, collapse = " ") + transcription = stringr::str_trim(transcription) + transcription = stringr::str_replace_all(transcription, "'", "''") + + if (cleanWhitespaces) + { + transcription = stringr::str_replace_all(transcription, "\\s+", " ") + } + + queryTxt = paste0( + "INSERT INTO items VALUES('", + dbHandle$UUID, + "','", + session, + "','", + bundle, + "',1,'", + transcriptionLevel, + "','ITEM',1,", + samplerate, + ",NULL, NULL, NULL)" + ) + DBI::dbExecute(dbHandle$connection, queryTxt) + + queryTxt = paste0( + "INSERT INTO labels VALUES('", + dbHandle$UUID, + "','", + session, + "','", + bundle, + "',1,1,'", + transcriptionLevel, + "','')" + ) + DBI::dbExecute(dbHandle$connection, queryTxt) + + queryTxt = paste0( + "INSERT INTO labels VALUES('", + dbHandle$UUID, + "','", + session, + "','", + bundle, + "',1,2,'", + attributeDefinitionName, + "','", + transcription, + "')" + ) + DBI::dbExecute(dbHandle$connection, queryTxt) + + + if (verbose) + { + utils::setTxtProgressBar(pb, idx) + } + } + if (verbose) + { + cat("\n") + } + + + dbConfig = list( + name = dbName, + UUID = dbHandle$UUID, + mediafileExtension = mediaFileExtension, + ssffTrackDefinitions = list(), + levelDefinitions = list( + list( + name = transcriptionLevel, + type = "ITEM", + attributeDefinitions = list(list(name=transcriptionLevel, + type = "STRING"), + list(name = attributeDefinitionName, + type = "STRING", + description="Transcription imported from txt collection")) + ) + ), + linkDefinitions = list(), + EMUwebAppConfig = list( + perspectives = list( + list( + name = 'default', + signalCanvases = list( + order = c("OSCI","SPEC"), + assign = list(), + contourLims = list() + ), + levelCanvases = list(order = list()), + twoDimCanvases = list(order = list()) + ) + ), + activeButtons = list(saveBundle = TRUE, + showHierarchy = TRUE) + ) + ) + + res = try(dir.create(basePath)) + if (res == FALSE || inherits(res, "try-error")) + { + stop("Could not create emuDB base directory ", basePath) + } + + store_DBconfig(dbHandle, dbConfig) + + make_bpfDbSkeleton(dbHandle) + + copy_bpfMediaFiles( + basePath = basePath, + sourceDir = sourceDir, + mediaFiles = filePairList[,2], + verbose = verbose + ) + + rewrite_annots(dbHandle, verbose = verbose) +} \ No newline at end of file diff --git a/R/emuR-create_DBconfigFromTextGrid.R b/R/emuR-create_DBconfigFromTextGrid.R new file mode 100644 index 00000000..16e57592 --- /dev/null +++ b/R/emuR-create_DBconfigFromTextGrid.R @@ -0,0 +1,101 @@ +## Create emuDB DBconfig object from a TextGrid file +## +## @param tgPath path to TextGrid file +## @param dbName name of the database +## @param basePath project base path +## @param tierNames character vector containing names of tiers to extract and convert. If NULL (the default) all +## tiers are converted. +## @return object of class emuDB.schema.db +## @import stringr uuid wrassp RSQLite +## @keywords internal +## +create_DBconfigFromTextGrid = function(tgPath, dbName, basePath, tierNames = NULL){ + + #################### + # check parameters + + if(is.null(tgPath)) { + stop("Argument tgPath (path to TextGrid file) must not be NULL\n") + } + + if(is.null(dbName)) { + stop("Argument dbName (name of new DB) must not be NULL\n") + } + + # + #################### + + # parse TextGrid + tgAnnotDFs = TextGridToBundleAnnotDFs(tgPath, + sampleRate = 2000, + name = "tmpBundleName", + annotates = "tmpBundleName.wav") # sampleRate/name/annotates don't matter!! -> hardcoded + + # remove unwanted levels + if(!is.null(tierNames)){ + # filter items + tgAnnotDFs$items = dplyr::filter(tgAnnotDFs$items, .data$level %in% tierNames) + # filter labels + tgAnnotDFs$labels = dplyr::filter(tgAnnotDFs$labels, .data$name %in% tierNames) + } + + levels = dplyr::distinct(tgAnnotDFs$items, .data$level, .keep_all = TRUE) + + # create level definitions + levelDefinitions = list() + + # generate defaultLvlOrder + defaultLvlOrder=list() + levIdx = 1 + + for(lineIdx in 1:nrow(levels)){ + lev = levels[lineIdx,] + if(lev$type == 'SEGMENT' || lev$type == 'EVENT'){ + defaultLvlOrder[[length(defaultLvlOrder)+1L]]=lev$level + }else{ + stop(paste0('Found levelDefinition that is not of type SEGMENT|EVENT ", + "while parsing TextGrid...this should not occur! This ", + "TextGrid file caused the problem:', tgPath)) + } + # add new leveDef. + levelDefinitions[[levIdx]] = list(name = lev$level, + type = lev$type, + attributeDefinitions = list(list(name = lev$level, type = "STRING"))) + levIdx = levIdx + 1 + } + + + # create signalCanvas config + sc = list(order = c("OSCI","SPEC"), + assign = list(), + contourLims = list()) + + # create perspective + defPersp = list(name = 'default', + signalCanvases = sc, + levelCanvases = list(order = defaultLvlOrder), + twoDimCanvases = list(order = list())) + # create EMUwebAppConfig + waCfg = list(perspectives = list(defPersp), + activeButtons = list(saveBundle = TRUE, + showHierarchy = TRUE)) + + + + # generate full schema list + dbSchema = list(name = dbName, + UUID = uuid::UUIDgenerate(), + mediafileExtension = 'wav', + ssffTrackDefinitions = list(), + levelDefinitions = levelDefinitions, + linkDefinitions = list(), + EMUwebAppConfig = waCfg) + + + return(dbSchema) +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-create_DBconfigFromTextGrid.R') diff --git a/R/emuR-create_emuRdemoData.R b/R/emuR-create_emuRdemoData.R new file mode 100644 index 00000000..1195618b --- /dev/null +++ b/R/emuR-create_emuRdemoData.R @@ -0,0 +1,261 @@ +##' Create demo data for the emuR package +##' +##' Create a folder within the folder specified +##' by the dir argument called emuR_demoData. +##' This folder contains the folders: +##' \itemize{ +##' \item{ae_emuDB: Containing an emuDB that adheres to the new format specification +##' (as expected by the \code{\link{load_emuDB}} function). See \code{vignette(emuDB)} +##' for more information on this database format.} +##' \item{BPF_collection: Containing a BAS Partitur Format (BPF) file collection (as +##' expected by the \code{\link{convert_BPFCollection}} function)} +##' \item{legacy_ae: Containing a legacyEmuDB (as expected by the +##' \code{\link{convert_legacyEmuDB}} function)} +##' \item{TextGrid_collection: Containing a TextGrid file collection +##' (as expected from the \code{\link{convert_TextGridCollection}} function)} +##' } +##' @param dir directory to create demo data in (default= tempdir()) +##' @param precache creates an on-file-system cache for the ae emuDB to allow fast loading +##' (see \code{load_emuDB} for details about the emuDB file cache) +##' @export +##' @examples +##' \dontrun{ +##' +##' # create demo data directory in directory +##' # provided by the tempdir function +##' create_emuRdemoData(dir = tempdir()) +##' } +create_emuRdemoData <- function(dir = tempdir(), precache = FALSE){ + + ddPath = file.path(dir,"emuR_demoData") + + path2data = system.file("extdata", package = "emuR") + + if(file.exists(ddPath)){ + stop("Path '", ddPath,"' already exists!") + } + + created = dir.create(ddPath) + if(!created){ + stop("Couldn't created ", ddPath) + } + + ################################# + # create scripts + matlabSourcePaths = list.files(file.path(path2data, "rawDemoData", "scriptFiles"), + pattern = ".m$", + full.names = TRUE) + + scriptsDestinationPath = file.path(ddPath, "add_signal_scripts") + matlabDestinationPath = file.path(scriptsDestinationPath, "matlab") + + created = dir.create(scriptsDestinationPath) + if(!created){ + stop("Couldn't create ", scriptsDestinationPath) + } + created = dir.create(matlabDestinationPath) + if(!created){ + stop("Couldn't create ", matlabDestinationPath) + } + + file.copy(matlabSourcePaths, matlabDestinationPath) + + + ################################# + # create ae + configPath = list.files(path2data, + pattern = "DBconfig.json$", + recursive = TRUE, + full.names = TRUE) + wavPaths = list.files(path2data, + pattern = ".wav$", + recursive = TRUE, + full.names = TRUE) + annotPaths = list.files(path2data, + pattern = "_annot.json$", + recursive = TRUE, + full.names = TRUE) + aePath = file.path(ddPath, + paste0("ae", emuDB.suffix)) + + created = dir.create(aePath) + if(!created){ + stop("Couldn't create ", aePath) + } + + file.copy(configPath, aePath) + + sesPath = file.path(aePath, "0000_ses") + created = dir.create(sesPath) + if(!created){ + stop("Couldn't create ", sesPath) + } + for(p in wavPaths){ + bndlName = gsub(".wav$", "", basename(p)) + bndlPath = file.path(sesPath, paste0(bndlName, "_bndl")) + dir.create(bndlPath) + + file.copy(p, bndlPath) + idx = grep(paste0(bndlName, "_annot.json$"), annotPaths) + file.copy(annotPaths[idx], bndlPath) + + } + + # calc dft and fms files + wps = list.files(sesPath, + pattern = ".wav$", + recursive = TRUE, + full.names = TRUE) + wrassp::dftSpectrum(wps, verbose = FALSE) + wrassp::forest(wps, verbose = FALSE) + + + # generate cache of ae emuDB + if(precache){ + dbHandle = load_emuDB(aePath, inMemoryCache = FALSE, verbose = FALSE) + DBI::dbDisconnect(dbHandle$connection) + } + + #################################### + # create TextGrid_collection, BPF_collection and txt_collection + fpltgc = create_filePairList(path2data, + path2data, + "wav", + "TextGrid") + fplbpf_original = create_filePairList(path2data, + path2data, + "wav", + "par") + fplbpf_manipulated = create_filePairList(path2data, + path2data, + "wav", + "parmanipulated") + fpltxt = create_filePairList(path2data, + path2data, + "wav", + "txt") + tgcPath = file.path(ddPath, + "TextGrid_collection") + bpfPath_original = file.path(ddPath, + "BPF_collection") + txtcPath = file.path(ddPath, + "txt_collection") + + created = dir.create(tgcPath) + if(!created){ + stop("Couldn't create ", tgcPath) + } + + created = dir.create(bpfPath_original) + if(!created){ + stop("Couldn't create ", bpfPath_original) + } + + created = dir.create(txtcPath) + if(!created){ + stop("Couldn't create ", txtcPath) + } + + + file.copy(fpltgc[,1], tgcPath) + file.copy(fpltgc[,2], tgcPath) + file.copy(fplbpf_original[,1], bpfPath_original) + file.copy(fplbpf_original[,2], bpfPath_original) + file.copy(fpltxt[,1], txtcPath) + file.copy(fpltxt[,2], txtcPath) + + ################################# + # create legacyEmuDB + tplPath = list.files(path2data, + pattern = ".tpl$", + recursive = TRUE, + full.names = TRUE) + wavPaths = list.files(path2data, + pattern = ".wav$", + recursive = TRUE, + full.names = TRUE) + hlbPaths = list.files(path2data, + pattern = "hlb$", + recursive = TRUE, + full.names = TRUE) + labPaths = list.files(path2data, + pattern = "lab$", + recursive = TRUE, + full.names = TRUE) + tonePaths = list.files(path2data, + pattern = "tone$", + recursive = TRUE, + full.names = TRUE) + + legacyAePath = file.path(ddPath, "legacy_ae") + created = dir.create(legacyAePath) + if(!created){ + stop("Couldn't create ", legacyAePath) + } + + labelsPath = file.path(legacyAePath, "labels") + created = dir.create(labelsPath) + if(!created){ + stop("Couldn't create ", legacyAePath) + } + + signalsPath = file.path(legacyAePath, "signals") + created = dir.create(signalsPath) + if(!created){ + stop("Couldn't create ", legacyAePath) + } + + # copy files + file.copy(tplPath, legacyAePath) + file.copy(wavPaths, signalsPath) + file.copy(hlbPaths, labelsPath) + file.copy(labPaths, labelsPath) + file.copy(tonePaths, labelsPath) + + # calc dft and fms files + wps = list.files(signalsPath, pattern = ".wav$", recursive = TRUE, full.names = TRUE) + wrassp::dftSpectrum(wps, verbose = FALSE) + wrassp::forest(wps, verbose = FALSE) + + return(invisible()) +} + +## create manipulated BPF_collection +## +## @param dir directory in that the BPF_collection is created +create_BPFcollectionManipulated = function(dir){ + + path2data = system.file("extdata", package = "emuR") + + bpfPath_manipulated = file.path(dir, "BPF_collection_manipulated") + + if(file.exists(bpfPath_manipulated)){ + stop("Path '", bpfPath_manipulated,"' already exists!") + } + + created = dir.create(bpfPath_manipulated) + if(!created){ + stop("Couldn't create ", bpfPath_manipulated) + } + + + fplbpf_manipulated = create_filePairList(path2data, + path2data, + "wav", + "parmanipulated") + + created = dir.create(file.path(bpfPath_manipulated, "0000")) + if(!created){ + stop("Couldn't create ", file.path(bpfPath_manipulated, "0000")) + } + file.copy(fplbpf_manipulated[,1], + file.path(bpfPath_manipulated, "0000")) + file.copy(fplbpf_manipulated[,2], + file.path(bpfPath_manipulated, "0000")) + +} + +######################## +# FOR DEVELOPMENT +# unlink(file.path(tempdir(),"emuR_demoData"), recursive = TRUE) +# create_emuRdemoData(precache = TRUE) diff --git a/R/emuR-create_filePairList.R b/R/emuR-create_filePairList.R new file mode 100644 index 00000000..00f83ceb --- /dev/null +++ b/R/emuR-create_filePairList.R @@ -0,0 +1,93 @@ +## Create a file-pair-list +## +## Recursivly searches through a root directory and matches the +## basenames of files that have the extentions provided. +## +## +## @param ext1Path2rootDir path to root directory of first file extention +## @param ext2Path2rootDir path to root directory of second file +## extention (CAUTION: think of DB size and search space!) +## @param ext1 first extention to look for. This extention is considered +## the primary extention. +## This means that this extentions genarates the basename list that the +## second extentions list is matched against. +## @param ext2 second extention to pair base names of first extention with +## @import tools +## +create_filePairList <- function(ext1Path2rootDir, ext2Path2rootDir, ext1, ext2){ + # normalize paths + ext1Path2rootDir = suppressWarnings(normalizePath(ext1Path2rootDir)) + ext2Path2rootDir = suppressWarnings(normalizePath(ext2Path2rootDir)) + + # ext1Path2rootDir is valid path + if(!dir.exists(ext1Path2rootDir)){ + stop(paste0('ext1Path2rootDir does not exist: ', + ext1Path2rootDir)) + } + + # ext2Path2rootDir is valid path + if(!dir.exists(ext2Path2rootDir)){ + stop(paste0('ext2Path2rootDir does not exist: ', + ext2Path2rootDir)) + } + + + # get all ext1 file paths + allExt1FilePaths = list.files(ext1Path2rootDir, + pattern = paste(ext1, "$", sep = ""), + recursive = TRUE, + full.names = TRUE) + + # get all ext2 file paths + allExt2FilePaths = list.files(ext2Path2rootDir, + pattern = paste(ext2, "$", sep = ""), + recursive = TRUE, + full.names = TRUE) + + # check more ext1 found than ext2 + if(length(allExt1FilePaths) > length(allExt2FilePaths)){ + stop("Found less files with '", + ext2, "' extension than files with '", + ext1, "' extension in ", + ext1Path2rootDir, " and ", + ext2Path2rootDir, " including their sub-directories!") + } + + # extract base names + allExt1FilePathsBNs = basename(sub(pattern = "(.*)\\..*$", replacement = "\\1", allExt1FilePaths)) + allExt2FilePathsBNs = basename(sub(pattern = "(.*)\\..*$", replacement = "\\1", allExt2FilePaths)) + + equalToExt1FilePathsBNs = allExt1FilePathsBNs[allExt2FilePathsBNs %in% allExt1FilePathsBNs] + foundExt2FilePaths = allExt2FilePaths[allExt2FilePathsBNs %in% allExt1FilePathsBNs] + + + # check if found all allExt2FilePathsBNs in allExt1FilePathsBNs + if(length(allExt1FilePathsBNs) != length(equalToExt1FilePathsBNs)){ + stop("Not all '", + ext2, + "' files found for '", + ext1, + "' files found in ", + ext1Path2rootDir, + " and ", + ext2Path2rootDir, + " including their sub-directories!") + } + + + # check they are empty + if(length(allExt1FilePathsBNs)==0 || length(allExt2FilePathsBNs) == 0){ + stop('Both colomns in file pair list are empty! This means that no files where found...') + } + + # cbind filePairList + fpl = cbind(allExt1FilePaths, foundExt2FilePaths) + + colnames(fpl) = c('ext1FilePaths', 'ext2FilePaths') + + return(fpl) +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_create.filePairList.R') diff --git a/R/emuR-create_seglists.R b/R/emuR-create_seglists.R new file mode 100644 index 00000000..75a235df --- /dev/null +++ b/R/emuR-create_seglists.R @@ -0,0 +1,530 @@ + +convert_queryEmuRsegsToTibble <- function(emuDBhandle, emuRsegs){ + + if(nrow(emuRsegs) == 0){ + return(dplyr::tibble(labels = character(), start = numeric(), end = numeric(), + db_uuid = character(), session = character(), + bundle = character(), start_item_id = integer(), end_item_id = integer(), + level = character(), attribute = character(), start_item_seq_idx = integer(), + end_item_seq_idx = integer(), type = character(), sample_start = integer(), + sample_end = integer(), sample_rate = integer())) + } + + resultAttrDef = unique(emuRsegs$level[!is.na(emuRsegs$level)]) + + if(length(resultAttrDef) > 1){ + stop("Could not convert the emuRsegs object to a tibble as it contains multiple attribute definitions.") + } + attrDefLn = get_levelNameForAttributeName(emuDBhandle, resultAttrDef) + # fix attribute/level + emuRsegs$attribute = resultAttrDef + emuRsegs$level = attrDefLn + if(any(is.na(emuRsegs$labels))){ + emuRsegs[is.na(emuRsegs$labels),]$attribute = NA + emuRsegs[is.na(emuRsegs$labels),]$level = NA + } + + # select columns in correct order + res_tibble = emuRsegs %>% + dplyr::select("labels", "start", "end", + "db_uuid", "session", + "bundle", "start_item_id", "end_item_id", + "level", "attribute", "start_item_seq_idx", + "end_item_seq_idx", "type", "sample_start", + "sample_end", "sample_rate") %>% + dplyr::as_tibble() + + return(res_tibble) + +} + +convert_queryResultToEmusegs<-function(emuDBhandle, + timeRefSegmentLevel=NULL, + sessionPattern, + bundlePattern, + query = "", + calcTimes = TRUE, + verbose){ + + emuRsegs = convert_queryResultToEmuRsegs(emuDBhandle, + timeRefSegmentLevel, + sessionPattern, + bundlePattern, + queryStr = query, + calcTimes = calcTimes, + verbose = verbose) + emusegs = as.emusegs(emuRsegs) + return(emusegs) +} + +################################## +# +convert_queryResultToEmuRsegs <- function(emuDBhandle, + timeRefSegmentLevel=NULL, + sessionPattern = ".*", + bundlePattern = ".*", + queryStr = "", + calcTimes = TRUE, + preserveParentLength = FALSE, # only set TRUE by requery_hier + verbose){ + itemsTableName = "items" + labelsTableName ="labels" + projectionItemsN = 0 + if(DBI::dbExistsTable(emuDBhandle$connection, "interm_res_proj_items_tmp_root")){ + projectionItemsN = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT COUNT(*) AS n ", + "FROM interm_res_proj_items_tmp_root"))$n + } + + if(projectionItemsN > 0){ + + # insert everything into interm_res_items_tmp_root for query_hierarchyWalk + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM interm_res_items_tmp_root")) + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_root ", + "SELECT DISTINCT ", + " db_uuid, ", + " session, ", + " bundle, ", + " p_seq_start_id AS seq_start_id, ", + " p_seq_end_id AS seq_end_id, ", + " p_seq_len AS seq_len, ", + " p_level AS level, ", + " p_attribute AS attribute, ", + " p_seq_start_seq_idx AS seq_start_seq_idx, ", + " p_seq_end_seq_idx AS seq_end_seq_idx ", + "FROM interm_res_proj_items_tmp_root")) + + } + + # check for empty result + itemsN = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT COUNT(*) AS n ", + "FROM interm_res_items_tmp_root ", + "WHERE db_uuid IS NOT NULL"))$n + if(itemsN > 0 ){ + + # use "normal" items + seqStartIdColName = "seq_start_id" + seqEndIdColName = "seq_end_id" + seqLenColName = "seq_len" + levelColName = "level" + + # set type of join depending on preserve*Length args + if(preserveParentLength){ + joinType = "LEFT JOIN" + orderByString = "" # don't reorder if left joining to perserve NA/NULL row placement + }else{ + joinType = "INNER JOIN" + orderByString = paste0("ORDER BY items_seq_start.db_uuid, ", + " items_seq_start.session, ", + " items_seq_start.bundle, ", + " items_seq_start.level, ", + " items_seq_start.seq_idx") + } + + + dbConfig = load_DBconfig(emuDBhandle) + resultAttrDef = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT attribute ", + " FROM interm_res_items_tmp_root", + " WHERE level IS NOT NULL"))$attribute + + attrDefLn = get_levelNameForAttributeName(emuDBhandle, resultAttrDef) + ld = get_levelDefinition(emuDBhandle, attrDefLn) + + + # create temp table that holds emuRsegs without labels + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE TEMP TABLE emursegs_tmp ( ", + " labels TEXT, ", + " start REAL, ", + " end REAL, ", + " utts TEXT, ", + " db_uuid VARCHAR(36), ", + " session TEXT, ", + " bundle TEXT, ", + " start_item_id INTEGER, ", + " end_item_id INTEGER, ", + " level TEXT, ", + " start_item_seq_idx INTEGER, ", + " end_item_seq_idx INTEGER, ", + " type TEXT, ", + " sample_start INTEGER, ", + " sample_end INTEGER, ", + " sample_rate INTEGER", + ");")) + + if(!calcTimes){ # no times are requested then that makes things a lot easier :-) + + # set type of join depending on preserveParentLength + if(preserveParentLength){ + joinType = "LEFT JOIN" + orderByString = "ORDER BY interm_res_items_tmp_root.rowid" # don't reorder if left joining to perserve NA/NULL row placement + }else{ + joinType = "INNER JOIN" + orderByString = paste0("ORDER BY interm_res_items_tmp_root.db_uuid, ", + " interm_res_items_tmp_root.session, ", + " interm_res_items_tmp_root.bundle, ", + " interm_res_items_tmp_root.seq_start_seq_idx") + } + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO emursegs_tmp ", + "SELECT NULL AS labels, ", + " NULL AS start, ", + " NULL AS end, ", + " interm_res_items_tmp_root.session || ':' || interm_res_items_tmp_root.bundle AS utts, ", + " interm_res_items_tmp_root.db_uuid, ", + " interm_res_items_tmp_root.session, ", + " interm_res_items_tmp_root.bundle, ", + " interm_res_items_tmp_root.seq_start_id AS start_item_id, ", + " interm_res_items_tmp_root.seq_end_id AS end_item_id, ", + " interm_res_items_tmp_root.level AS level, ", + " interm_res_items_tmp_root.seq_start_seq_idx AS start_item_seq_idx, ", + " interm_res_items_tmp_root.seq_end_seq_idx AS end_item_seq_idx, ", + " items_seq_start.type AS type, ", + " NULL AS sampleStart, ", + " NULL AS sample_end, ", + " items_seq_start.sample_rate AS sample_rate ", + "FROM interm_res_items_tmp_root ", + joinType, " items AS items_seq_start ", + "ON interm_res_items_tmp_root.db_uuid = items_seq_start.db_uuid ", + " AND interm_res_items_tmp_root.session = items_seq_start.session ", + " AND interm_res_items_tmp_root.bundle = items_seq_start.bundle ", + " AND interm_res_items_tmp_root.seq_start_id = items_seq_start.item_id ", + joinType, " items AS items_seq_end ", + "ON interm_res_items_tmp_root.db_uuid = items_seq_end.db_uuid ", + " AND interm_res_items_tmp_root.session = items_seq_end.session ", + " AND interm_res_items_tmp_root.bundle = items_seq_end.bundle ", + " AND interm_res_items_tmp_root.seq_end_id = items_seq_end.item_id ", + joinType, " labels ", + "ON interm_res_items_tmp_root.db_uuid = labels.db_uuid ", + " AND interm_res_items_tmp_root.session = labels.session ", + " AND interm_res_items_tmp_root.bundle = labels.bundle ", + " AND interm_res_items_tmp_root.seq_end_id = labels.item_id ", + " AND labels.name = '", resultAttrDef, "' ", + orderByString, + "")) + + }else if(ld$type != "ITEM"){ # if level has time information, time can be calculated from sample values directly + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO emursegs_tmp ", + "SELECT NULL AS labels, ", + " CASE items_seq_start.type ", + " WHEN 'SEGMENT' THEN ", + " CASE items_seq_start.sample_start ", + " WHEN 0 THEN CAST(0.0 AS REAL) ", + " ELSE (CAST (items_seq_start.sample_start AS REAL) - 0.5 ) / CAST(items_seq_start.sample_rate AS REAL) * 1000.0 ", + " END", + " WHEN 'EVENT' THEN CAST (items_seq_start.sample_point AS REAL) / CAST(items_seq_start.sample_rate AS REAL) * 1000.0 ", + " ELSE NULL ", + " END AS start, ", + " CASE items_seq_start.type ", + " WHEN 'SEGMENT' THEN (CAST (items_seq_end.sample_start + items_seq_end.sample_dur AS REAL) + 0.5) / CAST (items_seq_end.sample_rate AS REAL) * 1000.0 ", + " WHEN 'EVENT' THEN 0.0", + " ELSE NULL ", + " END AS end, ", + " interm_res_items_tmp_root.session || ':' || interm_res_items_tmp_root.bundle AS utts, ", + " interm_res_items_tmp_root.db_uuid, ", + " interm_res_items_tmp_root.session, ", + " interm_res_items_tmp_root.bundle, ", + " interm_res_items_tmp_root.seq_start_id AS start_item_id, ", + " interm_res_items_tmp_root.seq_end_id AS end_item_id, ", + " interm_res_items_tmp_root.attribute AS level, ", + " interm_res_items_tmp_root.seq_start_seq_idx, ", + " interm_res_items_tmp_root.seq_end_seq_idx AS end_item_seq_idx, ", + " items_seq_start.type AS type, ", + " CASE items_seq_start.type ", + " WHEN 'SEGMENT' THEN items_seq_start.sample_start ", + " WHEN 'EVENT' THEN items_seq_start.sample_point ", + " END AS sample_start, ", + " CASE items_seq_start.type ", + " WHEN 'SEGMENT' THEN (items_seq_end.sample_start + items_seq_end.sample_dur) ", + " WHEN 'EVENT' THEN items_seq_start.sample_point ", + " END AS sample_end, ", + " items_seq_start.sample_rate AS sample_rate ", + "FROM interm_res_items_tmp_root ", + joinType, " items AS items_seq_start ", + "ON interm_res_items_tmp_root.db_uuid = items_seq_start.db_uuid ", + " AND interm_res_items_tmp_root.session = items_seq_start.session ", + " AND interm_res_items_tmp_root.bundle = items_seq_start.bundle ", + " AND interm_res_items_tmp_root.seq_start_id = items_seq_start.item_id ", + joinType, " items AS items_seq_end ", + "ON interm_res_items_tmp_root.db_uuid = items_seq_end.db_uuid ", + " AND interm_res_items_tmp_root.session = items_seq_end.session ", + " AND interm_res_items_tmp_root.bundle = items_seq_end.bundle ", + " AND interm_res_items_tmp_root.seq_end_id = items_seq_end.item_id ", + joinType, " labels ", + "ON interm_res_items_tmp_root.db_uuid = labels.db_uuid ", + " AND interm_res_items_tmp_root.session = labels.session ", + " AND interm_res_items_tmp_root.bundle = labels.bundle ", + " AND interm_res_items_tmp_root.seq_end_id = labels.item_id ", + " AND labels.name = '", resultAttrDef, "' ", + orderByString, + "")) + + }else{ + + segLvlNms = find_segmentLevels(emuDBhandle, resultAttrDef) + + if(!is.null(timeRefSegmentLevel)){ + if(!(timeRefSegmentLevel %in% segLvlNms)){ + stop("Cannot resolve time information for result level '", + resultAttrDef, + "' using segment time reference level '", + timeRefSegmentLevel, + "'\nPlease set one of these levels for timeRefSegmentLevel parameter: ", + paste(segLvlNms,collapse=', '),".") + } + lnwt = timeRefSegmentLevel # level name with time + }else{ + segLvlsCnt=length(segLvlNms) + if(segLvlsCnt>1){ + stop("Segment time information derivation for level '", + resultAttrDef, + "' is ambiguous:\nThe level is linked to multiple segment levels: ", + paste(segLvlNms,collapse=', '), + "\nPlease select one of these levels using the 'timeRefSegmentLevel' query parameter.") + }else if(segLvlsCnt == 0){ + stop("Could not find a time bearing sub-level connected to '", + resultAttrDef, + "'. Consider either using 'calcTimes=FALSE' or adding potentially missing link definitions in your emuDB.") + } + lnwt = segLvlNms[1] # level name with time + } + # get children and collapse + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = "root", + targetItemsAttributeName = lnwt, + preserveStartItemsRowLength = TRUE, # we always want the seqs. + sessionPattern = sessionPattern, + bundlePattern = bundlePattern, + verbose = verbose) # result written to lr_exp_res_tmp table (left parents/right children) + + # set type of join depending on preserveParentLength + # if(preserveParentLength){ + # joinType = "LEFT JOIN" + # orderByString = "ORDER BY irit.rowid" # don't reorder if left joining to perserve NA/NULL row placement + # }else{ + # joinType = "INNER JOIN" + # orderByString = paste0("ORDER BY lr_exp_res_tmp.db_uuid, ", + # " lr_exp_res_tmp.session, ", + # " lr_exp_res_tmp.bundle, ", + # " min(itl.sample_start)") + # } + + # calculate left and right times and store in tmp table + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO emursegs_tmp ", + "SELECT NULL AS labels, ", + " CASE items_start_child.type ", + " WHEN 'SEGMENT' THEN ", + " CASE items_start_child.sample_start ", + " WHEN 0 THEN CAST(0.0 AS REAL) ", + " ELSE (CAST (items_start_child.sample_start AS REAL) - 0.5 ) / CAST(items_start_child.sample_rate AS REAL) * 1000.0 ", + " END", + " WHEN 'EVENT' THEN 'Not implemented yet!'", + " ELSE NULL ", + " END AS start, ", + " CASE items_start_child.type ", + " WHEN 'SEGMENT' THEN (CAST (items_end_child.sample_start + items_end_child.sample_dur AS REAL) + 0.5) / CAST (items_end_child.sample_rate AS REAL) * 1000.0 ", + " WHEN 'EVENT' THEN 0.0", + " ELSE NULL ", + " END AS end, ", + " lr_exp_res_tmp.session || ':' || lr_exp_res_tmp.bundle AS utts, ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " lr_exp_res_tmp.l_seq_start_id AS start_item_id, ", + " lr_exp_res_tmp.l_seq_end_id AS end_item_id, ", + " '", resultAttrDef, "' AS level, ", + " lr_exp_res_tmp.l_seq_start_seq_idx AS start_item_seq_idx, ", + " lr_exp_res_tmp.l_seq_end_seq_idx AS end_item_seq_idx, '", ld$type, "' AS type, ", + " (items_start_child.sample_start + 0) AS sample_start, (items_end_child.sample_start + items_end_child.sample_dur) AS sample_end, ", + " items_end_child.sample_rate AS sample_rate ", + "FROM lr_exp_res_tmp ", + " LEFT JOIN ", itemsTableName, " AS items_start_child ", + "ON lr_exp_res_tmp.db_uuid = items_start_child.db_uuid ", + " AND lr_exp_res_tmp.session = items_start_child.session ", + " AND lr_exp_res_tmp.bundle = items_start_child.bundle ", + " AND lr_exp_res_tmp.r_seq_start_id = items_start_child.item_id ", + " LEFT JOIN ", itemsTableName, " AS items_end_child ", + "ON lr_exp_res_tmp.db_uuid = items_end_child.db_uuid ", + " AND lr_exp_res_tmp.session = items_end_child.session ", + " AND lr_exp_res_tmp.bundle = items_end_child.bundle ", + " AND lr_exp_res_tmp.r_seq_end_id = items_end_child.item_id ", + "")) + + } + ################################ + # construct labels + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE INDEX IF NOT EXISTS emursegs_tmp_idx ON emursegs_tmp(db_uuid, session, bundle, start_item_id, end_item_id)")) + + # set type of join depending on preserve*Length args + # if(preserveParentLength){ + joinType = "LEFT JOIN" + # orderByString = "" # don't reorder if left joining to perserve NA/NULL row placement + # }else{ + # joinType = "INNER JOIN" + # } + seglist = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT ", + " GROUP_CONCAT(ungrouped.label, '->') AS labels, ", + " start, ", + " end, ", + " utts, ", + " db_uuid, ", + " session, ", + " bundle, ", + " start_item_id, ", + " end_item_id, ", + " level, ", " start_item_seq_idx, ", + " end_item_seq_idx, ", + " type, ", + " sample_start, ", + " sample_end, ", + " sample_rate ", + "FROM ", + " (SELECT ", + " emursegs_tmp.rowid, ", + " labels.label, ", + " emursegs_tmp.start, ", + " emursegs_tmp.end, ", + " emursegs_tmp.utts, ", + " emursegs_tmp.db_uuid, ", + " emursegs_tmp.session, ", + " emursegs_tmp.bundle, ", + " emursegs_tmp.start_item_id, ", + " emursegs_tmp.end_item_id, ", + " emursegs_tmp.level, ", + " emursegs_tmp.start_item_seq_idx, ", + " emursegs_tmp.end_item_seq_idx, ", + " emursegs_tmp.type, ", + " emursegs_tmp.sample_start, ", + " emursegs_tmp.sample_end, ", + " emursegs_tmp.sample_rate ", + "FROM emursegs_tmp ", + joinType, " ", itemsTableName, " AS itl ", + "ON emursegs_tmp.db_uuid = itl.db_uuid ", + " AND emursegs_tmp.session = itl.session ", + " AND emursegs_tmp.bundle = itl.bundle ", + " AND emursegs_tmp.start_item_id = itl.item_id ", + joinType, " ", itemsTableName, " AS itr ", + "ON emursegs_tmp.db_uuid = itr.db_uuid ", + " AND emursegs_tmp.session = itr.session ", + " AND emursegs_tmp.bundle = itr.bundle ", + " AND emursegs_tmp.end_item_id = itr.item_id ", + joinType, " ", itemsTableName, " AS iseq ", + "ON itl.db_uuid = iseq.db_uuid ", + " AND itl.session = iseq.session ", + " AND itl.bundle = iseq.bundle ", + " AND itl.level = iseq.level ", + " AND iseq.seq_idx >= itl.seq_idx ", + " AND iseq.seq_idx <= itr.seq_idx ", # join all seq. items + joinType, " ", labelsTableName, " AS labels ", # items table left & right + "ON iseq.db_uuid = labels.db_uuid ", + " AND iseq.session = labels.session ", + " AND iseq.bundle = labels.bundle ", + " AND iseq.item_id = labels.item_id ", + " AND labels.name = '", resultAttrDef, "' ", + "ORDER BY ", + " emursegs_tmp.db_uuid, ", + " emursegs_tmp.session, ", + " emursegs_tmp.bundle, ", + " emursegs_tmp.level, ", + " iseq.seq_idx", + ") AS ungrouped ", + "GROUP BY rowid", + "")) + + # drop temp table + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS emursegs_tmp")) + }else{ + seglist = data.frame(labels = character(), start = numeric(), end = numeric(), utts = character(), + db_uuid = character(), session = character(), bundle = character(), + start_item_id = numeric(), end_item_id = numeric(), level = character(), + type = character(), sample_start = numeric(), sample_end = numeric(), sample_rate = numeric(), + stringsAsFactors = FALSE) + } + # set emusegs type attribute, default 'segment' + slType='segment' + if(nrow(seglist)>0){ + # set to event only if all rows are of type EVENT + if(all(seglist$type[!is.na(seglist$type)] == "EVENT")){ + slType='event' + } + } + + segmentList = make.emuRsegs(dbName = emuDBhandle$dbName, + seglist = seglist, + query = queryStr, + type = slType) + + # if contains NAs -> also set everything to NA + if(any(is.na(segmentList$labels))){ + segmentList[is.na(segmentList$labels),] = NA + } + + return(segmentList) + +} + +################################## +################################## +################################## +equal.emusegs<-function(seglist1, + seglist2, + compareAttributes = TRUE, + tolerance=0.0, + uttsPrefix2 = ''){ + + if(!inherits(seglist1,"emusegs")){ + stop("seglist1 is not of class emusegs") + } + if(!inherits(seglist2,"emusegs")){ + stop("seglist2 is not of class emusegs") + } + if(tolerance<0){ + stop("tolerance must be greater or equal 0") + } + + sl1RowCnt=nrow(seglist1) + sl2RowCnt=nrow(seglist2) + if(sl1RowCnt!=sl2RowCnt){ + return(FALSE) + } + if(compareAttributes){ + attrEq=((attr(seglist1,'query')==attr(seglist2,'query'))&attr(seglist1,'database')==attr(seglist2,'database')) + if(!attrEq){ + return(FALSE) + } + } + # seglist have no implicit order + sl1Order=order('[[.data.frame'(seglist1,'utts'),'[[.data.frame'(seglist1,'start'),'[[.data.frame'(seglist1,'end'),'[[.data.frame'(seglist1,'labels')) + sl1=`[.data.frame`(seglist1,sl1Order,) + sl2Order=order('[[.data.frame'(seglist2,'utts'),'[[.data.frame'(seglist2,'start'),'[[.data.frame'(seglist2,'end'),'[[.data.frame'(seglist2,'labels')) + sl2=`[.data.frame`(seglist2,sl2Order,) + equal=TRUE + rs=1:sl1RowCnt + for(i in rs){ + l1='[[.data.frame'(sl1,i,'labels') + l2='[[.data.frame'(sl2,i,'labels') + s1='[[.data.frame'(sl1,i,'start') + s2='[[.data.frame'(sl2,i,'start') + e1='[[.data.frame'(sl1,i,'end') + e2='[[.data.frame'(sl2,i,'end') + u1='[[.data.frame'(sl1,i,'utts') + u2='[[.data.frame'(sl2,i,'utts') + u2Sess=paste0(uttsPrefix2,u2) + if(l1!=l2 | u1!=u2Sess){ + return(FALSE) + } + sdAbs=abs(s2-s1) + if(sdAbs>tolerance){ + equal=FALSE + #cat("Start differs ",s1,s2,sdAbs,"\n") + } + edAbs=abs(e2-e1) + if(edAbs>tolerance){ + equal=FALSE + #cat("End differs ",e1,e2,edAbs,"\n") + } + + } + return(equal) + +} diff --git a/R/emuR-crud-helpers.R b/R/emuR-crud-helpers.R new file mode 100644 index 00000000..00fc7c87 --- /dev/null +++ b/R/emuR-crud-helpers.R @@ -0,0 +1,367 @@ +## Insert one item into the database +## +## @description One item, identified as \code{session:bundle:level:sequenceIndex}, +## is inserted into the database. One label has to be provided for every attribute +## of the given level. +## +## @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +## @param itemToInsert Data frame containing the labels for the item to be inserted. +## Must contain the columns \code{session}, \code{bundle}, \code{level}, +## \code{start_item_seq_idx}, \code{attribute}, \code{labelIndex}, and +## \code{label}. The first four of these identify the item and must +## contain the same value in all rows. \code{attribute} and \code{labelIndex} +## must match up - the label index marks the position of the +## attribute within its level (see \code{\link{get_labelIndex}}. +## @param levelType type of level (ITEM vs EVENT vs SEGMENT) +insertItemIntoDatabase = function(emuDBhandle, + itemToInsert, + levelType) { + + session = itemToInsert$session[1] + bundle = itemToInsert$bundle[1] + level = itemToInsert$level[1] + sequenceIndex = itemToInsert$start_item_seq_idx[1] + + ## + ## Make sure that the provided attributes exactly match those required for the provided level + ## + requiredAttributes = list_attributeDefinitions(emuDBhandle, level)$name + + if (!identical (sort(requiredAttributes), sort(itemToInsert$attribute))) { + stop (call. = FALSE, + paste0("Error in item ", + paste(session, bundle, level, sequenceIndex, sep = ":"), + ". ", + "The provided attributes (", + paste0(itemToInsert$attribute, collapse = ", "), + ") do not match the attributes required (", + paste0(requiredAttributes, collapse = ", "), + ") for the level (", + level, + ").")) + } + + # set sample_point, sample_start and sample_dur values based on levelType + if(levelType == "SEGMENT"){ + samplePoint = NA + sampleStart = itemToInsert$sample_start[1] + sampleDur = itemToInsert$sample_end[1] - itemToInsert$sample_start[1] + }else if(levelType == "EVENT"){ + samplePoint = itemToInsert$sample_point[1] + sampleStart = NA + sampleDur = NA + }else{ + samplePoint = NA + sampleStart = NA + sampleDur = NA + } + + ## + ## Insert item into the database (first the item itself, then the corresponding labels) + ## + itemId = 1 + bas_get_max_id(emuDBhandle, + session, + bundle, + items_table_name = "items_annot_crud_tmp") + + sampleRate = bas_get_samplerate(emuDBhandle, + session, + bundle) + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO items_annot_crud_tmp (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur) ", + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + )) + + DBI::dbBind( + statement, + list( + emuDBhandle$UUID, + session, + bundle, + itemId, + level, + levelType, + sequenceIndex, + sampleRate, + samplePoint, + sampleStart, + sampleDur + ) + ) + + DBI::dbClearResult(statement) + + # now labels + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO labels_annot_crud_tmp ( ", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " label_idx, ", + " name, ", + " label", + ")", + "VALUES (?, ?, ?, ?, ?, ?, ?)" + )) + + itemToInsert$dbUuid = emuDBhandle$UUID + itemToInsert$itemId = itemId + + DBI::dbBind( + statement, + list( + itemToInsert$dbUuid, + itemToInsert$session, + itemToInsert$bundle, + itemToInsert$itemId, + itemToInsert$labelIndex, + itemToInsert$attribute, + itemToInsert$label + ) + ) + + DBI::dbClearResult(statement) + + invisible(itemToInsert) +} + +## Vectorized function to translate level/attribute name pairs into label indexes. +## +## @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +## @param levelName The level of the level/attribute pairs. This vector must +## match the \code{attributeName} vector. +## @param attributeName The attribute of the level/attribute pairs. This vector must +## match the \code{levelName} vector. +get_labelIndex = function(emuDBhandle, + levelName, + attributeName) { + ## @todo Should we export this function? I do not think so. + # check_emuDBhandle(emuDBhandle, checkCache = TRUE) + + allAttributes = data.frame ( + levelName = character(0), + attributeName = character(0), + index = numeric(0), + + stringsAsFactors = FALSE + ) + + requestedAttributes = data.frame( + levelName = levelName, + attributeName = attributeName, + + stringsAsFactors = FALSE + ) + + + DBconfig = load_DBconfig(emuDBhandle) + + for (levelDefinition in DBconfig$levelDefinitions) { + currentAttributeIndex = 0 + for (attributeDefinition in levelDefinition$attributeDefinitions) { + currentAttributeIndex = currentAttributeIndex + 1 + allAttributes = rbind( + allAttributes, + data.frame ( + levelName = levelDefinition$name, + attributeName = attributeDefinition$name, + index = currentAttributeIndex, + + stringsAsFactors = FALSE + ) + ) + } + } + + result = dplyr::left_join(x = requestedAttributes, + y = allAttributes, + by = c("levelName", "attributeName")) + + return (result$index) + + ## Unit test + ## get_labelIndex(db, + ## c("Word", "bundle", "Word", "foo", "bundle"), + ## c("Canonical", "transcription", "Word", "foo", "transcription")) == c(2,2,1, NA, 2) +} + +## Rewrite all sequence indexes across all sessions and bundles. Operate on the +## temporary table for annotation CRUD. +## +## Reads the existing sequences of all items, assuming they are a mixture of +## natural values, real values and NULL, sorts them in ascending order, and +## replaces them with the sequence 1..n, where n is the number of items on the +## respective level in the respective bundle. NULL values are placed at the end +## of the sequence. +## +## @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +## +rewrite_allSequenceIndexes = function (emuDBhandle) { + allItems = DBI::dbReadTable(emuDBhandle$connection, "items_annot_crud_tmp") + + allItems %>% + dplyr::group_by(.data$db_uuid, .data$session, .data$bundle, .data$level) %>% + dplyr::do(rewrite_sequenceIndexesOneLevel(emuDBhandle, .data)) +} + +## See \code{\link{rewrite_allSequenceIndexes}} +## +## @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +## @param itemsOnLevel Data frame describing all items on a particular level. +## +rewrite_sequenceIndexesOneLevel = function (emuDBhandle, + itemsOnLevel) { + # Sort items by their current sequence_index + itemsOnLevel = dplyr::arrange(itemsOnLevel, .data$seq_idx) + + # @todo + # dplyr::arrange handles NA values the way I want it to - they go at the end + # of the list, no matter if sorting in ascending or descending order (we only + # use ascending anyway). However, this does not seem to be documented. Should + # We rely on it? + + # Re-calculate the sequence index + itemsOnLevel$newSequenceIndex = 1:nrow(itemsOnLevel) + + statement = DBI::dbSendStatement( + emuDBhandle$connection, + paste0("UPDATE items_annot_crud_tmp ", + "SET seq_idx = ? ", + "WHERE db_uuid = ? ", + " AND session = ? ", + " AND bundle = ? ", + " AND item_id = ?")) + + DBI::dbBind( + statement, + list( + itemsOnLevel$newSequenceIndex, + itemsOnLevel$db_uuid, + itemsOnLevel$session, + itemsOnLevel$bundle, + itemsOnLevel$item_id + ) + ) + + DBI::dbClearResult(statement) + + invisible(itemsOnLevel) +} + + +ensureSequenceIndexesAreUnique = function (itemsOnAttribute) { + + uniqueSequenceIndexes = unique (itemsOnAttribute$start_item_seq_idx) + + if (length(uniqueSequenceIndexes) != length(itemsOnAttribute$start_item_seq_idx)) { + stop(call. = FALSE, + paste("Sequence indexes must be unique within one level.", + "Found duplicate sequence indices in", + paste(itemsOnAttribute[1, "session"], + itemsOnAttribute[1, "bundle"], + itemsOnAttribute[1, "level"], + itemsOnAttribute[1, "attribute"], + sep = ":"))) + } + + invisible(itemsOnAttribute) +} + +database.DDL.emuDB_items_annot_crud_tmp = paste0("CREATE TEMP TABLE items_annot_crud_tmp (", + " db_uuid VARCHAR(36), ", + " session TEXT, ", + " bundle TEXT, ", + " item_id INTEGER, ", + " level TEXT, ", + " type TEXT, ", + " seq_idx FLOAT, ", + " sample_rate FLOAT, ", + " sample_point INTEGER, ", + " sample_start INTEGER, ", + " sample_dur INTEGER, ", + "PRIMARY KEY (db_uuid, session, bundle, item_id) ", + #"FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE + ");") + + +database.DDL.emuDB_labels_annot_crud_tmp = paste0("CREATE TEMP TABLE labels_annot_crud_tmp ( ", + " db_uuid VARCHAR(36), ", + " session TEXT, ", + " bundle TEXT, ", + " item_id INTEGER, ", + " label_idx INTEGER, ", + " name TEXT, ", + " label TEXT, ", + "PRIMARY KEY (db_uuid, session, bundle, item_id, label_idx) ", + #"FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE", + #"FOREIGN KEY (db_uuid, session, bundle, item_id) REFERENCES items(db_uuid, session, bundle, item_id) ON DELETE CASCADE", + ");") + +database.DDL.emuDB_links_annot_crud_tmp = paste0("CREATE TEMP TABLE links_annot_crud_tmp ( ", + " db_uuid VARCHAR(36) NOT NULL, ", + " session TEXT, ", + " bundle TEXT, ", + " from_id INTEGER, ", + " to_id INTEGER, ", + " label TEXT ", + #"FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE ON UPDATE CASCADE", + ");") + + + +create_annotCrudTmpTables = function(emuDBhandle) { + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_items_annot_crud_tmp) + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO items_annot_crud_tmp SELECT * FROM items") + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_labels_annot_crud_tmp) + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO labels_annot_crud_tmp SELECT * FROM labels") + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_links_annot_crud_tmp) + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO links_annot_crud_tmp SELECT * FROM links") +} + +remove_annotCrudTmpTables = function(emuDBhandle) { + DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE IF EXISTS items_annot_crud_tmp") + DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE IF EXISTS labels_annot_crud_tmp") + DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE IF EXISTS links_annot_crud_tmp") +} + + +moveback_annotCrudTmpTables = function(emuDBhandle) { + DBI::dbExecute(emuDBhandle$connection, + "DELETE FROM links") + DBI::dbExecute(emuDBhandle$connection, + "DELETE FROM labels") + DBI::dbExecute(emuDBhandle$connection, + "DELETE FROM items") + + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO items SELECT * FROM items_annot_crud_tmp") + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO labels SELECT * FROM labels_annot_crud_tmp") + DBI::dbExecute(emuDBhandle$connection, + "INSERT INTO links SELECT * FROM links_annot_crud_tmp") + remove_annotCrudTmpTables(emuDBhandle) +} diff --git a/R/emuR-dataDocs.R b/R/emuR-dataDocs.R new file mode 100644 index 00000000..a0a9762b --- /dev/null +++ b/R/emuR-dataDocs.R @@ -0,0 +1,822 @@ + +##' Three-columned matrix +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format Three-columned matrix +##' @name bridge +NULL + + + + + +##' EPG-compressed trackdata from the segment list coutts +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name coutts.epg +NULL + + + + + +##' Vector of word label from the segment list coutts +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name coutts.l +NULL + + + + + +##' Segment list of words, read speech, female speaker of Australian English +##' from database epgcoutts +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name coutts +NULL + + + + + +##' rms Data to coutts segment list +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @name coutts.rms +##' @format segmentlist +##' @examples +##' +##' data(coutts.rms) +##' +NULL + + + + + +##' Trackdata of acoustic waveforms from the segment list coutts +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name coutts.sam +##' +NULL + + + + + +##' EPG-compressed trackdata from the segment list coutts2 +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name coutts2.epg +NULL + + + + + +##' Vector of word label from the segment list coutts2 +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of word label +##' @name coutts2.l +NULL + + + + + +##' Segment list, same as coutts but at a slower speech rate +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name coutts2 +NULL + + + + + +##' Trackdata of acoustic waveforms from the segment list coutts2 +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name coutts2.sam +NULL + + + + + +##' Emu segment list +##' +##' Segment list of the demo database that is part of the Emu system. It is +##' the result of a database query, that searched all segments at level +##' Phonetic. +##' +##' A segment list is created via \code{\link{query}}. +##' +##' @format First Column labels Second start time of the segment Third end time +##' of the segment Fourth utterance name of the utterance the segment was found +##' @seealso \code{\link{demo.vowels}} \code{\link{segmentlist}} +##' @docType data +##' @keywords datasets +##' @name demo.all +NULL + + + + + +##' Emu track data for a rms track for segment list demo.all +##' +##' A track list of the demo database that is part of the Emu system. It is +##' the result of get rms data for the segment list demo.all (data(demo.all)). +##' +##' A track list is created via the \code{\link{get_trackdata}} function. +##' +##' @format A object with $index, $ftime and $data +##' +##' index: a two columned matrix with the range of the $data rows that belong +##' to the segment ftime: a two columned matrix with the times marks of the +##' segment data: a vector with the rms data +##' @seealso \code{\link{demo.vowels.fm}} \code{\link{segmentlist}} +##' \code{\link{trackdata}} +##' @docType data +##' @keywords datasets +##' @name demo.all.rms +NULL + + + + + +##' F0 track data for segment list demo.vowels +##' +##' A track list of the demo database that is part of the Emu system. It is +##' the result of get F0 data for the segment list demo.vowels (see +##' data(demo.vowels)). +##' +##' A track list is created via the \code{\link{get_trackdata}} function. +##' +##' @format An object with $index, $ftime and $data +##' +##' index: a two columned matrix with the range of the $data rows that belong +##' to the segment ftime: a two columned matrix with the times marks of the +##' segment data: a one columned matrix with the F0 values +##' @seealso \code{\link{demo.all.rms}} \code{\link{segmentlist}} +##' \code{\link{trackdata}} +##' @docType data +##' @keywords datasets +##' @name demo.all.f0 +NULL + + + + + +##' Formant track data for segment list demo.vowels +##' +##' A track list of the demo database that is part of the Emu system. It is +##' the result of get fm data for the segment list demo.vowels (see +##' data(demo.vowels)). +##' +##' A track list is created via the \code{\link{get_trackdata}} function. +##' +##' @format index: a two columned matrix with the range of the $data rows that +##' belong to the segment ftime: a two columned matrix with the times marks of +##' the segment data: a three columned matrix with the formant values of the +##' first three formants for each segment +##' @seealso \code{\link{demo.all.rms}} \code{\link{segmentlist}} +##' \code{\link{trackdata}} +##' @docType data +##' @keywords datasets +##' @name demo.all.fm +NULL + + + + + +##' Emu segment List +##' +##' Segment list of the demo database that is part of the Emu system. It is +##' the result of a database query, that searched all vowel segments at level +##' Phonetic. +##' +##' A segment list is created via \code{\link{query}}. +##' +##' @format First Column labels Second start time of the segment Third end time +##' of the segment Fourth utterance name of the utterance the segment was found +##' @seealso \code{\link{demo.all}} \code{\link{segmentlist}} +##' @docType data +##' @keywords datasets +##' @name demo.vowels +NULL + + +#' F0 track data for segment list demo.vowels +#' +#' A track list of the demo database that is part of the Emu system. It is the +#' result of get F0 data for the segment list demo.vowels (see +#' data(demo.vowels)). +#' +#' A track list is created via the \code{\link{get_trackdata}} function. +#' +#' @format An object with $index, $ftime and $data +#' +#' index: a two columned matrix with the range of the $data rows that belong +#' to the segment ftime: a two columned matrix with the times marks of the +#' segment data: a one columned matrix with the F0 values +#' @seealso \code{\link{demo.all.rms}} \code{\link{segmentlist}} +#' \code{\link{trackdata}} +#' @keywords datasets +#' @name demo.vowels.f0 +NULL + + +#' Formant track data for segment list demo.vowels +#' +#' A track list of the demo database that is part of the Emu system. It is the +#' result of get fm data for the segment list demo.vowels (see +#' data(demo.vowels)). +#' +#' A track list is created via the \code{\link{get_trackdata}} function. +#' +#' @format index: a two columned matrix with the range of the $data rows that +#' belong to the segment ftime: a two columned matrix with the times marks of +#' the segment data: a three columned matrix with the formant values of the +#' first three formants for each segment +#' @seealso \code{\link{demo.all.rms}} \code{\link{segmentlist}} +#' \code{\link{trackdata}} +#' @keywords datasets +#' @name demo.vowels.fm +NULL + + + +##' Trackdata of formants from the segment list dip +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name dip.fdat +NULL + + + + + +##' Vector of phoneme labels from the segment list dip +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phoneme lables +##' @name dip.l +NULL + + + + + +##' Segment list of diphthongs, two speakers one male, one female , Standard +##' North German, read speech from database kielread +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name dip +NULL + + + + + +##' Vector of speaker labels from the segment list dip +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of speaker labels +##' @name dip.spkr +NULL + + + + + + + +##' Spectral vector of a single E vowel produced by a male speaker of Standard +##' North German. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format spectral vector +##' @name e.dft +NULL + + + + + +##' EPG-compressed trackdata from the segment list engassim +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name engassim.epg +NULL + + + + + +##' Vector of phonetic labels from the segment list engassim: nK = nk,ng , sK = +##' sk,sg +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phonetic labels +##' @name engassim.l +NULL + + + + + +##' Segment list of a sequence of syllable final n or N preceding k or g , +##' isolated words single speaker, Australian English female from database +##' epgassim. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name engassim +NULL + + + +##' Vector of word labels from the segment list engassim. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of word labels +##' @name engassim.w +NULL + + + + + +##' Spectral trackdata object from the segment list fric. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name fric.dft +NULL + + + + + +##' Vector of labels from the segment list fric +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of labels +##' @name fric.l +NULL + + + + + +##' Segment list of word-medial s or z one male speaker of Standard North +##' German, read speech from database kielread. +##' +##' An EMU dataset +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name fric +NULL + + + + + +##' Vector of word labels from the segment list fric. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of word labels +##' @name fric.w +NULL + + + + + +##' Trackdata of formants from the segment list isol +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name isol.fdat +NULL + + + + + +##' Vector of vowel phoneme labels from the segment list isol +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of vowel phoneme labels +##' @name isol.l +NULL + + + + + +##' Segment list of vowels in a d d context isolated word speech, one male +##' speaker of Australian English from database isolated. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name isol +NULL + + + + + +##' EPG-compressed trackdata from the segment list polhom +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name polhom.epg +NULL + + + + + +##' Vector of phonetic labels from the segment list polhom +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phonetic labels +##' @name polhom.l +NULL + + + + + +##' Segment list of four Polish homorganic fricatives from database epgpolish. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name polhom +NULL + + + + + + + +##' Data frame of various parameters and labels from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format dataframe +##' @name vowlax.df +NULL + + + + + +##' Spectral matrix centred at the temporal midpoint of the vowels from the +##' segment list vowlax. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format spectral matrix +##' @name vowlax.dft.5 +NULL + + + + + +##' Matrix of formant data extracted at the temporal midpoint from the segment +##' list vowlax. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format matrix of formant data +##' @name vowlax.fdat.5 +NULL + + + + + +##' Trackdata of formants from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name vowlax.fdat +NULL + + + + + +##' Vector of fundamental frequency extracted at the temporal midpoint from the +##' segment list vowlax. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of fundamental frequency +##' @name vowlax.fund.5 +NULL + + + + + +##' Trackdata of fundamental frequency from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name vowlax.fund +NULL + + + + + +##' Vector of phoneme labels from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phoneme labels +##' @name vowlax.l +NULL + + + + + +##' Vector of labels preceding the vowels from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phoneme labels +##' @name vowlax.left +NULL + + + + + +##' Segment list of four lax vowels, read speech, one male and one female +##' speaker of Standard North German from database kielread. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format segmentlist +##' @name vowlax +NULL + + + + + +##' Vector of labels following the vowels from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of phoneme labels +##' @name vowlax.right +NULL + + + + + +##' Vector of RMS energy values at the temporal midpoint extracted at the +##' temporal midpoint from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of RMS energy values +##' @name vowlax.rms.5 +NULL + + + + + +##' Trackdata of RMS energy from the segment list vowlax +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format trackdata object +##' @name vowlax.rms +NULL + + + + + +##' Vector of speaker labels from the segment list vowlax. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of speaker labels +##' @name vowlax.spkr +NULL + + + + + +##' Vector of word labels from the segment list vowlax. +##' +##' An EMU dataset +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of word labels +##' @name vowlax.word +NULL + + + + + +##' Vector of word labels from segment list wordlax +##' +##' For wordlax (see data(vowlax)) +##' +##' +##' @docType data +##' @keywords datasets +##' @format vector of word labels +##' @name wordlax.l +NULL diff --git a/R/emuR-database.DBconfig.EMUwebAppConfig.R b/R/emuR-database.DBconfig.EMUwebAppConfig.R new file mode 100644 index 00000000..bfaf089a --- /dev/null +++ b/R/emuR-database.DBconfig.EMUwebAppConfig.R @@ -0,0 +1,308 @@ + +########################################### +# CRUD operation for perspectives + +##' Add / List / Remove perspective to / of / from emuDB +##' +##' Add / List / Remove perspective to / of / from emuDB. The EMU-webApp subdivides different ways +##' to look at an emuDB into so called perspectives. These perspectives, +##' between which you can switch in the web application, contain +##' information on what levels are displayed, which ssffTracks are drawn, +##' and so on. For more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param name name of perspective +##' @name AddListRemovePerspective +##' @keywords emuDB database DBconfig Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # add perspective called "justTones" to the ae emuDB +##' add_perspective(emuDBhandle = ae, +##' name = "justTones") +##' +##' # add levelCanvasOrder so only the "Tone" level is displayed +##' set_levelCanvasesOrder(emuDBhandle = ae, +##' perspectiveName = "justTones", +##' order = c("Tone")) +##' +##' # list perspectives of ae emuDB +##' list_perspectives(emuDBhandle = ae) +##' +##' # remove newly added perspective +##' remove_perspective(emuDBhandle = ae, +##' name = "justTones") +##' +##' } +##' +NULL + +##' @rdname AddListRemovePerspective +##' @export +add_perspective <- function(emuDBhandle, + name){ + + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + curPersp = list_perspectives(emuDBhandle) + # check if level defined + if(name %in% curPersp$name){ + stop("Perspective with name: '", name, "' already exists") + } + + persp = list(name = name, + signalCanvases = list(order = c("OSCI", "SPEC"), + assign = list(), contourLims = list()), + levelCanvases = list(order = list()), + twoDimCanvases = list(order = list())) + + l = length(DBconfig$EMUwebAppConfig$perspectives) + + DBconfig$EMUwebAppConfig$perspectives[[l + 1]] = persp + # show perspectives side bar + if(is.null(DBconfig$EMUwebAppConfig$restrictions$showPerspectivesSidebar) && l > 1){ + DBconfig$EMUwebAppConfig$restrictions$showPerspectivesSidebar = TRUE + } + # store changes + store_DBconfig(emuDBhandle, DBconfig) + +} + + +##' @rdname AddListRemovePerspective +##' @export +list_perspectives <- function(emuDBhandle){ + + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + df = data.frame(name = character(), + signalCanvasesOrder = character(), + levelCanvasesOrder = character(), + stringsAsFactors = FALSE) + + for(p in DBconfig$EMUwebAppConfig$perspectives){ + df = rbind(df , data.frame(name = p$name, + signalCanvasesOrder = paste(p$signalCanvases$order, collapse = "; "), + levelCanvasesOrder = paste(p$levelCanvases$order, collapse = "; "), + stringsAsFactors = FALSE)) + } + + return(df) +} + + +##' @rdname AddListRemovePerspective +##' @export +remove_perspective <- function(emuDBhandle, + name){ + + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + curPersp = list_perspectives(emuDBhandle) + + # check if perspective defined + if(!name %in% curPersp$name){ + stop("No perspective with name: '", name, "' found!") + } + + for(i in 1:length(DBconfig$EMUwebAppConfig$perspectives)){ + if(DBconfig$EMUwebAppConfig$perspectives[[i]]$name == name){ + DBconfig$EMUwebAppConfig$perspectives[[i]] = NULL + } + } + # store changes + store_DBconfig(emuDBhandle, DBconfig) + +} + +########################################### +# CRUD operation for signalCanvasesOrder + + +##' Set / Get signalCanvasesOrder of / to / from emuDB +##' +##' Set / Get signalCanvasesOrder array that specifies which signals are +##' displayed in the according perspective by the EMU-webApp. An entry in this character vector +##' refers to either the name of an ssffTrackDefinition or a predefined string: \code{"OSCI"} which +##' represents the oscillogram or \code{"SPEC"} which represents the +##' spectrogram. For more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param perspectiveName name of perspective +##' @param order character vector containing names of ssffTrackDefinitions or "OSCI" / "SPEC" +##' @name SetGetSignalCanvasesOrder +##' @keywords emuDB database DBconfig Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # get signal canvas order of the "default" +##' # perspective of the ae emuDB +##' get_signalCanvasesOrder(emuDBhandle = ae, +##' perspectiveName = "default") +##' +##' } +##' +NULL + +##' @rdname SetGetSignalCanvasesOrder +##' @export +set_signalCanvasesOrder <- function(emuDBhandle, + perspectiveName, + order){ + + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + curTracks = c("OSCI", "SPEC", list_ssffTrackDefinitions(emuDBhandle)$name) + + #check if tracks given are defined + for(t in order){ + if(!t %in% curTracks){ + stop("No ssffTrackDefinition present with name '", t, "'!") + } + } + + for(i in 1:length(DBconfig$EMUwebAppConfig$perspectives)){ + if(DBconfig$EMUwebAppConfig$perspectives[[i]]$name == perspectiveName){ + DBconfig$EMUwebAppConfig$perspectives[[i]]$signalCanvases$order = as.list(order) + break + } + } + + # store changes + store_DBconfig(emuDBhandle, DBconfig) +} + + +##' @rdname SetGetSignalCanvasesOrder +##' @export +get_signalCanvasesOrder <- function(emuDBhandle, + perspectiveName){ + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + order = NA + for(p in DBconfig$EMUwebAppConfig$perspectives){ + if(p$name == perspectiveName){ + order = unlist(p$signalCanvases$order) + } + } + return(order) +} + +########################################### +# CRUD operation for levelCanvasesOrder + + +##' Set / Get level canvases order of emuDB +##' +##' Set / Get which levels of an emuDB to display as level canvases (in a +##' given perspective of the EMU-webApp), +##' and in what order. Level canvases refer to levels of +##' the type "SEGMENT" or "EVENT" that are displayed by the EMU-webApp. Levels +##' of type "ITEM" can always be displayed using the hierarchy view of the +##' web application but can not be displayed as level canvases. +##' For more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param perspectiveName name of perspective +##' @param order character vector containing names of levelDefinitions +##' @name SetGetlevelCanvasesOrder +##' @keywords emuDB database DBconfig Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # get level canvases order of ae emuDB +##' order = get_levelCanvasesOrder(emuDBhandle = ae, +##' perspectiveName = "default") +##' +##' # reverse the level canvases order of ae emuDB +##' set_levelCanvasesOrder(emuDBhandle = ae, +##' perspectiveName = "default", +##' order = rev(order)) +##' +##' # get level canvases order of ae emuDB +##' get_levelCanvasesOrder(emuDBhandle = ae, +##' perspectiveName = "default") +##' } +##' +NULL + +##' @rdname SetGetlevelCanvasesOrder +##' @export +set_levelCanvasesOrder <- function(emuDBhandle, + perspectiveName, + order){ + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + curLevelNames = list_levelDefinitions(emuDBhandle)$name + curLevelTypes = list_levelDefinitions(emuDBhandle)$type + + #check if levels given are defined and of correct type + for(t in order){ + if(!t %in% curLevelNames){ + stop("No levelDefinition present with name '", t, "'!") + } + lt = curLevelTypes[curLevelNames == t] + if(!lt %in% c("SEGMENT", "EVENT")){ + stop("levelDefinition with name '", t, "' is not of type 'SEGMENT' or 'EVENT'") + } + } + + for(i in 1:length(DBconfig$EMUwebAppConfig$perspectives)){ + if(DBconfig$EMUwebAppConfig$perspectives[[i]]$name == perspectiveName){ + DBconfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order = as.list(order) + break + } + } + # store changes + store_DBconfig(emuDBhandle, DBconfig) +} + + +##' @rdname SetGetlevelCanvasesOrder +##' @export +get_levelCanvasesOrder <- function(emuDBhandle, + perspectiveName){ + + check_emuDBhandle(emuDBhandle) + + DBconfig = load_DBconfig(emuDBhandle) + + order = NA + for(p in DBconfig$EMUwebAppConfig$perspectives){ + if(p$name == perspectiveName){ + order = unlist(p$levelCanvases$order) + } + } + return(order) +} + + +# FOR DEVELOPMENT +# library('testthat') +# test_file("tests/testthat/test_aaa_initData.R") +# test_file('tests/testthat/test_emuR-database.DBconfig.EMUwebAppConfig.R') diff --git a/R/emuR-database.DBconfig.R b/R/emuR-database.DBconfig.R new file mode 100644 index 00000000..df002081 --- /dev/null +++ b/R/emuR-database.DBconfig.R @@ -0,0 +1,1623 @@ + +##################################################### +# functions used to build various path combinations +# plus helper functions + +get_levelNameForAttributeName <- function(emuDBhandle, attributeName){ + DBconfig = load_DBconfig(emuDBhandle) + for(lvlD in DBconfig$levelDefinitions){ + aNames = character(0) + for(ad in lvlD$attributeDefinitions){ + aNames = c(aNames, ad[['name']]) + if(attributeName %in% aNames){ + return(lvlD[['name']]) + } + } + } + return(NULL) +} + + +get_allAttributeNames<-function(emuDBhandle){ + DBconfig = load_DBconfig(emuDBhandle) + aNames=character(0) + for(lvlD in DBconfig$levelDefinitions){ + for(ad in lvlD$attributeDefinitions){ + aNames=c(aNames,ad$name) + } + + } + return(aNames) +} + + +get_linkLevelChildrenNames<-function(schema, superlevelName){ + chNames = character(0) + for(ld in schema[['linkDefinitions']]){ + if(ld[['superlevelName']] == superlevelName){ + chNames=c(chNames, ld[['sublevelName']]) + } + } + return(chNames) +} + +expand_linkPath <- function(p){ + expPath = list() + pLen = length(p) + if(pLen == 1){ + return(list()) + } + expPath[[length(expPath)+1L]] = p + expPath = c(expPath, expand_linkPath(p[1:(pLen-1)])) + return(expPath) +} + +## build all hierarchy paths including partial paths +## @return list containing paths and subpaths +build_allHierarchyPaths <- function(schema){ + extLds = list() + for(ld in schema[['levelDefinitions']]){ + lName = ld[['name']] + pathes = build_sublevelPathes(schema, lName) + for(p in pathes){ + extLds = c(extLds, expand_linkPath(p)) + } + } + return(unique(extLds)) +} + + +build_sublevelPathes <- function(DBconfig, levelName){ + pathes = list() + chNames = get_linkLevelChildrenNames(DBconfig, levelName) + if(length(chNames) == 0){ + pathes[[length(pathes) + 1L]] = c(levelName) + }else{ + for(chName in chNames){ + chPathes = build_sublevelPathes(DBconfig, chName) + for(chPath in chPathes){ + pathes[[length(pathes)+1L]] = c(levelName,chPath) + } + } + } + return(pathes) +} + + +build_levelPathes <- function(emuDBhandle){ + DBconfig = load_DBconfig(emuDBhandle) + pathes = list() + chNames = character(0) + for(l in DBconfig$levelDefinitions){ + lPathes = build_sublevelPathes(DBconfig, l[['name']]) + pathes = c(pathes, lPathes) + } + return(pathes) +} + +# get all paths through hierarchy connecting two levels +get_hierPathsConnectingLevels <- function(emuDBhandle, levelName1, levelName2){ + + allHierPaths = build_allHierarchyPaths(load_DBconfig(emuDBhandle)) + + conHierPaths = list() + + + for(p in allHierPaths){ + # assume levelName1 is above levelName2 + if(p[1] == levelName1 & p[length(p)] == levelName2){ + conHierPaths[[length(conHierPaths) + 1]] = p + } + # assume levelName2 is above levelName1 + if(p[1] == levelName2 & p[length(p)] == levelName1){ + conHierPaths[[length(conHierPaths) + 1]] = p + } + + } + + return(conHierPaths) +} + + + +# builds "extended" link definitions +# lists link definitionsfor every possible directed connection between levels +# returns list of character vectors +# the first element of each character vector contains the super level name of the levelDefinition, +# the follwing elements contain all exetnded linked sub level names +build_extLinkDefinitions <- function(emuDBhandle){ + lds = list() + pathes = build_levelPathes(emuDBhandle) + for(p in pathes){ + pLen = length(p) + for(i in 1:pLen){ + ld = character(0) + for(j in i:pLen){ + ld = c(ld,p[j]) + } + lds[[length(lds)+1L]] = ld + } + } + return(lds) +} + + +find_segmentLevels<-function(emuDBhandle, attrName){ + lvlNm = get_levelNameForAttributeName(emuDBhandle, attrName) + extLnkDefs = build_extLinkDefinitions(emuDBhandle) + segLvlList=character(0) + for(extLnkDef in extLnkDefs){ + if(extLnkDef[1]==lvlNm){ + if(length(extLnkDef) > 1){ + for(trgLvlNm in extLnkDef[2:length(extLnkDef)]){ + trgLd=get_levelDefinition(emuDBhandle, trgLvlNm) + if(trgLd['type']=='SEGMENT'){ + segLvlList=unique(c(segLvlList,trgLvlNm)) + } + } + } + } + } + return(segLvlList) +} + +get_levelDefinition <- function(emuDBhandle, name){ + DBconfig = load_DBconfig(emuDBhandle) + res = NULL + for(ld in DBconfig$levelDefinitions){ + if(ld$name == name){ + res = ld + break + } + } + return(res) +} + +########################################### +# DBconfig file handeling functions + +## load function for _DBconfig.json file of emuDB +load_DBconfig <- function(emuDBhandle){ + dbCfgPath = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, database.schema.suffix)) + if(file.exists(dbCfgPath)){ + DBconfig = jsonlite::fromJSON(dbCfgPath, simplifyVector=FALSE) + }else{ + stop(dbCfgPath, " does not seem to exist. This could be due to a bad 'name' entry in the DBconfig file. This field has to be the same as the name of the emuDB (directory & _DBconfig.json)") + } + return(DBconfig) +} + +# store function for dbConfig +store_DBconfig <- function(emuDBhandle, dbConfig, basePath = NULL){ + if(is.null(basePath)){ + basePath = emuDBhandle$basePath + } + dbCfgPath = file.path(basePath, paste0(emuDBhandle$dbName, database.schema.suffix)) + json = jsonlite::toJSON(dbConfig, auto_unbox = TRUE, force = TRUE, pretty = TRUE) + writeLines(json, dbCfgPath, useBytes = TRUE) +} + + +################################################################ +################# CRUD DBconfig functions ###################### +################################################################ + + + +########################################### +# CRUD operation for levelDefinitions + +##' Add / List / Remove level definition to / of / from emuDB +##' +##' Add / List / Remove database operation functions for level definitions. +##' A level is a more general term for what is often referred to as a "tier". +##' It is more general in the sense that people usually +##' expect tiers to contain time information. Levels +##' can either contain time information if they are of the +##' type "EVENT" or of the type "SEGMENT" but are timeless +##' if they are of the type "ITEM". For more information +##' on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' Note that a level cannot be removed, if it contains instances of annotation items +##' or if it is linked to another level. Further note, renaming a level definition +##' can be done using \code{\link{rename_attributeDefinition}}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param name name of level definition +##' @param type type of level definition ("SEGMENT","EVENT","ITEM") +##' @param rewriteAllAnnots should changes be written to file system (_annot.json files) (intended for expert use only) +##' @param force delete all items incl. links pointing to those items from the levels +##' @param verbose Show progress bars and further information +##' @keywords emuDB database schema Emu +##' @name AddListRemoveLevelDefinitions +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # add level called "Phonetic2" to the ae emuDB +##' # that could for example contain the transcriptions of a second annotator +##' add_levelDefinition(emuDBhandle = ae, +##' name = "Phonetic2", +##' type = "SEGMENT") +##' +##' # list level definition of ae emuDB +##' list_levelDefinitions(emuDBhandle = ae) +##' +##' # remove newly added level definition +##' remove_levelDefinitions(emuDBhandle = ae, +##' name = "Phonetic2") +##' } +##' +NULL + +##' @rdname AddListRemoveLevelDefinitions +##' @export +add_levelDefinition<-function(emuDBhandle, name, + type, rewriteAllAnnots = TRUE, verbose = TRUE){ + check_emuDBhandle(emuDBhandle) + allowedTypes = c('ITEM', 'SEGMENT', 'EVENT') + # precheck type + if(!(type %in% allowedTypes)){ + stop('Bad type given! Type has to be either ', paste(allowedTypes, collapse = ' | ') ) + } + levelDefinition=list(name = name, type = type, + attributeDefinitions = list(list(name = name, type = 'STRING'))) + dbConfig = load_DBconfig(emuDBhandle) + # check if level definition (name) already exists + for(ld in dbConfig$levelDefinitions){ + if(ld$name == levelDefinition$name){ + stop("Level definition:", levelDefinition$name," already exists in database ", emuDBhandle$dbName) + } + } + # add + dbConfig$levelDefinitions[[length(dbConfig$levelDefinitions) + 1]] = levelDefinition + + store_DBconfig(emuDBhandle, dbConfig) + + if(rewriteAllAnnots){ + rewrite_annots(emuDBhandle, verbose = verbose) + } + invisible(NULL) +} + + +##' @rdname AddListRemoveLevelDefinitions +##' @export +list_levelDefinitions <- function(emuDBhandle){ + check_emuDBhandle(emuDBhandle, checkCache=FALSE) + dbConfig = load_DBconfig(emuDBhandle) + df <- data.frame(name = character(), + type = character(), + nrOfAttrDefs = numeric(), + stringsAsFactors = FALSE) + + for(ld in dbConfig$levelDefinitions){ + df <- rbind(df, data.frame(name = ld$name, + type = ld$type, + nrOfAttrDefs = length(ld$attributeDefinitions), + attrDefNames = paste0(sapply(ld$attributeDefinitions, function(ad) paste0(ad$name, ";")), collapse = " "), + stringsAsFactors = FALSE)) + } + # NULL out + if(nrow(df) == 0){ + df = NULL + } + return(df) +} + + +##' @rdname AddListRemoveLevelDefinitions +##' @export +remove_levelDefinition<-function(emuDBhandle, name, rewriteAllAnnots = TRUE, force = FALSE, verbose = TRUE){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + # check if level definition (name)exists + if(!any(sapply(dbConfig$levelDefinitions, function(ld) ld[['name']] == name))){ + stop("Level definition:", name, " does not exist in database ", dbConfig$name) + } + # check if level is referenced by link defintion + for(lkd in dbConfig$linkDefinitions){ + if(lkd[['superlevelName']] == name | lkd[['sublevelName']] == name){ + lkdStr = toString(lkd) + stop("Cannot remove level definition ", name, ". It is referenced by link definition: ", lkdStr) + } + } + + if(!force){ + # check if level is empty + itemsDf = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * FROM items i WHERE \ + i.db_uuid='", emuDBhandle$UUID, "' AND i.level='", name, "'")) + itemsCnt = nrow(itemsDf) + if(itemsCnt > 0){ + stop("Level is not empty. Remove items first to delete level ", name) + } + }else{ + + if(verbose){ + answ <- readline(prompt="Are you sure you wish to remove all annotational items that are associated with this levelDefinition (y/n): ") + + if(!answ %in% c("y", "Y")){ + stop("removal of linkDefinition incl. associated links aborted") + } + } + + # delete all labels + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM labels ", + "WHERE EXISTS( ", + "SELECT * FROM items i ", + "WHERE i.db_uuid='", emuDBhandle$UUID, "' ", + "AND i.session = labels.session AND i.bundle = labels.bundle AND i.item_id = labels.item_id ", + "AND i.level='", name, "' ", + ")" + )) + + # delete all items + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM items ", + "WHERE items.db_uuid='", emuDBhandle$UUID, "' AND items.level='", name, "'")) + } + + # do removal + newLvlDefs = list() + for(lvlDef in dbConfig$levelDefinitions){ + if(lvlDef[['name']] != name){ + newLvlDefs[[length(newLvlDefs) + 1]] = lvlDef + } + } + dbConfig$levelDefinitions = newLvlDefs + + # remove from levelCanvasOrder of EMUwebAppConfig if present + for(i in 1:length(dbConfig$EMUwebAppConfig$perspectives)){ + if(any(dbConfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order == name)){ + # print(dbConfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order) + found = dbConfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order == name + # print(found) + dbConfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order = dbConfig$EMUwebAppConfig$perspectives[[i]]$levelCanvases$order[!found] + } + } + + store_DBconfig(emuDBhandle, dbConfig) + + if(rewriteAllAnnots){ + rewrite_annots(emuDBhandle, verbose = verbose) + } + + return(invisible(NULL)) +} + +################################################### +# CRUD operations for attributeDefinitions + +##' Add / List / Rename / Remove attribute definition to / of / from emuDB +##' +##' @description Add / List / Rename / Remove database operation functions for attribute +##' definition to / of / from an existing level definition of an emuDB. +##' Attribute definitions can be viewed as definitions of +##' parallel labels for the annotational units (ITEMs) of the emuDB. +##' Each level definition is required to have at least one +##' default attribute definition that has the same name as the level definition +##' (automatically created by \code{\link{add_levelDefinition}}). For more +##' information on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' Note that as with level definitions, an attribute definition to a level cannot be removed, +##' if it contains labels in the emuDB. +##' +##' As the only one of these operations, \code{rename_attributeDefinition} can +##' also be used to manipulate (i.e. rename) a level definition. It is therefore +##' not necessary to specify the name of the level that the attribute definition +##' belongs to. While renaming a level or attribute definition, emuR will +##' (1) rewrite the levelDefinitions in DBconfig, (2) rewrite the +##' linkDefinitions in DBconfig, (3) rewrite the perspectives in DBconfig, +##' (4) rewrite the anagestConfig in DBconfig, and (5) rewrite all _annot.json +##' files. (5) May take quite a while, depending on the number of bundles in the +##' database. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param levelName name of level +##' @param name name of attributeDefinition +##' @param type type of attributeDefinition (currently only "STRING") +##' @param origAttrDef name of level/attribute definition in emuDB that is to be changed +##' @param newAttrDef new name that shall be assigned to the level/attribute definition +##' @param rewriteAllAnnots should changes be written to file system (_annot.json files) (intended for expert use only) +##' @param force delete all attribute definitions in annotations (== label entries) +##' @param verbose if set to \code{TRUE}, more status messages are printed +##' @keywords emuDB database DBconfig Emu +##' @name AddListRenameRemoveAttributeDefinitions +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # add additional attribute definition to the "Phonetic" level +##' # of the ae emuDB that will contain the UTF8 IPA +##' # symbols of the phonetic transcriptions +##' add_attributeDefinition(emuDBhandle = ae, +##' levelName = "Phonetic", +##' name = "IPA-UTF8") +##' +##' # list attribute definitions for level "Word" +##' # of the ae emuDB +##' list_attributeDefinitions(emuDBhandle = ae, +##' levelName = "Word") +##' +##' # remove newly added attributeDefinition +##' remove_attributeDefinition(emuDBhandle = ae, +##' levelName = "Phonetic", +##' name = "IPA-UTF8") +##' } +##' +NULL + +##' @rdname AddListRenameRemoveAttributeDefinitions +##' @export +add_attributeDefinition <- function(emuDBhandle, levelName, + name, type = "STRING", + rewriteAllAnnots = TRUE, verbose = TRUE){ + check_emuDBhandle(emuDBhandle) + internal_add_attributeDefinition(emuDBhandle, levelName, + name, type = "STRING", + rewriteAllAnnots = rewriteAllAnnots, verbose = verbose) + + +} + +internal_add_attributeDefinition <- function(emuDBhandle, levelName, + name, + type = "STRING", + rewriteAllAnnots = TRUE, + verbose = TRUE, + insertLabels = TRUE){ + if(type != "STRING"){ + stop("Currently only attributeDefinition of type 'STRING' allowed") + } + + # precheck if attribute definition is already defined + lds = list_levelDefinitions(emuDBhandle) + for(ln in lds$name){ + lads = list_attributeDefinitions(emuDBhandle, ln) + if(name %in% lads$name){ + stop("attributeDefinition with name '", name, "' already exists on level '", ln, "'! Currently, only unique attributeDefinition names are allowed within a single emuDB.") + } + } + + dbConfig = load_DBconfig(emuDBhandle) + + df = list_attributeDefinitions(emuDBhandle, levelName) + + labelIdx = -1 + if(!(name %in% df$name)){ + for(i in 1:length(dbConfig$levelDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$name == levelName){ + labelIdx = length(dbConfig$levelDefinitions[[i]]$attributeDefinitions) + 1 + dbConfig$levelDefinitions[[i]]$attributeDefinitions[[labelIdx]] = list(name = name, type = type) + break + } + } + }else{ + stop(paste0("attributeDefinition with name '", name, "' already present in level '", levelName, "'")) + } + + # add to labels table + if(insertLabels){ + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels ", + "SELECT db_uuid, session , bundle, item_id, ", labelIdx, " AS label_idx, '", name, "' AS name, '' AS label ", + "FROM labels ", + "WHERE name = '", levelName, "' AND label_idx = 1 ")) + } + # store changes + store_DBconfig(emuDBhandle, dbConfig) + + if(rewriteAllAnnots){ + rewrite_annots(emuDBhandle, verbose = verbose) + } + +} + + + +##' @rdname AddListRenameRemoveAttributeDefinitions +##' @export +list_attributeDefinitions <- function(emuDBhandle, levelName){ + check_emuDBhandle(emuDBhandle, checkCache = FALSE) + # init empty result df + df = data.frame(name = character(), + level = character(), + type = character(), + hasLabelGroups = logical(), + hasLegalLabels = logical(), + stringsAsFactors = FALSE) + + for(lev in levelName){ + ld = get_levelDefinition(emuDBhandle, lev) + + for(ad in ld$attributeDefinitions){ + df = rbind(df, df = data.frame(name = ad$name, + level = lev, + type = ad$type, + hasLabelGroups = !is.null(ad$labelGroups), + hasLegalLabels = !is.null(ad$legalLabels), + stringsAsFactors = FALSE)) + } + } + + rownames(df) <- NULL + return(df) +} + + +##' @rdname AddListRenameRemoveAttributeDefinitions +##' @export +rename_attributeDefinition <- function(emuDBhandle, + origAttrDef, + newAttrDef, + verbose = TRUE) { + + ############################# + # check input parameters + check_emuDBhandle(emuDBhandle) + if((!inherits(origAttrDef, "character")) | (!inherits(newAttrDef, "character")) | length(origAttrDef) != 1 | length(newAttrDef) != 1){ + stop("origAttrDef and newAttrDef have to be character vectors with only one item!") + } + + allAttrNames = get_allAttributeNames(emuDBhandle) + if(!origAttrDef %in% allAttrNames){ + stop(paste0("Attribute definition: ", origAttrDef, " not found in emuDB! The available attribute definitions are: ", paste0(allAttrNames, collapse = "; "))) + } + + if(newAttrDef %in% allAttrNames){ + stop(paste0("Attribute definition: ", newAttrDef, " is already defined in emuDB! You need to specify unique names!")) + } + + ############################# + # adjust DBconfig + + dbConfig = load_DBconfig(emuDBhandle) + + + dbConfig$linkDefinitions = lapply ( + dbConfig$linkDefinitions, + function (linkDef) { + if (linkDef$superlevelName == origAttrDef) { + linkDef$superlevelName = newAttrDef + } + if (linkDef$sublevelName == origAttrDef) { + linkDef$sublevelName = newAttrDef + } + + linkDef + } + ) + + dbConfig$EMUwebAppConfig$perspectives = lapply ( + dbConfig$EMUwebAppConfig$perspectives, + function (perspective) { + perspective$levelCanvases$order = lapply( + perspective$levelCanvases$order, + function (canvas) { + if (canvas == origAttrDef) { + newAttrDef + } else { + canvas + } + } + ) + + perspective + } + ) + + dbConfig$levelDefinitions = lapply ( + dbConfig$levelDefinitions, + + function (lvlDef) { + # If lvlDef references the level to be renamed in its anagest config, + # adjust that + if(!is.null(lvlDef$anagestConfig)){ + if (lvlDef$anagestConfig$autoLinkLevelName == origAttrDef) { + lvlDef$anagestConfig$autoLinkLevelName = newAttrDef + } + } + # If lvlDef *is* the level to be renamed, adjust that + if (lvlDef$name == origAttrDef) { + lvlDef$name = newAttrDef + lvlDef$attributeDefinitions[[1]]$name = newAttrDef + } else { + # If lvlDef is not the level to be renamed, search lvlDef's attribute + # definitions. One of them may be the one to be renamed. + lvlDef$attributeDefinitions = lapply( + lvlDef$attributeDefinitions, + function (attrDef) { + if (attrDef$name == origAttrDef) { + attrDef$name = newAttrDef + } + + attrDef + } + ) + } + + # Return the (possibly modified) lvlDef so lapply knows the new value + lvlDef + } + ) + + + # + ############################# + if(verbose){ + cat("\n INFO: creating temporary index...\n") + } + + # create temp index + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE INDEX IF NOT EXISTS level_rename_tmp_idx ON items(db_uuid, level)")) + + + if(verbose){ + cat("\n INFO: renaming attribute definition\n") + } + + # transaction start + DBI::dbBegin(emuDBhandle$connection) + + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE items SET level = '", newAttrDef, "' ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + "AND level = '", origAttrDef, "'")) + + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE labels SET name = '", newAttrDef, "' ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + "AND name = '", origAttrDef, "'")) + + # transaction end + DBI::dbCommit(emuDBhandle$connection) + + if(verbose){ + cat("\n INFO: removing temporary index...\n") + } + # remove temp index + DBI::dbExecute(emuDBhandle$connection, paste0("DROP INDEX IF EXISTS level_rename_tmp_idx")) + + store_DBconfig(emuDBhandle, dbConfig) + rewrite_annots(emuDBhandle, verbose = verbose) +} + + +##' @rdname AddListRenameRemoveAttributeDefinitions +##' @export +remove_attributeDefinition <- function(emuDBhandle, + levelName, + name, + force = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE){ + + check_emuDBhandle(emuDBhandle) + + if(levelName == name){ + stop("Can not remove primary attributeDefinition (attributeDefinition with same name as level)") + } + + dbConfig = load_DBconfig(emuDBhandle) + + ld = get_levelDefinition(emuDBhandle, levelName) + + if(!force){ + # check if instances are present + qRes = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * FROM items AS it, labels AS lb WHERE ", + "it.db_uuid = lb.db_uuid AND ", + "it.session = lb.session AND ", + "it.bundle = lb.bundle AND ", + "it.item_id = lb.item_id AND ", + "it.level = '", levelName, "' AND ", + "lb.name = '", name, "'")) + if(nrow(qRes) > 0){ + stop("Can not remove attributeDefinition if there are labels present") + } + }else{ + if(verbose){ + answ <- readline(prompt = "Are you sure you wish to remove all labels that are associated with this attributeDefinition (y/n): ") + + if(!answ %in% c("y", "Y")){ + stop("removal of attributeDefinition aborted") + } + } + # delete all labels + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM labels ", + "WHERE EXISTS( ", + "SELECT * FROM items i ", + "WHERE i.db_uuid='", emuDBhandle$UUID, "' ", + "AND i.session = labels.session AND i.bundle = labels.bundle AND i.item_id = labels.item_id ", + "AND i.level='", levelName, "' AND labels.name ='", name, "' ", + ")" + )) + + + } + + levDefIdx = NULL + for(i in 1:length(dbConfig$levelDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$name == levelName){ + levDefIdx = i + break + } + } + + for(i in 1:length(dbConfig$levelDefinitions[[levDefIdx]]$attributeDefinitions)){ + if(dbConfig$levelDefinitions[[levDefIdx]]$attributeDefinitions[[i]]$name == name){ + dbConfig$levelDefinitions[[levDefIdx]]$attributeDefinitions[[i]] = NULL + break + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) + if(rewriteAllAnnots){ + rewrite_annots(emuDBhandle, verbose = verbose) + } +} + +################################################### +# CRUD operations for legalLabels + +##' Set / Get / Remove legal labels of attributeDefinition of emuDB +##' +##' Set / Get / Remove legal labels of a specific attributeDefinition of a emuDB. +##' The legal labels are a character vector of strings +##' that specifies the labels that are legal (i.e. allowed / valid) for the given attribute. +##' As the EMU-webApp won't allow the annotator to enter any labels that are not +##' specified in this array, this is a simple way of assuring that a level +##' has a consistent label set. For more information +##' on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' Note that defining legal labels for an attributeDefinition does not imply that the +##' existing labels are checked for being 'legal' in the emuDB. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param levelName name of level +##' @param attributeDefinitionName name of attributeDefinition (can be and often is the level name) +##' @param legalLabels character vector of labels +##' @keywords emuDB database schema Emu +##' @name SetGetRemoveLegalLabels +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' legalPhoneticLabels = c("V", "m", "N", "s", "t", "H", "@:", "f", "r", +##' "E", "n", "z", "S", "i:", "w", "@", "k", "I", "d", +##' "db", "j", "u:", "dH", "l", "ai", "O", "D", "o:", "v") +##' +##' # set legal labels of the +##' # default "Phonetic" attributeDefinition of +##' # the "Phonetic" level of ae emuDB +##' set_legalLabels(emuDBhandle = ae, +##' levelName = "Phonetic", +##' attributeDefinitionName = "Phonetic", +##' legalLabels = legalPhoneticLabels) +##' +##' # get legal labels of the +##' # default "Phonetic" attributeDefinition of +##' # the "Phonetic" level of ae emuDB +##' get_legalLabels(emuDBhandle = ae, +##' levelName = "Phonetic", +##' attributeDefinitionName = "Phonetic") +##' +##' +##' # remove legal labels of the +##' # default "Phonetic" attributeDefinition of +##' # the "Phonetic" level of ae emuDB +##' remove_legalLabels(emuDBhandle = ae, +##' levelName = "Phonetic", +##' attributeDefinitionName = "Phonetic") +##' +##' } +##' +NULL + +##' @rdname SetGetRemoveLegalLabels +##' @export +set_legalLabels <- function(emuDBhandle, + levelName, + attributeDefinitionName, + legalLabels){ + + check_emuDBhandle(emuDBhandle) + + if(!is.null(legalLabels) & (!inherits(legalLabels, "character"))){ + stop("legalLabels must be of class 'character'") + } + + dbConfig = load_DBconfig(emuDBhandle) + + for(i in 1:length(dbConfig$levelDefinitions)){ + for(j in 1:length(dbConfig$levelDefinitions[[i]]$attributeDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$name == attributeDefinitionName){ + dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$legalLabels = legalLabels + } + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) + +} + + +##' @rdname SetGetRemoveLegalLabels +##' @export +get_legalLabels <- function(emuDBhandle, + levelName, + attributeDefinitionName){ + check_emuDBhandle(emuDBhandle) + + ld = get_levelDefinition(emuDBhandle, levelName) + + ll = NULL + for(ad in ld$attributeDefinitions){ + if(ad$name == attributeDefinitionName){ + if(!is.null(ad$legalLabels)){ + ll = unlist(ad$legalLabels) + }else{ + ll = NA + } + } + } + + return(ll) +} + + +##' @rdname SetGetRemoveLegalLabels +##' @export +remove_legalLabels <- function(emuDBhandle, + levelName, + attributeDefinitionName){ + check_emuDBhandle(emuDBhandle) + # remove by setting to NULL + set_legalLabels(emuDBhandle, + levelName, + attributeDefinitionName, + legalLabels = NULL) +} + +################################################### +# CRUD operations for attributeDefinition$labelGroups + +##' Add / List / Remove labelGroup to / of / from attributeDefinition of emuDB +##' +##' Add / List / Remove label group to / of / from a specific attribute definition. +##' This label group can be used as a short hand +##' to reference groups of labels specific +##' to an attribute definition (compared to global label groups that +##' are added by \code{\link{add_labelGroup}}) in a +##' \code{\link{query}}. A common example would be to +##' add a label group for something like the phonetic +##' category of nasals to be able reference them +##' as "nasals" in a \code{\link{query}}. For more information +##' on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param levelName name of level +##' @param attributeDefinitionName name of attributeDefinition +##' @param labelGroupName name of label group +##' @param labelGroupValues character vector of labels +##' @keywords emuDB database schema Emu +##' @seealso add_labelGroup +##' @name AddListRemoveAttrDefLabelGroup +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' sampaNasals = c("m", "F", "n", "J", "N") +##' +##' # add these values to the default Phonetic attribute +##' # definition of the Phonetic level of the ae emuDB +##' add_attrDefLabelGroup(emuDBhandle = ae, +##' levelName = "Phonetic", +##' attributeDefinitionName = "Phonetic", +##' labelGroupName = "sampaNasals", +##' labelGroupValues = sampaNasals) +##' +##' # query the labelGroup +##' query(ae, "Phonetic=sampaNasals") +##' +##' +##' # list attribute definition label groups +##' # of attributeDefinition "Phonetic" of the level "Phonetic" +##' # of the ae emuDB +##' list_attrDefLabelGroups(emuDBhandle = ae, +##' levelName = "Phonetic" , +##' attributeDefinitionName = "Phonetic") +##' +##' # remove the newly added attrDefLabelGroup +##' remove_attrDefLabelGroup(emuDBhandle = ae, +##' levelName = "Phonetic", +##' attributeDefinitionName = "Phonetic", +##' labelGroupName = "sampaNasals") +##' +##' } +##' +NULL + +##' @rdname AddListRemoveAttrDefLabelGroup +##' @export +add_attrDefLabelGroup <- function(emuDBhandle, + levelName, + attributeDefinitionName, + labelGroupName, + labelGroupValues){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + curLgs = list_attrDefLabelGroups(emuDBhandle, + levelName, + attributeDefinitionName) + + # wrap in list if array of length 1 -> so converted to json + if(length(labelGroupValues) ==1 ){ + labelGroupValues = list(labelGroupValues) + } + + if(labelGroupName %in% curLgs$name){ + stop("labelGroupName '", labelGroupName ,"' already exists!") + } + for(i in 1:length(dbConfig$levelDefinitions)){ + for(j in 1:length(dbConfig$levelDefinitions[[i]]$attributeDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$name == attributeDefinitionName){ + l = length(dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$labelGroups) + dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$labelGroups[[l + 1]] = list(name = labelGroupName, + values = labelGroupValues) + } + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) +} + +##' @rdname AddListRemoveAttrDefLabelGroup +##' @export +list_attrDefLabelGroups <- function(emuDBhandle, + levelName, + attributeDefinitionName){ + check_emuDBhandle(emuDBhandle) + ld = get_levelDefinition(emuDBhandle, levelName) + + df = data.frame(name = character(), + values = character(), + stringsAsFactors = FALSE) + for(ad in ld$attributeDefinitions){ + if(ad$name == attributeDefinitionName){ + if(!is.null(ad$labelGroups)){ + for(lg in ad$labelGroups){ + df = rbind(df, data.frame(name = lg$name, + values = paste0(lg$values, collapse = "; "), + stringsAsFactors = FALSE )) + } + } + } + } + + return(df) +} + + +##' @rdname AddListRemoveAttrDefLabelGroup +##' @export +remove_attrDefLabelGroup <- function(emuDBhandle, + levelName, + attributeDefinitionName, + labelGroupName){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + curLgs = list_attrDefLabelGroups(emuDBhandle, + levelName, + attributeDefinitionName) + + if(!labelGroupName %in% curLgs$name){ + stop("labelGroupName '", labelGroupName ,"' does not exists!") + } + + for(i in 1:length(dbConfig$levelDefinitions)){ + for(j in 1:length(dbConfig$levelDefinitions[[i]]$attributeDefinitions)){ + if(dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$name == attributeDefinitionName){ + l = length(dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$labelGroups) + dbConfig$levelDefinitions[[i]]$attributeDefinitions[[j]]$labelGroups[[l]] = NULL + } + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) + +} + +################################################### +# CRUD operations for linkDefinitions + +##' Add / List / Remove linkDefinition to / of / from emuDB +##' +##' Add / List / Remove new link definition to / of / from emuDB. A link definition +##' specifies the relationship between two levels, the +##' super-level and the sub-level. The entirety of all link +##' definitions of a emuDB specifies the +##' hierarchical structure of the database. For more information +##' on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' +##' Link type descriptions: +##' \itemize{ +##' \item{\code{"ONE_TO_MANY"}: A single ITEM of the super-level can be linked to multiple ITEMs of the sub-level} +##' \item{\code{"MANY_TO_MANY"}: Multiple ITEMs of the super-level can be linked to multiple ITEMs of the sub-level} +##' \item{\code{"ONE_TO_ONE"}: A single ITEM of the super-level can be linked to a single ITEM of the sub-level} +##' } +##' +##' For all link types the rule applies that no links are allowed to cross any other links. +##' Further, a linkDefinition can not be removed, if there are links present in the emuDB. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param type type of linkDefinition (either \code{"ONE_TO_MANY"}, \code{"MANY_TO_MANY"} or \code{"ONE_TO_ONE"}) +##' @param superlevelName name of super-level of linkDefinition +##' @param sublevelName name of sub-level of linkDefinition +##' @param force delete all links belonging to the linkDefinition (\strong{USE WITH CAUTION! VERY INVASIVE AKTION!}) +##' @param verbose be verbose. Ask to delete links if \code{force} is \code{TRUE}. +##' @name AddListRemoveLinkDefinition +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded emuDB that was converted +##' # using the convert_TextGridCollection function called myTGcolDB +##' # (see ?load_emuDB and ?convert_TextGridCollection for more information) +##' +##' # add link definition from super-level "Phoneme" +##' # to sub-level "Phonetic" of type "ONE_TO_MANY" +##' # for myTGcolDB emuDB +##' add_linkDefinition(emuDBhandle = myTGcolDB, +##' type = "ONE_TO_MANY", +##' superlevelName = "Phoneme", +##' sublevelName = "Phonetic") +##' +##' # list link definitions for myTGcolDB emuDB +##' list_linkDefinitions(emuDBhandle = myTGcolDB) +##' +##' # remove newly added link definition +##' remove_linkDefinition(emuDBhandle = myTGcolDB, +##' superlevelName = "Phoneme", +##' sublevelName = "Phonetic") +##' +##' +##' } +NULL + +##' @rdname AddListRemoveLinkDefinition +##' @export +add_linkDefinition <- function(emuDBhandle, + type, + superlevelName, + sublevelName){ + + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + + allowedTypes = c("ONE_TO_MANY", "MANY_TO_MANY", "ONE_TO_ONE") + + if(!type %in% allowedTypes){ + stop("Only the following types permitted: ", paste(allowedTypes, collapse = '; ')) + } + + curLds = list_linkDefinitions(emuDBhandle) + + # check if level is defined + curLevs = list_levelDefinitions(emuDBhandle) + if(!any(curLevs$name == superlevelName) | !any(curLevs$name == sublevelName)){ + stop("Either superlevelName or sublevelName are not defined") + } + + + # check if link between levels already exists + if(any(curLds$superlevelName == superlevelName & curLds$sublevelName == sublevelName)){ + stop("linkDefinition already exists for superlevelName: '", + superlevelName, "' and sublevelName: '", sublevelName, "'") + } + + # check that super level isn't of type EVENT -> validates "Events can never be 'parents' in a domination relationship" constraint + superLevDev = curLevs %>% dplyr::filter(.data$name == superlevelName) + if(superLevDev$type == "EVENT"){ + stop("levels of type 'EVENT' are not allowed to be super levels (== parents) in a domination relationship!") + } + + l = length(dbConfig$linkDefinitions) + dbConfig$linkDefinitions[[l + 1]] = list(type = type, + superlevelName = superlevelName, + sublevelName = sublevelName) + + # store changes + store_DBconfig(emuDBhandle, dbConfig) + +} + + +##' @rdname AddListRemoveLinkDefinition +##' @export +list_linkDefinitions <- function(emuDBhandle){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + + df = data.frame(type = character(), + superlevelName = character(), + sublevelName = character(), + stringsAsFactors = FALSE) + + for(ld in dbConfig$linkDefinitions){ + df = rbind(df, data.frame(type = ld$type, + superlevelName = ld$superlevelName, + sublevelName = ld$sublevelName, + stringsAsFactors = FALSE)) + } + # NULL out df + if(nrow(df) == 0){ + df = NULL + } + return(df) + +} + + +##' @rdname AddListRemoveLinkDefinition +##' @export +remove_linkDefinition <- function(emuDBhandle, + superlevelName, + sublevelName, + force = FALSE, + verbose = TRUE){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + curLds = list_linkDefinitions(emuDBhandle) + + # check if linkDef exists + if(!any(curLds$superlevelName == superlevelName & curLds$sublevelName == sublevelName)){ + stop("No linkDefinition found for superlevelName '", superlevelName, + "' and sublevelName '", sublevelName, "'") + } + if(!force){ + # check if links are present + res = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * FROM ", + "links ", + "INNER JOIN (SELECT * FROM items WHERE level = '", superlevelName, "' AND db_uuid = '", dbConfig$UUID, "') as superItems", + " ON links.from_id = superItems.item_id ", + " AND links.db_uuid = superItems.db_uuid ", + " AND links.session = superItems.session ", + " AND links.bundle = superItems.bundle ", + "INNER JOIN (SELECT * FROM items WHERE level = '", sublevelName, "' AND db_uuid = '", dbConfig$UUID, "') as subItems", + " ON links.to_id = subItems.item_id ", + " AND links.db_uuid = subItems.db_uuid ", + " AND links.session = subItems.session ", + " AND links.bundle = subItems.bundle ", + "WHERE links.db_uuid = '", emuDBhandle$UUID, "'")) + + if(nrow(res) != 0){ + stop("linkDefinition can not be remove as there are links present") + } + }else{ + + if(verbose){ + answ <- readline(prompt="Are you sure you wish to remove all links that are associated with this linkDefinition (y/n): ") + + if(!answ %in% c("y", "Y")){ + stop("removal of linkDefinition incl. associated links aborted") + } + } + # delete all links belonging to linkDef + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM links ", + "WHERE EXISTS( ", + "SELECT * FROM items i_from, items i_to ", + "WHERE i_from.db_uuid='", emuDBhandle$UUID, "' ", + "AND i_from.session = links.session AND i_from.bundle = links.bundle AND i_from.item_id = links.from_id ", + "AND i_from.level='", superlevelName, "' ", + "AND i_to.db_uuid='", emuDBhandle$UUID, "' ", + "AND i_to.session = links.session AND i_to.bundle = links.bundle AND i_to.item_id = links.to_id ", + "AND i_to.level='", sublevelName, "' ", + ")" + )) + + + } + + for(i in 1:length(dbConfig$linkDefinitions)){ + if(dbConfig$linkDefinitions[[i]]$superlevelName == superlevelName && dbConfig$linkDefinitions[[i]]$sublevelName == sublevelName){ + dbConfig$linkDefinitions[[i]] = NULL + break + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) + if(force){ + rewrite_annots(emuDBhandle, verbose = verbose) + } + +} + +################################################### +# CRUD operations for ssffTrackDefinitions + +##' Add / List / Remove ssffTrackDefinition to / from / of emuDB +##' +##' Add / List / Remove ssffTrackDefinition to / from / of emuDB. +##' An ssffTrack (often simply referred to as a track) references +##' data that is stored in the Simple Signal File Format (SSFF) +##' in the according bundle folders. The two most common types of data are: +##' \itemize{ +##' \item{complementary data that was acquired during the recording +##' such as data acquired during electromagnetic +##' articulographic (EMA) or electropalatography (EPG) recordings;} +##' \item{derived data, i.e. data that was calculated from the original audio signal +##' such as formant values and their bandwidths or the short-term Root Mean Square amplitude of the signal.} +##' } +##' For more information on the structural elements of an emuDB see \code{vignette(emuDB)}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param name name of ssffTrackDefinition +##' @param columnName columnName of ssffTrackDefinition. +##' If the \code{onTheFlyFunctionName} parameter is set and columnName isn't, the +##' \code{columnName} will default to the first entry in \code{wrasspOutputInfos[[onTheFlyFunctionName]]$tracks}. +##' @param fileExtension fileExtension of ssffTrackDefinitions. +##' If the \code{onTheFlyFunctionName} parameter is set and fileExtension isn't, the +##' \code{fileExtension} will default to the first entry in \code{wrasspOutputInfos[[onTheFlyFunctionName]]$ext}. +##' @param fileFormat (optional) file format of ssffTrackDefinition. This is currently in test phase. Can be ssff, +##' Rda or NULL. Defaults to ssff. +##' @param onTheFlyFunctionName name of wrassp function to do on-the-fly calculation. If set to the name of a wrassp +##' signal processing function, not only the emuDB schema is extended by the ssffTrackDefintion but also +##' the track itself is calculated from the signal file and stored in the emuDB. See \code{names(wrasspOutputInfos)} +##' for a list of all the signal processing functions provided by the wrassp package. +##' @param onTheFlyParams a list of parameters that will be given to the function +##' passed in by the onTheFlyFunctionName parameter. This list can easily be +##' generated using the \code{\link{formals}} function on the according signal processing function +##' provided by the wrassp package and then setting the +##' parameter one wishes to change. +##' @param onTheFlyOptLogFilePath path to optional log file for on-the-fly function +##' @param deleteFiles delete files that belong to ssffTrackDefinition on removal +##' @param verbose Show progress bars and further information +##' @param interactive ask user for confirmation +##' @name AddListRemoveSsffTrackDefinition +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # add ssff track definition to ae emuDB +##' # calculating the according SSFF files (.zcr) on-the-fly +##' # using the wrassp function "zcrana" (zero-crossing-rate analysis) +##' add_ssffTrackDefinition(emuDBhandle = ae, +##' name = "ZCRtrack", +##' onTheFlyFunctionName = "zcrana") +##' +##' # add ssff track definition to ae emuDB +##' # for SSFF files that will be added later (either +##' # by adding files to the emuDB using +##' # the add_files() function or by calculating +##' # them using the according function provided +##' # by the wrassp package) +##' add_ssffTrackDefinition(emuDBhandle = ae, +##' name = "formants", +##' columnName = "fm", +##' fileExtension = "fms") +##' +##' # list ssff track definitions for ae emuDB +##' list_ssffTrackDefinitions(emuDBhandle = ae) +##' +##' # remove newly added ssff track definition (does not delete +##' # the actual .zcr files) +##' remove_ssffTrackDefinition(emuDBhandle = ae, +##' name = "ZCRtrack") +##' +##' } +##' +NULL + +##' @rdname AddListRemoveSsffTrackDefinition +##' @export +add_ssffTrackDefinition <- function(emuDBhandle, name, + columnName = NULL, fileExtension = NULL, + fileFormat = NULL, + onTheFlyFunctionName = NULL, onTheFlyParams = NULL, + onTheFlyOptLogFilePath = NULL, + verbose = TRUE, interactive = TRUE){ + + check_emuDBhandle(emuDBhandle) + + dbConfig = load_DBconfig(emuDBhandle) + + ######################### + # parameter checks + + # set columnName to fist tracks entry in wrasspOutputInfos if columnName is not set + if(!is.null(onTheFlyFunctionName) && is.null(columnName)){ + columnName = wrasspOutputInfos[[onTheFlyFunctionName]]$tracks[1] + } + + # set fileExtension to fist ext entry in wrasspOutputInfos if fileExtension is not set + if(!is.null(onTheFlyFunctionName) && is.null(fileExtension)){ + fileExtension = wrasspOutputInfos[[onTheFlyFunctionName]]$ext[1] + } + + + # check if three main parameters are not null + if(is.null(name) || is.null(columnName) || is.null(fileExtension)){ + stop('name, columnName, fileExtension have to be set!') + } + + # check if fileFormat has a valid value + if (!fileFormat %in% c("ssff", "Rda") && !is.null(fileFormat)) { + stop("fileFormat must be either ssff, Rda or NULL") + } + + # check if onTheFlyFunctionName is set if onTheFlyParams is + if(is.null(onTheFlyFunctionName) && !is.null(onTheFlyParams)){ + stop('onTheFlyFunctionName has to be set if onTheFlyParams is set!') + } + + # check if both onTheFlyFunctionName and onTheFlyParams are set if onTheFlyOptLogFilePath is + if( !is.null(onTheFlyOptLogFilePath) && (is.null(onTheFlyFunctionName) || is.null(onTheFlyParams))){ + stop('Both onTheFlyFunctionName and onTheFlyParams have to be set for you to be able to use the onTheFlyOptLogFilePath parameter!') + } + + curDefs = list_ssffTrackDefinitions(emuDBhandle) + + if(sum(curDefs$name == name) != 0){ + stop("ssffTrackDefinitions with name ", name ," already exists for emuDB: ", emuDBhandle$dbName, "!") + } + + ans = 'y' + # calculate new files + if(!is.null(onTheFlyFunctionName)){ + # check if files exist + filesDf = list_files(emuDBhandle, fileExtension) + if(nrow(filesDf) != 0){ + fp = paste(emuDBhandle$basePath, paste0(filesDf$session, session.suffix), paste0(filesDf$bundle, bundle.dir.suffix), filesDf$file, sep = .Platform$file.sep) + if(interactive){ + ans = readline(paste0("There are files present in '",emuDBhandle$dbName,"' that have the file extention '", + fileExtension, "'! Continuing will overwrite these files! Do you wish to proceed? (y/n) ")) + } + }else{ + if(ans == 'y'){ + + ############################### + # set up function formals + funcFormals = formals(onTheFlyFunctionName) + funcFormals[names(onTheFlyParams)] = onTheFlyParams + funcFormals$optLogFilePath = onTheFlyOptLogFilePath + fp = list_files(emuDBhandle, dbConfig$mediafileExtension) + funcFormals$listOfFiles = paste(emuDBhandle$basePath, paste0(fp$session, session.suffix), paste0(fp$bundle, bundle.dir.suffix), fp$file, sep = .Platform$file.sep) + funcFormals$explicitExt = fileExtension + funcFormals$verbose = verbose + + # check if columnName is valid track + if(!(columnName %in% wrasspOutputInfos[[onTheFlyFunctionName]]$tracks)){ + stop("'", columnName ,"' is not a column produced by '", onTheFlyFunctionName, "'! Please check wrasspOutputInfos for information on the tracks of each wrassp function.") + } + + do.call(onTheFlyFunctionName, funcFormals) + }else{ + stop('Aborted by user...') + } + } + } + + if(ans == 'y'){ + # add new ssffTrackDefinition + if (is.null(fileFormat)) { + # If fileFormat is NULL, we leave it out completely so the resulting JSON + # does not contain "fileFormat": {} + dbConfig$ssffTrackDefinitions[[length(dbConfig$ssffTrackDefinitions) + 1]] = list(name = name, + columnName = columnName, + fileExtension = fileExtension) + } else { + dbConfig$ssffTrackDefinitions[[length(dbConfig$ssffTrackDefinitions) + 1]] = list(name = name, + columnName = columnName, + fileExtension = fileExtension, + fileFormat = fileFormat) + } + # store changes + store_DBconfig(emuDBhandle, dbConfig) + } +} + +##' @rdname AddListRemoveSsffTrackDefinition +##' @export +list_ssffTrackDefinitions <- function(emuDBhandle){ + check_emuDBhandle(emuDBhandle, checkCache = FALSE) + dbConfig = load_DBconfig(emuDBhandle) + + listOfLists = dbConfig$ssffTrackDefinitions %>% + lapply(function(element) { + # Make sure the optional property fileFormat gets its default value here + if (is.null(element$fileFormat)) { + element$fileFormat = "ssff" + } + return(element) + }) %>% + lapply(data.frame, stringsAsFactors=FALSE) + + # This should have been part of the pipe as %>% do.call(rbind, .); but then + # R CMD CHECK complains about the global variable "." + result_df = do.call(rbind, listOfLists) + + return(result_df) +} + + +##' @rdname AddListRemoveSsffTrackDefinition +##' @export +remove_ssffTrackDefinition <- function(emuDBhandle, name, + deleteFiles = FALSE){ + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + + # precheck if exists + sDefs = list_ssffTrackDefinitions(emuDBhandle) + + if(!(name %in% sDefs$name)){ + stop("No ssffTrackDefinitions found with name: '", name, "'") + } + # find end delete entry + deletedDef = NULL + for(i in 1:length(dbConfig$ssffTrackDefinitions)){ + if(dbConfig$ssffTrackDefinitions[[i]]$name == name){ + deletedDef = dbConfig$ssffTrackDefinitions[[i]] + dbConfig$ssffTrackDefinitions[[i]] = NULL + break + } + } + + # find and delete files + if(deleteFiles){ + fp = list_files(emuDBhandle, deletedDef$fileExtension) + fp = paste(emuDBhandle$basePath, paste0(fp$session, session.suffix), paste0(fp$bundle, bundle.dir.suffix), fp$file, sep = .Platform$file.sep) + file.remove(fp) + } + # store changes + store_DBconfig(emuDBhandle, dbConfig) +} + +################################################### +# CRUD operations for global labelGroups + +##' Add / List / Remove global labelGroup to / of / from emuDB +##' +##' Add / List / Remove label group that can be used as a short hand +##' to reference groups of labels that are globally defined +##' for the entire database (compared to attribute definition +##' specific label groups that +##' are added by \code{\link{add_attrDefLabelGroup}}) in a +##' \code{\link{query}}. A common example would be to +##' add a label group for something like the phonetic +##' category of nasals to be able to reference them +##' as "nasals" in a \code{\link{query}}. +##' In theory you could use a labelGroupName as a label instance within the +##' level, but since this could lead to serious confusion, it is better avoided. +##' For users transitioning from the legacy EMU system: Do not confuse a +##' labelGroup with legal labels: a labelGroup +##' had the unfortunate name 'legal labels' in the legacy EMU system. +##' For more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param name name of label group +##' @param values character vector of labels +##' @keywords emuDB database schema Emu +##' @seealso add_attrDefLabelGroup +##' @name AddListRemoveLabelGroup +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' sampaNasals = c("m", "F", "n", "J", "N") +##' +##' # add these values to the ae emuDB +##' # as a globally available labelGroup +##' add_labelGroup(emuDBhandle = ae, +##' name = "sampaNasals", +##' values = sampaNasals) +##' +##' # query the labelGroup in the "Phonetic" level +##' query(emuDBhandle = ae, +##' query = "Phonetic == sampaNasals") +##' +##' # query the labelGroup in the "Phoneme" level +##' query(emuDBhandle = ae, +##' query = "Phoneme == sampaNasals") +##' +##' # list global label groups of ae emuDB +##' list_labelGroups(emuDBhandle = ae) +##' +##' # remove the newly added labelGroup +##' remove_labelGroup(emuDBhandle = ae, +##' name = "sampaNasals") +##' } +##' +NULL + +##' @rdname AddListRemoveLabelGroup +##' @export +add_labelGroup <- function(emuDBhandle, + name, + values){ + + check_emuDBhandle(emuDBhandle) + + dbConfig = load_DBconfig(emuDBhandle) + curLgs = list_labelGroups(emuDBhandle) + + if(name %in% curLgs$name){ + stop("labelGroup with name '", name ,"' already exists!") + } + + # add labelGroup + dbConfig$labelGroups[[length(dbConfig$labelGroups) + 1]] = list(name = name, + values = values) + + # store changes + store_DBconfig(emuDBhandle, dbConfig) +} + + +##' @rdname AddListRemoveLabelGroup +##' @export +list_labelGroups <- function(emuDBhandle){ + + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + df = data.frame(name = character(), + values = character(), + stringsAsFactors = FALSE) + + for(lg in dbConfig$labelGroups){ + df = rbind(df, data.frame(name = lg$name, + values = paste0(lg$values, collapse = "; "), + stringsAsFactors = FALSE)) + } + + return(df) + +} + + +##' @rdname AddListRemoveLabelGroup +##' @export +remove_labelGroup <- function(emuDBhandle, + name){ + + check_emuDBhandle(emuDBhandle) + dbConfig = load_DBconfig(emuDBhandle) + curLgs = list_labelGroups(emuDBhandle) + + if(!name %in% curLgs$name){ + stop("No labelGroup with name '", name ,"' found!") + } + + for(i in 1:length(dbConfig$labelGroups)){ + if(dbConfig$labelGroups[[i]]$name == name){ + dbConfig$labelGroups[[i]] = NULL + } + } + + # store changes + store_DBconfig(emuDBhandle, dbConfig) +} + + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-database.DBconfig.R') diff --git a/R/emuR-database.R b/R/emuR-database.R new file mode 100644 index 00000000..70718489 --- /dev/null +++ b/R/emuR-database.R @@ -0,0 +1,1352 @@ +############################################# +############################################# +# constants + +# API level of database object format +# increment this value if the internal database object format changes +emuDB.apiLevel = 3L + +# internalVars currently containing only server handle (should merge testingVars back into it as well) +.emuR_pkgEnv <- new.env() +assign("internalVars", list(testingVars = list(inMemoryCache = FALSE)), envir = .emuR_pkgEnv) + +############################################# +# file/folder suffixes of emuDB format + +emuDB.suffix = '_emuDB' +session.suffix = '_ses' +bundle.dir.suffix = '_bndl' +bundle.annotation.suffix = '_annot' +database.schema.suffix = '_DBconfig.json' +database.cache.suffix = '_emuDBcache.sqlite' + +############################################# +# create table / index definitions for DBI + +database.DDL.emuDB = 'CREATE TABLE emu_db ( + uuid VARCHAR(36) NOT NULL, + name TEXT, + PRIMARY KEY (uuid) +);' + +database.DDL.emuDB_session = 'CREATE TABLE session ( + db_uuid VARCHAR(36), + name TEXT, + PRIMARY KEY (db_uuid,name), + FOREIGN KEY (db_uuid) REFERENCES emu_db(uuid) ON DELETE CASCADE ON UPDATE CASCADE +);' + +database.DDL.emuDB_bundle = 'CREATE TABLE bundle ( + db_uuid VARCHAR(36), + session TEXT, + name TEXT, + annotates TEXT, + sample_rate FLOAT, + md5_annot_json TEXT, + PRIMARY KEY (db_uuid, session, name), + FOREIGN KEY (db_uuid, session) REFERENCES session(db_uuid, name) ON DELETE CASCADE ON UPDATE CASCADE +);' + +database.DDL.emuDB_items = 'CREATE TABLE items ( + db_uuid VARCHAR(36), + session TEXT, + bundle TEXT, + item_id INTEGER, + level TEXT, + type TEXT, + seq_idx INTEGER, + sample_rate FLOAT, + sample_point INTEGER, + sample_start INTEGER, + sample_dur INTEGER, + PRIMARY KEY (db_uuid, session, bundle, item_id), + FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE ON UPDATE CASCADE +);' + +database.DDL.emuDB_items_level_seq_idx = "CREATE INDEX IF NOT EXISTS items_level_seq_idx ON items(db_uuid, session, bundle, level, seq_idx)" + +# Important note: +# The primary key of items contains more columns then needed to identify a particular item. +# PRIMARY KEY (db_uuid,session,bundle,item_id) would be sufficient but the extended primary key +# is necessary to speed up the build_redundnatLinksForPathes SQL query. +# It did not work to create an index like the one in the comment line below. +# It seems the query always uses the index of the primary key. +#database.DDL.emuDB_itemsIdx='CREATE UNIQUE INDEX items_level_idx ON items(db_uuid,session,bundle,level,item_id,type)' + +database.DDL.emuDB_labels = 'CREATE TABLE labels ( + db_uuid VARCHAR(36), + session TEXT, + bundle TEXT, + item_id INTEGER, + label_idx INTEGER, + name TEXT, + label TEXT, + PRIMARY KEY (db_uuid, session, bundle, item_id, label_idx), + FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE ON UPDATE CASCADE + -- FOREIGN KEY (db_uuid, session, bundle, item_id) REFERENCES items(db_uuid, session, bundle, item_id) ON DELETE CASCADE +);' + +database.DDL.emuDB_label_nameLabel_idx = 'CREATE INDEX IF NOT EXISTS label_nameLabel_idx ON labels(db_uuid, bundle, session, item_id)' +# database.DDL.emuDB_label_nameLabel_idx2 = "CREATE INDEX IF NOT EXISTS label_nameLabel_idx2 ON labels(db_uuid, session, bundle, item_id, name, label)" + +database.DDL.emuDB_links = 'CREATE TABLE links ( + db_uuid VARCHAR(36) NOT NULL, + session TEXT, + bundle TEXT, + from_id INTEGER, + to_id INTEGER, + label TEXT, + FOREIGN KEY (db_uuid, session, bundle) REFERENCES bundle(db_uuid, session, name) ON DELETE CASCADE ON UPDATE CASCADE +);' + +database.DDL.emuDB_links_both_ids_idx = paste0("CREATE INDEX IF NOT EXISTS ", + " links_both_ids_idx ", + "ON links(db_uuid, session, bundle, from_id, to_id)") +database.DDL.emuDB_links_to_id_idx = paste0("CREATE INDEX IF NOT EXISTS ", + " links_to_id_idx ", + "ON links(db_uuid, session, bundle, to_id)") + +#################################### +######### DBI functions ############ +#################################### + +#################################### +# init functions (create tables and indices) + +initialize_emuDbDBI <- function(emuDBhandle, + createTables = TRUE, + createIndices = TRUE){ + # check of old tables are present and rename them + if(DBI::dbExistsTable(emuDBhandle$connection, "emuDB")){ + warning(paste0("INFO: Deprecated cache tables found. Deleting these and recreating ", + "SQL cache that adheres to new DB schema definition.\n")) + allTableNames = DBI::dbListTables(emuDBhandle$connection) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "items")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "labels")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "links")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "linksTmp")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "linksExt")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "linksExtTmp")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "linksExtTmp2")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "bundle")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "session")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "emuDB")) + }else if(DBI::dbExistsTable(emuDBhandle$connection, "links_ext")){ + warning(paste0("INFO: Found deprecated links_ext table. Deleting this table as it ", + "is not needed any longer.\n")) + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE IF EXISTS ", "links_ext")) + } + + if(createTables & !DBI::dbExistsTable(emuDBhandle$connection, "emu_db")){ + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_session) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_bundle) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_items) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_labels) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_links) + } + if(createTables & !DBI::dbExistsTable(emuDBhandle$connection, "meta_jsons")){ + # browser() + } + if(createIndices){ + create_emuDBindicesDBI(emuDBhandle) + } +} + +create_emuDBindicesDBI<-function(emuDBhandle){ + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_items_level_seq_idx) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_links_both_ids_idx) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_links_to_id_idx) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_label_nameLabel_idx) + # DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_label_nameLabel_idx2) +} + + +#################################### +# emuDB table DBI functions + +add_emuDbDBI <- function(emuDBhandle){ + dbSqlInsert = paste0("INSERT INTO ", + " emu_db(uuid,name) ", + "VALUES('", emuDBhandle$UUID, "','", emuDBhandle$dbName, "')") + DBI::dbExecute(emuDBhandle$connection, dbSqlInsert) +} + +get_emuDbDBI <- function(emuDBhandle){ + query = paste0("SELECT * ", + "FROM emu_db ", + "WHERE uuid='", emuDBhandle$UUID, "'") + res <- DBI::dbGetQuery(emuDBhandle$connection, query) + return(res) +} + + +#################################### +# session table DBI functions + +add_sessionDBI <- function(emuDBhandle, sessionName){ + insertSessionSql = paste0("INSERT INTO ", + " session(db_uuid, name) ", + "VALUES('", emuDBhandle$UUID,"','", sessionName, "')") + DBI::dbExecute(emuDBhandle$connection, insertSessionSql) +} + +list_sessionsDBI <- function(emuDBhandle){ + dbs=DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT name ", + "FROM session ", + "WHERE db_uuid='", emuDBhandle$UUID, "'")) + return(dbs) +} + + +remove_sessionDBI <- function(emuDBhandle, sessionName){ + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM ", + " session ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND name='", sessionName, "'")) +} + +#################################### +# bundle table DBI functions + +add_bundleDBI <- function(emuDBhandle, sessionName, name, annotates, sampleRate, MD5annotJSON){ + insertBundleSql = paste0("INSERT INTO ", + " bundle(db_uuid, session, name, annotates, sample_rate, md5_annot_json) ", + "VALUES(", + " '", emuDBhandle$UUID, "', ", + " '", sessionName, "', ", + " '", name, "', ", + " '", annotates, "', ", + sampleRate, ", ", + "'", MD5annotJSON, "' ", + ")") + DBI::dbExecute(emuDBhandle$connection, insertBundleSql) +} + +list_bundlesDBI <- function(emuDBhandle, sessionName = NULL){ + if(is.null(sessionName)){ + bundle = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT ", + " session, ", + "name ", + "FROM bundle ", + "WHERE db_uuid='", emuDBhandle$UUID, "'")) + }else{ + bundle = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT ", + " session, ", + " name ", + "FROM bundle ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "'")) + } + return(bundle) +} + +remove_bundleDBI <- function(emuDBhandle, sessionName, name){ + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM ", + " bundle ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND name='", name, "'")) +} + +# MD5annotJSON +get_MD5annotJsonDBI <- function(emuDBhandle, sessionName, name){ + MD5annotJSON = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT ", + "md5_annot_json as md5 ", + "FROM bundle ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND name='", name, "'"))$md5 + if(length(MD5annotJSON) == 0){ + MD5annotJSON = "" + } + return(MD5annotJSON) +} + +#################################### +# items, links, labels DBI functions + +create_insertStatements <- function(emuDBhandle){ + storeItemsStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO items (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur) ", + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")) + + DBI::dbClearResult(storeItemsStatement) + + storeLabelsStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO labels (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " label_idx, ", + " name, ", + " label) ", + "VALUES (?, ?, ?, ?, ?, ?, ?)")) + + DBI::dbClearResult(storeLabelsStatement) + + storeLinksStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO links (", + " db_uuid, ", + " session, ", + " bundle, ", + " from_id, ", + " to_id, ", + " label) ", + "VALUES (?, ?, ?, ?, ?, ?)")) + + DBI::dbClearResult(storeLinksStatement) + + return(list(storeItemsStatement = storeItemsStatement, + storeLabelsStatement = storeLabelsStatement, + storeLinksStatement = storeLinksStatement)) +} + +store_bundleAnnotDFsDBI <- function(emuDBhandle, + bundleAnnotDFs, + sessionName, + bundleName) { + + # insert items table entries (fist exanding it with db_uuid, session and bundle columns) + if(nrow(bundleAnnotDFs$items) > 0){ + bundleAnnotDFs$items = data.frame(db_uuid = emuDBhandle$UUID, + session = sessionName, + bundle = bundleName, + bundleAnnotDFs$items, + stringsAsFactors = FALSE) + # + # DBI::dbWriteTable(emuDBhandle$connection, + # "items", + # bundleAnnotDFs$items, + # append = TRUE, + # row.names = FALSE) + + # DBI::dbAppendTable(emuDBhandle$connection, + # "items", + # bundleAnnotDFs$items) + + + storeItemsStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO items (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur) ", + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")) + + DBI::dbBind( + storeItemsStatement, + list( + bundleAnnotDFs$items$db_uuid, + bundleAnnotDFs$items$session, + bundleAnnotDFs$items$bundle, + bundleAnnotDFs$items$item_id, + bundleAnnotDFs$items$level, + bundleAnnotDFs$items$type, + bundleAnnotDFs$items$seq_idx, + bundleAnnotDFs$items$sample_rate, + bundleAnnotDFs$items$sample_point, + bundleAnnotDFs$items$sample_start, + bundleAnnotDFs$items$sample_dur + ) + ) + + DBI::dbClearResult(storeItemsStatement) + + } + + # insert labels table entries (fist exanding it with db_uuid, session and bundle columns) + if(nrow(bundleAnnotDFs$labels) > 0){ + bundleAnnotDFs$labels = data.frame(db_uuid = emuDBhandle$UUID, + session = sessionName, + bundle = bundleName, + bundleAnnotDFs$labels, + stringsAsFactors = FALSE) + + # DBI::dbWriteTable(emuDBhandle$connection, + # "labels", + # bundleAnnotDFs$labels, + # append = TRUE, + # row.names = FALSE) + + # DBI::dbAppendTable(emuDBhandle$connection, + # "labels", + # bundleAnnotDFs$labels) + + storeLabelsStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO labels (", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id, ", + " label_idx, ", + " name, ", + " label) ", + "VALUES (?, ?, ?, ?, ?, ?, ?)")) + + DBI::dbBind( + storeLabelsStatement, + list( + bundleAnnotDFs$labels$db_uuid, + bundleAnnotDFs$labels$session, + bundleAnnotDFs$labels$bundle, + bundleAnnotDFs$labels$item_id, + bundleAnnotDFs$labels$label_idx, + bundleAnnotDFs$labels$name, + bundleAnnotDFs$labels$label + ) + ) + + DBI::dbClearResult(storeLabelsStatement) + } + + # insert links table entries (fist exanding it with db_uuid, session and bundle columns) + if(nrow(bundleAnnotDFs$links) > 0){ + bundleAnnotDFs$links = data.frame(db_uuid = emuDBhandle$UUID, + session = sessionName, + bundle = bundleName, + bundleAnnotDFs$links, + label = NA, + stringsAsFactors = FALSE) + + # DBI::dbWriteTable(emuDBhandle$connection, + # "links", + # bundleAnnotDFs$links, + # append = TRUE, + # row.names = FALSE) + + # DBI::dbAppendTable(emuDBhandle$connection, + # "links", + # bundleAnnotDFs$links) + + storeLinksStatement <- DBI::dbSendStatement( + emuDBhandle$connection, + paste0("INSERT INTO links (", + " db_uuid, ", + " session, ", + " bundle, ", + " from_id, ", + " to_id, ", + " label) ", + "VALUES (?, ?, ?, ?, ?, ?)")) + + DBI::dbBind( + storeLinksStatement, + list( + bundleAnnotDFs$links$db_uuid, + bundleAnnotDFs$links$session, + bundleAnnotDFs$links$bundle, + bundleAnnotDFs$links$from_id, + bundleAnnotDFs$links$to_id, + bundleAnnotDFs$links$label + ) + ) + + DBI::dbClearResult(storeLinksStatement) + } +} + +load_bundleAnnotDFsDBI <- function(emuDBhandle, + sessionName, + bundleName){ + + DBconfig = load_DBconfig(emuDBhandle) + levelDefs = list_levelDefinitions(emuDBhandle) + # meta infos + annotates = paste0(bundleName, ".", DBconfig$mediafileExtension) + sampleRateQuery = paste0("SELECT ", + " sample_rate ", + "FROM bundle ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND name='", bundleName,"'") + sampleRate = DBI::dbGetQuery(emuDBhandle$connection, sampleRateQuery)$sample_rate + + # items + itemsQuery = paste0("SELECT ", + " item_id, ", + " level, ", + " type, ", + " seq_idx, ", + " sample_rate, ", + " sample_point, ", + " sample_start, ", + " sample_dur ", + "FROM items ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName,"' ", + "ORDER BY ", + " level, ", + " seq_idx") + items = DBI::dbGetQuery(emuDBhandle$connection, itemsQuery) + # reorder items to match DBconfig + items = items[order(match(items$level,levelDefs$name)),] + + # labels + labelsQuery = paste0("SELECT ", + " item_id, ", + " label_idx, ", + " name, ", + " label ", + "FROM ", + " labels ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName,"'") + labels = DBI::dbGetQuery(emuDBhandle$connection, labelsQuery) + + # links + + linksQuery = paste0("SELECT ", + " from_id, ", + " to_id, ", + " label ", + "FROM links ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName,"'") + links = DBI::dbGetQuery(emuDBhandle$connection, linksQuery) + + + return(list(name = bundleName, + annotates = annotates, + sampleRate = sampleRate, + items = items, + links = links, + labels = labels)) +} + + + +remove_bundleAnnotDBI<-function(emuDBhandle, + sessionName, + bundleName){ + + cntSqlQuery=paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName,"'") + res<-DBI::dbGetQuery(emuDBhandle$connection, cntSqlQuery) + + delSqlQuery=paste0("DELETE FROM items ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName, "'") + DBI::dbExecute(emuDBhandle$connection, delSqlQuery) + + delSqlQuery=paste0("DELETE FROM labels ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName,"'") + DBI::dbExecute(emuDBhandle$connection, delSqlQuery) + + delSqlQuery=paste0("DELETE FROM links ", + "WHERE db_uuid='", emuDBhandle$UUID, "' ", + " AND session='", sessionName, "' ", + " AND bundle='", bundleName, "'") + DBI::dbExecute(emuDBhandle$connection, delSqlQuery) + +} + + +########################################## +################# emuDB ################## +########################################## + +##' Rename emuDB +##' @description Rename a emuDB. This effectively renames the folder of a +##' emuDB the _DBconfig.json file as well as the "name" entry in the _DBconfig.json +##' file and the _emuDBcache.sqlite file if available. +##' @param databaseDir directory of the emuDB +##' @param newName new name of emuDB +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # rename ae emuDB to "aeNew" +##' rename_emuDB(databaseDir = "/path/2/ae_emuDB", newName = "aeNew") +##' +##' } +##' +rename_emuDB <- function(databaseDir, newName){ + + dbName_old = stringr::str_replace_all(basename(databaseDir), + pattern = "_emuDB$", + "") + + ####################### + # handle DBconfig.json + dbCfgPath_old = file.path(databaseDir, + paste0(dbName_old, + database.schema.suffix)) + dbCfgPath_new = file.path(databaseDir, + paste0(newName, + database.schema.suffix)) + dbConfig = jsonlite::fromJSON(dbCfgPath_old, + simplifyVector = FALSE) + + # change name entry, store and rename DBconfig + dbConfig$name = newName + json = jsonlite::toJSON(dbConfig, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + writeLines(json, + dbCfgPath_old, + useBytes = TRUE) + file.rename(dbCfgPath_old, dbCfgPath_new) + + ############################ + # handle emuDBcache.sqlite + cachePath_old = file.path(normalizePath(databaseDir), + paste0(dbName_old, + database.cache.suffix)) + cachePath_new = file.path(normalizePath(databaseDir), + paste0(newName, + database.cache.suffix)) + if(file.exists(cachePath_old)){ # because it doesn't have to exist if it hasn't been created yet + file.rename(cachePath_old, + cachePath_new) + + + con <- DBI::dbConnect(RSQLite::SQLite(), cachePath_new) + DBI::dbExecute(con, paste0("UPDATE emu_db ", + "SET name = '", newName, "' ", + "WHERE uuid = '", dbConfig$UUID, "'")) + DBI::dbDisconnect(con) + con = NULL # delete -> disconnect + } + ############################ + # handle _emuDB folder + databaseDir_new = file.path(stringr::str_replace_all(normalizePath(databaseDir), + pattern = basename(normalizePath(databaseDir)), + ""), + paste0(newName, emuDB.suffix)) + file.rename(databaseDir, databaseDir_new) + + return(invisible(NULL)) +} + +############################################# +# function that use emuDB files (vs. DBI) + +##' List sessions of emuDB +##' @description List session names of emuDB +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param sessionPattern A regular expression pattern matching session names to +##' be searched for in the database. Note: "_ses$" is appended to this RegEx automatically +##' @return data.frame object with session names +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # list all sessions of ae emuDB +##' list_sessions(emuDBhandle = ae) +##' +##' } +##' +list_sessions <- function(emuDBhandle, + sessionPattern = ".*"){ + check_emuDBhandle(emuDBhandle, checkCache = FALSE) + sesPattern = paste0(sessionPattern, session.suffix ,"$") + sesDirs = dir(emuDBhandle$basePath, pattern = sesPattern) + sesDirs = gsub(paste0(session.suffix, "$"), "", sesDirs) + return(data.frame(name = sesDirs, stringsAsFactors = FALSE)) +} + +##' List bundles of emuDB +##' +##' List all bundles of emuDB or of particular session. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param session optional session (depricated!) +##' @param sessionPattern A regular expression pattern matching session names to +##' be searched for in the database. Note: "_ses$" is appended to this RegEx automatically +##' @param bundlePattern A regular expression pattern matching bundle names to +##' be searched for in the database. Note: "_bndl$" is appended to this RegEx automatically +##' @return data.frame object with columns session and name of bundles +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # list bundles of session "0000" of ae emuDB +##' list_bundles(emuDBhandle = ae, +##' session = "0000") +##' +##' } +##' +list_bundles <- function(emuDBhandle, + session = NULL, + sessionPattern = ".*", + bundlePattern = ".*"){ + + check_emuDBhandle(emuDBhandle, checkCache = FALSE) + sesDf = list_sessions(emuDBhandle, sessionPattern) + if(!is.null(session)){ + warning("the session parameter is depricated! Please use sessionPattern insead.") + sesDf = dplyr::filter(sesDf, .data$name == session) + } + bndlPattern = paste0(bundlePattern, bundle.dir.suffix ,"$") + res = data.frame(session = character(), + name = character(), + stringsAsFactors = FALSE) + + + for(ses in sesDf$name){ + bndlDirs = dir(file.path(emuDBhandle$basePath, + paste0(ses, session.suffix)), + pattern = bndlPattern) + bndlNames = gsub(paste0(bundle.dir.suffix, "$"), "", bndlDirs) + if(length(bndlNames) > 0){ + res = rbind(res, + data.frame(session = ses, + name = bndlNames, + stringsAsFactors = FALSE)) + } + } + return(tibble::as_tibble(res)) +} + +##' Rename bundles in emuDB +##' +##' Rename bundles of emuDB. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param bundles data.frame like object with the columns +##' \itemize{ +##' \item \code{session}: name of sessions containing bundle +##' \item \code{name}: name of bundle +##' \item \code{name_new}: new name given to bundle +##' } +##' It is worth noting that \code{session} and \code{name} are the columns returned by +##' \code{\link{list_bundles}}. +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # list bundles of session "0000" of ae emuDB +##' bundles = list_bundles(emuDBhandle = ae, +##' session = "0000") +##' +##' # append "XXX" to bundle names and rename +##' bundles$name_new = paste0(bundles$name, "XXX") +##' rename_bundles(emuDBhandle, bundles) +##' } +##' +rename_bundles <- function(emuDBhandle, bundles){ + + # check that all cols are present + if(!all(c("session", "name", "name_new") %in% colnames(bundles))){ + stop(paste0("Missing requiered column(s)! ", + "The requiered columns are: 'session', 'name' and 'name_new'")) + } + # check if all bundles exits + all_bundles = list_bundles(emuDBhandle) + all_bundles$leftjoinNA = "should_not_be_na" + joined = dplyr::left_join(bundles, + all_bundles, + by = c("session", "name")) + + if(any(is.na(joined$leftjoinNA))){ + stop(paste0("The following bundles where not found in the emuDB:\n"), + paste(utils::capture.output(print(bundles[is.na(joined$leftjoinNA),])), collapse = "\n")) + } + + # bundles$db_uuid = emuDBhandle$UUID + # DBI::dbExecute(emuDBhandle$connection, "DROP TABLE IF EXISTS bundles_tmp;") + # + # DBI::dbWriteTable(emuDBhandle$connection, + # "bundles_tmp", + # bundles, + # append = TRUE, + # row.names = FALSE) # append to avoid rewirte of col names + + + foreign_key_list = DBI::dbGetQuery(emuDBhandle$connection, "PRAGMA foreign_key_list(bundle);") + + # check if ON UPDATE CASCADE ist set + if(!all(foreign_key_list$on_update == "CASCADE")){ + stop(paste0("'ON UPDATE CASCADE' not set on emuDBcache SQL tables (Previous version of emuR didn't set this). ", + "Deleting the emuDBcache and reloading the emuDB should resolve this issue.")) + } + + db_config = load_DBconfig(emuDBhandle) + + # update bundle table + statement = DBI::dbSendStatement(emuDBhandle$connection, + paste0("UPDATE bundle ", + "SET name = ?,", + " annotates = ? ", + "WHERE bundle.db_uuid = ? ", + " AND bundle.session = ? ", + " AND bundle.name = ? ")) + + DBI::dbBind( + statement, + list( + bundles$name_new, + paste0(bundles$name_new, ".", db_config$mediafileExtension), + rep(emuDBhandle$UUID, nrow(bundles)), + bundles$session, + bundles$name + ) + ) + + DBI::dbClearResult(statement) + + # rename bundle dirs + old_bundle_paths = file.path(emuDBhandle$basePath, + paste0(bundles$session, session.suffix), + paste0(bundles$name, bundle.dir.suffix)) + + new_bundle_paths = file.path(emuDBhandle$basePath, + paste0(bundles$session, session.suffix), + paste0(bundles$name_new, bundle.dir.suffix)) + + file.rename(from = old_bundle_paths, + to = new_bundle_paths) + + # rename files in bundles + for(i in 1:nrow(bundles)){ + cur_bndl = bundles[i,] + old_files = list_files(emuDBhandle, bundlePattern = paste0("^", cur_bndl$name_new, "$")) + new_file_names = stringr::str_replace_all(old_files$file, cur_bndl$name, cur_bndl$name_new) + file.rename(from = file.path(emuDBhandle$basePath, + paste0(old_files$session, session.suffix), + paste0(old_files$bundle, bundle.dir.suffix), + old_files$file), + to = file.path(emuDBhandle$basePath, + paste0(old_files$session, session.suffix), + paste0(old_files$bundle, bundle.dir.suffix), + new_file_names)) + } + + rewrite_annots(emuDBhandle) + +} + +## rewrite annot json files from the cache +## @param emuDBhandle emuDBhandle +## @param bundles data.frame containing session +## and bundle colums (e.g. see output of list_bundles()) +rewrite_annots <- function(emuDBhandle, + bundles = NULL, + verbose = TRUE){ + + if(is.null(bundles)){ + bndls = list_bundles(emuDBhandle) + }else{ + bndls = bundles + } + # check if any bundles exist + if(nrow(bndls) == 0){ + return() + } + + progress = 0 + if(verbose){ + bundleCount=nrow(bndls) + cat(" INFO: Rewriting", bundleCount, "_annot.json files to file system...\n") + pb=utils::txtProgressBar(min=0,max=bundleCount,style=3) + utils::setTxtProgressBar(pb,progress) + } + + for(i in 1:nrow(bndls)){ + bndl = bndls[i,] + + bundleAnnotDFs = load_bundleAnnotDFsDBI(emuDBhandle, + bndl$session, + bndl$name) + + annotJSONchar = bundleAnnotDFsToAnnotJSONchar(emuDBhandle, + bundleAnnotDFs) + + # construct path to annotJSON + annotFilePath = file.path(emuDBhandle$basePath, + paste0(bndl$session, session.suffix), + paste0(bndl$name, bundle.dir.suffix), + paste0(bndl$name, bundle.annotation.suffix, '.json')) + + writeLines(annotJSONchar, annotFilePath, useBytes = TRUE) + + # (re-)calculate md5 sums + newMD5sum = tools::md5sum(annotFilePath) + res = DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE bundle ", + "SET md5_annot_json = '", newMD5sum, "' ", + "WHERE db_uuid ='", emuDBhandle$UUID, "' ", + " AND session='", bndl$session, "' ", + " AND name='", bndl$name, "'")) + + progress = progress + 1L + if(verbose){ + utils::setTxtProgressBar(pb,progress) + } + } +} + + + +######################################################### +# store / create / load functions + +## Store EMU database to directory +## +## @details +## options is a list of key value pairs: +## rewriteSSFFTracks: if TRUE rewrite SSF tracks instead of file +## copy to get rid of big endian encoded SSFF files (SPARC), default: FALSE +## ignoreMissingSSFFTrackFiles if TRUE missing SSFF track files +## are ignored, default: FALSE +## symbolicLinkSignalFiles if TRUE signal files are symbolic linked +## instead of copied. Implies: rewriteSSFFTracks=FALSE, Default: FALSE +## +## @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +## @param targetDir target directory +## @param options list of options +## @param verbose show infos and progress bar +## @import stringr uuid jsonlite +## @keywords emuDB database Emu +## @seealso \code{\link{load_emuDB}} +## @examples +## \dontrun{ +## # Store database 'ae' to directory /homes/mylogin/EMUnew/ +## +## store('ae',"/homes/mylogin/EmuStore/") +## +## } +## +##' @import stringr uuid jsonlite +store <- function(emuDBhandle, + targetDir, + options = NULL, + verbose = TRUE){ + # default options + # ignore missing SSFF track files + # rewrite SSFF track files + mergedOptions = list(ignoreMissingSSFFTrackFiles = TRUE, + rewriteSSFFTracks = FALSE, + symbolicLinkSignalFiles = FALSE) + if(!is.null(options)){ + for(opt in names(options)){ + mergedOptions[[opt]] = options[[opt]] + } + } + + progress = 0 + # check target dir + if(file.exists(targetDir)){ + tdInfo = file.info(targetDir) + if(!tdInfo[['isdir']]){ + stop(targetDir," exists and is not a directory.") + } + }else{ + # create target dir + created = dir.create(targetDir) + if(!created){ + stop("Couldn't create ", targetDir) + } + } + + # build db dir name + dbDirName = paste0(emuDBhandle$dbName, emuDB.suffix) + # create database dir in targetdir + pp = file.path(targetDir, dbDirName) + # check existence + if(file.exists(pp)){ + stop(pp," already exists.") + } + + created = dir.create(pp) + if(!created){ + stop("Couldn't create ", pp) + } + + # check if handle has basePath if not -> emuDB doesn't extist yet -> create new DBconfig + if(is.null(emuDBhandle$basePath)){ + DBconfig = list(name = emuDBhandle$dbName, + UUID = emuDBhandle$UUID, + mediafileExtension = "wav", + ssffTrackDefinitions=list(), + levelDefinitions = list(), + linkDefinitions = list()) + }else{ + DBconfig = load_DBconfig(emuDBhandle) + } + + # set editable + showHierarchy + DBconfig[['EMUwebAppConfig']][['activeButtons']] = list(saveBundle=TRUE, + showHierarchy=TRUE) + + # store db schema file + store_DBconfig(emuDBhandle, + DBconfig, + basePath = pp) + + # create session dirs + sessions = list_sessionsDBI(emuDBhandle) + if(nrow(sessions) == 0){ + return() + } + sesDirPaths = file.path(pp, paste0(sessions$name, session.suffix)) + for(path in sesDirPaths){ + created = dir.create(path) + if(!created){ + stop("Coudln't create", path) + } + } + + # create bundle dirs + bndls = list_bundlesDBI(emuDBhandle) + if(nrow(bndls) == 0){ + return() + } + bndlDirPaths = file.path(pp, + paste0(sessions$name, session.suffix), + paste0(bndls$name, bundle.dir.suffix)) + for(path in bndlDirPaths){ + created = dir.create(path) + if(!created){ + stop("Coudln't create", path) + } + } + + # copy media files + mediaFilePathsOld = file.path(emuDBhandle$basePath, + paste0(sessions$name, session.suffix), + paste0(bndls$name, bundle.dir.suffix), + paste0(bndls$name, ".", DBconfig$mediafileExtension)) + mediaFilePathsNew = file.path(pp, + paste0(sessions$name, session.suffix), + paste0(bndls$name, bundle.dir.suffix), + paste0(bndls$name, ".", DBconfig$mediafileExtension)) + file.copy(mediaFilePathsOld, mediaFilePathsNew) + + # rewrite annotations (or should these just be a copied as well?) + emuDBhandle$basePath = pp + rewrite_annots(emuDBhandle, verbose = verbose) + + # copy SSFF files + ssffDefs = list_ssffTrackDefinitions(emuDBhandle) + if(!is.null(ssffDefs)){ + for(i in 1:nrow(ssffDefs)){ + ssffDef = ssffDefs[1,] + ssffFilePathsOld = file.path(emuDBhandle$basePath, + paste0(sessions$name, session.suffix), + paste0(bndls$name, bundle.dir.suffix), + paste0(bndls$name, ".", ssffDef$fileExtension)) + ssffFilePathsNew = file.path(pp, + paste0(sessions$name, session.suffix), + paste0(bndls$name, bundle.dir.suffix), + paste0(bndls$name, ".", ssffDef$fileExtension)) + file.copy(ssffFilePathsOld, ssffFilePathsNew) + } + } + +} + + +##' @title Create empty emuDB +##' @description Creates an empty emuDB in the target directory specified +##' @details Creates a new directory [name]_emuDB in targetDir. By default +##' the emuDB is created in the R session, written to the filesystem and +##' then purged from the R session. +##' @param name of new emuDB +##' @param targetDir target directory to store the emuDB to +##' @param mediaFileExtension defines mediaFileExtention (NOTE: currently only +##' 'wav' (the default) is supported by all components of EMU) +##' @param store store new created emuDB to file system +##' @param verbose display infos & show progress bar +##' @export +##' @examples +##' \dontrun{ +##' # create empty emuDB in folder provided by tempdir() +##' create_emuDB(name = "myNewEmuDB", +##' targetDir = tempdir()) +##' } +create_emuDB <- function(name, + targetDir, + mediaFileExtension = 'wav', + store = TRUE, + verbose = TRUE){ + + dbDirName = paste0(name,emuDB.suffix) + dbHandle = emuDBhandle(dbName = name , + basePath = NULL, + uuid::UUIDgenerate(), + ":memory:") + if(store){ + store(dbHandle, + targetDir = targetDir, + verbose = verbose) + } + + return(invisible()) +} + +##' Load emuDB +##' +##' @description Function loads emuDB into its cached representation and makes it accessible from within the +##' current R session by returning a emuDBhandle object +##' @details In order to access an emuDB from R it is necessary to load the annotation and configuration +##' files to an emuR internal database format. The function expects a emuDB file structure in directory +##' \code{databaseDir}. The emuDB configuration file is loaded first. On success the function iterates +##' through session and bundle directories and loads found annotation files. The parameter \code{inMemoryCache} +##' determines where the internal database is stored: If \code{FALSE} a database cache file in \code{databaseDir} +##' is used. When the database is loaded for the first time the function will create a new cache file and store +##' the data to it. On subsequent loading of the same database the cache is only updated if files have changed, +##' therefore the loading is then much faster. For this to work the user needs write permissions to +##' \code{databaseDir} and the cache file. The database is loaded into a volatile in-memory database if +##' \code{inMemoryCache} is set to \code{TRUE}. +##' @param databaseDir directory of the emuDB +##' @param inMemoryCache cache the loaded DB in memory +##' @param connection pass in DBI connection to SQL database if you want to override the default which is to +##' use an SQLite database either in memory (\code{inMemoryCache = TRUE}) or in the emuDB folder. This is intended +##' for expert use only! +##' @param verbose be verbose +##' @param ... additional parameters +##' @return emuDB handle object +##' @import jsonlite DBI +##' @export +##' @keywords emuDB database DBconfig +##' @examples +##' \dontrun{ +##' ## Load database ae in directory /homes/mylogin/EMUnew/ae +##' ## assuming an existing emuDB structure in this directory +##' +##' ae = load_emuDB("/homes/mylogin/EMU/ae") +##' +##' ## Load database ae from demo data +##' +##' # create demo data in temporary directory +##' create_emuRdemoData(dir = tempdir()) +##' # build base path to demo emuDB +##' demoDatabaseDir = file.path(tempdir(), "emuR_demoData", "ae_emuDB") +##' +##' # load demo emuDB +##' ae = load_emuDB(demoDatabaseDir) +##' +##' } + +load_emuDB <- function(databaseDir, + inMemoryCache = FALSE, + connection = NULL, + verbose = TRUE, + ...){ + + progress = 0 + # check database dir + if(!dir.exists(databaseDir)){ + stop("Database dir ",databaseDir," does not exist!") + } + dbDirInfo = file.info(databaseDir) + if(!dbDirInfo[['isdir']]){ + stop(databaseDir," exists, but is not a directory.") + } + + # load db schema file + dbCfgPattern = paste0('.*',database.schema.suffix,'$') + dbCfgFiles = list.files(path = databaseDir,dbCfgPattern) + dbCfgFileCount = length(dbCfgFiles) + if(dbCfgFileCount == 0){ + stop("Could not find global DB config JSON file (regex pattern: ",dbCfgPattern,") in ",databaseDir) + } + if(dbCfgFileCount>1){ + stop("Found multiple global DB config JSON files (regex pattern: ",dbCfgPattern,") in ",databaseDir) + } + + dbCfgPath = file.path(databaseDir, dbCfgFiles[[1]]) + if(!file.exists(dbCfgPath)){ + stop("Could not find database info file: ",dbCfgPath,"\n") + } + # extract ... (ellipsis) parameters + dots = list(...) + if("update_cache" %in% names(dots)){ + updateCache = dots$update_cache + }else{ + updateCache = TRUE + } + # load DBconfig + DBconfig = jsonlite::fromJSON(dbCfgPath, simplifyVector=FALSE) + # normalize base path + basePath = normalizePath(databaseDir) + + # shorthand vars + dbName = DBconfig$name + dbUUID = DBconfig$UUID + # create dbHandle + if(inMemoryCache){ + dbHandle = emuDBhandle(dbName, + basePath, + dbUUID, + connectionPath = ":memory:") + }else{ + cachePath = file.path(normalizePath(databaseDir), + paste0(dbName, database.cache.suffix)) + # check for read only emuDB -> if so copy cache to tempdir() and open connection + if(file.exists(cachePath)){ + if(any(file.access(c(basePath,cachePath), 2) == -1)){ + if(verbose){ + cat(paste0("INFO: Either emuDBcache or the emuDB dir have READ ONLY permissions! Moving emuDBcache to tempdir() directory...\n")) + } + tmpDirSubDir = file.path(tempdir(), "emuR_readOnlyCacheCopies") + if(!dir.exists(tmpDirSubDir)){ + created = dir.create(tmpDirSubDir) + if(!created){ + stop("Couldn't create", tmpDirSubDir) + } + } + file.copy(cachePath, tmpDirSubDir, overwrite = TRUE) + cacheCopyPath = file.path(normalizePath(tmpDirSubDir), paste0(dbName, database.cache.suffix)) + Sys.chmod(cacheCopyPath, mode = "755") + connection = DBI::dbConnect(RSQLite::SQLite(), cacheCopyPath) + } + } + if(is.null(connection)){ + dbHandle = emuDBhandle(dbName, + basePath, + dbUUID, + cachePath) + }else{ + dbHandle = emuDBhandle(dbName, + basePath, + dbUUID, + "", + connection = connection) + } + } + + + # check if cache exist -> update cache if true + dbsDf = get_emuDbDBI(dbHandle) + if(nrow(dbsDf)>0){ + if(updateCache){ + update_cache(dbHandle, verbose = verbose) + } + return(dbHandle) + } + + # write to DBI emuDB table + add_emuDbDBI(dbHandle) + + # list sessions & bundles + sessions = list_sessions(dbHandle) + bundles = list_bundles(dbHandle) + # add column to sessions to track if already stored + if(nrow(sessions) > 0 && nrow(bundles) > 0){ + sessions$stored = FALSE + + # calculate bundle count + bundleCount = nrow(bundles) + # create progress bar + pMax = bundleCount + if(pMax == 0){ + pMax = 1 + } + if(verbose){ + cat(paste0("INFO: Loading EMU database from ", + databaseDir, "... (", bundleCount , " bundles found)\n")) + pb=utils::txtProgressBar(min = 0L, max = pMax, style = 3) + utils::setTxtProgressBar(pb, progress) + } + + # bundles + DBI::dbBegin(dbHandle$connection) + for(bndlIdx in 1:nrow(bundles)){ + bndl = bundles[bndlIdx,] + # check if session has to be added to DBI + if(!(sessions$stored[sessions$name == bndl$session])){ + add_sessionDBI(dbHandle, bndl$session) + sessions$stored[sessions$name == bndl$session] = TRUE + } + + # construct path to annotJSON + annotFilePath = normalizePath(file.path(dbHandle$basePath, + paste0(bndl$session, session.suffix), + paste0(bndl$name, bundle.dir.suffix), + paste0(bndl$name, bundle.annotation.suffix, '.json'))) + + # calculate MD5 sum of bundle annotJSON + newMD5annotJSON = tools::md5sum(annotFilePath) + names(newMD5annotJSON) = NULL + + # read annotJSON as charac + annotJSONchar = readr::read_file(annotFilePath) + # convert to bundleAnnotDFs + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(annotJSONchar) + # add to bundle table + add_bundleDBI(dbHandle, + bndl$session, + bndl$name, + bundleAnnotDFs$annotates, + bundleAnnotDFs$sampleRate, + newMD5annotJSON) + # add to items, links, labels tables + store_bundleAnnotDFsDBI(dbHandle, + bundleAnnotDFs, + bndl$session, + bndl$name) + + # increase progress bar + progress=progress+1L + if(verbose){ + utils::setTxtProgressBar(pb,progress) + } + + } + DBI::dbCommit(dbHandle$connection) + if(verbose){ + cat("\n") + } + } + + return(dbHandle) + +} + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-database.R') +# test_file('tests/testthat/test_duplicate.loaded.emuDB.R') +# test_file('tests/testthat/test_database.caching.R') diff --git a/R/emuR-database.annotJSON.R b/R/emuR-database.annotJSON.R new file mode 100644 index 00000000..456d5b15 --- /dev/null +++ b/R/emuR-database.annotJSON.R @@ -0,0 +1,223 @@ +####################################################### +# annotJSON representation to annotDFs conversion functions + +# convert annotJSON to list of data.frames including +# meta information (name, annotates, samplerate) +annotJSONcharToBundleAnnotDFs <- function(annotJSONchar){ + + jsonObj = jsonlite::fromJSON(annotJSONchar, simplifyVector=FALSE) # SIC pass in path 2 json instead -> a bit faster + + ############################## + # extract items + + # extract all IDs + allIds = unlist(lapply(jsonObj$levels, function(level){ + allIds = sapply(level$items, function(it) { + it$id + }) + })) + if(is.null(allIds)) allIds = integer() + + # extract all levels + allLevels = unlist(lapply(jsonObj$levels, function(level){ + allLevels = sapply(level$items, function(it) { + level$name + }) + })) + if(is.null(allLevels)) allLevels = character() + + # extract all types + allTypes = unlist(lapply(jsonObj$levels, function(level){ + allTypes = sapply(level$items, function(it) { + level$type + }) + })) + if(is.null(allTypes)) allTypes = character() + + # extract all seq_idx + allSeqIdx = unlist(lapply(jsonObj$levels, function(level){ + curIdx = 0 + allIdx = sapply(level$items, function(it) { + curIdx <<- curIdx + 1 + curIdx + }) + })) + if(is.null(allSeqIdx)) allSeqIdx = integer() + + # extract all sample points + allSamplePoints = unlist(lapply(jsonObj$levels, function(level){ + allSamplePoints = sapply(level$items, function(it) { + if(is.null(it$samplePoint)){ + return(NA) + }else{ + return(it$samplePoint) + } + }) + })) + if(is.null(allSamplePoints)) allSamplePoints = integer() + + # extract all sample start + allSampleStarts = unlist(lapply(jsonObj$levels, function(level){ + allSampleStarts = sapply(level$items, function(it) { + if(is.null(it$sampleStart)){ + return(NA) + }else{ + return(it$sampleStart) + } + }) + })) + if(is.null(allSampleStarts)) allSampleStarts = integer() + + # extract all sample durs + allSampleDurs = unlist(lapply(jsonObj$levels, function(level){ + allSampleDurs = sapply(level$items, function(it) { + if(is.null(it$sampleDur)){ + return(NA) + }else{ + return(it$sampleDur) + } + }) + })) + if(is.null(allSampleDurs)) allSampleDurs = integer() + + items = data.frame(item_id = allIds, level = allLevels, type = allTypes, + seq_idx = allSeqIdx, sample_rate = rep(jsonObj$sampleRate, length(allIds)), + sample_point = allSamplePoints, sample_start = allSampleStarts, + sample_dur = allSampleDurs, stringsAsFactors = FALSE) + + ############################## + # extract links + + # extract all from ids + allFromIds = sapply(jsonObj$links, function(l) l$fromID) + if(is.null(allFromIds)) allFromIds = integer() + # extract all to ids + allToIds = sapply(jsonObj$links, function(l) l$toID) + if(is.null(allToIds)) allToIds = integer() + + links = data.frame(from_id = allFromIds, + to_id = allToIds, + stringsAsFactors = FALSE) + + + ############################## + # extract labels + + # extract all label item ids + allLabelItemIds = unlist(lapply(jsonObj$levels, function(level){ + allLabelItemIds = lapply(level$items, function(it) { + allLabelItemIds = lapply(it$labels, function(l) it$id) + }) + })) + if(is.null(allLabelItemIds)) allLabelItemIds = integer() + + # extract all label label idx + allLabelLabelIdx = unlist(lapply(jsonObj$levels, function(level){ + allLabelLabelIdx = lapply(level$items, function(it) { + curIdx = 0 + allLabelLabelIdx = lapply(it$labels, function(l) { + curIdx <<- curIdx + 1 + curIdx + }) + }) + })) + if(is.null(allLabelLabelIdx)) allLabelLabelIdx = integer() + + # extract all label names + allLabelNames = unlist(lapply(jsonObj$levels, function(level){ + allLabelNames = lapply(level$items, function(it) { + allLabelNames = lapply(it$labels, function(l) l$name) + }) + })) + if(is.null(allLabelNames)) allLabelNames = character() + + # extract all label values + allLabelValues = unlist(lapply(jsonObj$levels, function(level){ + allLabelValues = lapply(level$items, function(it) { + allLabelValues = lapply(it$labels, function(l) l$value) + }) + })) + if(is.null(allLabelValues)) allLabelValues = character() + + labels = data.frame(item_id = allLabelItemIds, + label_idx = allLabelLabelIdx, + name = allLabelNames, + label = allLabelValues, + stringsAsFactors = FALSE) + + return(list(name = jsonObj$name, + annotates = jsonObj$annotates, + sampleRate = jsonObj$sampleRate, + items = items, + links = links, + labels = labels)) + +} + +# convert annotDFs (annotation list of data.frame representation) to annotJSON +bundleAnnotDFsToAnnotJSONchar <- function(emuDBhandle, annotDFs){ + # NOTE load_bundleAnnotDFsDBI that produces annotDFs orders the items according to DBconfig + + # load DBconfig to generate levelNames vector (although levels are not ordered per say) + levelDefs = list_levelDefinitions(emuDBhandle) + + attrDefs = list_attributeDefinitions(emuDBhandle, levelDefs$name) + + levelsdf = dplyr::full_join(annotDFs$items, + attrDefs, + by=c("level"), + relationship="many-to-many") %>% + dplyr::left_join(annotDFs$labels, by=c("item_id", "name")) + + levelsdf$label[is.na(levelsdf$label)] = "" # set missing labels top "" + + # convert columns that are split() to factors to prevent reodering + levelsdf$level = factor(levelsdf$level, levels = unique(levelDefs$name)) + + levels = split(levelsdf, levelsdf$level) %>% + purrr::map(function(lev) { + # convert columns that are split() to factors to prevent reodering + lev$item_id = factor(lev$item_id, levels = unique(lev$item_id)) + split(lev, lev$item_id) + }) %>% + purrr::modify_depth(2, function(df){ + if(unique(df$type.x) == "ITEM"){ + list(id = as.integer(as.character(unique(df$item_id))), + labels = data.frame(name = df$name, value = df$label, stringsAsFactors = FALSE)) + }else if(unique(df$type.x) == "SEGMENT"){ + list(id=as.integer(as.character(unique(df$item_id))), + sampleStart = unique(df$sample_start), + sampleDur = unique(df$sample_dur), + labels = data.frame(name = df$name, value = df$label, stringsAsFactors = FALSE)) + }else if(unique(df$type.x) == "EVENT"){ + list(id=as.integer(as.character(unique(df$item_id))), + samplePoint = unique(df$sample_point), + labels = data.frame(name = df$name, value = df$label, stringsAsFactors = FALSE)) + } + }) + + levels = purrr::map2(levels, 1:length(levels), function(itemsList, levelIdx){ + names(itemsList) = NULL # remove items key values + list(name = levelDefs$name[levelIdx], type = levelDefs$type[levelIdx], items = itemsList) + }) + + names(levels) = NULL # remove level key values + + # build links + if(nrow(annotDFs$links) > 0){ + links = data.frame(fromID = annotDFs$links$from_id, + toID = annotDFs$links$to_id, + stringsAsFactors = FALSE) + }else{ + links = list() + } + + annotJSON = list(name = annotDFs$name, + annotates = annotDFs$annotates, + sampleRate = annotDFs$sampleRate, + levels = levels, links = links) + + return(jsonlite::toJSON(annotJSON, auto_unbox = TRUE, force = TRUE, pretty = TRUE)) + +} + diff --git a/R/emuR-database.caching.R b/R/emuR-database.caching.R new file mode 100644 index 00000000..60b840d0 --- /dev/null +++ b/R/emuR-database.caching.R @@ -0,0 +1,192 @@ +## Update cache of emuDB +## +## Updates sqlite cache of loaded emuDB. This can be used +## to update changes to precached/loaded DBs as it only updates the deltas +## in the cache which is considerably faster than reloading and therefore +## recacheing the entire DB. This function is now called by load_emuDB if +## load_emuDB finds a preexisting cache. +## @param emuDBhandle +## @param verbose display infos +update_cache <- function(emuDBhandle, verbose = TRUE){ + + DBconfig = load_DBconfig(emuDBhandle) + + # add missing index to cache + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE INDEX IF NOT EXISTS ", + " items_level_seq_idx ", + "ON items(db_uuid, session, bundle, level, seq_idx)")) + DBI::dbExecute(emuDBhandle$connection, paste0("CREATE INDEX IF NOT EXISTS ", + " label_nameLabel_idx ", + "ON labels(name, label)")) + + # list sessions & bundles + sessions = list_sessions(emuDBhandle) + bundles = list_bundles(emuDBhandle) + notUpdatedSessionDBI = list_sessionsDBI(emuDBhandle) + notUpdatedBundlesDBI = list_bundlesDBI(emuDBhandle) + + if(nrow(sessions) ==0 || nrow(bundles) == 0){ + return() + } + + if(verbose){ + cat("INFO: Checking if cache needs update for", nrow(sessions), "sessions ") + } + + + # handle session + sesDelta_new = dplyr::anti_join(sessions, notUpdatedSessionDBI, by = "name") + sesDelta_toDelete = dplyr::anti_join(notUpdatedSessionDBI, sessions, by = "name") + + # add new + if(nrow(sesDelta_new) > 0){ + for(i in 1:nrow(sesDelta_new)){ + add_sessionDBI(emuDBhandle, sesDelta_new[i,]) + } + } + # delete + if(nrow(sesDelta_toDelete) > 0){ + for(i in 1:nrow(sesDelta_toDelete)){ + remove_sessionDBI(emuDBhandle, sesDelta_toDelete[i,]) + } + } + + progress = 0 + + if(verbose){ + cat("and", nrow(bundles), "bundles ...\n") + } + + # delete all entries in all tables that don't have the uuid of the emuDB (happens by on delete cascade) + numberOfRowsAffected = DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM emu_db WHERE uuid != '", emuDBhandle$UUID, "'")) + if(numberOfRowsAffected != 0){ + warning(paste0("INFO: Found and deleted ", + numberOfRowsAffected, + " elements in emuDBcache of emuDB with other UUID \n")) + } + + # calculate all md5sums + allAnnotFps = file.path(emuDBhandle$basePath, + paste0(bundles$session, session.suffix), + paste0(bundles$name, bundle.dir.suffix), + paste0(bundles$name, bundle.annotation.suffix, ".json")) + # remove all paths that don't contain _ses & _bndl just in case + allAnnotFps_onlyAnnots = stringr::str_match(allAnnotFps, + pattern = ".*_ses.*_bndl.*_annot.json") + + if(verbose){ + cat("INFO: Performing precheck and calculating checksums (== MD5 sums) for _annot.json files ...\n") + } + + file_md5sums = tools::md5sum(allAnnotFps_onlyAnnots) + + files_sesBndlMd5DF = data.frame(session = bundles$session, + name = bundles$name, + md5_annot_json = file_md5sums, + row.names = NULL, + stringsAsFactors = FALSE) + cache_sesBndlMd5DF = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT session, name, md5_annot_json FROM bundle")) + + bndlsDelta_new = dplyr::anti_join(files_sesBndlMd5DF, + cache_sesBndlMd5DF, + by = c("session", "name")) + bndlsDelta_toDelete = dplyr::anti_join(cache_sesBndlMd5DF, + files_sesBndlMd5DF, + by = c("session", "name")) + + bndlsDelta_updated = dplyr::anti_join(files_sesBndlMd5DF, + cache_sesBndlMd5DF, + by = c("session", "name", "md5_annot_json")) + bndlsDelta_updated = dplyr::anti_join(bndlsDelta_updated, + bndlsDelta_new, + by = c("session", "name", "md5_annot_json")) # remove new + bndlsDelta_updated = dplyr::anti_join(bndlsDelta_updated, + bndlsDelta_toDelete, + by = c("session", "name", "md5_annot_json")) # remove toDelete + + # return if data.frames are the same + if(nrow(bndlsDelta_new) == 0 & nrow(bndlsDelta_toDelete) == 0 & nrow(bndlsDelta_updated) == 0){ + if(verbose){ + cat("INFO: Nothing to update!\n") + } + return() + } + + bndlsDelta_load = dplyr::bind_rows(bndlsDelta_new, bndlsDelta_updated) + + ########################## + # as of here we loop manualy... + if(verbose){ + cat("INFO: (Re)loading / deleting", nrow(bndlsDelta_load), "bundle(s) to / from emuDBcache ...\n") + pb = utils::txtProgressBar(min = 0, max = nrow(bndlsDelta_load) + nrow(bndlsDelta_toDelete), initial = progress, style=3) + utils::setTxtProgressBar(pb, progress) + } + # add + if(nrow(bndlsDelta_load) > 0){ + DBI::dbBegin(emuDBhandle$connection) + for(bndlIdx in 1:nrow(bndlsDelta_load)){ + + bndl = bndlsDelta_load[bndlIdx,] + + # construct path to annotJSON + annotFilePath = normalizePath(file.path(emuDBhandle$basePath, + paste0(bndl$session, session.suffix), + paste0(bndl$name, bundle.dir.suffix), + paste0(bndl$name, bundle.annotation.suffix, '.json'))) + + # extract MD5 sum of bundle annotJSON + newMD5annotJSON = files_sesBndlMd5DF[files_sesBndlMd5DF$session == bndl$session + & files_sesBndlMd5DF$name == bndl$name,]$md5_annot_json + # read annotJSON as charac + #annotJSONchar = enc2utf8(readChar(annotFilePath, file.info(annotFilePath)$size)) # wrapped in enc2utf8 as readChar respects the system default (windows iso 88591) + annotJSONchar = readr::read_file(annotFilePath) + # convert to bundleAnnotDFs + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(annotJSONchar) + # removing old bundle entry + remove_bundleDBI(emuDBhandle, bndl$session, bndl$name) + # and adding to bundle table + add_bundleDBI(emuDBhandle, + bndl$session, + bndl$name, + bundleAnnotDFs$annotates, + bundleAnnotDFs$sampleRate, + newMD5annotJSON) + # and remove bundleAnnotDBI + remove_bundleAnnotDBI(emuDBhandle, + bndl$session, + bndl$name) + # add to items, links, labels tables + store_bundleAnnotDFsDBI(emuDBhandle, + bundleAnnotDFs, + bndl$session, + bndl$name) + + # increase progress bar + progress=progress+1L + if(verbose){ + utils::setTxtProgressBar(pb,progress) + } + } + DBI::dbCommit(emuDBhandle$connection) + } + + # delete + if(nrow(bndlsDelta_toDelete) > 0 ){ + for(i in 1:nrow(bndlsDelta_toDelete)){ + remove_bundleDBI(emuDBhandle, + bndlsDelta_toDelete[i,]$session, + bndlsDelta_toDelete[i,]$name) + progress=progress+1L + if(verbose){ + utils::setTxtProgressBar(pb,progress) + } + } + } +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-database.caching.R') diff --git a/R/emuR-database.files.R b/R/emuR-database.files.R new file mode 100644 index 00000000..22cc76c0 --- /dev/null +++ b/R/emuR-database.files.R @@ -0,0 +1,361 @@ +is_relativeFilePath<-function(nativeFilePathStr, forRunningPlatform=FALSE){ + if(forRunningPlatform){ + if(.Platform[['OS.type']]=='unix'){ + if(.Platform[['file.sep']]==substr(nativeFilePathStr,1,1)){ + # UNIX: "/dir/file" + # absolute path + return(FALSE) + } + }else if(.Platform[['OS.type']]=='windows'){ + #See http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx + if(substr(nativeFilePathStr,1,2)=='\\'){ + # fully qualified MS UNC path (is this supported with R?): \\samba\bla + return(FALSE) + }else if(grepl('^[A-Z,a-z][:]',nativeFilePathStr)){ + # fully qualified drive path: C:\Users\bla + return(FALSE) + }else if(.Platform[['file.sep']]==substr(nativeFilePathStr,1,1)){ + # Windows: "\dir\file" + # absolute path + return(FALSE) + } + } + }else{ + if(grepl('^[A-Z,a-z][:]',nativeFilePathStr)){ + return(FALSE) + } + if(grepl('^[\\]',nativeFilePathStr)){ + return(FALSE) + } + if(grepl('^/',nativeFilePathStr)){ + return(FALSE) + } + + } + return(TRUE) +} + +##' Import media files to emuDB +##' +##' Import new recordings (media files) to emuDB and create bundles. +##' Looks for files with the defined mediafile extension of the emuDB +##' (see \code{mediaFileExtension} in vignette \code{emuDB}) in \code{dir} +##' or in sub-directories thereof (interpreted as sessions), for each mediafile +##' create a bundle directory +##' named as the basename of the mediafile in the specified session, and copies +##' the mediafile into the bundle. If not already present, adds 'OSCI' and +##' 'SPEC' perspectives to the emuDB config file. +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param dir directory containing mediafiles or session directories +##' @param targetSessionName name of session in which to create the new bundles +##' @param verbose display infos & show progress bar +##' @import stringr +##' @keywords emuDB database Emu +##' @export +##' @examples +##' \dontrun{ +##' ## Add mediafiles from directory +##' +##' import_mediaFiles(myEmuDB,dir="/data/mymedia/") +##' +##' } +import_mediaFiles<-function(emuDBhandle, + dir, + targetSessionName = '0000', + verbose = TRUE){ + check_emuDBhandle(emuDBhandle) + dbCfg = load_DBconfig(emuDBhandle) + if(is.null(dbCfg[['mediafileExtension']])){ + pattern = NULL + #stop("The DB has no media file extension defined.") + }else{ + pattern = paste0('.*[.]', dbCfg[['mediafileExtension']],'$') + } + if(!dir.exists(dir)){ + stop(dir, " doesn't exist!") + } + + mfList = list.files(dir, pattern = pattern) + if(length(mfList) == 0){ + stop("no media files found in ", dir) + } else { + # create session dir and session list object if required + sessDir = file.path(emuDBhandle$basePath, + paste0(targetSessionName, session.suffix)) + if(!file.exists(sessDir)){ + created = dir.create(sessDir) + if(!created){ + stop("Couldn't create ", sessDir) + } + } + + qSessSql = paste0("SELECT * ", + "FROM session ", + "WHERE db_uuid='",emuDBhandle$UUID,"' ", + " AND name='",targetSessionName,"'") + sessDf <- DBI::dbGetQuery(emuDBhandle$connection,qSessSql) + if(nrow(sessDf) == 0){ + add_sessionDBI(emuDBhandle, sessionName = targetSessionName) + } + + } + mediaAdded = FALSE + + progress = 0 + if(verbose){ + cat("INFO: Importing ", length(mfList), " media files...\n") + pb = utils::txtProgressBar(min = 0, + max = length(mfList), + initial = progress, + style = 3) + utils::setTxtProgressBar(pb, progress) + } + + for(mf in mfList){ + mfFullPath = file.path(dir,mf) + bundleName = sub('[.][^.]*$','',mf) + + bundleDir=file.path(sessDir,paste0(bundleName, bundle.dir.suffix)) + created = dir.create(bundleDir) + if(!created){ + stop("Couldn't create ", bundleDir) + } + newMediaFileFullPath = file.path(bundleDir,mf) + file.copy(from = mfFullPath, to = newMediaFileFullPath) + + pfAssp = wrassp::read.AsspDataObj(newMediaFileFullPath, 0, 4000) + sampleRate = attr(pfAssp,'sampleRate') + b = list(name = bundleName, + annotates = mf, + sampleRate = sampleRate, + levels = list(), + links = list()) + + # add empty levels + for(ld in dbCfg[['levelDefinitions']]){ + b$levels[[length(b$levels) + 1]] = list(name=ld[['name']], + type = ld[['type']], + items = list()) + } + + # write to file + annotJSONchar = jsonlite::toJSON(b, auto_unbox = TRUE, pretty = TRUE) + newAnnotFileFullPath = file.path(bundleDir, + paste0(bundleName, bundle.annotation.suffix, ".json")) + writeLines(annotJSONchar, newAnnotFileFullPath, useBytes = TRUE) + + # calculate MD5 sum of bundle annotJSON + MD5annotJSON = tools::md5sum(newAnnotFileFullPath) + + add_bundleDBI(emuDBhandle, + sessionName = targetSessionName, + name = bundleName, + annotates = mf, + sampleRate = sampleRate, + MD5annotJSON = MD5annotJSON) + + # update pb + progress = progress + 1 + if(verbose){ + utils::setTxtProgressBar(pb, progress) + } + mediaAdded = TRUE + } + + # create an EMUwebapp default perspective if media has been added + perspectives = dbCfg[['EMUwebAppConfig']][['perspectives']] + if(mediaAdded & (is.null(perspectives) | length(perspectives) == 0)){ + sc = list(order = c("OSCI","SPEC"), assign = list(), contourLims = list()) + defPersp=list(name = 'default', + signalCanvases = sc, + levelCanvases = list(order = list()), + twoDimCanvases = list(order = list())) + dbCfg[['EMUwebAppConfig']][['perspectives']] = list(defPersp) + store_DBconfig(emuDBhandle, dbConfig = dbCfg) + } + return(invisible(NULL)) +} + + + + +################################################### +# CRUD operations for files + + +##' Add files to emuDB +##' +##' Add files to existing bundles of specified session of emuDB. +##' Do not use this function to import new recordings (media files) and create bundles; +##' see \code{?import_mediaFiles} to import new recordings. +##' The files that are found in \code{dir} that have the extension +##' \code{fileExtension} will be copied into the according bundle +##' folder that have the same basename as the file. Note that the +##' same bundle name may appear in different sessions, therefore you must +##' specify the session in \code{targetSessionName}. For +##' more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' Note that adding files does not mean the emuDB is automatically using these, unless +##' you have defined the usage of these files (e.g. by ssffTrackDefinitions). +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param dir directory containing files to be added +##' @param fileExtension file extension of files to be added. If no . (dot) is found +##' in this string (e.g. "zcr") then the bundle name matching is performed by removing +##' \code{paste0(".", fileExtension)} from the files ("/path/to/msajc003.zcr" will become "msajc003") +##' and the according bundle name will be searched. If a . (dot) if found within this string +##' (e.g. "_annot.json") then the entire string is remove without prepending a . (dot) ("/path/to/msajc003_annot.json" will then become "msajc003") +##' @param targetSessionName name of sessions containing +##' bundles that the files will be added to +##' @export +##' @keywords emuDB database Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # specify path to folder containing the following +##' # files we wish to add to: +##' # msajc003.zcr, msajc010.zcr, msajc012.zcr, msajc015.zcr, +##' # msajc022.zcr, msajc023.zcr and msajc057.zcr +##' path2dir = "/path/to/dir/" +##' +##' # add the files to session "0000" of the "ae" emuDB +##' add_files(emuDBhandle = ae, +##' dir = path2dir, +##' fileExtension = "zcr", +##' targetSessionName = "0000") +##' +##' } +add_files <- function(emuDBhandle, + dir, + fileExtension, + targetSessionName = '0000'){ + + check_emuDBhandle(emuDBhandle) + bndls = list_bundles(emuDBhandle, + sessionPattern = paste0("^", targetSessionName)) + + if(nrow(bndls) == 0){ + stop("No bundles found in session! Make sure to specify an existing session that contains bundles!") + } + + sourcePaths = list.files(dir, + pattern = paste0("\\.",fileExtension, '$'), + full.names = TRUE) + + destDirs = file.path(emuDBhandle$basePath, + paste0(bndls$session, '_ses'), + paste0(bndls$name, '_bndl')) + + if(length(sourcePaths) == 0){ + stop("no files found in 'dir' that match the provided 'fileExtension'") + } + + # copy files + for (i in 1:length(sourcePaths)){ + # if fileExtension doesn't contains . -> split at . + if(!stringr::str_detect(fileExtension, pattern = "\\.")){ + cbn = basename(stringr::str_remove(sourcePaths[i], + paste0("\\.", fileExtension, "$"))) + } else { + # remove from back + cbn = basename(stringr::str_remove(sourcePaths[i], + paste0(fileExtension, "$"))) + } + cbndl = bndls[bndls$name == cbn, ] + # check that only one bundle folder + if(nrow(cbndl) != 1){ + if(nrow(cbndl) == 0){ + stop(paste0("no bundle found that matches the base name (",cbn,") of the file '", sourcePaths[i], "'")) + } else { + stop(paste0("more than one bundle found (found = ",nrow(cbndl),") that matches the base name (",cbn,") of the file '", sourcePaths[i], "'")) + } + } + + destDir = file.path(emuDBhandle$basePath, paste0(cbndl$session, '_ses'), paste0(cbndl$name, '_bndl')) + file.copy(sourcePaths[i], destDir) + } +} + +##' List files of emuDB +##' +##' List files belonging to emuDB. For +##' more information on the structural elements of an emuDB +##' see \code{vignette{emuDB}}. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param fileExtension file extension of files +##' @param sessionPattern A (RegEx) pattern matching sessions to be searched from the database +##' @param bundlePattern A (RegEx) pattern matching bundles to be searched from the database +##' @return file paths as character vector +##' @export +##' @importFrom rlang .data +##' @keywords emuDB database schema Emu +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # list all files of ae emuDB +##' list_files(emuDBhandle = ae) +##' +##' # list all files of ae emuDB in bundles ending with '3' +##' list_files(emuDBhandle = ae, bundlePattern=".*3$") +##' +##' } +##' +list_files <- function(emuDBhandle, + fileExtension = ".*", + sessionPattern = ".*", + bundlePattern = ".*"){ + + check_emuDBhandle(emuDBhandle) + + fileList = list.files(path = file.path(emuDBhandle$basePath), + recursive = TRUE, + pattern = paste0(".*[.]", fileExtension, "$")) %>% + tibble::enframe(name = NULL) %>% + tidyr::separate(col = "value", + into = c("session", "bundle", "file"), + sep = .Platform$file.sep, + extra = "drop", + fill = "right") %>% + dplyr::filter(!is.na(.data$session)) %>% + dplyr::filter(!is.na(.data$bundle)) %>% + dplyr::filter(!is.na(.data$file)) %>% + + dplyr::filter (endsWith(.data$session, "_ses")) %>% + dplyr::filter (endsWith(.data$bundle, "_bndl")) %>% + + dplyr::mutate(session = stringr::str_remove(.data$session, "_ses$")) %>% + dplyr::mutate(bundle = stringr::str_remove(.data$bundle, "_bndl$")) %>% + + dplyr::filter (stringr::str_detect(.data$session, sessionPattern)) %>% + dplyr::filter (stringr::str_detect(.data$bundle, bundlePattern)) %>% + + dplyr::mutate (absolute_file_path = file.path(emuDBhandle$basePath, + paste0(.data$session, "_ses"), + paste0(.data$bundle, "_bndl"), + file)) + + return (fileList) +} + +modify_files <- function(){ + stop('not implemented yet') +} + +remove_files <- function(){ + stop('not implemented yet') +} + + +######################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_emuR-database.files.R') diff --git a/R/emuR-database.flatfiledata.R b/R/emuR-database.flatfiledata.R new file mode 100644 index 00000000..3bd9c551 --- /dev/null +++ b/R/emuR-database.flatfiledata.R @@ -0,0 +1,201 @@ +read_and_join_long_flatData <- function(emuDBhandle, + file, + x, + bundleName, + sessionName, + readFunction){ + if(file.exists(file)){ + key_value_data = suppressMessages(readFunction(file = file, + col_types = readr::cols())) + + if(all(names(key_value_data) == c("key", "value"))){ + key_value_data_pivoted = tidyr::pivot_wider(key_value_data, names_from = "key", values_from = "value") + if(missing(bundleName) && missing(sessionName)){ + res = dplyr::cross_join(x, key_value_data_pivoted) + } else if(missing(bundleName) && !missing(sessionName)) { + # join by session + key_value_data_pivoted$session = sessionName + res = dplyr::left_join(x, key_value_data_pivoted, by = "session") + } else if(!missing(bundleName) && !missing(sessionName)){ + key_value_data_pivoted$session = sessionName + key_value_data_pivoted$bundle = bundleName + res = dplyr::left_join(x, key_value_data_pivoted, by = c("session", "bundle")) + } + } else { + stop(file, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!") + } + return(res) + }else{ + return(x) + } +} + +read_and_join_wide_flatData <- function(emuDBhandle, + file, + x, + bundleName, + sessionName, + readFunction){ + if(file.exists(file)){ + long_flatData = suppressMessages(readFunction(file, + col_types = readr::cols())) + + if(missing(bundleName) && missing(sessionName)){ + # emuDB level + if(all(c("session", "bundle") %in% names(long_flatData))){ + res = dplyr::left_join(x, long_flatData, by = c("bundle", "session")) + } else { + stop(paste0("session and/or bundle columns not found in ", file)) + } + } else if(missing(bundleName) && !missing(sessionName)){ + # session level + if("bundle" %in% names(long_flatData)){ + long_flatData$session = sessionName + res = dplyr::left_join(x, long_flatData, by = c("session", "bundle")) + } else { + stop(paste0("bundle column not found in ", file)) + } + } else if(!missing(bundleName) && !missing(sessionName)){ + # bundle level + long_flatData$session = sessionName + long_flatData$bundle = bundleName + res = dplyr::left_join(x, long_flatData, by = c("session", "bundle")) + } else{ + stop(file, " doesn't only contain the columns 'session' and 'bundle'. Only these two columns are permitted!") + } + return(res) + } else { + return(x) + } +} + +## Join flat file data (UTF-8 .csv/.tsv files) to x +## +## Join flat file data that is present within +## the directories of an emuDB to a tibble/data.frame object +## usually either produced by \link{query} or \link{get_trackdata}. As +## it uses the "session" and "bundle" columns to perform the joins these +## have to be present in x. +## +## This function recognizes 2 types flat files files: +## \itemize{ +## \item *_keyValue files +## } +## +## @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +## @param sessionPattern A regular expression pattern matching session names to be searched from the database +## @param bundlePattern A regular expression pattern matching bundle names to be searched from the database +## @param fileExtension file extension of flat data files (default: '.csv') +## @param readFunction function used to read in flat data files (default: \link{readr::read_csv2}) +## +## @return tibble with the columns +## \itemize{ +## \item session +## \item bundle +## \item sample_rate_annot_json +## \item sample_rate_media_file +## } +## \code{session}, \code{b} +## @export +join_flatFileData <- function(emuDBhandle, + x, + sessionPattern = '.*', + bundlePattern = '.*', + fileExtension = '.csv', + readFunction = readr::read_csv2){ + + # gen. strat. move from bundles to session to emuDB level + all_bundles = list_bundles(emuDBhandle) + # filter to sessions & bundles only in x + all_bundles = all_bundles[all_bundles$session %in% x$session & all_bundles$name %in% x$bundle,] + # filter by sessionPattern & bundlePattern + all_bundles = all_bundles[ + grepl(pattern = sessionPattern, x = all_bundles, perl = TRUE) + & grepl(pattern = bundlePattern, x = all_bundles, perl = TRUE) + ] + + ############################## + # handle emuDB level + # get long flat data file on emuDB level + path2flatDataFile = file.path(emuDBhandle$basePath, + paste0(emuDBhandle$dbName, "_long", fileExtension)) + x = read_and_join_long_flatData(emuDBhandle, + file = path2flatDataFile, + x = x, + readFunction = readFunction) + # get wide flat data file on emuDB level + path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_wide", fileExtension)) + x = read_and_join_wide_flatData(emuDBhandle, + file = path2tsv, + x = x, + readFunction = readFunction) + + ############################## + # handle session level + for(session_name in unique(all_bundles$session)){ + # get long flat data file on session level + path2flatDataFile = file.path(emuDBhandle$basePath, + paste0(session_name, session.suffix), + paste0(session_name, "_long", fileExtension)) + + x = read_and_join_long_flatData(emuDBhandle, + file = path2flatDataFile, + x = x, + sessionName = session_name, + readFunction = readFunction) + + # get wide flat data file on session level + path2flatDataFile = file.path(emuDBhandle$basePath, + paste0(session_name, session.suffix), + paste0(session_name, "_wide", fileExtension)) + + x = read_and_join_wide_flatData(emuDBhandle, + file = path2flatDataFile, + x = x, + sessionName = session_name, + readFunction = readFunction) + + } + + ############################## + # handle bundle level + for(bndl_row_idx in 1:nrow(all_bundles)){ + + cur_bndl = all_bundles[bndl_row_idx,] + # get long flat data file on session level + path2flatDataFile = file.path(emuDBhandle$basePath, + paste0(cur_bndl$session, session.suffix), + paste0(cur_bndl$name, bundle.dir.suffix), + paste0(cur_bndl$name, "_long", fileExtension)) + + x = read_and_join_long_flatData(emuDBhandle, + file = path2flatDataFile, + x = x, + sessionName = cur_bndl$session, + bundleName = cur_bndl$name, + readFunction = readFunction) + + # get wide flat data file on session level + path2flatDataFile = file.path(emuDBhandle$basePath, + paste0(session_name, session.suffix), + paste0(session_name, "_wide", fileExtension)) + + x = read_and_join_wide_flatData(emuDBhandle, + file = path2flatDataFile, + x = x, + sessionName = session_name, + readFunction = readFunction) + + } + + + return(x) + +} + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-database.flatfiledata.R') + diff --git a/R/emuR-databaseGit.R b/R/emuR-databaseGit.R new file mode 100644 index 00000000..1d4db0ec --- /dev/null +++ b/R/emuR-databaseGit.R @@ -0,0 +1,64 @@ +# currently not used +# update_emuDBgit <- function(emuDBhandle, verbose = TRUE){ +# +# +# ###################### +# # get/create repo +# repo = tryCatch({ +# git2r::repository(emuDBhandle$basePath) +# }, warning = function(warning_condition) { +# if(verbose){ +# print("got following warning:", warning_condition) +# } +# }, error = function(error_condition) { +# # no repo present so make one +# if(verbose){ +# print("Init a new repository") +# } +# git2r::init(emuDBhandle$basePath) +# +# }, finally = { +# # print("done!") +# }) +# +# ###################### +# # create .gitignore if it doesn't already exist +# gitignorePath = file.path(emuDBhandle$basePath, +# ".gitignore") +# +# if(!file.exists(gitignorePath)){ +# # only ignore cache 4 now +# readr::write_lines(c("*_emuDBcache.sqlite"), gitignorePath, num_threads = 1) +# } +# +# ###################### +# # check if remote:origin is ahead of local +# remotes = git2r::remotes(repo) +# +# if(length(remotes) > 0){ +# # todo: check if remote ahead and warn if so +# # simply print out suggested git2r command +# # (git fetch needed) +# } +# +# # add everything and commit with fixed message +# +# status = git2r::status(repo) +# if (length(status$staged) == 0 && length(status$unstaged) == 0 && length(status$untracked) == 0) { +# if(verbose){ +# # TODO add sha1 here as well +# print("INFO: No changes to commit!") +# } +# } else { +# git2r::add(repo, "*") +# commit = git2r::commit(repo, +# message = "emuR::load_emuDB() git auto snapshot") +# +# if(verbose){ +# print(paste0("INFO: emuDB git commit SHA1: ", +# stringr::str_sub(commit$sha, start = 1, end = 7))) +# } +# } +# +# +# } diff --git a/R/emuR-emuDBhandle.R b/R/emuR-emuDBhandle.R new file mode 100644 index 00000000..038593a2 --- /dev/null +++ b/R/emuR-emuDBhandle.R @@ -0,0 +1,142 @@ +# constructor function for emuDBhandle +emuDBhandle = function(dbName, + basePath, + UUID, + connectionPath, + connection = NULL){ + + if(is.null(connection)){ + con <- DBI::dbConnect(RSQLite::SQLite(), + connectionPath) + }else{ + con = connection + } + + handle = list(dbName = dbName, + basePath = basePath, + UUID = UUID, + connection = con) + + class(handle) = "emuDBhandle" + + if(inherits(handle$connection, "SQLiteConnection")){ + setSQLitePragmas(handle$connection) + # init regex + RSQLite::initRegExp(handle$connection) + } + + if(connectionPath == ":memory:" + || file.exists(file.path(basePath, + paste0(dbName, database.cache.suffix))) + || !is.null(connection)){ + initialize_emuDbDBI(handle) + } + + + + return(handle) +} + +setSQLitePragmas <- function(con){ + DBI::dbExecute(con, "PRAGMA foreign_keys = ON;") + DBI::dbExecute(con, "PRAGMA temp_store = 2;") +} + +##' @export +print.emuDBhandle = function(x, ...){ + check_emuDBhandle(x) + print(paste0(" (dbName = '", x$dbName, "', basePath = '", x$basePath, "')")) +} + +# function to check if a emuDBhandle +# seems to be valid. It only does some shallow +# checks and doesn't do any deep inspection +check_emuDBhandle <- function(emuDBhandle, checkCache = TRUE){ + # check if dir and cache actually exist + if(!dir.exists(emuDBhandle$basePath)){ + stop(paste0("emuDBhandle is invalid as emuDBhandle$basePath ", + "doesn't exist! Please reload the emuDB.")) + } + if(!DBI::dbIsValid(emuDBhandle$connection)){ + stop(paste0("emuDBhandle is invalid as emuDBhandle$connection is not ", + "a valid DBI connection! Please reload the emuDB.")) + } + # from basePath extract dbName and see if DB + dbName = stringr::str_replace(basename(emuDBhandle$basePath), + pattern = '_emuDB$', + replacement = '') + + if(!file.exists(file.path(emuDBhandle$basePath, paste0(dbName, database.schema.suffix)))){ + stop(paste0("emuDBhandle is invalid as the directory emuDBhandle$basePath doesn't ", + "contain the _DBconfig.json file '", dbName, "_DBconfig.json'. Note that ", + "the emuDB directory has to have the same prefix / name as the _DBconfig.json.")) + } + if(checkCache){ + if(!file.exists(file.path(emuDBhandle$basePath, paste0(dbName, database.cache.suffix)))){ + stop(paste0("emuDBhandle is invalid as the directory emuDBhandle$basePath doesn't contain ", + "a _emuDBcache.sqlite file! Please reload the emuDB to recreate the emuDBcache.")) + } + } +} + +##' Print summary of loaded EMU database (emuDB). +##' @description Gives an overview of an EMU database. +##' Prints database name, UUID, base directory path, session and bundle +##' count and informations about signal track, annotation level, attribute and link definitions. +##' @param object emuDBhandle as returned by \code{\link{load_emuDB}} +##' @param ... additional arguments affecting the summary produced. +##' @export +summary.emuDBhandle = function(object, ...){ + + check_emuDBhandle(object) + + cli::cli_h1("Summary of emuDB") + + cat("Name:\t", object$dbName, "\n") + cat("UUID:\t", object$UUID, "\n") + cat("Directory:\t", object$basePath, "\n") + sess = list_sessions(object) + cat("Session count:", nrow(sess), "\n") + bndls = list_bundles(object) + cat("Bundle count:", nrow(bndls), "\n") + + itCntQ = paste0("SELECT count(*) FROM items WHERE db_uuid='", object$UUID, "'") + itCntDf = DBI::dbGetQuery(object$connection, itCntQ) + itemCnt = itCntDf[[1]] + labCntQ = paste0("SELECT count(*) FROM labels WHERE db_uuid='", object$UUID, "'") + labCntDf = DBI::dbGetQuery(object$connection, labCntQ) + labCnt = labCntDf[[1]] + liCntQ = paste0("SELECT count(*) FROM links WHERE db_uuid='", object$UUID, "'") + liCntDf = DBI::dbGetQuery(object$connection, liCntQ) + linkCnt = liCntDf[[1]] + cat("Annotation item count: ", itemCnt, "\n") + cat("Label count: ", labCnt, "\n") + cat("Link count: ", linkCnt, "\n") + cli::cli_h1("Database configuration") + + dbConfig = load_DBconfig(object) + cli::cli_h2("SSFF track definitions") + ssffTrackDefs = list_ssffTrackDefinitions(object) + pr <- print.data.frame(ssffTrackDefs, right = FALSE, row.names = FALSE) + cat("\n") + cli::cli_h2("Level definitions") + levelDefs = list_levelDefinitions(object) + pr <- print.data.frame(levelDefs, right = FALSE, row.names = FALSE) + cat("\n") + lblGrps = list_labelGroups(object) + if(nrow(lblGrps) > 0){ + cli::cli_h2("Database label group definitions") + pr <- print.data.frame(lblGrps, right = FALSE, row.names = FALSE) + cat("\n") + } + cli::cli_h2("Link definitions") + linkDefs = list_linkDefinitions(object) + pr <- print.data.frame(linkDefs, right = FALSE, row.names = FALSE) +} + +########################## +# FOR DEVELOPMENT +# handle = emuDBhandle(dbName = "test12", basePath = "/path/2/emuDB", UUID = "3412D5E3-E0EA-4E81-9F1C-E0A864D0D403", ":memory:") +# ae1 = load_emuDB("~/Desktop/emuR_demoData/ae", inMemoryCache = TRUE) +# ae2 = load_emuDB("~/Desktop/emuR_demoData/ae", inMemoryCache = FALSE) +# print(summary(ae1)) diff --git a/R/emuR-emuRsegs.R b/R/emuR-emuRsegs.R new file mode 100644 index 00000000..a7cac3d8 --- /dev/null +++ b/R/emuR-emuRsegs.R @@ -0,0 +1,161 @@ +## compatibility to emusegs +## methods: +## read.emusegs OK (type cast to emusegs) Override? +## make. OK different constructor +## is.seglist OK +## modify.seglist Problematic. No S3 method we cannot overload. Warning? +## emusegs.database OK +## emusegs.type OK (question: are mixed (EVENT and SEGMENT) +## seglist possible in legacy Emu ?) +## emusegs.query +## print.emusegs OK But shows too many columns: TODO select a +## good set of columns for an S3 override method +## [.emusegs OK But not clear what emusegs really does (and +## code includes version$major switch (on S versions?)) +## summary.emusegs OK +## label.emusegs OK +## as.matrix.emusegs OK +## write.emusegs OK (typecast to emusegs) TODO Print warning about data loss! +## start.emusegs OK +## end.emusegs OK +## utt.emusegs OK +## dur.emusegs OK + +##' Make emuDB segment list +##' @param dbName name of emuDB +##' @param seglist segment list data.frame +##' @param query query string +##' @param type type of list elements +##' @export make.emuRsegs +make.emuRsegs <- function(dbName, seglist, query, type) +{ + + class(seglist) <- c("emuRsegs", + "emusegs", + "data.frame") + + attr(seglist, "query") <- query + attr(seglist, "type") <- type + attr(seglist, "database") <- dbName + + seglist +} + + +##' Print emuRsegs segment list +##' @param x object to print +##' @param ... additional params +##' @export +"print.emuRsegs" <- function(x, ...) +{ + cat(attributes(x)$type, + " list from database: ", + attributes(x)$database, + "\n") + cat("query was: ", + attributes(x)$query, + "\n" ) + printX = '[.data.frame'(x, c('labels', + 'start', + 'end', + 'session', + 'bundle', + 'level', + 'type')) + + print.data.frame(printX, ...) + +} + +##' Sort emuRsegs segment list by session, bundle and sample_start +##' @param x object to sort +##' @param decreasing NOT IMPLEMENTED! +##' @param ... additional params +##' @export +"sort.emuRsegs" <- function(x, decreasing, ...) +{ + + old_atts = attributes(x) + + sl_df_sorted = dplyr::arrange(x, .data$session, .data$bundle, .data$sample_start) + + attributes(sl_df_sorted) = old_atts + + return(sl_df_sorted) +} + + +# S3 method definition +as.emusegs <- function(x, ...){ + UseMethod("as.emusegs", x) +} + + +##' @export +as.emusegs.emuRsegs <- function(x, ...){ + emusegs = make.seglist(x$labels, + x$start, + x$end, + x$utts, + attr(x, "query"), + type = attr(x, "type"), + database = attr(x, "database")) + return(emusegs) +} + + +##' Exports a segment list to txt collection +##' +##' Extract the media file (usually .wav file) snippets that correspond to +##' the segments of a segment list (see result of a \code{\link{query}}) and +##' save them to separate files and write the corresponding labels into a .txt file. Further, +##' the segmentlist is also stored to the target directory (as a .csv file). +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param seglist \code{tibble}, \code{\link{emuRsegs}} or +##' \code{\link{emusegs}} object obtained by \code{\link{query}}ing a loaded emuDB +##' @param targetDir target directory to store +##' @export +export_seglistToTxtCollection <- function(emuDBhandle, + seglist, + targetDir){ + + if(!dir.exists(targetDir)){ + stop("targetDir does not exist!") + } + + targetDir_full = file.path(targetDir, + paste0(emuDBhandle$dbName, + "_txt_col_from_seglist")) + created = dir.create(targetDir_full) + if(!created){ + stop("Couldn't create ", targetDir_full) + } + + for(i in 1:nrow(seglist)){ + ado = wrassp::read.AsspDataObj(file.path(emuDBhandle$basePath, + paste0(seglist[i,]$session, "_ses"), + paste0(seglist[i,]$bundle, "_bndl"), + paste0(seglist[i,]$bundle, ".wav")), + begin = seglist[i,]$start / 1000, + end = seglist[i,]$end / 1000) # hardcoded mediaFileExt! + + i_padded = stringr::str_pad(i, + width = stringr::str_length(nrow(seglist)), + side = "left", + pad = "0") + + wrassp::write.AsspDataObj(ado, + file = file.path(targetDir_full, + paste0("sl_rowIdx_", i_padded, ".wav"))) + + readr::write_file(seglist[i,]$labels, + file = file.path(targetDir_full, + paste0("sl_rowIdx_", i_padded, ".txt"))) + } + + readr::write_csv(seglist, + file = file.path(targetDir_full, + paste0("seglist.csv"))) + +} \ No newline at end of file diff --git a/R/emuR-emuRtrackdata.R b/R/emuR-emuRtrackdata.R new file mode 100644 index 00000000..e4a8a58b --- /dev/null +++ b/R/emuR-emuRtrackdata.R @@ -0,0 +1,315 @@ +##' create emuRtrackdata object +##' +##' Joins \code{\link{emuRsegs}} and \code{\link{trackdata}} objects +##' to create an \code{\link{emuRtrackdata}} object that is a sub-class of +##' a \code{\link{data.frame}} object. This object +##' can be viewed as a flat version of a \code{\link{trackdata}} object that also +##' contains all the information of a \code{\link{emuRsegs}} object. It is meant to +##' ease integration with other packages as it is based on the well known +##' \code{\link{data.frame}} object. +##' @param sl seglist of class \code{\link{emuRsegs}} +##' @param td \code{\link{trackdata}} object generated from sl +##' @return emuRtrackdata object +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # query emuDB (to get object of class emuRsegs) +##' sl = query(emuDBhandle = ae, +##' query = "Phonetic == i:") +##' +##' # get formats for SEGMENTs in sl (to get object of class trackdata) +##' td = get_trackdata(emuDBhandle = ae, +##' seglist = sl, +##' onTheFlyFunctionName = "forest") +##' +##' # create emuRtrackdata object +##' create_emuRtrackdata(sl = sl, td = td) +##' +##' } +create_emuRtrackdata <- function(sl, td){ + + ######################## + # check parameters + # check correct classes + if((!inherits(sl, "emuRsegs") + & !inherits(sl, "tbl_df")) | !inherits(td, "trackdata")){ + stop(paste0("emuRtrackdata could not be created: sl is not of ", + "class 'emuRsegs' or td arguments is not of class 'trackdata'")) + } + + # check same number of items + if(dim(td$index)[1] != nrow(sl)){ + stop(paste0("emuRtrackdata could not be created: td and sl objects don't ", + "have the same number of elements (dim(td$index)[1] != nrow(sl))")) + } + + + nframes = 1 + apply(td$index, 1, diff) + inds = rep(1:nrow(td), nframes) + # expand seglist + expSl = sl[inds,] + + times = tracktimes(td) + start.time = rep(start(td), nframes) + n.time = times - start.time + rownames(td$data) = NULL + resTmp = data.frame(sl_rowIdx = inds, expSl, times_orig = times, times_rel = n.time) + # calculate normalized time (between 0-1) + resTmp = resTmp %>% + dplyr::group_by(.data$sl_rowIdx) %>% + dplyr::mutate(times_norm = .data$times_rel / max(.data$times_rel)) + # remove class spectral to avoid usage of [] overide which affects indexing + class(td$data) = "matrix" + # add data + res = data.frame(resTmp, td$data) + + class(res) <- c("emuRtrackdata", class(res)) + # set negative values in times_rel and times_norm to 0 + # these can be caused by this sort of stuff (tracktimes() uses rownames() which are strings): + # number = 140.0811234234234123412341234 + # as.numeric(as.character(number)) == number # -> FALSE! + res$times_rel[res$times_rel < 0] = 0 + res$times_norm[res$times_norm < 0] = 0 + return(res) +} + +"check_emuRtrackdataColumns" <- function(td){ + + # convert factors into characters + td = td %>% dplyr::mutate_if(is.factor, as.character) + + # check if all columns of emuRsegs object are present + emuRsegsNames = c("sl_rowIdx", "labels", "start", "end", + "db_uuid", "session", + "bundle", "start_item_id", "end_item_id", "level", "start_item_seq_idx", + "end_item_seq_idx", "type", "sample_start", "sample_end", "sample_rate") + + if(!all(emuRsegsNames %in% names(td))){ + stop(paste0("Not all emuRsegs columns are present in emuRtrackdata object, ", + "hence it is not an emuRtrackdata object!")) + } + + # check if all time columns are present + timeColNames = c("times_orig", "times_rel", "times_norm") + + if(!all(emuRsegsNames %in% names(td))){ + stop(paste0("Not all time columns are present (times_orig, times_rel, times_norm) ", + "in emuRtrackdata object, hence it is not an emuRtrackdata oject!")) + } + + # check if every other column is of class numeric + allColNames = c(emuRsegsNames, timeColNames) + + additional_cols = setdiff(names(td), allColNames) + + numericDataClasses = c("complex", "single", "double", "integer", "numeric") + + for(dc in additional_cols){ + if(!dc %in% c("attribute", "utts")){ # ignore waring for columns that are part of tibble/emuRtrackdata (no common to both) + if(!class(td[[dc]]) %in% numericDataClasses){ + warning(paste0('Found additional column that is not of a number class ', + '("complex", "single", "double", "integer", "numeric"). Column name is: "', + dc, '". The first entry of each segment is reduplicated to match the ', + 'length of each normalized segment.')) + } + } + } + +} + + +##' Normalize length of segments contained in a \code{data.frame} like object returned by \code{\link{get_trackdata}} +##' +##' @param x data.frame like object that was generated by \code{\link{get_trackdata}} with +##' the resultType set to either \code{emuRtrackdata} or \code{tibble} +##' @param N specify length of normalized segments (each segment in resulting +##' object will consist of \code{N} rows). +##' @param colNames character vector containing names of columns to normalize. If not set all +##' data columns are normalized (T1-TN as well as other numeric columns). +##' @return data.frame like object containing the length normalized segments +##' @seealso \code{\link{emuRtrackdata} \link{emuRsegs}} +##' @export +"normalize_length" <-function(x, colNames = NULL, N = 21){ + + nonDataColNames = c("sl_rowIdx", "labels", "start", "end", "utts", "db_uuid", "session", + "bundle", "start_item_id", "end_item_id", "level", "start_item_seq_idx", + "end_item_seq_idx", "type", "sample_start", "sample_end", "sample_rate", + "times_orig", "times_rel", "times_norm") + + # check if object that was passed in has all the needed columns + check_emuRtrackdataColumns(x) + + # extract data cols + if(is.null(colNames)){ + additional_cols = setdiff(names(x), nonDataColNames) + }else{ + if(all(colNames %in% names(x))){ + additional_cols = colNames + }else{ + stop("Passed in column names don't exist in x") + } + + } + + # look 4 repeats: + if(any(duplicated(rle(x$sl_rowIdx)$values))){ + stop(paste0("found repeating sl_rowIdx sequences (e.g. c(1,1,1,2,2,2,1,1,1)\n ", + "where 1 is repeated). This is not permitted! Please fix this in\n ", + "the passed in trackdata (== parameter 'x')\n")) + } + + urowIdx = unique(x$sl_rowIdx) + + resLen = length(urowIdx) * N + + # preallocate resulting tibble + res_tbl = tibble::tibble(sl_rowIdx = integer(resLen), + labels = character(resLen), + start = double(resLen), + end = double(resLen), + utts = character(resLen), + db_uuid = character(resLen), + session = character(resLen), + bundle = character(resLen), + start_item_id = integer(resLen), + end_item_id = integer(resLen), + level = integer(resLen), + start_item_seq_idx = integer(resLen), + end_item_seq_idx = integer(resLen), + type = integer(resLen), + sample_start = integer(resLen), + sample_end = integer(resLen), + sample_rate = integer(resLen), + times_orig = double(resLen), + times_rel = double(resLen), + times_norm = double(resLen)) + + # add other columns that are not emuRsegsColNames, hence added columns + for(colName in additional_cols){ + res_tbl[,colName] = NA # add empty column + class(res_tbl[[colName]]) = class(x[[colName]]) # set col column class + } + + res_list = list() + + for (i in unique(x$sl_rowIdx)){ + # get current segment and remove unwanted columns + eRtd = x[x$sl_rowIdx == i, names(x) %in% c(nonDataColNames, additional_cols)] + + xynew = approx(eRtd$times_norm, eRtd$T1, n = N) + # create data.frame of correct length (all relevant entries are replaced) + # and fill with values of first row (only rel. for redundant columns such as sl_rowIdx, labels) + eRtd.normtemp = tibble::tibble(eRtd[1,], .rows = N) + eRtd.normtemp$times_norm = seq(0, 1, length.out = N) #xynew$x - use seq instead of xynew$x to avoid approx rounding issues + # interpolate data columns + for (name in additional_cols){ + # y = dplyr::pull(eRtd, name) + y = eRtd[[name]] + if(!inherits(y, "character")){ + eRtd.normtemp[,name] = approx(eRtd$times_norm, y, n = N)$y + }else{ + eRtd.normtemp[,name] = y[1] # use first element to fill up vector (R's recycling) + } + } + # recalculate times_orig & rimes_rel + eRtd.normtemp$times_orig = seq(unique(eRtd.normtemp$start), + unique(eRtd.normtemp$end), + length.out = N) + eRtd.normtemp$times_rel = seq(0, + unique(eRtd.normtemp$end) - unique(eRtd.normtemp$start), + length.out = N) + + res_list[[i]] = eRtd.normtemp + + } + + res_tbl = do.call(rbind, res_list) + + return(res_tbl) +} + + +##' Print emuRtrackdata object +##' @param x object to print +##' @param ... additional params +##' @export +"print.emuRtrackdata" <- function(x, ...) +{ + trackNames = names(x)[stringr::str_detect(names(x), 'T.*')] + printX = '[.data.frame'(x, c('sl_rowIdx', + 'labels', + 'start', + 'end', + 'session', + 'bundle', + 'level', + 'type', + 'times_orig', + 'times_rel', + 'times_norm', + trackNames)) + print.data.frame(printX, ...) +} + +##' convert tracks of a tibble trackdata object to the long form +##' +##' Converts a trackdata tibble object of the form (==wide): +##' \tabular{lllllll}{ +##' sl_rowIdx \tab ... \tab T1 \tab T2 \tab T3 \tab ... \tab TN\cr +##' 1 \tab ... \tab T1_value \tab T2_value \tab T3_value \tab ... \tab TN_value +##' } +##' to its long form equivalent: +##' \tabular{llll}{ +##' sl_rowIdx \tab ... \tab track_name \tab track_value \cr +##' 1 \tab ... \tab T1 \tab T1_value \cr +##' 1 \tab ... \tab T2 \tab T2_value \cr +##' 1 \tab ... \tab T3 \tab T3_value \cr +##' ... \tab ... \tab ... \tab ... \cr +##' 1 \tab ... \tab TN \tab TN_value \cr +##' } +##' +##' @param td wide form trackdata tibble object +##' @param calcFreqs calculate an additional column containing +##' frequency values from 0-nyquist frequency that match T1-TN (can be quite useful for spectral data) +##' @return long form trackdata tibble object +##' @export +convert_wideToLong <- function(td, calcFreqs = FALSE){ + + # get col idx values of tracks (T1-TN) + tracks_colIdx = grep(pattern = "^T[0-9]+$", names(td)) + + tracks_long = dplyr::ungroup(td) %>% + tidyr::gather(key = "track_name", + value = "track_value", + min(tracks_colIdx):max(tracks_colIdx), + convert = TRUE) %>% + dplyr::mutate(freq = as.numeric(substring(.data$track_name, 2))) %>% + dplyr::group_by(.data$sl_rowIdx) %>% + dplyr::arrange(.data$freq, .by_group = TRUE) + + # calc freq if calcFreqs = FALSE otherwise drop column + if(calcFreqs) { + tracks_long = tracks_long %>% + dplyr::mutate(freq = rep(seq(0, + (unique(.data$sample_rate) / 2), + length.out = length(tracks_colIdx)), + each = dplyr::n() / length(tracks_colIdx))) + } else{ + tracks_long = tracks_long %>% + dplyr::select(-"freq") + } + + return(dplyr::ungroup(tracks_long)) +} + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuRtrackdata.R') +# test_file('tests/testthat/test_zzz_cleanUp.R') diff --git a/R/emuR-export_BPFCollection.R b/R/emuR-export_BPFCollection.R new file mode 100644 index 00000000..99a0c032 --- /dev/null +++ b/R/emuR-export_BPFCollection.R @@ -0,0 +1,483 @@ +##' Exports an emuDB into a BAS Partitur File (BPF) Collection +##' +##' This function exports an emuDB into the BAS Partitur File format, with one BPF file per bundle. +##' The user must pass a list of matching label names and BPF keys. +##' \strong{Important:} The BPF format does not support explicit hierarchies with more than three +##' levels. Hence, you will probably lose information when exporting complex hierarchies. +##' +##' @param handle handle to the emuDB +##' @param targetDir directory where the BPF collection should be saved +##' @param extractLevels list containing the names of labels (not levels!) that should be extracted, and their +##' matching BPF keys, e.g. extractLevels = list(SampleRate="SAM", Text="ORT", Phonemes="SAP") +##' @param refLevel optional name of level (not label!) used as reference for symbolic links. If NULL (the default), a link-less BPF collection is created. +##' @param newLevels optional vector containing names of levels in the BPF collection that are not part of the standard +##' BPF levels. See \url{https://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html#Partitur_tiersdef} for details on +##'standard BPF levels. +##' @param newLevelClasses optional vector containing the classes of levels in the newLevels vector as integers. +##' Must have the same length and order as newLevels. +##' @param copyAudio if true, audio files are copied to the new BPF collection +##' @param verbose display infos, warnings and show progress bar +##' @return NULL +##' @seealso export_TextGridCollection +##' @export + +export_BPFCollection <- function(handle, + targetDir, + extractLevels, + refLevel = NULL, + verbose = TRUE, + newLevels = NULL, + newLevelClasses = NULL, + copyAudio = FALSE) +{ + check_emuDBhandle(handle) + levelClasses = as.list(BPF_STANDARD_LEVEL_CLASSES) + names(levelClasses) = BPF_STANDARD_LEVELS + levelClasses[newLevels] = newLevelClasses + + bundles = list_bundles(handle) + + if(nrow(bundles) == 0) + { + stop("The database does not appear to contain any bundles.") + } + + build_skeleton(handle, targetDir, copyAudio, verbose) + + if(verbose) + { + cat("INFO: Exporting emuDB containing", nrow(bundles), "bundle(s) to BPF collection ...\n") + pb = utils::txtProgressBar(min = 0, max = nrow(bundles), initial = 0, style=3) + utils::setTxtProgressBar(pb, 0) + } + + + for(kdx in 1:nrow(bundles)) + { + bundle = bundles[kdx,"name"] + session = bundles[kdx,"session"] + + bpfLines = c("LBD:") + headerLines = c("LHD: Partitur 1.3") + seen_sam = FALSE + + bpf_target = file.path(targetDir, handle$dbName, session, paste0(bundle, ".par")) + + link_map = make_link_map(handle, session, bundle, refLevel) + + extract_set_string = paste0("('", paste(names(extractLevels), collapse="','"), "')") + + queryTxt = paste0("SELECT ", + " items.item_id, ", + " items.sample_start, ", + " items.sample_dur, ", + " items.sample_point, ", + " labels.name, ", + " labels.label ", + "FROM items ", + "JOIN labels ", + "ON items.item_id = labels.item_id ", + " AND items.db_uuid = labels.db_uuid ", + " AND items.session = labels.session ", + " AND items.bundle = labels.bundle", + basic_cond(handle, session, bundle, prefix = "items"), + " AND labels.name in ", extract_set_string, " ", + "ORDER BY labels.name, items.seq_idx") + + labels = DBI::dbGetQuery(handle$connection, queryTxt) + + if(nrow(labels) > 0) + { + for(idx in 1:nrow(labels)) + { + key = labels[idx, "name"] + bpfkey = extractLevels[[key]] + label = labels[idx, "label"] + + if(!(bpfkey %in% names(levelClasses))) + { + queryTxt = paste0("SELECT items.item_id ", + "FROM items ", + "JOIN labels ", + "ON items.item_id = labels.item_id ", + " AND items.db_uuid = labels.db_uuid ", + " AND items.session = labels.session ", + " AND items.bundle = labels.bundle ", + basic_cond(handle, session, bundle, prefix = "items"), + " AND labels.name = '", key, "'") + tmp = DBI::dbGetQuery(handle$connection, queryTxt) + + if(nrow(tmp) > 1) + { + stop("More than one item with a ", key, " label. Cannot use this for header") + } + + headerLines = c(headerLines, paste0(bpfkey, ": ", label)) + if(bpfkey == "SAM") + { + seen_sam = TRUE + } + } + + else + { + class = levelClasses[[bpfkey]] + + label = labels[idx, "label"] + item_id = labels[idx, "item_id"] + + if(class %in% c(1,4,5)) + { + link = "-1" + + if(toString(item_id) %in% names(link_map)) + { + link = paste(unique(link_map[[toString(item_id)]]), collapse=",") + } + } + + if(class %in% c(2,4)) + { + start = labels[idx, "sample_start"] + dur = labels[idx, "sample_dur"] + + if(is.na(start) || is.na(dur)) + { + container = infer_temporal_info(handle, + session, + bundle, + item_id, + type = "SEGMENT") + start = container$sample_start + dur = container$sample_dur + } + + if(is.na(start) || is.na(dur)) + { + stop("Invalid segment of class 2 or 4 ", labels[idx,], ". Could not infer start and duration.") + } + } + + if(class %in% c(3,5)) + { + point = labels[idx, "sample_point"] + if(is.na(point)) + { + container = infer_temporal_info(handle, + session, + bundle, + item_id, + type="EVENT") + point = container$sample_point + } + if(is.na(point)) + { + stop("Invalid segment of class 3 or 5 ", labels[idx,], ". Could not infer sample point.") + } + } + + if(class == 1) + { + bpfLines = c(bpfLines, paste0(bpfkey, ": ", link, " ", label)) + } + else if(class == 2) + { + bpfLines = c(bpfLines, paste0(bpfkey, ": ", start, " ", dur, " ", label)) + } + else if(class == 3) + { + bpfLines = c(bpfLines, paste0(bpfkey, ": ", point, " ", label)) + } + else if(class == 4) + { + bpfLines = c(bpfLines, paste0(bpfkey, ": ", start, " ", dur, " ", link, " ", label)) + } + else if(class == 5) + { + bpfLines = c(bpfLines, paste0(bpfkey, ": ", point, " ", link, " ", label)) + } + } + } + } + + + if(!seen_sam) + { + queryTxt = paste0("SELECT DISTINCT sample_rate ", + "FROM items", basic_cond(handle, session, bundle)) + samplerate = DBI::dbGetQuery(handle$connection, queryTxt) + + if(nrow(samplerate) > 0) + { + headerLines = c(headerLines, paste0("SAM: ", samplerate[1,1])) + } + } + + writeLines(c(headerLines, bpfLines), con = bpf_target, sep = "\n", useBytes = TRUE) + + if(verbose) + { + utils::setTxtProgressBar(pb, kdx) + } + } + if(verbose) + { + cat("\n") + } +} + +make_link_map <- function(handle, session, bundle, refLevel) +{ + link_map = list() + if(!is.null(refLevel)) + { + ref_id_map = list() + queryTxt = paste0("SELECT item_id ", + "FROM items", basic_cond(handle, session, bundle), + " AND level = '", refLevel, "' ", + "ORDER BY seq_idx") + refItems = DBI::dbGetQuery(handle$connection, queryTxt) + + if(nrow(refItems) > 0) + { + ref_id_map[refItems$item_id] = 0:(nrow(refItems)-1) + + for(ref_id in refItems$item_id) + { + for(id in get_links(handle, session, bundle, ref_id)) + { + if(!(id %in% names(link_map))) + { + link_map[[toString(id)]] = list() + } + link_map[[toString(id)]][[length(link_map[[toString(id)]]) + 1]] = ref_id_map[[ref_id]] + } + } + } + } + return(link_map) +} + +get_links <- function(handle, session, bundle, ref_id, direction="all", + level=NULL, item_table = "items", link_table = "links") +{ + links = c(ref_id) + + if(direction == "all") + { + directions = list(c("from_id", "to_id"), c("to_id", "from_id")) + } + else if(direction == "up") + { + directions = list(c("from_id", "to_id")) + } + else if(direction == "down") + { + directions = list(c("to_id", "from_id")) + } + + for(direction in directions) + { + current_anchors = c(ref_id) + this = direction[1] + other = direction[2] + + while(length(current_anchors) > 0) + { + id_set_string = paste0("(", paste(current_anchors, collapse = "," ) ,")") + + if(is.null(level)) + { + queryTxt = paste0("SELECT l.", this, " FROM ", link_table, " AS l", + basic_cond(handle, session, bundle, prefix = "l"), + " AND l.", other, " in ", id_set_string) + } + else + { + queryTxt = paste0("SELECT l.", this, " FROM ", link_table, " AS l ", + "JOIN ", item_table, " AS i ", + "ON l.db_uuid == i.db_uuid ", + " AND l.session == i.session ", + " AND l.bundle == i.bundle ", + " AND l.", this, " == i.item_id", + basic_cond(handle, session, bundle, prefix = "i"), + " AND l.", other, " in ", id_set_string, + " AND i.level=='", level, "'") + } + + current_anchors = DBI::dbGetQuery(handle$connection, queryTxt)[[this]] + links = c(links, current_anchors) + } + } + + if(!is.null(level)) + { + id_set_string = paste0("(", paste(links, collapse = "," ) ,")") + queryTxt = paste0("SELECT item_id FROM ", item_table, " ", + basic_cond(handle, session, bundle), + " AND level=='", level, "' ", + " AND item_id in ", id_set_string) + links = DBI::dbGetQuery(handle$connection, queryTxt)[["item_id"]] + } + + return(links) +} + + +build_skeleton <- function(handle, targetDir, copyAudio, verbose) +{ + if(file.exists(file.path(targetDir, handle$dbName))) + { + stop(file.path(targetDir, handle$dbName), " already exists.") + } + + created = dir.create(file.path(targetDir, handle$dbName)) + if(!created){ + stop("Couldn't create ", file.path(targetDir, handle$dbName)) + } + + bundles = list_bundles(handle) + sessions = list_sessions(handle) + + if(verbose) + { + max = nrow(sessions) + if(copyAudio) + { + max = nrow(bundles) + nrow(sessions) + } + cat("INFO: Building BPF collection skeleton ...\n") + progress = 0 + pb = utils::txtProgressBar(min = 0, max = max, initial = 0, style=3) + utils::setTxtProgressBar(pb, 0) + } + + for(session in sessions$name) + { + created = dir.create(file.path(targetDir, handle$dbName, session)) + if(!created){ + stop("Couldn't create ", file.path(targetDir, handle$dbName, session)) + } + if(verbose) + { + progress = progress + 1 + utils::setTxtProgressBar(pb, progress) + } + + if(copyAudio) + { + for(bundle in list_bundles(handle, session = session)$name) + { + queryTxt = paste0("SELECT annotates FROM bundle", + basic_cond(handle, session, bundle, bundlename = "name")) + + annotates = DBI::dbGetQuery(handle$connection, queryTxt)[1,1] + + wav_target = file.path(targetDir, + handle$dbName, + session, + annotates) + + wav_source = file.path(handle$basePath, + paste0(session, session.suffix), + paste0(bundle, bundle.dir.suffix), + annotates) + + file.copy(wav_source, wav_target) + if(verbose) + { + progress = progress + 1 + utils::setTxtProgressBar(pb, progress) + } + } + } + } + if(verbose) + { + cat("\n") + } +} + + +basic_cond <- function(handle, + session, + bundle, + bundlename = "bundle", + prefix = NULL) +{ + if(is.null(prefix)) + { + prefix = ""; + } + else + { + prefix = paste0(prefix, ".") + } + return(paste0(" WHERE ", prefix, "db_uuid='", handle$UUID, "' AND ", prefix, "session='", + session, "' AND ", prefix, bundlename, "='", bundle, "' ")) +} + +infer_temporal_info <- function(handle, session, bundle, item_id, type="SEGMENT") +{ + if(type == "SEGMENT") + { + needed = c("sample_start", "sample_dur") + } + else if(type == "EVENT") + { + needed = c("sample_point") + } + + container = list() + container[needed] = NA + + links_down = get_links(handle, session, bundle, item_id, direction="down") + + queryTxt = paste0("SELECT DISTINCT type FROM items", + basic_cond(handle, session, bundle), + " AND type=='", type, "'") + + types = DBI::dbGetQuery(handle$connection, queryTxt) + if(nrow(types) > 0) + { + ids_as_set = paste0("(", paste(links_down, collapse = ","), ")") + needed_as_set = paste(needed, collapse = ",") + + for(t in types$type) + { + queryTxt = paste0("SELECT ", needed_as_set, " FROM items", + basic_cond(handle, session, bundle), + " AND type='", t, "' ", + " AND item_id in ", ids_as_set, + " ORDER by ", needed[1]) + tmp = DBI::dbGetQuery(handle$connection, queryTxt) + + if(nrow(tmp) > 0) + { + if(type == "EVENT") + { + if(is.numeric(tmp[1,1])) + { + container$sample_point = tmp[1,1] + break + } + } + else if(type == "SEGMENT") + { + start = tmp[1,1] + dur = tmp[nrow(tmp), 1] + tmp[nrow(tmp), 2] - tmp[1,1] + if(is.numeric(start) && is.numeric(dur)) + { + container$sample_start = start + container$sample_dur = dur + break + } + } + } + } + } + + return(container) +} + + diff --git a/R/emuR-export_TextGridCollection.R b/R/emuR-export_TextGridCollection.R new file mode 100644 index 00000000..68f1697f --- /dev/null +++ b/R/emuR-export_TextGridCollection.R @@ -0,0 +1,327 @@ +##' Export annotations of emuDB to TextGrid collection +##' +##' Exports the annotations of an emuDB to a TextGrid collection (.TextGrid and .wav file pairs). +##' To avoid naming conflicts and not to loose the session information, the session structure of +##' the database is kept in place (i.e. the TextGrid collection will have sub-folders that are named +##' as the sessions were). Due to the more complex annotation structure modeling capabilities of +##' the EMU-SDMS system, this export routine has to make several compromises on export which +##' can lead to information loss. So use with caution and at own risk as reimporting the exported +##' data will mean that not all information can be recreated! +##' The main compromises are: +##' \itemize{ +##' \item If a MANY_TO_MANY relationship between two levels is present and +##' two items from the parent level are linked to a single item on the child level, the +##' concatenated using the '->' symbol. An example would be: the annotation items containing the labels 'd' and 'b' of the +##' parent items are merged into a single annotation item and their labels are +##' Phoneme level are linked to 'db' on the Phonetic level. The generated Phoneme tier then has a segment with the +##' start and end times of the 'db' item and contains the labels 'db' (see for example the +##' bundle 0000_ses/msajc010_bndl of the ae_emuDB). +##' \item As annotations can contain gaps (e.g. incomplete hierarchies or orphaned items) and do not have to start at +##' time 0 and be the length of the audio file this export routine pads these gaps with empty segments. +##' } +##' +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param targetDir directory where the TextGrid collection should be saved +##' @param sessionPattern A regular expression pattern matching session names to be exported from the database +##' @param bundlePattern A regular expression pattern matching bundle names to be exported from the database +##' @param attributeDefinitionNames list of names of attributeDefinitions that are to be +##' exported as tiers. If set to NULL (the default) all attribute definitions will be exported as separate tiers. +##' @param timeRefSegmentLevel parameter passed into \link{query} function. (set time segment level from which to derive time +##' information. It is only necessary to set this parameter if more than one child +##' level contains time information and the queried parent level is of type ITEM.) +##' @param verbose Show progress bars and further information +##' @export +##' @seealso \code{\link{load_emuDB}} +##' @keywords emuDB database query Emu EQL +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' ## Export all levels +##' export_TextGridCollection(ae, "/path/2/targetDir") +##' +##' } +##' +export_TextGridCollection <- function(emuDBhandle, + targetDir, + sessionPattern = '.*', + bundlePattern = '.*', + attributeDefinitionNames = NULL, + timeRefSegmentLevel = NULL, + verbose = TRUE) { + + check_emuDBhandle(emuDBhandle) + + dbConfig = load_DBconfig(emuDBhandle) + + allAttrNames = get_allAttributeNames(emuDBhandle) + + if(!is.null(attributeDefinitionNames)){ + + if(!all(attributeDefinitionNames %in% allAttrNames)){ + stop(paste0("Bad attributeDefinitionNames given! Valid ", + "attributeDefinitionNames of the emuDB are: ", + paste0(allAttrNames, collapse = "; "))) + } + allAttrNames = attributeDefinitionNames + } + + # create target dir + if(!dir.exists(targetDir)){ + if(verbose){ + cat("targetDir DOESN'T exist! Creating new dir...\n") + } + created = dir.create(targetDir) + if(!created){ + stop("Couldn't create ", targetDir) + } + }else{ + if(verbose){ + cat("targetDir exists! Using specified dir...\n") + } + } + + # extract all items as giant seglist + if(verbose){ + cat("Querying all annotation items... (this may take a while!)\n") + } + + slAll = NULL + for(i in 1:length(allAttrNames)){ + sl = query(emuDBhandle, + paste0(allAttrNames[i], "=~ .*"), + resultType = "emuRsegs", # still uses old emuRsegs obj + timeRefSegmentLevel = timeRefSegmentLevel, + sessionPattern = sessionPattern, + bundlePattern = bundlePattern, + verbose = FALSE) + slAll = dplyr::bind_rows(slAll, sl) + } + + # convert times to seconds + slAll$start = slAll$start / 1000 + slAll$end = slAll$end / 1000 + + # extract rel. bundles + bndls = list_bundles(emuDBhandle) + bndls = bndls[grepl(sessionPattern, bndls$session) & grepl(bundlePattern, bndls$name),] + + if(verbose){ + cat('\n INFO: exporting', nrow(bndls), 'bundles\n') + pb <- utils::txtProgressBar(min = 0, max = nrow(bndls), style = 3) + } + + # loop through bundles and write to TextGrids & copy wav + for(i in 1:nrow(bndls)){ + curSes = bndls[i, ]$session + curBndl = bndls[i, ]$name + # check if session folder exists + sesDir = file.path(targetDir, bndls[i, ]$session) + if(!dir.exists(sesDir)){ + created = dir.create(sesDir) + if(!created){ + stop("Couldn't create ", sesDir) + } + } + + # copy wav file + wavPath = file.path(emuDBhandle$basePath, + paste0(curSes, session.suffix), + paste0(curBndl, bundle.dir.suffix), + paste0(curBndl, ".", dbConfig$mediafileExtension)) + file.copy(wavPath, sesDir) + # extract bundle sl + slBndl = slAll[grepl(paste0("^", curSes, "$"), + slAll$session) + & grepl(paste0("^", curBndl, "$"), + slAll$bundle), ] + tgPath = file.path(sesDir, paste0(curBndl, ".TextGrid")) + + wavDur = wrassp::dur.AsspDataObj(wrassp::read.AsspDataObj(wavPath)) + + # tg header + tgHeader = c("File type = \"ooTextFile\"", + "Object class = \"TextGrid\"", + "", + "xmin = 0 ", + paste0("xmax = ", wavDur, " "), + "tiers? ", + paste0("size = ", length(allAttrNames), " "), + "item []: ") + + readr::write_lines(tgHeader, tgPath, num_threads = 1) + + for(attrNameIdx in 1:length(allAttrNames)){ + + slTier = slBndl[slBndl$level == allAttrNames[attrNameIdx],] + levelDef = get_levelDefinition(emuDBhandle, + name = get_levelNameForAttributeName(emuDBhandle, allAttrNames[attrNameIdx])) + emptyRow = data.frame(labels = "", + start = -1, + end = -1, + utts = "", + db_uuid = "", + session = "", + bundle = "", + start_item_id = "", + end_item_id = "", + level = "", + start_item_seq_idx = "", + end_item_seq_idx = "", + type = "", + sample_start = "", + sample_end = "", + sample_rate = "", + stringsAsFactors = FALSE) + # tier header + if(levelDef$type == "EVENT"){ + tierType = "TextTier" + }else{ + tierType = "IntervalTier" + if(nrow(slTier) > 0){ + if(min(slTier$start) > 0){ + # add empty segment to left (== pad left) + emptyRow$start = 0 + emptyRow$end = min(slTier$start) + slTier = rbind(emptyRow, slTier) + } + + if(max(slTier$end) < wavDur){ + # add empty segment to right (== pad right) + emptyRow$start = max(slTier$end) + emptyRow$end = wavDur + slTier = rbind(slTier, emptyRow) + } + + # check for empty and overlapping segments (caused by orphaned children in hierarchy) + problemSegs = slTier[-nrow(slTier),]$end - slTier[-1,]$start != 0 + # check for overlapping segs + overlSegs = slTier[-nrow(slTier),]$end - slTier[-1,]$start > 0 + # check for duplicate segs (caused by many_to_many -> elisions) + dupliSegs = slTier[-nrow(slTier),]$start == slTier[-1,]$start & slTier[-nrow(slTier),]$end == slTier[-1,]$end + + if(any(problemSegs) | any(overlSegs) | any(dupliSegs)){ + slTierTmpNrow = nrow(slTier) + length(which(problemSegs)) - length(which(overlSegs)) - length(which(dupliSegs)) # remove overlSegs + dupliSegs from problemSegs (reason for minus) + # preallocate data.frame + slTierTmp = data.frame(labels = character(slTierTmpNrow), + start = integer(slTierTmpNrow), + end = integer(slTierTmpNrow), + utts = character(slTierTmpNrow), + db_uuid = character(slTierTmpNrow), + session = character(slTierTmpNrow), + bundle = character(slTierTmpNrow), + start_item_id = character(slTierTmpNrow), + end_item_id = character(slTierTmpNrow), + level = character(slTierTmpNrow), + start_item_seq_idx = integer(slTierTmpNrow), + end_item_seq_idx = integer(slTierTmpNrow), + type = character(slTierTmpNrow), + sampleStart = integer(slTierTmpNrow), + sample_end = integer(slTierTmpNrow), + sample_rate = integer(slTierTmpNrow), + stringsAsFactors = FALSE) + + slTierTmp[1,] = slTier[1,] + curRowIdx = 2 + dupliSegsRowIdx = NULL + for(slTierRowIdx in 2:nrow(slTier)){ + if(slTier[slTierRowIdx - 1,]$end < slTier[slTierRowIdx,]$start){ + # add empty segment + emptyRow$start = slTier[slTierRowIdx - 1,]$end + emptyRow$end = slTier[slTierRowIdx,]$start + slTierTmp[curRowIdx,] = emptyRow + curRowIdx = curRowIdx + 1 + slTierTmp[curRowIdx,] = slTier[slTierRowIdx,] + curRowIdx = curRowIdx + 1 + + }else{ + if(slTier[slTierRowIdx - 1,]$end > slTier[slTierRowIdx,]$start + | slTier[slTierRowIdx - 1,]$start == slTier[slTierRowIdx,]$start + & slTier[slTierRowIdx - 1,]$end == slTier[slTierRowIdx,]$end){ + # overlapping or duplicate + slTierTmp[curRowIdx,] = slTier[slTierRowIdx,] + slTierTmp[curRowIdx,]$labels = paste0(slTier[slTierRowIdx - 1,]$labels, + "->", + slTier[slTierRowIdx,]$labels) + slTierTmp[curRowIdx,]$start = slTier[slTierRowIdx - 1,]$start + slTierTmp[curRowIdx,]$end = slTier[slTierRowIdx - 1,]$end + dupliSegsRowIdx = c(dupliSegsRowIdx, curRowIdx - 1) + curRowIdx = curRowIdx + 1 + }else{ + slTierTmp[curRowIdx,] = slTier[slTierRowIdx,] + curRowIdx = curRowIdx + 1 + } + } + } + + + if(is.null(dupliSegsRowIdx)){ + slTier = slTierTmp + }else{ + slTier = slTierTmp[-1 * dupliSegsRowIdx,] + } + } + } + } + tierHeader = c(paste0(" item [", attrNameIdx, "]:"), + paste0(" class = \"", tierType, "\" "), + paste0(" name = \"", allAttrNames[attrNameIdx], "\" "), + " xmin = 0 ", + paste0(" xmax = ", wavDur, " ")) + + readr::write_lines(tierHeader, tgPath, append=TRUE, num_threads=1) + + # tier items + if(tierType == "IntervalTier"){ + if(nrow(slTier) > 0 ){ + tierItems = c(paste0(" intervals: size = ", nrow(slTier), " "), + c(rbind(paste0(" intervals [",1:nrow(slTier), "]:"), + paste0(" xmin = ", slTier$start, " "), + paste0(" xmax = ", slTier$end, " "), + paste0(" text = \"", slTier$labels, "\" ")))) + } else { + # Praat can handle this but should this be an + # interval the length of the bundle? That is + # how Praat creates empty tiers + tierItems = " intervals: size = 0 " + } + + } else { + if(nrow(slTier) > 0){ + tierItems = c(paste0(" points: size = ", nrow(slTier), " "), + c(rbind(paste0(" points [",1:nrow(slTier), "]:"), + paste0(" number = ", slTier$start, " "), + paste0(" mark = \"", slTier$labels, "\" ")))) + } else { + tierItems = " points: size = 0 " + } + + } + + readr::write_lines(tierItems, + tgPath, + append = TRUE, + num_threads = 1) + } + # increase pb + if(verbose){ + utils::setTxtProgressBar(pb, i) + } + + } + + # close progress bar if open + if(exists('pb')){ + close(pb) + } + + +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_emuR-export_TextGridCollection.R') + diff --git a/R/emuR-get_trackdata.R b/R/emuR-get_trackdata.R new file mode 100644 index 00000000..f7ad067b --- /dev/null +++ b/R/emuR-get_trackdata.R @@ -0,0 +1,626 @@ +##' Get trackdata from loaded emuDB +##' +##' Extract trackdata information from a loaded emuDB that +##' corresponds to the entries in a segment list. +##' +##' This function utilizes the wrassp package for signal processing and +##' SSFF/audio file handling. It reads time relevant data from a given +##' segment list (\code{\link{emuRsegs}} or \code{\link{emusegs}}), extracts the +##' specified trackdata and places it into a +##' trackdata object (analogous to the deprecated \code{emu.track}). This function +##' replaces the deprecated \code{emu.track} function. Note that an warning is issued +##' if the bundles in the \code{\link{emuRsegs}} or \code{\link{emusegs}} object +##' have in-homogeneous sampling rates as this could lead to inconsistent/erroneous +##' \code{\link{trackdata}}, \code{\link{emuRtrackdata}} or \code{\link[tibble]{tibble}} result objects. For +##' more information on the structural elements of an emuDB +##' see the signal data extraction chapter of the EMU-SDMS manual +##' (\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-sigDataExtr.html}). +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param seglist \code{tibble}, \code{\link{emuRsegs}} or \code{\link{emusegs}} +##' object obtained by \code{\link{query}}ing a loaded emuDB +##' @param ssffTrackName The name of track that one wishes to extract (see +##' \code{\link{list_ssffTrackDefinitions}} for the defined ssffTracks of the +##' emuDB). If the parameter \code{onTheFlyFunctionName} is set, then +##' this corresponds to the column name af the AsspDataObj (see +##' \code{wrassp::wrasspOutputInfos[[onTheFlyFunctionName]]$tracks} and +##' \code{\link[wrassp]{wrasspOutputInfos}} - NOTE: \code{library(wrassp)} might be +##' necessary to access the \code{wrasspOutputInfos} object without the \code{wrassp::} prefix). +##' If the parameter \code{onTheFlyFunctionName} is set and this one isn't, then per default +##' the first track listed in the \code{wrassp::wrasspOutputInfos} is chosen +##' (\code{wrassp::wrasspOutputInfos[[onTheFlyFunctionName]]$tracks[1]}). +##' +##' \code{get_trackdata} has so called constant track names that are always available +##' for every emuDB. The constant track names are: +##' +##' \itemize{ +##' \item{"MEDIAFILE_SAMPLES": refers to the audio sample values specified +##' by the "mediafileExtension" entry of the DBconfig.json} +##' } +##' +##' @param cut An optional cut time for segment data, ranges between +##' 0 and 1, a value of 0.5 will extract data only at the segment midpoint. +##' @param npoints An optional number of points to retrieve for each segment or event. +##' For segments this requires the \code{cut} parameter to be set; if this is the +##' case, then data is extracted around the resulting cut time. +##' For events data is extracted around the event time. If npoints is an odd number, the +##' samples are centered around the cut-time-sample; if not, they are skewed to the +##' right by one sample. +##' @param onTheFlyFunctionName Name of wrassp function that will perform the on-the-fly +##' calculation (see \code{?wrassp} for a list of all the signal processing functions wrassp provides) +##' @param onTheFlyParams A \code{pairlist} of parameters that will be given to the function +##' passed in by the \code{onTheFlyFunctionName} parameter. This list can easily be +##' generated by applying the \code{formals} function to the on-the-fly function name and then setting the according +##' parameter one wishes to change. +##' @param onTheFlyOptLogFilePath Path to optional log file for on-the-fly function +##' @param onTheFlyFunction pass in a function pointer. This function will be called with the path to the +##' current media file. It is required that the function returns a tibble/data.frame like object that contains +##' a column called \code{frame_time} that specifies the time point of each row. \code{get_trackdata} will then +##' extract the rows belonging to the current segment. This allows users to code their own function to be used with +##' \code{get_trackdata} and allows for most data formats to be used within an emuDB. +##' @param resultType Specify class of returned object. Either \code{"emuRtrackdata"}, +##' \code{"trackdata"} or \code{"tibble"} == the default (see \code{\link{trackdata}}, \code{\link{emuRtrackdata}} +##' and \code{\link[tibble]{tibble}} for details about these objects). +##' @param consistentOutputType Prevent converting the output object to a \code{data.frame} +##' depending on the \code{npoint} and \code{cut} arguments (only applies to output +##' type "trackdata"). Set to \code{FALSE} if the following legacy \code{emu.track} output +##' conversion behaviour is desired: If the \code{cut} parameter is not set (the default) an +##' object of type \code{\link{trackdata}} is returned. If \code{cut} is set and \code{npoints} +##' is not, or the seglist is of type event and npoints is not set, a \code{\link{data.frame}} is +##' returned (see the \code{consistentOutputType} to change this behaviour). +##' @param verbose Show progress bars and further information +##' @return object of type specified with \code{resultType} +##' @seealso \code{\link{formals}}, \code{\link[wrassp]{wrasspOutputInfos}}, +##' \code{\link{trackdata}}, \code{\link{emuRtrackdata}} +##' @keywords misc +##' @import wrassp +##' @aliases emu.track +##' @export +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' # query loaded "ae" emuDB for all "i:" segments of the "Phonetic" level +##' sl = query(emuDBhandle = ae, +##' query = "Phonetic == i:") +##' +##' # get the corresponding formant trackdata +##' td = get_trackdata(emuDBhandle = ae, +##' seglist = sl, +##' ssffTrackName = "fm") +##' +##' # get the corresponding F0 trackdata +##' # as there is no F0 ssffTrack defined in the "ae" emuDB we will +##' # calculate the necessary values on-the-fly +##' td = get_trackdata(emuDBhandle = ae, +##' seglist = sl, +##' onTheFlyFunctionName = "ksvF0") +##' +##' } +##' +"get_trackdata" <- function(emuDBhandle, + seglist = NULL, + ssffTrackName = NULL, + cut = NULL, + npoints = NULL, + onTheFlyFunctionName = NULL, + onTheFlyParams = NULL, + onTheFlyOptLogFilePath = NULL, + onTheFlyFunction = NULL, + resultType = "tibble", + consistentOutputType = TRUE, + verbose = TRUE){ + + check_emuDBhandle(emuDBhandle) + + ######################### + # get DBconfig + DBconfig = load_DBconfig(emuDBhandle) + + # convert factors into characters + if("tbl_df" %in% class(seglist)){ + seglist = seglist %>% dplyr::mutate_if(is.factor, as.character) + } + + ######################### + # parameter checks + + # set ssffTrackName to first tracks entry in wrasspOutputInfos if ssffTrackName is not set + if(!is.null(onTheFlyFunctionName) && is.null(ssffTrackName)){ + ssffTrackName = wrassp::wrasspOutputInfos[[onTheFlyFunctionName]]$tracks[1] + } + + # checks for onTheFlyFunction + if(is.function(onTheFlyFunction)){ + if(resultType != "tibble"){ + stop('onTheFlyFunction only works with with resultType = "tibble"') + } + ssffTrackName = "CUSTOM_FUNCTION" + } + + # check if all values for minimal call are set + if(!is.function(onTheFlyFunction) && (is.null(emuDBhandle) || is.null(seglist) || is.null(ssffTrackName))) { + stop("emuDBhandle, seglist and ssffTrackName have to all be set!\n") + } + + # check if cut value is correct + if(!is.null(cut)){ + if(cut < 0 || cut > 1){ + stop('Bad value given for cut argument. Cut can only be a value between 0 and 1!') + } + if(sum(seglist$end) == 0){ + stop("Cut value should not be set if sum(seglist$end) == 0!") + } + } + + # check if npoints value is correct + if(!is.null(npoints)){ + if(is.null(cut) && emusegs.type(seglist) != 'event'){ + stop(paste0("Cut argument hast to be set or seglist has ", + "to be of type event if npoints argument is used.")) + } + } + + # check if onTheFlyFunctionName is set if onTheFlyParams is + if(is.null(onTheFlyFunctionName) && !is.null(onTheFlyParams)){ + stop('onTheFlyFunctionName has to be set if onTheFlyParams is set!') + } + + # check if both onTheFlyFunctionName and onTheFlyParams are set if onTheFlyOptLogFilePath is + if( !is.null(onTheFlyOptLogFilePath) + && (is.null(onTheFlyFunctionName) || is.null(onTheFlyParams))){ + stop(paste0("Both onTheFlyFunctionName and onTheFlyParams have to be ", + "set for you to be able to use the onTheFlyOptLogFilePath parameter!")) + } + + # check resultType if valid string + if(!resultType %in% c("tibble", "emuRtrackdata", "trackdata")){ + stop("resultType has to either be 'tibble ', 'emuRtrackdata' or 'trackdata'") + } + + # + if(!resultType %in% c("trackdata")){ + if(consistentOutputType == FALSE){ + if(verbose){ + cat(paste0("INFO: resetting 'consistentOutputType' back to TRUE as setting ", + "it to FALSE is only allowed when resultType is set to 'trackdata'\n")) + } + consistentOutputType = TRUE + } + } + + if(resultType == "emuRtrackdata" && class(seglist)[1] == "emusegs"){ + stop("resultType can only be 'trackdata', if a seglist of class 'emusegs' is passed in") + } + + if(nrow(seglist) == 0){ + stop("'seglist' is empty! Can't get trackdata if no segments are specified...") + } + + + ######################### + # get track definition + if(ssffTrackName %in% c("MEDIAFILE_SAMPLES")){ + trackDef = list() + trackDef[[1]] = list() + trackDef[[1]]$name = "MEDIAFILE_SAMPLES" + trackDef[[1]]$columnName = "audio" + trackDef[[1]]$fileExtension = DBconfig$mediafileExtension + }else if(ssffTrackName %in% c("CUSTOM_FUNCTION")){ + trackDef = list() + trackDef[[1]] = list() + trackDef[[1]]$name = "CUSTOM_FUNCTION" + trackDef[[1]]$columnName = "CUSTOM_FUNCTION" + trackDef[[1]]$fileExtension = "CUSTOM_FUNCTION" + trackCache = list() + }else{ + if(is.null(onTheFlyFunctionName)){ + trackDefFound = sapply(DBconfig$ssffTrackDefinitions, + function(x){ x$name == ssffTrackName}) + trackDef = DBconfig$ssffTrackDefinitions[trackDefFound] + + # check if correct nr of trackDefs where found + if(length(trackDef) != 1){ + if(length(trackDef) < 1 ){ + stop("The emuDB object ", DBconfig$name, + " does not have any ssffTrackDefinitions called ", ssffTrackName) + }else{ + stop("The emuDB object ", DBconfig$name, + " has multiple ssffTrackDefinitions called ", ssffTrackName, + "! This means the DB has an invalid _DBconfig.json") + } + } + }else{ + trackDef = list() + trackDef[[1]] = list() + trackDef[[1]]$name = ssffTrackName + trackDef[[1]]$columnName = ssffTrackName + } + } + + ################################### + # check for sample rate consistancy + if(!("emuRsegs" %in% class(seglist)) & !("tbl_df" %in% class(seglist))){ + uniqSessionBndls = utils::read.table(text = as.character(dplyr::distinct(seglist, .data$utts)$utts), + sep = ":", + col.names = c("session", "bundle"), + colClasses = c("character", "character"), + stringsAsFactors = FALSE) + }else{ + uniqSessionBndls = dplyr::distinct(as.data.frame(seglist), .data$bundle, .data$session) + } + DBI::dbExecute(emuDBhandle$connection,"CREATE TEMP TABLE uniq_session_bndls_tmp (session TEXT,bundle TEXT)") + DBI::dbWriteTable(emuDBhandle$connection, "uniq_session_bndls_tmp", uniqSessionBndls, append = TRUE) + sesBndls = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT ", + " bundle.db_uuid, ", + " bundle.session, ", + " bundle.name, ", + " bundle.annotates, ", + " bundle.sample_rate, ", + " bundle.md5_annot_json ", + "FROM uniq_session_bndls_tmp, ", + " bundle ", + "WHERE uniq_session_bndls_tmp.session = bundle.session ", + " AND uniq_session_bndls_tmp.bundle = bundle.name")) + DBI::dbExecute(emuDBhandle$connection,"DROP TABLE uniq_session_bndls_tmp") + + # remove uuid & MD5sum because we don't want to scare our users :-) + sesBndls$db_uuid = NULL + sesBndls$MD5annotJSON = NULL + if(length(unique(sesBndls$sample_rate)) != 1){ + warning(paste0("The emusegs/emuRsegs object passed in refers to bundles with in-homogeneous sampling rates in their ", + "audio files! Here is a list of all refered to bundles incl. their sampling rate: \n"), + paste(utils::capture.output(print(sesBndls %>% + dplyr::rename( + bundle = "name", + media_file = "annotates" + ))), + collapse = "\n")) + } + + ################################### + #create empty index, ftime matrices + index <- matrix(ncol = 2, nrow = nrow(seglist)) + colnames(index) <- c("start","end") + + ftime <- matrix(ncol = 2, nrow = nrow(seglist)) + colnames(ftime) <- c("start","end") + + data <- NULL + origFreq <- NULL + + ############################### + # set up function formals + pb + if(!is.null(onTheFlyFunctionName)){ + funcFormals = formals(onTheFlyFunctionName) + funcFormals[names(onTheFlyParams)] = onTheFlyParams + funcFormals$toFile = FALSE + funcFormals$optLogFilePath = onTheFlyOptLogFilePath + if(verbose){ + cat('\n INFO: applying', onTheFlyFunctionName, 'to', nrow(seglist), 'segments/events\n') + pb <- utils::txtProgressBar(min = 0, max = nrow(seglist), style = 3) + } + }else{ + if(verbose){ + cat('\n INFO: parsing', nrow(seglist), trackDef[[1]]$fileExtension, 'segments/events\n') + pb <- utils::txtProgressBar(min = 0, max = nrow(seglist), style = 3) + } + } + + prevUtt = "" + bndls = list_bundles(emuDBhandle) + + # init result lists & index + curIndexStart = 1 + data_list = list() + timeStampRowNames_list = list() + + # loop through bundle names + for (i in 1:nrow(seglist)){ + if(!("emuRsegs" %in% class(seglist)) & !("tbl_df" %in% class(seglist))){ + curUtt = seglist$utts[i] + splUtt = stringr::str_split(curUtt, ':')[[1]] + }else{ + splUtt = c(seglist$session[i], seglist$bundle[i]) + curUtt = paste(splUtt[1], ":", splUtt[2]) + } + + # check if utts entry exists + if(!any(bndls$session == splUtt[1] & bndls$name == splUtt[2])){ + stop("Following utts entry not found: ", seglist$utts[i]) + } + + fpath <- file.path(emuDBhandle$basePath, + paste0(splUtt[1], session.suffix), + paste0(splUtt[2], bundle.dir.suffix), + paste0(splUtt[2], ".", trackDef[[1]]$fileExtension)) + + # update progressbar + if(verbose){ + utils::setTxtProgressBar(pb, i) + } + + ################ + #get data object + if(!is.function(onTheFlyFunction)){ + if(!is.null(onTheFlyFunctionName)){ + qr = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT * ", + "FROM bundle ", + "WHERE db_uuid = '", emuDBhandle$UUID, "' ", + " AND session = '", splUtt[1], "' ", + " AND name='", splUtt[2], "'")) + funcFormals$listOfFiles = file.path(emuDBhandle$basePath, + paste0(qr$session, session.suffix), + paste0(qr$name, bundle.dir.suffix), + qr$annotates) + # only perform calculation if curUtt is not equal to preUtt + if(curUtt != prevUtt){ + curDObj = do.call(onTheFlyFunctionName, funcFormals) + } + + } else { # if precalculated track read in file + if(file.exists(fpath)){ + if (!is.null(trackDef[[1]]$fileFormat) && trackDef[[1]]$fileFormat == "Rda") { + rda_file_environment = rlang::new_environment() + load(fpath, envir = rda_file_environment) + + curDObj = list() + curDObj$data = rda_file_environment$data + attr(curDObj, "sampleRate") = rda_file_environment$sampleRate + attr(curDObj, "startTime") = rda_file_environment$startTime + attr(curDObj, "origFreq") = rda_file_environment$originalFrequency + } else { + # if file doesn't exist this causes the R session to crash + # this didn't used to be the case? Further wrassp debugging + # needed... + curDObj <- wrassp::read.AsspDataObj(fpath) + } + } else { + stop("trying to read a stored track from a file that doesn't exist: ", fpath) + } + } + + # set origFreq + origFreq <- attr(curDObj, "origFreq") + + # set curStart+curEnd + curStart <- seglist$start[i] + if(sum(seglist$end) == 0){ + curEnd <- seglist$start[i] + }else{ + curEnd <- seglist$end[i] + } + + + fSampleRateInMS <- (1 / attr(curDObj, "sampleRate")) * 1000 + fStartTime <- attr(curDObj, "startTime") * 1000 + # add one on if event to be able to capture in breakValues + if(sum(seglist$end) == 0){ # if event seglist + if(npoints == 1 || is.null(npoints)){ + timeStampSeq <- seq(fStartTime, curEnd + fSampleRateInMS, fSampleRateInMS) + }else{ + timeStampSeq <- seq(fStartTime, curEnd + fSampleRateInMS * npoints, fSampleRateInMS) + } + }else{ + if(npoints == 1 || is.null(npoints)){ + timeStampSeq <- seq(fStartTime, curEnd, fSampleRateInMS) + }else{ + timeStampSeq <- seq(fStartTime, curEnd + fSampleRateInMS * npoints, fSampleRateInMS) + } + } + + ################################################## + # search for first element larger than start time + breakVal <- -1 + for (j in 1:length(timeStampSeq)){ + if (timeStampSeq[j] >= curStart){ + breakVal <- j + break + } + } + # check if breakVal was found + if(breakVal == -1){ + stop("No track samples found belonging to sl_rowIdx: ", i, + " with values: ", paste0(seglist[i,], collapse = " "), + "! This is probably due to a very short SEGMENT that doesn't contain any '", + ssffTrackName, "' values.") + } + + curStartDataIdx <- breakVal + curEndDataIdx <- length(timeStampSeq) + + + ################ + # extract data + tmpData <- eval(parse(text = paste("curDObj$", + trackDef[[1]]$columnName, + sep = ""))) + + if(is.null(tmpData) && inherits(curDObj, "AsspDataObj")){ + stop("Couldn't extract column with name: ", + "'", trackDef[[1]]$columnName, "' from AsspDataObj ", + "that was generated by a wrassp::read.AsspDataObj() ", + "of the file ", fpath, " this could be due to a bad ", + "column name in the DBconfig." + ) + } + + if(!is.null(trackDef[[1]]$fileFormat) && trackDef[[1]]$fileFormat == "Rda") { + if (!is.matrix(tmpData)) { + tmpData = as.matrix(tmpData) + } + } + + ############################################################# + # set curIndexEnd dependant on if event/segment/cut/npoints + if(!is.null(cut) || sum(seglist$end) == 0){ + if(sum(seglist$end) == 0){ + cutTime = curStart + curEndDataIdx <- curStartDataIdx + curStartDataIdx = curStartDataIdx - 1 # last to elements are relevant -> move start to left + }else{ + cutTime = curStart + (curEnd - curStart) * cut + } + + sampleTimes = timeStampSeq[curStartDataIdx:curEndDataIdx] + closestIdx = which.min(abs(sampleTimes - cutTime)) + cutTimeSampleIdx = curStartDataIdx + closestIdx - 1 + + if(is.null(npoints) || npoints == 1){ + # reset data idxs + curStartDataIdx = curStartDataIdx + closestIdx - 1 + curEndDataIdx = curStartDataIdx + curIndexEnd = curIndexStart + }else{ + # reset data idx + halfNpoints = (npoints - 1) / 2 # -1 removes cutTimeSample + curStartDataIdx = cutTimeSampleIdx - floor(halfNpoints) + curEndDataIdx = cutTimeSampleIdx + ceiling(halfNpoints) + curIndexEnd = curIndexStart + npoints - 1 + } + + }else{ + # normal segments + curIndexEnd <- curIndexStart + curEndDataIdx - curStartDataIdx + } + # set index and ftime + index[i,] <- c(curIndexStart, curIndexEnd) + ftime[i,] <- c(timeStampSeq[curStartDataIdx], timeStampSeq[curEndDataIdx]) + + ############################# + # calculate size of and create new data matrix + rowSeq <- seq(timeStampSeq[curStartDataIdx], timeStampSeq[curEndDataIdx], fSampleRateInMS) + curData <- matrix(ncol = ncol(tmpData), nrow = length(rowSeq)) + + # check if it is possible to extract curData + if(curStartDataIdx > 0 && curEndDataIdx <= dim(tmpData)[1]){ + possibleError_a <- tryCatch( + curData[, ] <- tmpData[curStartDataIdx:curEndDataIdx, ], + error=function(e) e + ) + # catch error and move on + if(inherits(possibleError_a, "error")){ + warning(paste0("The amount of data extracted doesn't match the \n", + "expected segment length in segment list row ", i, ".\n", + "This can be caused by slight rounding errors in \n", + "sample rates and start times. Adapting to extracted \n", + "sample length.")) + rowSeq <- timeStampSeq[curStartDataIdx:curEndDataIdx] + curData <- matrix(ncol = ncol(tmpData), nrow = length(rowSeq)) + tmp_len <- length(curStartDataIdx:curEndDataIdx) - length(rowSeq) + tmp_range <- curStartDataIdx:curEndDataIdx + possibleError <- tryCatch( + curData[, ] <- tmpData[curStartDataIdx:curEndDataIdx, ], + error=function(e) e + ) + if(inherits(possibleError, "error")) { + stop(paste0("Even after length adaptation an error occured. This shouldn't happen!\n", + "Problematic segment list row: ", i)) + } + } + ############# new ############## + }else{ + entry= paste(seglist[i,], collapse = " ") + stop('Can not extract data for the ', i, 'th row of the segment list: ', entry, ' start and/or end times out of bounds') + } + + curIndexStart <- curIndexEnd + 1 + + data_list[[i]] = curData + timeStampRowNames_list[[i]] = rowSeq + + prevUtt = curUtt + } else { + # use custom function + mediaPath <- file.path(emuDBhandle$basePath, + paste0(splUtt[1], session.suffix), + paste0(splUtt[2], bundle.dir.suffix), + paste0(splUtt[2], ".", DBconfig$mediafileExtension)) + + if(is.null(trackCache[[mediaPath]])){ + customFunctionRes = onTheFlyFunction(mediaPath) + trackCache[[mediaPath]] <- customFunctionRes + } + else + { + message("Cached results were used") + customFunctionRes = trackCache[[mediaPath]] + } + + if(!"frame_time" %in% colnames(customFunctionRes)){ + stop("The function passed in to onTheFlyFunction didn't return a data.frame with a column called 'frame_time'!") + } + + customFunctionRes_filtered = customFunctionRes %>% + dplyr::filter(.data$frame_time >= seglist$start[i] & .data$frame_time <= seglist$end[i]) + + # set index and ftime + index[i,] <- c(curIndexStart, curIndexStart + nrow(customFunctionRes_filtered) - 1) + ftime[i,] <- c(min(customFunctionRes_filtered$frame_time), max(customFunctionRes_filtered$frame_time)) + + # set data_list entry and timeStamp + data_list[[i]] = as.matrix(dplyr::select(customFunctionRes_filtered, -"frame_time")) + timeStampRowNames_list[[i]] = customFunctionRes_filtered %>% dplyr::pull(.data$frame_time) + + # reset start index + curIndexStart = curIndexStart + nrow(customFunctionRes_filtered) + } + + } + # combind lists to form result + data = do.call(rbind, data_list) + timeStampRowNames = unlist(timeStampRowNames_list) + + if(!consistentOutputType + && ((!is.null(cut) + && (npoints == 1 || is.null(npoints))) + || (sum(seglist$end) == 0 + && (npoints == 1 + || is.null(npoints))))){ + resObj = as.data.frame(data) + colnames(resObj) = paste(trackDef[[1]]$columnName, seq(1:ncol(resObj)), sep = '') + }else{ + rownames(data) <- timeStampRowNames + colnames(data) <- paste("T", 1:ncol(data), sep = "") + ######################################## + #convert data, index, ftime to trackdata + resObj <- as.trackdata(data, index=index, ftime, ssffTrackName) + + if(any(trackDef[[1]]$columnName %in% c("dft", "css", "lps", "cep"))){ + if(!is.null(origFreq)){ + if(verbose){ + cat('\n INFO: adding fs attribute to trackdata$data fields') + } + attr(resObj$data, "fs") <- seq(0, origFreq/2, length = ncol(resObj$data)) + class(resObj$data) <- c(class(resObj$data), "spectral") + }else{ + stop("no origFreq entry in spectral data file!") + } + } + } + + # close progress bar if open + if(exists('pb')){ + close(pb) + } + + # convert to emuRtrackdata if resultType is 'emuRtrackdata' + if(resultType =="emuRtrackdata"){ + resObj = create_emuRtrackdata(seglist, resObj) + } + + if(resultType == "tibble"){ + resObj = tibble::as_tibble(create_emuRtrackdata(seglist, resObj)) + } + + return(resObj) + +} + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_emuR-get_trackdata.R') diff --git a/R/emuR-legacy.R b/R/emuR-legacy.R new file mode 100644 index 00000000..84459dbb --- /dev/null +++ b/R/emuR-legacy.R @@ -0,0 +1,902 @@ +build_legacyBundleList <- function(parsedEmuPath, + currentPath = NULL, + fileSuffixPattern, + bundleList = list()){ + if(length(parsedEmuPath) == 0){ + fileRegexPattern=paste0('^.*', fileSuffixPattern) + fileList = list.files(currentPath, pattern = fileRegexPattern, recursive = FALSE, full.names = FALSE) + if(length(fileList) == 0){ + return(NULL) + }else{ + # TODO convert to bundle names + res=list() + for(f in fileList){ + # remove file suffix for bundle name + bundleName = gsub(x = f, pattern = fileSuffixPattern, replacement = '') + res[[length(res) + 1]] = bundleName + } + return(res) + } + }else{ + p1 = parsedEmuPath[[1]] + if(length(parsedEmuPath) == 1){ + restPath = list() + }else{ + restPath = parsedEmuPath[2:length(parsedEmuPath)] + } + if(p1[['pattern']]){ + dirs = list.dirs(currentPath, full.names = FALSE, recursive = FALSE) + bll = list() + for(dir in dirs){ + dirPatt = gsub('*', '.*', p1[['dir']], fixed = TRUE) + if(grepl(dirPatt,dir)){ + newPath = file.path(currentPath, dir) + bl = build_legacyBundleList(restPath, + newPath, + fileSuffixPattern, + bundleList = bundleList) + # prepend dir to list + bl = lapply(bl, function(x,s) return(c(s,x)), dir) + bll = c(bll, bl) + } + + } + return(bll) + }else{ + if(is.null(currentPath)){ + newPath = p1[['dir']] + }else{ + newPath = file.path(currentPath, p1[['dir']]) + } + + bll = build_legacyBundleList(restPath, + newPath, + fileSuffixPattern, + bundleList = bundleList) + return(bll) + } + } + +} + +convert_legacyBundleId <- function(legacybundleID){ + # takes character vector of legacy globpattern dirs and bundle name + # and converts to session and bundle + # examples: + # legacybundleID: "BLOCK10","SES1000","foo_42" -> "BLOCK10_SES1000","foo_42" + # legacybundleID: "SES1000","foo_42" -> "SES1000","foo_42" + # legacybundleID: "foo_42" -> "0000","foo_42" + + legacybundleIDLen = length(legacybundleID) + globPatternCount = legacybundleIDLen - 1 + + if(globPatternCount > 0){ + # collapse globpattern matches to one session ID + s = paste(legacybundleID[1:globPatternCount], collapse = '_') + + }else{ + # no glob patterns, put all bundles to dummy session + s = '0000' + + } + return(c(s, legacybundleID[legacybundleIDLen])) +} + +get_legacyEmuBundles = function(basePath, + pathPattern, + primaryFileSuffixPattern = NULL){ + if(is_relativeFilePath(pathPattern)){ + absPathPattern = file.path(basePath, pathPattern) + }else{ + absPathPattern = pathPattern + } + emuParsedPathPattern = parse_emuTrackPath(absPathPattern) + bl = build_legacyBundleList(emuParsedPathPattern[['dirs']], fileSuffixPattern = primaryFileSuffixPattern) + return(bl) +} + + +convert_emuTrackPath <- function(absEmuTrackPath){ + # Emu track path may have asterisks for pattern matching e.g. + # wav E:/KielCorpusRead/*/*/*/* + # to use this pattern with R regex we have to convert to regular expression + epSpl = strsplit(absEmuTrackPath,'/')[[1]] + pp = c() + cDir = NULL + lastIsAsterisk = FALSE + for(epDir in epSpl){ + #if(epDir!='*'){ + if(!grepl('*',epDir)){ + # Bug this condition block is never reached!! + # The regex substitution is done is list.trackdirs + lastIsAsterisk = FALSE + + if(epDir != ''){ + if(is.null(cDir)){ + cDir = epDir + }else{ + sep = '/' + if(length(gl <- grep('/$', cDir))){ + sep = '' + } + cDir = paste(cDir, epDir, sep = sep) + } + }else{ + cDir = '/' + } + }else{ + lastIsAsterisk = TRUE + + pp = c(pp, cDir, epDir) + cDir = NULL + } + } + if(!lastIsAsterisk){ + pp = c(pp, cDir) + } + return(pp) +} + +parse_emuTrackPath <- function(absEmuTrackPath){ + # Emu track path may have asterisks for pattern matching e.g. + # wav E:/KielCorpusRead/*/*/*/* + # to use this pattern with R regex we have to convert to regular expression + epSpl = strsplit(absEmuTrackPath,'/+')[[1]] + topo = list() + wildcardDirLevelCount = 0 + pp = list() + cDir = NULL + lastIsAsterisk = FALSE + for(epDir in epSpl){ + if(!grepl('[*]', epDir)){ + lastIsAsterisk = FALSE + if(epDir != ''){ + if(is.null(cDir)){ + cDir = epDir + }else{ + sep = '/' + if(length(gl <- grep('/$', cDir))){ + sep = '' + } + cDir = paste(cDir, epDir, sep = sep) + } + }else{ + cDir = '/' + } + }else{ + lastIsAsterisk = TRUE + + if(!is.null(cDir)){ + pp[[length(pp)+1]] = list(dir = cDir, + pattern = FALSE) + } + cDir = NULL + + pp[[length(pp)+1]] = list(dir = epDir, + pattern = TRUE) + wildcardDirLevelCount = wildcardDirLevelCount + 1 + + + } + } + if(!lastIsAsterisk){ + pp[[length(pp)+1]] = list(dir = cDir, + pattern = FALSE) + } + topo[['dirs']] = pp + topo[['patternCount']] = wildcardDirLevelCount + + return(topo) +} + +## List directories to search for track files +## @description emu track pathes syntax allows asterisk wildcard pattern +## for path. This function goes through the directory hierarchy and +## returns list with all directories matching the pattern +## @param emuPath Emu path specification (may contain asterisk wildcards) type character +## @param parsedEmuPathPattern character vector containing the parsed segments of +## the path. Each segment is a dierctory or a wildacrd asterisk. +## @return character vector of absolute path directories +## @keywords emuDB bundle Emu +## +list_trackdirs <- function(emuPath = NULL, + parsedEmuPathPattern = NULL){ + if(is.null(parsedEmuPathPattern)){ + if(is.null(emuPath)){ + stop("At least one of the parameters emuPath or parsedEmuPathPattern is required.") + } + parsedEmuPathPattern = convert_emuTrackPath(emuPath) + } + cDir = NULL + dirLevels = length(parsedEmuPathPattern) + res = c() + for(i in 1:dirLevels){ + ettp = parsedEmuPathPattern[i] + lastLevel = (i == dirLevels) + if(!grepl('[*]',ettp)){ + if(is.null(cDir)){ + cDir = ettp + }else{ + cDir = file.path(cDir,ettp) + } + if(lastLevel){ + return(cDir) + } + }else{ + + dirs = list.dirs(cDir, recursive = FALSE) + for(dir in dirs){ + dirPatt = gsub('*', '.*', ettp, fixed = TRUE) + if(grepl(dirPatt,dir)){ + newPattern = c(dir) + if(!lastLevel){ + for(j in (i + 1L):dirLevels){ + newPattern = c(newPattern, parsedEmuPathPattern[j]) + } + } + wcRes = list_trackdirs(parsedEmuPathPattern = newPattern) + res = c(res, wcRes) + } + } + return(res) + } + } + return(cDir) +} + +get_legacyFilePath = function(basePath, + emuPath, + legacybundleID, + fileExtension){ + if(is_relativeFilePath(emuPath)){ + absPathPattern = file.path(basePath, emuPath) + }else{ + absPathPattern = emuPath + } + pp = parse_emuTrackPath(absEmuTrackPath = absPathPattern) + path = NULL + bIdIdx = 1 + for(pdl in pp[['dirs']]){ + + if(pdl[['pattern']]){ + # substitute + dir = legacybundleID[bIdIdx] + bIdIdx = bIdIdx + 1 + }else{ + dir = pdl[['dir']] + } + if(is.null(path)){ + path=dir + }else{ + path = file.path(path, dir) + } + } + filename = paste0(legacybundleID[length(legacybundleID)], '.', fileExtension) + return(file.path(path, filename)) +} + +## @import stringr wrassp +load_annotationForLegacyBundle = function(schema, + legacyBundleID, + basePath = NULL, + encoding = NULL){ + + newBundleId = convert_legacyBundleId(legacyBundleID) + bundleName = newBundleId[2] + sessionName = newBundleId[1] + # determine samplerate + # fallback is primary file + sampleRateReferenceFile = NULL + sampleTrackFile = NULL + if(!is.null(schema[['mediafileBasePathPattern']]) && ! is.null(schema[['mediafileExtension']])){ + # use samples track to determine sample rate + + ## resolve wildcards + #sampleRateReferenceFile=find.file.in.emu.path.pattern(emuPathPattern=schema[['mediafileBasePathPattern']],fileName=sampleTrackFile,basePath) + sampleRateReferenceFile = get_legacyFilePath(basePath, + emuPath = schema[['mediafileBasePathPattern']], + legacyBundleID, + fileExtension = schema[['mediafileExtension']]) + } + if(is.null(sampleRateReferenceFile)){ + stop("Could not determine media sample rate of bundle ID ", + paste(legacyBundleID,collapse='_'),"\n") + }else{ + # TODO ASSP does not return good error messages if an IO error (not exist, permission denied ,etc...) occurs + # TODO test file access first + tryCatch({pfAssp = wrassp::read.AsspDataObj(sampleRateReferenceFile, 0, 4000)}, error = function(e){stop(paste0("Error reading: ", sampleRateReferenceFile, " (this could be caused by a faulty or empty audio file)"))}) + + sampleRate = attr(pfAssp, 'sampleRate') + } + + # create signal paths list + signalpaths = list() + for(tr in schema[['tracks']]){ + sFile = get_legacyFilePath(basePath = basePath, + emuPath = tr[['basePath']], + legacyBundleID, + fileExtension = tr[['fileExtension']]) + if(!is.null(sFile)){ + signalpaths[[length(signalpaths) + 1L]] = sFile + } + } + idCnt = 0 + levels = list() + links = list() + # ESPS label files first + for(ad in schema[['annotationDescriptors']]){ + extension = ad[['extension']] + annoBasePath = NULL + if(is.null(ad[['basePath']])){ + # TODO use same as primary track + }else{ + annoBasePath = ad[['basePath']] + # Emu: assume that files reside in this directory (no recursive search) + annoPath = get_legacyFilePath(basePath = basePath, + emuPath = ad[['basePath']], + legacyBundleID, + fileExtension = extension) + if(!is.null(annoPath)){ + if(extension != 'hlb'){ + # parse lab file + if(file.exists(annoPath)){ + labTier = parse_espsLabelFile(labFilePath = annoPath, + tierName = ad[['name']], + tierType = ad[['type']], + encoding = encoding, + sampleRate = sampleRate, + idCnt = idCnt) + if(!is.null(labTier)){ + levels[[labTier[['name']]]] <- labTier + labTierItemCnt = length(labTier[['items']]) + idCnt = idCnt + labTierItemCnt + } + }else{ + # warning ?? + } + } + } + } + + } + # now hlb file + for(ad in schema[['annotationDescriptors']]){ + extension = ad[['extension']] + + if(is.null(ad[['basePath']])){ + # TODO use same as primary track + }else{ + annoBasePathEmu = ad[['basePath']] + # resolve wildcards + if(extension == 'hlb'){ + hlbFilePath = get_legacyFilePath(basePath = basePath, + emuPath = annoBasePathEmu, + legacyBundleID, + fileExtension = extension) + if(file.exists(hlbFilePath)){ + hlbParseResult = parse_hlbFile(hlbFilePath = annoPath, + levelDefinitions = schema[['levelDefinitions']], + levels = levels, + encoding = encoding); + hlbTiers = hlbParseResult[['hlbTiers']] + links = hlbParseResult[['links']] + # sort levels + lIdx = 0 + sortedLevels = list() + + for( ld in schema[['levelDefinitions']]){ + lIdx = lIdx + 1L + for(hlbTier in hlbTiers){ + if(ld[['name']] == hlbTier[['name']]){ + sortedLevels[[hlbTier[['name']]]] <- hlbTier + break; + } + } + } + levels = sortedLevels + }else{ + #cat("Warning: HLB file: ",hlbFilePath," does not exist!\n") + } + } + } + + } + + # Initialize empty levels (see GitHub issue #127) + for( ld in schema[['levelDefinitions']]){ + if(is.null(levels[[ld[['name']]]])){ + levels[[ld[['name']]]] <- list(name = ld[['name']], + type = ld[['type']], + items = list()) + } + } + + bundleSampleRate = NULL + for(l in levels){ + lvlSr = l[['sampleRate']] + if(is.null(bundleSampleRate)){ + if(!is.null(lvlSr)){ + bundleSampleRate = lvlSr + } + + }else{ + if(!is.null(lvlSr) && lvlSr != bundleSampleRate){ + warning("WARNING: Levels have different sample rates!\n") + } + } + } + # set sample rate even if no annotation levels exist + if(is.null(bundleSampleRate)){ + bundleSampleRate = sampleRate + } + + + sampleTrackFile = paste0(bundleName, + '.', + schema[['mediafileExtension']]) + annotates = paste0(sampleTrackFile) + bundle = list(name = bundleName, + sessionName = sessionName, + legacyBundleID = legacyBundleID, + annotates = annotates, + sampleRate = bundleSampleRate, + levels = levels, + signalpaths = signalpaths, + mediaFilePath =sampleRateReferenceFile, + links = links) + return(bundle) +} + + +build_hashedLinkDefs <- function(linkDefinitions){ + + # build link definitions hashed by super level name + linkDefsHashed = list() + for(ld in linkDefinitions){ + supLvlNm = ld[['superlevelName']] + linkDefsHashed[[supLvlNm]] = c(linkDefsHashed[[supLvlNm]], + ld[['sublevelName']]) + } + return(linkDefsHashed) +} + +remove_redundantBundleLinks <- function(linkDefsHashed, + bundle){ + + lvls = bundle[['levels']] + itemsHashed = list() + for(lvl in lvls){ + for(it in lvl[['items']]){ + itemsHashed[[it[['id']]+1]] = lvl[['name']] + } + } + legacyLinks = bundle[['links']] + + # new link list without redundant links + links = list() + for(legLk in legacyLinks){ + fromLvl = itemsHashed[[legLk[['fromID']] + 1]] + toLvl = itemsHashed[[legLk[['toID']]+1]] + + subLvls = linkDefsHashed[[fromLvl]] + for(subLvl in subLvls){ + if(is.null(toLvl)){ + txt = paste0("Found unvalid link in ", + bundle[["name"]], + ".hlb that points to an item that doesn't exist! ", + "This means the annotation is corrupt and can not be parsed. ", + "Check the link matrix at the bottom of the .hlb file that start with the identifier: ", + legLk$fromID, "; for the invalid item identifier on the same line: ", legLk$toID, " ", + " (TIP: manually deleting the invalid identifier might fix the problem)") + stop(txt) + } + + if(subLvl == toLvl){ + links[[length(links) + 1]] = legLk + } + } + } + # overwrite + bundle[['links']] = links + return(bundle) + +} + + +##' @title Convert legacy EMU database to the emuDB format +##' @description Converts an existing legacy EMU database to emuDB database structure. +##' Copies or rewrites signal files and converts the database configuration and annotation data. +##' The legacy database must be addressed by its template file. +##' @details The database will be converted if the legacy database template file \code{emuTplPath} could +##' be found and successfully loaded and parsed. The legacy template file usually has the extension '.tpl'. +##' The UUID of the new emuDB will be randomly generated by default. If \code{targetDir} does not exist, +##' the directory and its parents will be created. A new directory with the name of the database and the +##' suffix '_emuDB' will be created in the \code{targetDir}. If the new database directory exists +##' already, the function stops with an error. The template file is converted to a JSON file. +##' +##' Some of the flags of the legacy EMU template files are ignored (lines with this syntax: "set [flagName] [flagValue]", +##' known ignored flag names are: 'LabelTracks', 'SpectrogramWhiteLevel', 'HierarchyViewLevels', 'SignalViewLevels'). +##' Legacy EMU utterances are reorganized to sessions and bundles. The naming of the sessions depends on the wildcard +##' path pattern of the primary track: If the path contains no wildcard, only one session with the name '0000' will be created. +##' If the path contains one wildcard path element, the names of the directories matching the pattern will be used as session names. +##' If the path contains more than one wildcard path element, the session name is the concatenation of directory names +##' separated by an underscore character. +##' +##' Media files (usually WAV files) are copied, SSFF track files are rewritten using the ASSP library of package +##' \code{wrassp} by default (see option \code{rewriteSSFFTracks} below, see also \link[wrassp]{read.AsspDataObj} +##' \link[wrassp]{write.AsspDataObj}). Annotations in EMU hierarchy (.hlb) files and ESPS label files are +##' converted to one JSON file per bundle (utterance). Only those files get copied, which match the scheme +##' of the template file. Additional files in the legacy database directories are ignored. The legacy EMU +##' database will not be modified. For more information on the structural elements of an emuDB see \code{vignette{emuDB}}. +##' +##' +##' \code{options} is a list of key value pairs: +##' @param emuTplPath EMU template file path +##' @param targetDir target directory +##' @param dbUUID optional UUID of emuDB, will be generated by default +##' @param ... currently available additional options: +##' \itemize{ +##' \item{\code{rewriteSSFFTracks}: if \code{TRUE}, rewrite SSFF tracks instead of copying +##' the file to get rid of big endian encoded SSFF files (SPARC), default: \code{TRUE}} +##' \item{\code{ignoreMissingSSFFTrackFiles}: if \code{TRUE}, missing SSFF track files +##' are ignored, if \code{FALSE} an error will be generated, default: \code{TRUE}} +##' \item{\code{sourceFileTextEncoding}: encoding of legacy database text files (template, +##' label and hlb files), possible values: NULL, "latin1", "UTF-8" "bytes" or "unknown" +##' :default \code{NULL} (uses encoding of operating system platform)} +##' \item{\code{symbolicLinkSignalFiles}: if \code{TRUE}, signal files are symbolic +##' linked instead of copied. Implies: \code{rewriteSSFFTracks=FALSE}, Default: \code{FALSE}} +##' } +##' @param verbose be verbose, default: \code{TRUE} +##' @seealso \code{\link{load_emuDB}} +##' @export +##' @name convert_legacyEmuDB +##' @keywords emuDB database schema Emu +##' @examples +##' \dontrun{ +##' ## Convert legacy EMU database specified by EMU +##' ## template file /mydata/EMU_legacy/ae/ae.tpl to directory /mydata/EMU/ +##' ## and load it afterwards +##' +##' convert_legacyEmuDB("/mydata/EMU_legacy/ae/ae.tpl","/mydata/EMU/") +##' ae=load_emuDB("/mydata/EMU/ae_emuDB") +##' +##' ## Convert database "ae" and do not rewrite SSFF tracks +##' +##' convert_legacyEmuDB("/mydata/EMU_legacy/ae/ae.tpl", +##' "/mydata/EMU/", +##' options=list(rewriteSSFFTracks=FALSE)) +##' +##' ## Convert legacy database "ae" from emuR demo data and load converted emuDB +##' +##' create_emuRdemoData() +##' demoTplPath=file.path(tempdir(),"emuR_demoData/legacy_ae/ae.tpl") +##' targetDir=file.path(tempdir(),"converted_to_emuR") +##' convert_legacyEmuDB(demoTplPath,targetDir) +##' dbHandle=load_emuDB(file.path(targetDir,"ae_emuDB")) +##' +##' } +##' +convert_legacyEmuDB <- function(emuTplPath, + targetDir, + dbUUID = uuid::UUIDgenerate(), + verbose = TRUE, + ...){ + # get ... options + options = list(...) + + progress = 0 + # default options + # ignore missing SSFF track files + # rewrite SSFF track files + # encoding : platform + mergedOptions = list(sourceFileTextEncoding = NULL, + ignoreMissingSSFFTrackFiles = TRUE, + rewriteSSFFTracks = TRUE, + symbolicLinkSignalFiles = FALSE) + if(!is.null(options)){ + for(opt in names(options)){ + mergedOptions[[opt]] = options[[opt]] + } + } + + # pre check target dir + if(file.exists(targetDir)){ + tdInfo = file.info(targetDir) + if(!tdInfo[['isdir']]){ + stop(targetDir, " exists and is not a directory.") + } + } + legacyBasePath = dirname(emuTplPath) + + # load database schema and metadata to get db name + dbConfig = load_dbConfigFromEmuTemplate(emuTplPath, + dbUUID = dbUUID, + encoding = mergedOptions[['sourceFileTextEncoding']]) + # database dir + pp=file.path(targetDir, + paste0(dbConfig[['name']], + emuDB.suffix)) + + # check existence of database dir + if(file.exists(pp)){ + stop("Database storage dir ", pp, " already exists.") + } + + + progress = progress + 1L + + tplBaseDir = NULL + tplBaseDir = dirname(emuTplPath) + + # create database dir in targetdir + dir.create(pp, recursive = TRUE) + + # get UUID + dbUUID = dbConfig[['UUID']] + + # get name + dbName = dbConfig[['name']] + + # set user editable + dbConfig[['EMUwebAppConfig']][['activeButtons']] = list(saveBundle = TRUE, showHierarchy=TRUE) + + # add handle for in memory DB + dbHandle = emuDBhandle(dbName, + pp, + dbUUID, + connectionPath = ":memory:") + + # filter transient properties + # the properties listed are not persisted to JSON files + + # (filter list of old persitence code) + # emuR.persist.filters.DBconfig=list() + # emuR.persist.filters.DBconfig[[1]]=c('annotationDescriptors') + # emuR.persist.filters.DBconfig[[2]]=c('tracks') + # emuR.persist.filters.DBconfig[[3]]=c('flags') + # emuR.persist.filters.DBconfig[[4]]=c('ssffTrackDefinitions','basePath') + # emuR.persist.filters.DBconfig[[5]]=c('mediafileBasePathPattern') + # emuR.persist.filters.DBconfig[[6]]=c('maxNumberOfLabels') + # emuR.persist.filters.DBconfig[[7]]=c('itemColNames') + # emuR.persist.filters.DBconfig[[8]]=c('basePath') + # emuR.persist.filters.DBconfig[[9]]=c('DBconfigPath') + + dbConfigPersist=dbConfig + for(pNm in names(dbConfig)){ + if(pNm %in% c('annotationDescriptors', + 'tracks', + 'flags', + 'mediafileBasePathPattern', + 'maxNumberOfLabels', + 'itemColNames', + 'basePath', + 'DBconfigPath')){ + dbConfigPersist[[pNm]] = NULL + } + } + ssffTrCnt = length(dbConfig[['ssffTrackDefinitions']]) + if(ssffTrCnt > 0){ + for(i in 1:ssffTrCnt){ + dbConfigPersist[['ssffTrackDefinitions']][[i]][['basePath']] = NULL + } + } + + # store db schema file + store_DBconfig(dbHandle, dbConfigPersist) + progress = progress + 1L + + # load primary track file list first + # and find samples track to get sample rate + primaryFileList = NULL + + primaryBasePath = NULL + primaryFileExtension = NULL + + primaryFileSuffixPattern = NULL + # find primary and sample track paths + for(tr in dbConfig[['tracks']]){ + if(tr[['fileExtension']] == dbConfig[['flags']][['PrimaryExtension']]){ + primaryFileExtension = tr[['fileExtension']] + primaryBasePath = tr[['basePath']] + break + } + } + + if(is.null(primaryFileExtension)){ + for(ad in dbConfig[['annotationDescriptors']]){ + if(ad[['extension']] == dbConfig[['flags']][['PrimaryExtension']]){ + primaryFileExtension = ad[['extension']] + primaryBasePath = ad[['basePath']] + break + } + } + } + + if(is.null(primaryFileExtension)){ + stop("Primary file extension not defined in legacy EMU template file ",emuTplPath) + } + + if(is.null(primaryBasePath)){ + stop("Base path for primary files not defined in legacy EMU template file ",emuTplPath) + } + + primaryFileSuffixPattern = paste0('[.]', primaryFileExtension, '$') + legacyBundleIDsList = get_legacyEmuBundles(legacyBasePath, primaryBasePath, primaryFileSuffixPattern) + + + bundlesCount = length(legacyBundleIDsList) + if(bundlesCount == 0){ + # This is likely an error in the template file or folder structure, inform the user + warning("WARNING: No bundles found!\n(Search pattern for primary files was: ", + primaryBasePath, + .Platform$file.sep,primaryFileSuffixPattern, + ")", + sep = '') + }else{ + us = 1:bundlesCount + if(verbose){ + cat("INFO: Converting legacy EMU database containing", + bundlesCount, + "bundles...\n") + pb = utils::txtProgressBar(min = 0, + max = bundlesCount + 2, + initial = progress, + style = 3) + + utils::setTxtProgressBar(pb, progress) + } + linkDefsHashed = build_hashedLinkDefs(dbConfig[['linkDefinitions']]) + for(ui in us){ + legacyBundleID = legacyBundleIDsList[[ui]] + newBundleId = convert_legacyBundleId(legacyBundleID) + sessionName = newBundleId[1] + bundleName = newBundleId[2] + + sDir = paste0(sessionName, session.suffix) + sfp = file.path(pp, sDir) + if(!file.exists(sfp)){ + dir.create(sfp) + } + ptrFilePath = get_legacyFilePath(legacyBasePath, + primaryBasePath, + legacyBundleID, + primaryFileExtension) + + ptrFileBasename = basename(ptrFilePath) + + cutLen = stringr::str_length(primaryFileExtension) + 1L + cutPos = stringr::str_length(ptrFileBasename)-cutLen + + bundle = load_annotationForLegacyBundle(dbConfig, + legacyBundleID, + legacyBasePath, + encoding = mergedOptions[['sourceFileTextEncoding']]) + bundle = remove_redundantBundleLinks(linkDefsHashed, bundle) + + bDir = paste0(bundle[['name']], + bundle.dir.suffix) + bfp = file.path(sfp,bDir) + dir.create(bfp) + # create new list that only contains relevant infos + bp = list(name = bundle$name, annotates = bundle$annotates, + sampleRate = bundle$sampleRate, levels = bundle$levels, links = bundle$links) + # remove sample rate entries + bLvlCnt = length(bp$levels) + if(bLvlCnt > 0){ + for(i in 1:bLvlCnt){ + bp$levels[[i]]$sampleRate = NULL + } + } + # remove level names + names(bp$levels) = NULL + + # metadata (annotations) + ban = stringr::str_c(bundle[['name']], + bundle.annotation.suffix, + '.json') + baJSONPath = file.path(bfp,ban) + pbpJSON = jsonlite::toJSON(bp, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + writeLines(pbpJSON, + baJSONPath, + useBytes = TRUE) + + + for(sf in bundle[['signalpaths']]){ + #cat("Signalpath: ",sf,"\n") + bn = basename(sf) + nsfp = file.path(bfp, bn) + # check if SSFF type + isSSFFFile = FALSE + for(ssffTrDef in dbConfig[['ssffTrackDefinitions']]){ + ssffTrFileExt = ssffTrDef[['fileExtension']] + fileExtPatt = paste0('[.]', + ssffTrFileExt, + '$') + if(length(grep(fileExtPatt,sf)) == 1){ + isSSFFFile = TRUE + break + } + } + if(file.exists(sf)){ + if(mergedOptions[['symbolicLinkSignalFiles']]){ + file.symlink(from = sf, to = nsfp) + }else if(mergedOptions[['rewriteSSFFTracks']] && isSSFFFile){ + # is SSFF track + # read/write instead of copy to get rid of big endian encoded SSFF files (SPARC) + pfAssp = wrassp::read.AsspDataObj(sf) + write.AsspDataObj(pfAssp, nsfp) + }else{ + # media file (likely a wav file) + if(dbConfigPersist$mediafileExtension == 'ssd'){ + # convert ssd to wav + ado = read.AsspDataObj(sf) + AsspFileFormat(ado) = 'WAVE' + wavfp = paste0(sub(pattern = "(.*)\\..*$", + replacement = "\\1", + nsfp), + '.wav') + write.AsspDataObj(wavfp, dobj = ado) + # fix and rewrite + bp_fixed = deduct_timeFromJson(bp, round(attr(ado,'startTime') * attr(ado,"sampleRate"))) + pbpJSON_fixed = jsonlite::toJSON(bp_fixed, auto_unbox = TRUE, force = TRUE, pretty = TRUE) + writeLines(pbpJSON_fixed, baJSONPath, useBytes = TRUE) + # also copy ssd file + file.copy(from = sf, to = nsfp) + }else{ + file.copy(from = sf, to = nsfp) + } + } + }else{ + if(!mergedOptions[['ignoreMissingSSFFTrackFiles']]){ + stop("SSFF track file :'", sf, "' does not exist!") + } + } + } + bundle[['levels']] = NULL + bundle[['links']] = NULL + + bName = bundle[['name']] + + progress = progress + 1L + if(verbose){ + utils::setTxtProgressBar(pb, progress) + } + } + + if(verbose){ + utils::setTxtProgressBar(pb, progress) + cat("\n") + } + } + # fix DBconfig + if(dbConfigPersist$mediafileExtension == 'ssd'){ + dbConfigPersist$mediafileExtension = 'wav' + store_DBconfig(dbHandle, dbConfigPersist) + } + +} + + +deduct_timeFromJson <- function(json, sample){ + for(l_idx in 1:length(json$levels)){ + if(json$levels[[l_idx]]$type != 'ITEM'){ + for(i_idx in 1:length(json$levels[[l_idx]]$items)){ + if(json$levels[[l_idx]]$type == 'SEGMENT'){ + newVal = json$levels[[l_idx]]$items[[i_idx]]$sampleStart - sample + if(newVal < 0) stop(paste0("Error normalizing startTime vals! Negative ", + "samples not allowed in annot.json files!")) + json$levels[[l_idx]]$items[[i_idx]]$sampleStart = newVal + }else{ + newVal = json$levels[[l_idx]]$items[[i_idx]]$samplePoint - sample + if(newVal < 0) stop(paste0("Error normalizing startTime vals! Negative ", + "samples not allowed in annot.json files!")) + json$levels[[l_idx]]$items[[i_idx]]$samplePoint = newVal + } + } + } + } + return(json) +} diff --git a/R/emuR-legacy.template.R b/R/emuR-legacy.template.R new file mode 100644 index 00000000..451da574 --- /dev/null +++ b/R/emuR-legacy.template.R @@ -0,0 +1,355 @@ +## Create emuDB database schema object from EMU template (.tpl) file +## +## @param tplPath EMU template file path +## @param dbUUID optional database UUID +## @param encoding encoding of the template file +## @return object of class emuDB.schema.db +## @import stringr uuid wrassp +## @keywords emuDB database schema Emu +## +load_dbConfigFromEmuTemplate = function(tplPath, + dbUUID = NULL, + encoding = NULL){ + LEVEL_CMD = 'level' + LABFILE_CMD = 'labfile' + LABEL_CMD = 'label' + SET_CMD = 'set' + TRACK_CMD = 'track' + PATH_CMD = 'path' + LEGAL_CMD = 'legal' + if(is.null(tplPath)) { + stop("Argument tplPath (path to Emu template file) must not be NULL\n") + } + tplBasename = basename(tplPath) + dbName = gsub("[.][tT][pP][lL]$", + "", + tplBasename) + + # read + if(is.null(encoding)){ + tpl = try(readr::read_lines(tplPath)) + }else{ + tpl = try(readr::read_lines(tplPath, + readr::locale(encoding = encoding))) + } + if(inherits(tpl, "try-error")) { + stop("read tpl: cannot read from file ", tplPath) + } + # check if file (not directory) + tplFInfo = try(file.info(tplPath)) + if(inherits(tplFInfo, "try-error") | is.null(tplFInfo)) { + stop("check template file: cannot get file info: ", tplPath) + } + if(tplFInfo[['isdir']]){ + stop(tplPath," is a directory. Expected a legacy EMU template file path.") + } + + tracks = list() + flags = list() + levelDefinitions = list() + linkDefinitions = list() + pathDescriptors = list() + annotationDescriptors = list() + hlbTierDescriptors = list() + hlbAnnotationDescriptor = NULL; + + lineNr = 0 + + for(line in tpl){ + lineNr = lineNr + 1L + trimmedLine = stringr::str_trim(line) + if(trimmedLine != ''){ + firstChar = substr(trimmedLine, 1, 1) + if(firstChar != '!'){ + + lineTokensLst = strsplit(trimmedLine, '[[:space:]]+') + lineTokens = lineTokensLst[[1]] + lineTokenCount = length(lineTokens) + if(lineTokenCount >= 1){ + command = lineTokens[1] + + if(command == LABFILE_CMD){ + tierName = lineTokens[2] + # TODO are there any default values for this properties? + extension = NULL + type = NULL + timeFactor = NULL + for(tki in 3:length(lineTokens)){ + tk = lineTokens[[tki]] + + if(substr(tk, 1, 1) == ':'){ + # property key + key = substring(tk, 2) + }else{ + #property value + if(is.null(key)){ + stop("Emu template parser key/value error in ", + lineNr, + "\n") + } + val = tk + if(key == 'extension'){ + extension = val + }else if(key == 'type'){ + type = val + }else if(key == 'time-factor'){ + timeFactor = val + } + # reset key + key = NULL + } + } + ad = list(name = tierName, + extension = extension, + type = type, + timeFactor = timeFactor) + annotationDescriptors[[length(annotationDescriptors)+1L]] <- ad + + # lab file can reference hlb level + replaced = FALSE + tdLen = length(levelDefinitions) + for(i in 1:tdLen){ + td = levelDefinitions[[i]] + if(td[['name']] == tierName){ + # replace + levelDefinitions[[i]] = list(name = td[['name']], + type = type, + attributeDefinitions = td[['attributeDefinitions']]) + replaced = TRUE + break; + } + } + if(!replaced){ + # append + levelDefinitions[[length(levelDefinitions)+1L]] = list(name = tierName); + } + }else if(command == TRACK_CMD){ + + name = lineTokens[2] + extension = lineTokens[3] + track = list(name = name, + columnName = name, + fileExtension = extension) + tracks[[length(tracks) + 1L]] <- track + }else if(command == SET_CMD){ + key = lineTokens[2] + value = lineTokens[3] + flags[[key]] = value + }else if(command == PATH_CMD){ + annoKeysStr = lineTokens[2] + annoBasePath = lineTokens[3] + annoKeysLst = strsplit(annoKeysStr, ',')[[1]] + for(aki in 1:length(annoKeysLst)){ + annoKey = annoKeysLst[[aki]] + pathDescr = list(basePath = annoBasePath, + key = annoKey) + pathDescriptors[[length(pathDescriptors)+1L]] <- pathDescr + if(annoKey == 'hlb'){ + # special meaning + # hlb files are neither declared by tracks nor by labfile directive + # add as annotationDescriptor + ad = list(name = NULL, + extension = annoKey, + type = 'HLB') + annotationDescriptors[[length(annotationDescriptors)+1L]] <- ad + } + } + }else if(command == LEVEL_CMD){ + levelTierName = lineTokens[2] + + if(lineTokenCount >= 3){ + linkType = "ONE_TO_MANY" + if(lineTokenCount >= 4){ + relationshipType = lineTokens[4] + if(relationshipType == 'many-to-many'){ + linkType = "MANY_TO_MANY" + } + } + linkDefinition = list(type = linkType, + superlevelName = lineTokens[3], + sublevelName = levelTierName) + linkDefinitions[[length(linkDefinitions) + 1L]] = linkDefinition + } + tierDescr = list(name = levelTierName, + type = 'ITEM', + attributeDefinitions = list(list(name = levelTierName, type = "STRING"))) + exists = FALSE + for(lDef in levelDefinitions){ + if(lDef[['name']] == levelTierName){ + exists = TRUE + break + } + } + if(!exists){ + levelDefinitions[[length(levelDefinitions) + 1L]] = tierDescr + } + + # TODO constraints + }else if(command == LABEL_CMD){ + + levelTierName = lineTokens[2] + labelNames = list(levelTierName) + if(lineTokenCount != 3){ + stop("Expected label directive \"label levelName labelName\"") + } + + for(i in 1:length(levelDefinitions)){ + td=levelDefinitions[[i]] + if(td[['name']] == levelTierName){ + # replace + attrDefs = levelDefinitions[[i]][['attributeDefinitions']] + attrDefs[[length(attrDefs) + 1L]] = list(name = lineTokens[3], + type = 'STRING') + levelDefinitions[[i]] = list(name = levelTierName, + type = td[['type']], + attributeDefinitions = attrDefs); + break + } + } + }else if(command == LEGAL_CMD){ + if(lineTokenCount <= 3){ + stop("Expected legal directive \"legal levelName groupName label1 label2 ... labeln\"") + } + attrName = lineTokens[2] + labelGroupName = lineTokens[3] + + groupLabels = list() + for(i in 4:lineTokenCount){ + groupLabels[[length(groupLabels) + 1]] = lineTokens[i] + } + set = FALSE + for(i in 1:length(levelDefinitions)){ + td = levelDefinitions[[i]] + ads = td[['attributeDefinitions']] + for(j in 1:length(ads)){ + ad = ads[[j]] + if(ad[['name']] == attrName){ + lblGrIdx = length(ad[['labelGroups']]) + 1 + levelDefinitions[[i]][['attributeDefinitions']][[j]][['labelGroups']][[lblGrIdx]] = list(name = labelGroupName, + values = groupLabels) + set = TRUE + break + } + } + if(set){ + break + } + } + } + } + } + } + } + + #pef=flags$PrimaryExtension + tl = length(tracks) + al = length(annotationDescriptors) + # apply pathes to tracks + tss2 = 1:tl + for(ti2 in tss2){ + for(pd in pathDescriptors){ + if(tracks[[ti2]][['fileExtension']] == pd[['key']]){ + tracks[[ti2]][['basePath']] = pd[['basePath']] + break + } + } + } + + # apply pathes to annotations + as = 1:al + for(ai in as){ + for(pd in pathDescriptors){ + if(annotationDescriptors[[ai]][['extension']] == pd[['key']]){ + annotationDescriptors[[ai]][['basePath']] = pd[['basePath']] + break + } + } + } + + ssffTrackDefinitions = list() + assign = list() + mediafileBasePathPattern = NULL + mediafileExtension = NULL + for(tr in tracks){ + n = tr[['name']] + e = tr[['fileExtension']] + if(e == flags[['PrimaryExtension']]){ + primaryBasePath = tr[['basePath']] + } + if(n == 'samples'){ + if(e != 'wav'){ + warning("WARNING: Media file type with extension ", e, " are not supported by the EMU-webApp.\n") + } + if(e == 'ssd'){ + warning("WARNING: Converting 'ssd' media files to wav! Note that all attr(ssd,'startTime') values that vary from 0 will be normalized to 0.\n") + warning("WARNING: 'ssd' files will still be copied into each bundle but not added as an ssffTrackDefinition.\n") + } + mediafileExtension = e + mediafileBasePathPattern = tr[['basePath']] + }else{ + #array ! + ssffTrackDefinitions[[length(ssffTrackDefinitions) + 1L]] = tr + # default assign all to spectrum TODO + + } + } + + if(is.null(dbUUID)){ + # Generate UUID + # problem: the UUID will change on every reload + dbUUID = uuid::UUIDgenerate() + } + + # default perspective + # assign all SSFF tracks to sonagram + assign = list() + for(ssffTrack in ssffTrackDefinitions){ + # TODO dirty workaround + # detect formant tracks by number of channels + if(ssffTrack[['name']] == 'fm'){ + #ssffTrack$name='FORMANTS' + #assign[[length(assign)+1]]=list(signalCanvasName='SPEC',ssffTrackName='FORMANTS') + } + } + + contourLims = list() + sc = list(order = c("OSCI","SPEC"), + assign = assign, + contourLims = contourLims) + + defaultLvlOrder = list() + for(ld in levelDefinitions){ + + if(ld[['type']] == 'SEGMENT' || ld[['type']] == 'EVENT'){ + defaultLvlOrder[[length(defaultLvlOrder) + 1L]] = ld[['name']] + } + } + + defPersp = list(name = 'default', + signalCanvases = sc, + levelCanvases = list(order = defaultLvlOrder), + twoDimCanvases = list(order=list())) + waCfg = list(perspectives = list(defPersp)) + dbSchema = list(name = dbName, + UUID = dbUUID, + mediafileBasePathPattern = mediafileBasePathPattern, + mediafileExtension = mediafileExtension, + ssffTrackDefinitions = ssffTrackDefinitions, + levelDefinitions = levelDefinitions, + linkDefinitions = linkDefinitions, + EMUwebAppConfig = waCfg, + annotationDescriptors = annotationDescriptors, + tracks = tracks, + flags = flags); + + # get max label array size + maxLbls = 0 + for(lvlDef in levelDefinitions){ + attrCnt = length(lvlDef[['attributeDefinitions']]) + if(attrCnt > maxLbls){ + maxLbls = attrCnt + } + } + dbSchema[['maxNumberOfLabels']] = maxLbls + return(dbSchema) +} \ No newline at end of file diff --git a/R/emuR-objDocs.R b/R/emuR-objDocs.R new file mode 100644 index 00000000..fb5a0a6b --- /dev/null +++ b/R/emuR-objDocs.R @@ -0,0 +1,213 @@ +##' Segment list +##' +##' A segment list is the result type of legacy Emu query. +##' +##' +##' @aliases segmentlist emusegs +##' @format multi-columned matrix one row per segment +##' \itemize{ +##' \item columnlabel +##' \item columnsegment onset time +##' \item columnsegment offset time +##' \item columnutterance name +##' } +##' @seealso \code{\link{query}}, \code{\link{demo.vowels}} +##' @keywords classes +##' @name segmentlist +##' @examples +##' +##' data(demo.vowels) +##' +##' #demo.vowels is a segment list +##' demo.vowels +##' +NULL + +##' emuR segment list +##' @description +##' An emuR segment list is a list of segment descriptors. Each segment +##' descriptor describes a sequence of annotation elements. The list is +##' usually a result of an emuDB query using function \code{\link{query}}. +##' +##' @details +##' Each row shows the annotation label sequence, the start and end position +##' in time, session and bundle names, level name and type. +##' Additionally the row contains the UUID of the emuDB, the ID's of start +##' and end elements and the corresponding start and end position as sample +##' count and the sample rate. These columns are not printed by default. +##' The print method of emuRsegs hides them. To print all columns of a segment +##' list object use the print method of \code{\link{data.frame}}. +##' For example to print all columns of an emuRsegs segmentlist \code{sl} type: +##' \code{print.data.frame(sl)} +##' Though the segment descriptors have references to the annotations, the label +##' and sample/time position information is not updated if any of them change. The +##' values of the segment list may get invalid if the the database is modified. +##' A segment may consist only of one single element, in this case start and end ID are equal. +##' An emuR segment list is the default result of \code{\link{query}} and can +##' be used to get track data using \code{\link{get_trackdata}}. +##' The emuRsegs class inherits \link{emusegs} and hence \code{\link{data.frame}} +##' +##' @aliases segment list emuRsegs +##' +##' @format Attributed data.frame, one row per segment descriptor. +##' +##' Data frame columns are: +##' \itemize{ +##' \item labels: sequenced labels of segment concatenated by '->' +##' \item start: onset time in milliseconds +##' \item end: offset time in milliseconds +##' \item session: session name +##' \item bundle: bundle name +##' \item level: level name +##' \item type: type of "segment" row: 'ITEM': symbolic item, 'EVENT': event item, 'SEGMENT': segment +##' +##' } +##' Additional hidden columns: +##' \itemize{ +##' \item utts: utterance name (for compatibility to \link{emusegs} class) +##' \item db_uuid: UUID of emuDB +##' \item startItemID: item ID of first element of sequence +##' \item endItemID: item ID of last element of sequence +##' \item sampleStart: start sample position +##' \item sampleEnd: end sample position +##' \item sampleRate: sample rate +##' } +##' +##' Attributes: +##' \itemize{ +##' \item database: name of emuDB +##' \item query: Query string +##' \item type: type ('segment' or 'event') (for compatibility to \link{emusegs} class) +##' } +##' +##' +##' +##' @seealso \code{\link{query}},\code{\link{get_trackdata}},\link{emusegs} +##' @keywords classes +##' @name emuRsegs +##' +NULL + + + + + +##' Start and end times for EMU segment lists and trackdata objects +##' +##' Obtain start and end times for EMU segment lists and trackdata objects +##' +##' The function returns the start and/or end times of either a segment list or +##' a trackdata object. The former refers to the boundary times of segments, +##' the latter the start and end times at which the tracks from segments occur. +##' start.emusegs and end.emusegs give exactly the same output as start and end +##' respectively. +##' +##' @aliases start.emusegs end.emusegs start.trackdata end.trackdata +##' @param x a segment list or a trackdata object +##' @param ... due to the generic only +##' @return A vector of times. +##' @author Jonathan Harrington +##' @seealso \code{\link{tracktimes}} +##' @keywords utilities +##' @name start.emusegs +##' @examples +##' +##' # start time of a segment list +##' start(polhom) +##' # duration of a segment list +##' end(polhom) - start(polhom) +##' # duration from start time of segment list +##' # and start time of parallel EPG trackdata +##' start(polhom) - start(polhom.epg) +##' +##' +NULL + + + + + +##' Track data object +##' +##' A track data object is the result of get_trackdata(). +##' +##' +##' @aliases trackdata Math.trackdata Math2.trackdata Ops.trackdata +##' Summary.trackdata +##' @format \describe{ \item{$index}{a two columned matrix, each row keeps the +##' first and last index of the $data rows that belong to one segment} +##' \item{$ftime}{a two columned matrix, each row keeps the times marks of one +##' segment} \item{$data}{a multi-columned matrix with the real track values +##' for each segment} } +##' @note The entire data track is retrieved for each segment in the segment +##' list. The amount of data returned will depend on the sample rate and number +##' of columns in the track requested. +##' @section Methods: The following generic methods are implemented for +##' trackdata objects. \describe{ \item{list("Arith")}{\code{"+"}, \code{"-"}, +##' \code{"*"}, \code{"^"}, \code{"\%\%"}, \code{"\%/\%"}, \code{"/"}} +##' \item{list("Compare")}{\code{"=="}, \code{">"}, \code{"<"}, \code{"!="}, +##' \code{"<="}, \code{">="}} \item{list("Logic")}{\code{"&"}, \code{"|"}. } +##' \item{list("Ops")}{\code{"Arith"}, \code{"Compare"}, \code{"Logic"}} +##' \item{list("Math")}{\code{"abs"}, \code{"sign"}, \code{"sqrt"}, +##' \code{"ceiling"}, \code{"floor"}, \code{"trunc"}, X \code{"cummax"}, +##' \code{"cummin"}, \code{"cumprod"}, \code{"cumsum"}, \code{"log"}, +##' \code{"log10"}, \code{"log2"}, \code{"log1p"}, \code{"acos"}, +##' \code{"acosh"}, \code{"asin"}, \code{"asinh"}, \code{"atan"}, +##' \code{"atanh"}, \code{"exp"}, \code{"expm1"}, \code{"cos"}, \code{"cosh"}, +##' \code{"sin"}, \code{"sinh"}, \code{"tan"}, \code{"tanh"}, \code{"gamma"}, +##' \code{"lgamma"}, \code{"digamma"}, \code{"trigamma"} } +##' \item{list("Math2")}{\code{"round"}, \code{"signif"}} +##' \item{list("Summary")}{\code{"max"}, \code{"min"}, \code{"range"}, +##' \code{"prod"}, \code{"sum"}, \code{"any"}, \code{"all"}} } +##' @seealso \code{\link{get_trackdata}}, \code{\link{demo.vowels.fm}} +##' \code{\link{demo.all.rms}} +##' @keywords classes +##' @name trackdata +##' @examples +##' +##' data(demo.vowels.fm) +##' data(demo.vowels) +##' +##' #Formant track data for the first segment of the segment list demo.vowels +##' demo.vowels.fm[1] +##' +##' +NULL + +##' emuR track data object +##' +##' A emuR track data object is the result of \code{\link{get_trackdata}} if the +##' \code{resultType} parameter is set to \code{"emuRtrackdata"} or the result of +##' an explicit call to \code{\link{create_emuRtrackdata}}. Compared to +##' the \code{\link{trackdata}} object it is a sub-class of a +##' \code{\link{data.frame}} which is meant to ease integration with other +##' packages for further processing. It can be viewed as an amalgamation of +##' a \code{\link{emuRsegs}} and a \code{\link{trackdata}} object as it +##' contains the information stored in both objects. +##' +##' +##' @format The \code{\link{data.frame}} has the following columns: +##' +##' \describe{ +##' \item{$sl_rowIdx}{column to indicate \code{\link{emuRsegs}} row index that +##' the value belongs to} +##' \item{$labels - $sampleRate}{duplicated information of \code{\link{emuRsegs}} row entries} +##' \item{$times_rel}{relative time stamps of sample values in milliseconds} +##' \item{$times_orig}{absolute time stamps of sample values in milliseconds} +##' \item{$T1 - $TN}{actual data values (e.g. formant values / F0 values / DFT values / ...)} +##' } +##' +##' Note that $labels - $sampleRate as well as $T1 - $TN (where the N in TN is to be read as the n-th T value) +##' refer to multiple columns of the object. +##' +##' @section Methods: The following methods are implemented for emuRtrackdata objects: +##' +##' \describe{ +##' \item{cut}{Function to extract a \code{\link{emuRtrackdata}} object from an +##' emuRtrackdata at a single time point or between two times} +##' } +##' @seealso \code{\link{get_trackdata}}, \code{\link{create_emuRtrackdata}} +##' @keywords classes +##' @name emuRtrackdata +##' @seealso trackdata +NULL diff --git a/R/emuR-packageDocs.R b/R/emuR-packageDocs.R new file mode 100644 index 00000000..adb2cce8 --- /dev/null +++ b/R/emuR-packageDocs.R @@ -0,0 +1,101 @@ +##' emuR - Main Package of the EMU Speech Database Management System +##' +##' The emuR package provides the next iteration of the EMU Speech +##' Database Management System with database management, data +##' extraction, data preparation and data visualization facilities. +##' +##' This package is part of the next iteration of the EMU Speech Database Management System (EMU-SDMS) +##' which aims to be as close to an all-in-one solution for generating, manipulating, querying, +##' analyzing and managing speech databases as possible. +##' For an overview of the system please visit this URL: \url{http://ips-lmu.github.io/EMU.html}. +##' +##' It can be viewed as the main component of the EMU-SDMS as it acts as +##' the central instance that is able to interact with every component of the system. +##' It takes care of database managing duties by being able to interact with a speech +##' database that is stored in the emuDB format. Further, it has easy to understand and +##' learn yet expressive and powerful querying mechanics, that allow the user to easily query +##' the annotation structures of the database. Lastly it provides easy data extraction +##' capabilities that extract data (e.g. formant values) which corresponds to the +##' result of a query. +##' +##' For an introduction to the emuR package please see the \code{emuR_intro} vignette +##' by calling: \code{vignette('emuR_intro')} +##' +##' For information about the \code{emuDB} database format please see the \code{emuDB} +##' vignette by calling: \code{vignette('emuDB')} +##' +##' For information about the query language used by the EMU-SDMS please see the \code{EQL} +##' vignette by calling: \code{vignette('EQL')} +##' +##' Typical work-flow in emuR (emuDB required): +##' +##' \enumerate{ +##' \item Load database into current R session - \code{\link{load_emuDB}} +##' \item Database annotation / visual inspection - +##' \code{\link{serve}} and connect the EMU-webApp to the local server +##' \item Query database - \code{\link{query}} (sometimes +##' followed by \code{\link{requery_hier}} or \code{\link{requery_seq}}) +##' \item Get trackdata (e.g. formant values) for the result +##' of a query - \code{\link{get_trackdata}} +##' \item Data preparation +##' \item Visual data inspection +##' \item Further analysis and statistical processing +##' } +##' +##' TIP: for a browsable overview of all the functions provided by emuR simply +##' run the command \code{help.start()} -> click on packages -> click on emuR +##' +##' @name emuR-package +##' @aliases emuR emuR-package +##' +##' @references Harrington, J. (2010). The Phonetic Analysis of Speech Corpora. +##' Blackwell. +##' +##' @keywords package +##' @import methods +##' @examples +##' \dontrun{ +##' # create demo data including an emuDB called "ae" +##' create_emuRdemoData(dir = tempdir()) +##' +##' # construct path to demo emuDB +##' path2ae = file.path(tempdir(), "emuR_demoData", "ae") +##' +##' # load emuDB into current R session +##' ae = load_emuDB(path2ae) +##' +##' # query loaded emuDB +##' lvowels = query(ae, "Phonetic = i: | u: | o:") +##' +##' # extract labels from query result +##' lvowels.labs = label(lvowels) +##' +##' # list all ssffTrackDefinitions of emuDB +##' list_ssffTrackDefinitions(ae) +##' +##' # get formant trackdata defined in ssffTrackDefinitions "fm" for query result +##' lvowels.fm = get_trackdata(ae, lvowels, "fm") +##' +##' # extract track values at temporal midpoint of segments +##' lvowels.fmCut = dcut(lvowels.fm, .5, prop = TRUE) +##' +##' # Plot the data as time signal and formant card +##' dplot(lvowels.fm[,1:2], lvowels.labs, normalise=TRUE, main = "Formants over vowel duration") +##' eplot(lvowels.fmCut[,1:2], lvowels.labs, dopoints=TRUE, +##' doellipse=FALSE, main = "F1/F2 of vowel midpoint", form=TRUE, +##' xlab = "F2 in Hz", ylab = "F1 in Hz") +##' +##' +##' # Plot of spectral data from 50% of aspiration duration +##' hs = query(ae,"Phonetic = H") +##' hs.labs = label(hs) +##' hs.dft = get_trackdata(ae, hs, "dft") +##' hs.dftCut = dcut(hs.dft, .5, prop=TRUE) +##' plot(hs.dftCut, hs.labs, main = "Spectral data of aspiration") +##' +##' } +##' +"_PACKAGE" + + + diff --git a/R/emuR-parse_TextGridDBI.R b/R/emuR-parse_TextGridDBI.R new file mode 100644 index 00000000..3e6b2b09 --- /dev/null +++ b/R/emuR-parse_TextGridDBI.R @@ -0,0 +1,544 @@ +## Parser for Praat TextGrid files +## +## parses directly to DBI tables (items, labels) +## @param emuDBhandle +## @param textGridPath TextGrid file connection +## @param sampleRate sample rate of correponding signal file +## @param encoding text encoding (currently the only excepted is the default UTF-8) +## @param bundle name of bundle +## @param session name of session +## +parse_TextGridDBI <- function(emuDBhandle, + TextGridPath = NULL, + sampleRate, + encoding = "UTF-8", + bundle = NULL, + session = "0000") { + + ##################### + # check arguments (TODO better checks for classes and the like...) + + if(is.null(TextGridPath)) { + stop("Argument TextGridPath must not be NULL\n") + } + if(sampleRate <=0 ){ + stop("Samplerate must be greater than zero\n") + } + if(encoding != "UTF-8"){ + stop("The only encoding that is currently supported is UTF-8\n") + } + if(is.null(bundle)){ + stop("Argument bundle must not be NULL!\n") + } + if(is.null(session)){ + stop("Argument session must not be NULL!\n") + } + + # + ##################### + + itemCounterGlobal = 1 + itemCounterLevel = 1 + + FILE_TYPE_KEY = "File type" + OBJECT_CLASS_KEY = "Object class" + TIERS_SIZE_KEY = "size" + TIER_ITEM_KEY = "item" + NAME_KEY = "name" + INTERVALS_KEY = "intervals" + POINTS_KEY = "points" + XMIN_KEY = "xmin" + XMAX_KEY = "xmax" + TEXT_KEY = "text" + TIME_KEY = "time" + + FILE_TYPE_VAL_OO_TEXTFILE = "ooTextFile" + OBJECT_CLASS_VAL_TEXTGRID = "TextGrid" + TIER_CLASS_VAL_INTERVAL = "IntervalTier" + TIER_CLASS_VAL_TEXT = "TextTier" + + fileType = NULL + objectClass = NULL + hasTiers = FALSE + tiersCount = NULL + currentTier = NULL + currentTierClass = NULL + currentTierName = NULL + currentTierSize = NULL + + # read TextGrid + tg = try(readr::read_lines(TextGridPath)) + if(inherits(tg, "try-error")) { + stop("read.TextGrid: cannot read from file ", TextGridPath) + } + + # remove all trailing/leading white spaces (for speed improvment) + tg = gsub("^\\s+|\\s+$", "", tg) + + for(line in tg){ + # check for fileType + if(is.null(fileType)){ + p = parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if(!is.null(p)){ + if(p[1] == FILE_TYPE_KEY){ + fileType = p[2] + # check if of correct type: + if(fileType != FILE_TYPE_VAL_OO_TEXTFILE){ + stop("Can only parse TextGrids with the File type: ", + FILE_TYPE_VAL_OO_TEXTFILE, + ". Found following File type: ", + fileType) + } + + } + } + }else{ + # check for objectClass + if(is.null(objectClass)){ + p = parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if(!is.null(p)){ + if(p[1] == OBJECT_CLASS_KEY){ + objectClass = p[2] + } + } + }else{ + # if we have both the file type and the object class + if((fileType == FILE_TYPE_VAL_OO_TEXTFILE) + && (objectClass == OBJECT_CLASS_VAL_TEXTGRID)){ + + if(is.null(tiersCount)){ + + p = parse_lineToKeyValue(line, initialTrim = FALSE) + if((!is.null(p)) && (p[1] == 'size')){ + tiersCount = p[2] + } + }else{ + ## if we have tiersCount tiers + if(length(grep("^item",line)) == 1){ + + tierIndexStr = sub('item\\s*','', line); + tierIndexStr = sub('\\s*:$','', tierIndexStr); + if(length(grep('\\[\\s*[0-9]+\\s*\\]', tierIndexStr)) == 1){ + tierIndexStr = sub('\\[\\s*', '', tierIndexStr); + tierIndexStr = sub('\\s*\\]', '', tierIndexStr); + + tierIndex = tierIndexStr; + # reset level/tier attributes + itemCounterLevel = 1 + currentTierClass = NULL; + currentTierName = NULL; + currentTierSize = NULL; + currentSegment = NULL; + currentSegmentIndex = NULL; + currentSegmentStart = NULL; + currentSegmentDur = NULL; + currentSegmentLabel = NULL; + currentMark = NULL; + currentPointIndex = NULL; + currentPointSample = NULL; + currentPointLabel = NULL; + } + }else { + # check for currentTierClass + if(is.null(currentTierClass)){ + p=parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if((! is.null(p)) && ('class' == p[1])){ + currentTierClass = p[2]; + if(currentTierClass == TIER_CLASS_VAL_INTERVAL){ + + }else if(currentTierClass == TIER_CLASS_VAL_TEXT){ + }else{ + stop("TextGrid tiers of class \"", + currentTierClass, + "\" not supported!"); + } + } + } + # check for currentTierName + if(is.null(currentTierName)){ + p = parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if((!is.null(p)) && ('name' == p[1])){ + currentTierName = p[2] + + } + } + # if we have the currentTierClass + if(!is.null(currentTierClass)){ + if(currentTierClass == TIER_CLASS_VAL_INTERVAL){ + # find size (and other properties) + if((is.null(currentTierSize)) + && (length(grep('^intervals[[:space:]]*:.*',line)) == 1)){ + + intervalsPropertyStr = stringr::str_trim(sub('^intervals[[:space:]]*:', '', line)) + intervalsProperty = parse_lineToKeyValue(intervalsPropertyStr, initialTrim = FALSE); + if((!is.null(intervalsProperty)) + && (intervalsProperty[1] == 'size')){ + currentTierSize = intervalsProperty[2] + #cat("intervals: size=",currentTierSize,"\n"); + + } + } + if(length(grep('intervals[[:space:]]*[[][[:space:]]*[0-9]+[[:space:]]*[]][[:space:]]*[:][[:space:]]*', line)) == 1){ + + segmentIndexStr = sub("intervals[[:space:]]*[[][[:space:]]*", "", line); + segmentIndexStr = sub("[[:space:]]*[]][[:space:]]*[:][[:space:]]*", "", segmentIndexStr); + currentElementIndex = segmentIndexStr; + + currentSegmentIndex = segmentIndexStr; + currentSegmentStart = NULL; + currentSegmentEnd = NULL; + currentSegmentLabel = NULL; + }else{ + p = parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if((!is.null(p)) && (!is.null(currentSegmentIndex))){ + if(p[1] == "xmin"){ + minTimeStr = p[2] + minTime = as(minTimeStr, "numeric") + startSample = floor(minTime * sampleRate) + currentSegmentStart = startSample + }else if(p[1] == "xmax"){ + maxTimeStr = p[2]; + maxTime = as(maxTimeStr, "numeric") + currentSegmentEnd = floor(maxTime * sampleRate) + + + }else if(p[1] == "text"){ + label = p[2]; + currentSegmentLabel = label + } + + if(!is.null(currentSegmentIndex) && + !is.null(currentSegmentStart) && + !is.null(currentSegmentEnd) && + !is.null(currentSegmentLabel)){ + sampleDur = currentSegmentEnd - currentSegmentStart - 1 + labels = list(list(name = currentTierName, value = currentSegmentLabel)) + + + # item entry: + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items VALUES (", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + " '", itemCounterGlobal, "', ", + " '", currentTierName, "', ", + " '", "SEGMENT", "', ", + itemCounterLevel, ", ", + sampleRate, ", ", + "NULL", ", ", + currentSegmentStart, ", ", + sampleDur, ")")) + + + + # label entry: + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "',", + itemCounterGlobal, ", ", + 0,", ", + " '", currentTierName, "', ", + " '", gsub("'","''", currentSegmentLabel), "')")) + + # links entry: + # no link entry because TextGrids don't have hierarchical infos + + # increase counters + itemCounterGlobal = itemCounterGlobal + 1 + itemCounterLevel = itemCounterLevel + 1 + + + currentSegment = NULL; + currentSegmentIndex = NULL; + currentSegmentStart = NULL; + currentSegmentDur = NULL; + } + + } + } + + }else if(currentTierClass == TIER_CLASS_VAL_TEXT){ + # find size (and other properties) + if((is.null(currentTierSize)) && (length(grep('^points[[:space:]]*[:].*', line)) == 1)){ + + intervalsPropertyStr = stringr::str_trim(sub('^points[[:space:]]*[:]', '', line)) + intervalsProperty = parse_lineToKeyValue(intervalsPropertyStr, initialTrim = FALSE); + if((!is.null(intervalsProperty)) && (intervalsProperty[1] == 'size')){ + currentTierSize = intervalsProperty[2] + } + } + if(length(grep("points[[:space:]]*[[][[:space:]]*[0-9]+[[:space:]]*[]][[:space:]]*[:][[:space:]]*", line)) == 1){ + pointIndexStr = sub("points[[:space:]]*[[][[:space:]]*", "", line); + pointIndexStr = sub("[[:space:]]*[]][[:space:]]*[:][[:space:]]*", "", pointIndexStr); + currentPointIndex = as.integer(pointIndexStr) + currentElementIndex = currentPointIndex + currentPointLabel = NULL; + currentPointSample = NULL; + }else{ + #cat("inside point: \n") + p = parse_lineToKeyValue(line, + doubleQuoted = TRUE, + initialTrim = FALSE) + if((!is.null(p)) && (!is.null(currentPointIndex))){ + if(p[1] == "time" || p[1] == "number"){ + timePointStr = p[2]; + timePoint = as(timePointStr, "numeric") + samplePoint = floor(timePoint * sampleRate) + currentPointSample = samplePoint + }else if(p[1] == "mark"){ + currentPointLabel = p[2] + }else if(p[1] == "text"){ + currentPointLabel = p[2] + } + } + if(!is.null(currentPointIndex) && + !is.null(currentPointSample) && + !is.null(currentPointLabel)){ + + labels = list(list(name = currentTierName, + value = currentPointLabel)) + + # item entry + itemId = paste0(emuDBhandle$dbName, '_', session, '_', bundle, '_', itemCounterGlobal) + + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items VALUES"," (", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + itemCounterGlobal, ", ", + " '", currentTierName,"', ", + " '", "EVENT", "', ", + itemCounterLevel, ", ", + sampleRate, ", ", + currentPointSample, ", ", + "NULL", ", ", + "NULL", ")")) + + + # label entry: + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + itemCounterGlobal, ", ", + 0,", ", + " '", currentTierName, "', ", + " '", gsub("'","''", currentPointLabel), "')")) + + + + # links entry: + # no link entry because TextGrids don't have hierarchical infos + + # increase counters + itemCounterGlobal = itemCounterGlobal + 1 + itemCounterLevel = itemCounterLevel + 1 + + currentPointIndex = NULL; + currentPointLabel = NULL; + currentPointSample = NULL; + } + } + } + } + } + } + } + } + } + } +} + +############################# +TextGridToBundleAnnotDFs <- function(tgPath, + sampleRate, + name, + annotates){ + + FILE_TYPE_KEY = "File type" + OBJECT_CLASS_KEY = "Object class" + + #tgChar = enc2utf8(readChar(tgPath, file.info(tgPath)$size)) # wrapped in enc2utf8 as readChar respects the system default (windows iso 88591) + tgChar = readr::read_file(tgPath) + lines = unlist(strsplit(tgChar, "\n")) + + if(!grepl(paste0("^", FILE_TYPE_KEY), lines[1]) & !grepl(paste0("^", OBJECT_CLASS_KEY), lines[2])){ + stop("First two lines of TextGrid file do not match: ", + FILE_TYPE_KEY, + "; and: ", + OBJECT_CLASS_KEY, + ". Only UTF-8 long form TextGrids are currently supported. Problem file is: ", + tgPath) + } + + + # estimate how many items are in TextGrid for preallocation + nrOfItems = length(grep("^\\s*(text|mark)\\s*=", lines)) + + # init data frames and preallocate enough rows + items = data.frame(item_id = integer(nrOfItems), + level = character(nrOfItems), + type = character(nrOfItems), + seq_idx = integer(nrOfItems), + sample_rate = numeric(nrOfItems), + sample_point = integer(nrOfItems), + sample_start = integer(nrOfItems), + sample_dur = integer(nrOfItems), + stringsAsFactors = FALSE) + + labels = data.frame(item_id = integer(nrOfItems), + label_idx = integer(nrOfItems), + name = character(nrOfItems), + label = character(nrOfItems), + stringsAsFactors = FALSE) + + + + # split at "...items [1]..." type lines + tiers = unlist(strsplit(tgChar, ".*item\\s\\[[0-9]+\\].*\n", perl = TRUE)) + header = tiers[1] # extract header + tiers = tiers[-1] + + maxItemID = 1 + # iterate through tiers + for(i in 1:length(tiers)){ + curTier = tiers[i] + tierLines = unlist(strsplit(curTier, "\n")) + + tierHeaderEndIdx = grep("[intervals|points]:\\s*size", tierLines, perl = TRUE) + if(length(tierHeaderEndIdx) == 0){ + stop("Couldn't find a match for [intervals|points]:\\s*size in ", + tgPath, + "!!! The first few lines of the current tier are (might help spot the error...): \n", + utils::head(curTier)) + } + tierHeader = tierLines[1:tierHeaderEndIdx] + tierLines = tierLines[-1:(-1*tierHeaderEndIdx)] + + if(grepl("IntervalTier", tierHeader[1])){ + levelName = sub('\\"\\s*$', "", + sub('^\\s*name\\s*=\\s*\\"', + "", + tierHeader[grepl("^\\s*name\\s*=", tierHeader)], + perl = TRUE), + perl = TRUE) + xminTimes = as.numeric(sub("^\\s*xmin\\s*=\\s*", + "", + tierLines[grepl("^\\s*xmin\\s*=", tierLines)], + perl = TRUE)) # as.numeric seems to be able to deal with trailing blanks + xmaxTimes = as.numeric(sub("^\\s*xmax\\s*=\\s*", + "", + tierLines[grepl("^\\s*xmax\\s*=", tierLines)], + perl = TRUE)) # as.numeric seems to be able to deal with trailing blanks + texts = sub('\\"\\s*$', + "", + sub('^\\s*text\\s*=\\s*\\"', + "", + tierLines[grepl("^\\s*text\\s*=", tierLines)]), + perl = TRUE) + # check if any items where found + if(length(xminTimes) != 0){ + # calculate times + startSamples = floor(xminTimes * sampleRate) + endSamples = floor(xmaxTimes * sampleRate) + sampleDurs = endSamples - startSamples - 1 + + # insert in data frames + items[maxItemID:(maxItemID + length(xminTimes) - 1), ] = data.frame(item_id = maxItemID:(maxItemID + length(xminTimes) - 1), + level = rep(levelName, length(xminTimes)), + type = rep("SEGMENT", length(xminTimes)), + seq_idx = 1:length(xminTimes), + sample_rate = rep(sampleRate, length(xminTimes)), + sample_point = NA, + sample_start = startSamples, + sample_dur = sampleDurs, + stringsAsFactors = FALSE) + + labels[maxItemID:(maxItemID + length(xminTimes) - 1), ] = data.frame(item_id = maxItemID:(maxItemID + length(xminTimes) - 1), + label_idx = rep(1, length(xminTimes)), + name = rep(levelName, length(xminTimes)), + label = texts, + stringsAsFactors = FALSE) + + maxItemID = max(items$item_id) + 1 + } + }else if(grepl("TextTier", tierHeader[1])){ + levelName = sub('\\"\\s*$', + "", + sub('^\\s*name\\s*=\\s*\\"', + "", + tierHeader[grepl("^\\s*name\\s*=", tierHeader)], + perl = TRUE), + perl = TRUE) + pointsTimes = as.numeric(sub("^\\s*\\w+\\s*=\\s*", + "", + tierLines[grepl("^\\s*number|time\\s*=", tierLines)], + perl = TRUE)) # as.numeric seems to be able to deal with trailing blanks + marks = sub('\\"\\s*$', + "", + sub('^\\s*mark\\s*=\\s*\\"', + "", + tierLines[grepl("^\\s*mark\\s*=", tierLines)]), + perl = TRUE) + # check if any items where found + if(length(pointsTimes) != 0){ + # calculate times + samplePoints = floor(pointsTimes * sampleRate) + + # create data frames + items[maxItemID:(maxItemID + length(samplePoints) - 1), ] = data.frame(item_id = maxItemID:(maxItemID + length(pointsTimes) - 1), + level = rep(levelName, length(pointsTimes)), + type = rep("EVENT", length(pointsTimes)), + seq_idx = 1:length(pointsTimes), + sample_rate = rep(sampleRate, length(pointsTimes)), + sample_point = samplePoints, + sample_start = NA, + sample_dur = NA, + stringsAsFactors = FALSE) + + labels[maxItemID:(maxItemID + length(samplePoints) - 1), ] = data.frame(items_id = maxItemID:(maxItemID + length(pointsTimes) - 1), + label_idx = rep(1, length(pointsTimes)), + name = rep(levelName, length(pointsTimes)), + label = marks, + stringsAsFactors = FALSE) + + maxItemID = max(items$item_id) + 1 + } + }else{ + stop("Found Tier that does not have a class definition 'IntervalTier' or 'TextTier'.", + " This probably means it is a mal formated TextGrid file. Problem file is: ", tgPath) + } + } + + links = data.frame(bundle = character(), + from_id = integer(), + to_id = integer(), + label = character(), + stringsAsFactors = FALSE) + + return(list(name = name, + annotates = annotates, + sampleRate = sampleRate, + items = items, + links = links, + labels = labels)) +} + +# FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_aaa_initData.R') +# test_file('tests/testthat/test_emuR-parse_TextGrid.R') +# tgPath = "~/Desktop/emuR_demoData/TextGrid_collection/msajc003.TextGrid" + diff --git a/R/emuR-parser.BPF.R b/R/emuR-parser.BPF.R new file mode 100644 index 00000000..1333d025 --- /dev/null +++ b/R/emuR-parser.BPF.R @@ -0,0 +1,1558 @@ +## EmuDB Parser for Bas Partitur Files +## +## @param bpfPath +## @param samplerate +## @param encoding +## @param dbName +## @param bundle +## @param session +## @param dbUUID +## @param refLevel +## @param segmentToEventLevels +## @param levelClasses +## @return list(levelInfo, linkInfo, warningsInfo) +## @import stringr RSQLite +## @keywords emuR BPF Emu + +parse_BPF <- function(emuDBhandle, + bpfPath, + encoding = "UTF-8", + bundle, + session, + refLevel, + extractLevels, + samplerate, + segmentToEventLevels, + unifyLevels, + levelClasses) +{ + # --------------------------------------------------------------------------- + # --- Containers for info to be passed out to caller (converter) function --- + # --------------------------------------------------------------------------- + + levelInfo = list() + linkInfo = list() + semicolonFound = FALSE + + # --------------------------------------------------------------------------- + # ------------------------ Read BPF file from disk -------------------------- + # --------------------------------------------------------------------------- + + bpfLines = try(readr::read_lines(bpfPath)) + if(inherits(bpfLines, "try-error")) + { + stop("Cannot read from file ", bpfPath) + } + + if(length(bpfLines) == 0) + { + stop("File ", bpfPath, " has length 0. This does not conform to BPF specifications.") + } + + # --------------------------------------------------------------------------- + # -------------------------- Parse header ----------------------------------- + # --------------------------------------------------------------------------- + + returnContainer = parse_bpfHeader( + bpfLines = bpfLines, + bpfPath = bpfPath, + samplerate = samplerate + ) + + header = returnContainer$header + bsKeyPosition = returnContainer$bsKeyPosition + samplerate = returnContainer$samplerate + + # --------------------------------------------------------------------------- + # ----------- Write 'bundle' item to items and lables tables ---------------- + # --------------------------------------------------------------------------- + levelInfo = write_bpfUtteranceToDb(emuDBhandle, + header = header, + session = session, + bundle = bundle, + samplerate = samplerate) + + # Utterance item will be written even if the BPF body is empty! + + # --------------------------------------------------------------------------- + # -------------------------- Parse body ------------------------------------- + # --------------------------------------------------------------------------- + + if(bsKeyPosition < length(bpfLines)) + { + returnContainer = parse_bpfBody(bpfLines = bpfLines, + bpfPath = bpfPath, + bsKeyPosition = bsKeyPosition, + extractLevels = extractLevels, + levelClasses = levelClasses, + unifyLevels = unifyLevels, + refLevel = refLevel, + segmentToEventLevels = segmentToEventLevels) + + levels = returnContainer$levels + currentItemID = returnContainer$currentItemID + semicolonFound = returnContainer$semicolonFound + + # ------------------------------------------------------------------------- + # --- Change classes of levels in segmentToEventLevels (2->3 and 4->5) ---- + # ------------------------------------------------------------------------- + + # (done after parsing because parser needs original classes to make sense of BPF lines) + + for(key in segmentToEventLevels) + { + levelClasses[[key]] = levelClasses[[key]] + 1 + } + + # ------------------------------------------------------------------------- + # ------ Check for temporal overlap within levels with time information --- + # ------------------------------------------------------------------------- + + check_bpfOverlap(levels = levels, + bpfPath = bpfPath, + segmentToEventLevels = segmentToEventLevels, + levelClasses = levelClasses) + + # ------------------------------------------------------------------------- + # ---------- Pad segment tiers between segments with empty items ---------- + # ------------------------------------------------------------------------- + + levels = pad_bpfSegments(levels = levels, + currentItemID = currentItemID, + levelClasses = levelClasses) + + # ------------------------------------------------------------------------- + # ------------------------------ Assign seqIdx ---------------------------- + # ------------------------------------------------------------------------- + + levels = assign_bpfSeqIdx(levels = levels, + levelClasses = levelClasses) + + # ------------------------------------------------------------------------- + # ----------- Write item and label information to database ---------------- + # ------------------------------------------------------------------------- + + levelInfo = write_bpfItemsLabelsToDb(emuDBhandle, + levels = levels, + session = session, + bundle = bundle, + samplerate = samplerate, + unifyLevels = unifyLevels, + levelInfo = levelInfo) + + # ------------------------------------------------------------------------- + # --------------- Write link information to database ---------------------- + # ------------------------------------------------------------------------- + + if(!is.null(refLevel)) + { + linkIdxMap = get_bpfLinkIdxMap(levels = levels, + refLevel = refLevel) + + # ----------------------------------------------------------------------- + # --------------- Write link information to database -------------------- + # ----------------------------------------------------------------------- + + linkInfo = write_bpfLinksToDb(emuDBhandle, + levels = levels, + levelClasses = levelClasses, + linkIdxMap = linkIdxMap, + refLevel = refLevel, + session = session, + bundle = bundle, + unifyLevels = unifyLevels, + bpfPath = bpfPath) + + # ----------------------------------------------------------------------- + # -------- Unify levels in unifyLevels with the reference level --------- + # ----------------------------------------------------------------------- + + if(!is.null(unifyLevels)) + { + levelInfo = unify_bpfLevels(emuDBhandle, + levels = levels, + linkIdxMap = linkIdxMap, + refLevel = refLevel, + bpfPath = bpfPath, + levelInfo = levelInfo, + unifyLevels = unifyLevels, + session = session, + bundle = bundle) + } + } + } + + # --------------------------------------------------------------------------- + # --------- Return info containers to caller (converter) function ----------- + # --------------------------------------------------------------------------- + + returnContainer = list(levelInfo = levelInfo, linkInfo = linkInfo, semicolonFound = semicolonFound) + return(returnContainer) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +# ----------------------------------------------------------------------------- +# ------------------------- HELPER FUNCTIONS ---------------------------------- +# ----------------------------------------------------------------------------- + +## Parser for Bas Partitur header +## +## @param bpfLines +## @param bpfPath +## @param samplerate +## @keywords emuR BPF Emu +## @return list(header, bsKeyPosition, missingHeaderKeys, samplerate) + +parse_bpfHeader <- function(bpfLines, + bpfPath, + samplerate) +{ + # --------------------------------------------------------------------------- + # --------------------------- Necessary constants --------------------------- + # --------------------------------------------------------------------------- + + # Supplements for ranges in regular expressions. + UPPER = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + DIGITS = "0123456789" + + # Obligatory header keys. If these are not present in the BPF header -> warningTracker + OBLIGATORY_HEADER_KEYS = list("LHD", "REP", "SNB", "SAM", "SBF", "SSB", "NCH", "SPN") + + # All lines of the BPF must conform to the following regular expression: + GLOBAL_REGEX = paste0("^[", UPPER, DIGITS, "]{3}:.*") + + # Key that marks the beginning of the BPF body / end of the BPF header: + BODY_START_KEY = "LBD" + + # --------------------------------------------------------------------------- + # --------------------------- Initialize containers ------------------------- + # --------------------------------------------------------------------------- + + # Container for key value pairs from the BPF header. + header = list() + + # Container for found keys (to check for duplicates). + foundKeys = c() + + # Line index of the body start key (needed for parse_bpfBody to know where to start). + bsKeyPosition = NULL + + # --------------------------------------------------------------------------- + # ---------------- Parse until body start key is found ---------------------- + # --------------------------------------------------------------------------- + + for (idx in 1:length(bpfLines)) + { + # Skip empty lines. + if(stringr::str_length(bpfLines[idx]) == 0) + { + next + } + + # Check line's format. + if (!stringr::str_detect(bpfLines[idx], GLOBAL_REGEX)) + { + stop("Line ", idx, " of the following BPF does not conform to BPF specifications: ", bpfPath) + } + + # Get key value pair. + splitline = stringr::str_split_fixed(bpfLines[idx], ":", 2) + key = splitline[1] + + # Remove trailing white space and escape single quotes (compatibility with SQL). + value = stringr::str_replace(str_replace_all(splitline[2], "'", "''"), "^\\s+", "") + + # Once the body start key is found, remember its position and break. + if(key == BODY_START_KEY) + { + bsKeyPosition = idx + break + } + + header[[key]] = value + foundKeys = c(foundKeys, key) + } + + # --------------------------------------------------------------------------- + # ---------------------------- Some checks --------------------------------- + # --------------------------------------------------------------------------- + + # Throw exception if the body start key has not been found. + if(is.null(bsKeyPosition)) + { + stop("The following BPF does not contain the body start key 'LBD': ", bpfPath) + } + + # Throw exception if a key was found more than once in the header. + if(length(unique(foundKeys)) < length(foundKeys)) + { + stop("There is a duplicate header key in the following BPF: ", bpfPath) + } + + # --------------------------------------------------------------------------- + # ----- Compare samplerate of audio with the one declared in BPF header ----- + # --------------------------------------------------------------------------- + + samplerate = compare_bpfSamplerate(samplerate = samplerate, + header = header, + bpfPath = bpfPath) + + # --------------------------------------------------------------------------- + # ---------------------------------- Return -------------------------------- + # --------------------------------------------------------------------------- + + returnContainer = list(header = header, + bsKeyPosition = bsKeyPosition, + samplerate = samplerate) + + return(returnContainer) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Compare samplerate of audio file with the one declared in the BPF header +## +## @param header +## @param samplerate +## @keywords emuR BPF Emu +## @return samplerate + +compare_bpfSamplerate <- function(header, + samplerate, + bpfPath) +{ + # Throw an exception if we can't get a sample rate from the BPF or the audio file. + if(is.null(header$SAM) && is.null(samplerate)) + { + stop("Sample rate has not been read from audio and is therefore needed in the following BPF: ", bpfPath) + } + + # If we don't have a sample rate from the audio, get samle rate from BPF. + else if(!is.null(header$SAM) && is.null(samplerate)) + { + samplerate = as.integer(header$SAM) + } + + # If we have one sample rate from the audio and one from the BPF, check if they match. + else if(!is.null(header$SAM) && !is.null(samplerate)) + { + if(as.integer(header$SAM) != samplerate) + { + stop("Declared sample rate in the following BPF does not match the sample rate of the audio: ", bpfPath) + } + } + + return(samplerate) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Write item and label information for bundle Item +## +## @param emuDBhandle +## @param session +## @param bundle +## @param samplerate +## @param header +## @keywords emuR BPF Emu +## @return levelInfo + +write_bpfUtteranceToDb <- function(emuDBhandle, + session, + bundle, + samplerate, + header) +{ + # Utterance gets itemID 1 (other items will start at ID 2) + utteranceItemID = 1 + + # Collect label keys ("bundle" + all header keys found). + labelTracker = list("bundle") + + queryTxt = paste0("INSERT INTO items VALUES"," (", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + utteranceItemID, ", ", + " 'bundle', ", + " 'ITEM', ", + " 1, ", + samplerate, ", ", + " NULL, ", + " NULL, ", + " NULL)") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + + labelIdxCounter = 1 + + # First label: 'bundle' -> empty string + queryTxt = paste0("INSERT INTO labels VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + utteranceItemID, ", ", + labelIdxCounter, ", ", + " 'bundle', ", + " ''", ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + + labelIdxCounter = labelIdxCounter + 1 + + + # Subsequent labels: Key -> value pairs found in BPF header. + for(key in names(header)) + { + queryTxt = paste0("INSERT INTO labels VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + utteranceItemID, ", ", + labelIdxCounter, ", ", + " '", key,"', ", + " '", header[[key]], "' ", + ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + + labelTracker[[length(labelTracker) + 1L]] = key + labelIdxCounter = labelIdxCounter + 1 + } + + + levelInfo = list(list(key = "bundle", type = "ITEM", labels = labelTracker)) + + return(levelInfo) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Parser for Bas Partitur Body +## +## @param bpfLines +## @param bpfPath +## @param bsKeyPosition +## @param extractLevels +## @param levelClasses +## @param unifyLevels +## @param currentItemID +## @param refLevel +## @keywords emuR BPF Emu +## @return list(levels, currentItemID, semicolonFound) + +parse_bpfBody <- function(bpfLines, + bpfPath, + bsKeyPosition, + extractLevels, + levelClasses, + unifyLevels, + refLevel, + segmentToEventLevels) +{ + # --------------------------------------------------------------------------- + # -------------------------- NECESSARY CONSTANTS ---------------------------- + # --------------------------------------------------------------------------- + + # Supplements for ranges in regular expressions. + UPPER = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + DIGITS = "0123456789" + + # Regular expressions for BPF lines of classes 1-5. + CLASS_REGEXES = c( + paste0("^[", UPPER, DIGITS, "]{3}:\\s+-?[", DIGITS, "][", DIGITS, ",;]*\\s+.*"), + paste0("^[", UPPER, DIGITS, "]{3}:\\s+[", DIGITS, "]+\\s+[", DIGITS, "]+\\s+.*"), + paste0("^[", UPPER, DIGITS, "]{3}:\\s+[", DIGITS, "]+\\s+.*"), + paste0("^[", UPPER, DIGITS, "]{3}:\\s+[", DIGITS, "]+\\s+[", DIGITS, "]+\\s+-?[", DIGITS, "][", DIGITS, ",;]*\\s+.*"), + paste0("^[", UPPER, DIGITS, "]{3}:\\s+[", DIGITS, "]+\\s+-?[", DIGITS, "][", DIGITS, ",;]*\\s+.*") + ) + + # The number of pieces a BPF line should be split into according to its class. + CLASS_SPLITNUMS = c(3, 4, 3, 5, 4) + + # All lines of the BPF must conform to the following regular expression: + GLOBAL_REGEX = paste0("^[", UPPER, DIGITS, "]{3}:.*") + + # Item type according to class. + CLASS_TO_TYPE = c("ITEM", "SEGMENT", "EVENT", "SEGMENT", "EVENT") + + # --------------------------------------------------------------------------- + # ------------------------------- Containers -------------------------------- + # --------------------------------------------------------------------------- + + # Boolean indicating whether a semicolon link operator (not supported) was found in the present BPF. + semicolonFound = FALSE + + # Container for levels. + levels = list() + + # --------------------------------------------------------------------------- + # --------------------------- Parsing --------------------------------------- + # --------------------------------------------------------------------------- + + # Initialize current itemID at 2 (since 'bundle' is 1). + currentItemID = 2 + + # Start parsing from body start key onwards (body start key position + 1). + for(idx in (bsKeyPosition+1):length(bpfLines)) + { + # Skip empty lines. + if(stringr::str_length(bpfLines[idx]) == 0) + { + next + } + + # Throw an exception if a line does not match the global regular expression. + if (!stringr::str_detect(bpfLines[idx], GLOBAL_REGEX)) + { + stop("Line ", idx, " of the following BPF does not conform to BPF specification: ", bpfPath) + } + + # Get level name (first three characters) + key = stringr::str_sub(bpfLines[idx], start = 1, end = 3) + + # If only a subset of levels should be extracted, and this level is not one of them, next. + if(!is.null(extractLevels)) + { + if(!key %in% extractLevels) + { + next + } + } + + if(!key %in% names(levelClasses)) + { + stop("Unknown level name in line ", + idx, + " of the following BPF: ", + bpfPath, + ". If this level is not one of the standard BPF tiers, you have to declare it using the newLevels argument.") + } + + if(!key %in% names(levels)) + { + levels[[key]] = list() + } + + # Throw an exception if the line does not conform to the regular expression of its class. + # WARNING: Cannot detect all errors! + if(!stringr::str_detect(bpfLines[idx], CLASS_REGEXES[levelClasses[[key]]])) + { + stop("Line ", idx, " in the following BPF does not match the Bas Partitur File Specifications: ", bpfPath, + ". Level '", key, "' should be of class ", levelClasses[[key]], ".") + } + + # Split the line according to its class. + splitline = stringr::str_split_fixed(bpfLines[idx], "\\s+", CLASS_SPLITNUMS[levelClasses[[key]]]) + + # Assign and increment global index. + if(!key %in% unifyLevels) + { + itemID = currentItemID + currentItemID = currentItemID + 1 + } + + # If the key in unifyLevels, assign no index (since this won't become an independent item but a label). + else + { + itemID = NA + } + + # Assign type, based on key class. + type = CLASS_TO_TYPE[levelClasses[[key]]] + + # Initialize seq index as NA (assigned later). + seqIdx = NA + + # ------------------------------------------------------------------------- + # --------------- Parse BPF line accrding to level class ------------------ + # ------------------------------------------------------------------------- + + returnContainer = parse_bpfLine(levelClass = levelClasses[[key]], + splitline = splitline) + + start = returnContainer$start + duration = returnContainer$duration + point = returnContainer$point + labelString = returnContainer$labelString + linksString = returnContainer$linksString + + # ------------------------------------------------------------------------- + # ---------------- Evaluate information in labelString -------------------- + # ------------------------------------------------------------------------- + + labels = evaluate_bpfLabelString(labelString = labelString, + key = key) + + # --------------------------------------------------------------------------- + # -------------------- Evaluate information in linksString ------------------ + # --------------------------------------------------------------------------- + + returnContainer = evaluate_bpfLinksString(linksString = linksString, + bpfPath = bpfPath, + refLevel = refLevel, + key = key) + + links = returnContainer$links + + if(returnContainer$semicolon) + { + semicolonFound = TRUE + } + + # ------------------------------------------------------------------------- + # ------- Turn segment into event if level in segmentToEventLevels -------- + # ------------------------------------------------------------------------- + + if(key %in% segmentToEventLevels) + { + itemID_start = itemID + itemID_end = currentItemID + currentItemID = currentItemID + 1 + + point_start = start + point_end = start + duration + + start = "NULL" + duration = "NULL" + + type = "EVENT" + + labels_start = list() + labels_end = list() + + + for(key in names(labels)) + { + labels_start[[key]] = paste0(labels[[key]], "_start") + labels_end[[key]] = paste0(labels[[key]], "_end") + } + + levels[[key]][[length(levels[[key]]) + 1L]] = list(itemID = itemID_start, + start = start, + duration = duration, + point = point_start, + labels = labels_start, + links = links, + seqIdx = seqIdx, + type = type) + + levels[[key]][[length(levels[[key]]) + 1L]] = list(itemID = itemID_end, + start = start, + duration = duration, + point = point_end, + labels = labels_end, + links = links, + seqIdx = seqIdx, + type = type) + } + + else + { + levels[[key]][[length(levels[[key]]) + 1L]] = list(itemID = itemID, + start = start, + duration = duration, + point = point, + labels = labels, + links = links, + seqIdx = seqIdx, + type = type) + } + } + + # --------------------------------------------------------------------------- + # -------------------------------- Return ----------------------------------- + # --------------------------------------------------------------------------- + + returnContainer = list(levels = levels, + currentItemID = currentItemID, + semicolonFound = semicolonFound) + return(returnContainer) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Parse a single BPF line according to the line's level class +## +## @param levelClass +## @param splitline +## @import stringr +## @keywords emuR BPF Emu +## @return list(start, duration, point, labelString, linksString) + +parse_bpfLine <- function(levelClass, + splitline) +{ + if(levelClass == 1) + { + start = "NULL" + duration = "NULL" + point = "NULL" + + # Escape single quotes with double quotes (conformity with SQL). + labelString = stringr::str_replace_all(splitline[3], "'", "''") + linksString = splitline[2] + } + + else if(levelClass == 2) + { + start = as.integer(splitline[2]) + duration = as.integer(splitline[3]) + point = "NULL" + labelString = stringr::str_replace_all(splitline[4], "'", "''") + linksString = NA + } + + else if(levelClass == 3) + { + start = "NULL" + duration = "NULL" + point = as.integer(splitline[2]) + labelString = stringr::str_replace_all(splitline[3], "'", "''") + linksString = NA + } + + else if(levelClass == 4) + { + start = as.integer(splitline[2]) + duration = as.integer(splitline[3]) + point = "NULL" + labelString = stringr::str_replace_all(splitline[5], "'", "''") + linksString = splitline[4] + } + + else if(levelClass == 5) + { + start = "NULL" + duration = "NULL" + point = as.integer(splitline[2]) + labelString = stringr::str_replace_all(splitline[4], "'", "''") + linksString = splitline[3] + } + + return(list(start = start, + duration = duration, + point = point, + labelString = labelString, + linksString = linksString)) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Turn raw label string into one or several value - key pairs +## +## @param labelString +## @param key +## @import stringr +## @keywords emuR BPF Emu +## @return labels + +evaluate_bpfLabelString <- function(labelString, + key) +{ + # --------------------------------------------------------------------------- + # -------------------------- NECESSARY CONSTANTS ---------------------------- + # --------------------------------------------------------------------------- + + # Supplements for ranges in regular expressions. + UPPER = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + DIGITS = "0123456789" + + # If the label string contains more than one label, expand into separate key -> value pairs. + # Syntax for label string with multiple labels: "ABC: value; DEF: value; GHI: value". + # If not, the level name becomes the label key, and the full label string becomes the value. + + labels = list() + + if(stringr::str_detect(labelString, paste0("^[", UPPER, DIGITS, "]{3}:\\s+.*;")) && + stringr::str_detect(labelString, paste0(";\\s*[", UPPER, DIGITS, "]{3}:\\s+.*$"))) + { + extractedLabels = stringr::str_split(labelString, "\\s*;\\s*")[[1]] + for(extractedLabel in extractedLabels) + { + splitLabel = stringr::str_split(extractedLabel, ":\\s+", n=2)[[1]] + + labels[[splitLabel[1]]] = splitLabel[2] + } + } + + else + { + labels[[key]] = labelString + } + + return(labels) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Turn raw links string into NA (no links) or a vector of integers +## +## @param linksString +## @param bpfPath +## @param refLevel +## @param key +## @import stringr +## @keywords emuR BPF Emu +## @return list(links, semicolon) + +evaluate_bpfLinksString <- function(linksString, + bpfPath, + refLevel, + key) +{ + # Variable to be returned (TRUE if a semicolon has been found in this BPF -> warningTracker). + semicolon = FALSE + + # If there was no link entry in the first place, or the link was '-1' -> no link information. + if(is.na(linksString) || stringr::str_detect(linksString, "-1")) + { + links = NA + } + + # Ignore links containing the ';' operator. + else if(stringr::str_detect(linksString, ";")) + { + semicolon = TRUE + links = NA + } + + # Store links as a vector of integers. + else + { + links = as.integer(unlist(stringr::str_split(linksString, ","))) + + # Throw an exception if an item links to the same item more than once. + for(link in links) + { + if(sum(links == link) > 1) + { + stop("An item cannot link to the same item more than once. BPF: ", bpfPath) + } + } + } + + # If the current level is the reference level, check whether all links are valid and atomic. + if(!is.null(refLevel)) + { + if(key == refLevel) + { + if(length(links) > 1) + { + stop("The reference level must contain atomic links. Not the case in the following BPF: ", bpfPath) + } + if(is.na(links[[1]])) + { + stop("The reference level must contain valid symbolic links. Valid symbolic ", + "links are neither '-1', nor do they contain the ';' operator. BPF: ", bpfPath) + } + } + } + + return(list(links = links, semicolon = semicolon)) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Check for temporal overlap on event and segment tiers +## +## @param levels +## @param bpfPath +## @param segmentToEventLevels +## @param levelClasses +## @import stringr +## @keywords emuR BPF Emu +## @return + +check_bpfOverlap <- function(levels, + bpfPath, + levelClasses, + segmentToEventLevels) +{ + for(key in names(levels)) + { + # If the level is time consuming, check whether there is segmental overlap. + if(levelClasses[[key]] %in% c(2, 4)) + { + start_order = sapply(levels[[key]], "[[", "start") + levels[[key]] = levels[[key]][order(start_order)] + + if(length(levels[[key]]) < 2) { next } + + for(idx in 2:length(levels[[key]])) + { + jdx = idx - 1 + if( + levels[[key]][[idx]][["start"]] <= levels[[key]][[jdx]][["start"]] + levels[[key]][[jdx]][["duration"]] + ) + { + currentElement = levels[[key]][[idx]] + segmentBPF = paste0(key, + ": ", + currentElement[[2]], + " ", + currentElement[[3]], + " ", + currentElement[[6]], + " ", + currentElement[[5]][[1]]) + stop("The following BPF contains overlapping segments on level '", + key, + "'; ", + " : ", + bpfPath, + " (BPF segment: ", + segmentBPF, + ")") + } + } + } + + # If the level is not time consuming, check whether there are two events pointing to the same sample. + if(levelClasses[[key]] %in% c(3, 5)) + { + point_order = sapply(levels[[key]], "[[", "point") + levels[[key]] = levels[[key]][order(point_order)] + + if(length(levels[[key]]) < 2) { next } + + for(idx in 2:length(levels[[key]])) + { + jdx = idx - 1 + if(levels[[key]][[idx]][["point"]] == levels[[key]][[jdx]][["point"]]) + { + if(key %in% segmentToEventLevels) + { + stop("The following BPF contains simultaneous events on level '", + key, + "' after segment overlap resolution: ", + bpfPath, + ". Check whether there are any segments with simultaneous starting and/or end points in this BPF.") + } + else + { + stop("The following BPF contains simultaneous events on level '", key, "': ", bpfPath) + } + } + } + } + } +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Pad space between segment items with empty strings +## +## @param levels +## @param currentItemID +## @param levelClasses +## @keywords emuR BPF Emu +## @return levels + +pad_bpfSegments <- function(levels, + currentItemID, + levelClasses) +{ + # Pad segment tiers with empty segments. + # No padding before the first segment and after the last segment. + + for(key in names(levels)) + { + # If there is only one item on this level, there is no padding required: Jump to next level. + + if(length(levels[[key]]) == 1 || levelClasses[[key]] %in% c(1, 3, 5)) + { + next + } + + start_order = sapply(levels[[key]], "[[", "start") + levels[[key]] = levels[[key]][order(start_order)] + + for(idx in 1:(length(levels[[key]])-1)) + { + if((levels[[key]][[idx]][["start"]] + levels[[key]][[idx]][["duration"]] + 1) < levels[[key]][[idx+1]][["start"]]) + { + start = levels[[key]][[idx]][["start"]] + levels[[key]][[idx]][["duration"]] + 1 + duration = levels[[key]][[idx+1]][["start"]] - start -1 + + # Create new item for the pad. + labels = list() + labels[[key]] = "" + levels[[key]][[length(levels[[key]]) + 1L]] = + list(itemID = currentItemID, start = start, duration = duration, point = "NULL", + labels = labels, links = NA, seqIdx = NA, type = "SEGMENT") + + currentItemID = currentItemID + 1 + } + } + } + + return(levels) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Assign seqIdx to items +## +## @param levels +## @param levelClasses +## @keywords emuR BPF Emu +## @return levels + +assign_bpfSeqIdx <- function(levels, + levelClasses) +{ + for(key in names(levels)) + { + # If there is only one item on this level (and the level is thus already ordered chronologically): + # Assign seqIdx as 1 and jump to next level. + if(length(levels[[key]]) == 1) + { + levels[[key]][[1]][["seqIdx"]] = 1 + next + } + + + # Counter for assigned indices (starting at 1 for each level). + currentSeqIdx = 1 + + # Order items on levels with temporal information chronologically. + if(levelClasses[[key]] %in% c(2, 4)) + { + startOrder = sapply(levels[[key]], "[[", "start") + levels[[key]] = levels[[key]][order(startOrder)] + } + + else if(levelClasses[[key]] %in% c(3, 5)) + { + pointOrder = sapply(levels[[key]], "[[", "point") + levels[[key]] = levels[[key]][order(pointOrder)] + } + + # Assign seqIdx from top to bottom. + for(idx in 1:length(levels[[key]])) + { + levels[[key]][[idx]][["seqIdx"]] = currentSeqIdx + currentSeqIdx = currentSeqIdx + 1 + } + } + + return(levels) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Write item and label info to EmuDB +## +## @param emuDBhandle +## @param levels +## @param session +## @param bundle +## @param samplerate +## @param unifyLevels +## @keywords emuR BPF Emu +## @return levelInfo + +write_bpfItemsLabelsToDb <- function(emuDBhandle, + levels, + session, + bundle, + samplerate, + unifyLevels, + levelInfo) +{ + for(key in names(levels)) + { + # Skip current level if it is to be unified with the reference level. + if(key %in% unifyLevels) + { + next + } + + labelTracker = list() + + for(idx in 1:length(levels[[key]])) + { + + # Write item information. + queryTxt = paste0("INSERT INTO items VALUES"," (", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + levels[[key]][[idx]][["itemID"]], ", ", + " '", key, "', ", + " '", levels[[key]][[idx]][["type"]], "', ", + levels[[key]][[idx]][["seqIdx"]], ", ", + samplerate, ", ", + levels[[key]][[idx]][["point"]], ", ", + levels[[key]][[idx]][["start"]], ", ", + levels[[key]][[idx]][["duration"]], ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + + labelIdxCounter = 1 + + for(labelKey in names(levels[[key]][[idx]][["labels"]])) + { + queryTxt = paste0("INSERT INTO labels VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + levels[[key]][[idx]][["itemID"]], ", ", + labelIdxCounter,", ", + " '", labelKey, "', ", + " '", levels[[key]][[idx]][["labels"]][[labelKey]], "' ", + ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + + if(!labelKey %in% labelTracker) + { + labelTracker[[length(labelTracker) + 1L]] = labelKey + } + labelIdxCounter = labelIdxCounter + 1 + } + } + levelInfo[[length(levelInfo) + 1L]] = list(key = key, + type = levels[[key]][[1]][["type"]], + labels = labelTracker) + } + + return(levelInfo) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Get a list mapping from link strings in the BPF to itemIDs on the reference level +## +## @param levels +## @param refLevel +## @keywords emuR BPF Emu +## @return linkIdxMap + +get_bpfLinkIdxMap <- function( + levels, + refLevel +) +{ + # Map from link name to indices on reference level. + linkIdxMap = list() + for(idx in 1:length(levels[[refLevel]])) + { + linkIdxMap[[toString(levels[[refLevel]][[idx]][["links"]][1])]] = levels[[refLevel]][[idx]][["itemID"]] + } + + return(linkIdxMap) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Locally determine link directions and write link info to EmuDB +## +## @param emuDBhandle +## @param levels +## @param levelClasses +## @param refLevel +## @param session +## @param bundle +## @param unifyLevels +## @param bpfPath +## @param linkIdxMap +## @keywords emuR BPF Emu +## @return linkInfo + +write_bpfLinksToDb <- function(emuDBhandle, + levels, + levelClasses, + refLevel, + session, + bundle, + unifyLevels, + bpfPath, + linkIdxMap) +{ + # Container for information on levels found in this BPF. Will be returned to conversion function. + linkInfo = list() + + for(key in names(levels)) + { + if(key == refLevel || levelClasses[[key]] %in% c(2, 3) || key %in% unifyLevels) + { + next + } + + # ------------------------------------------------------------------------- + # - Get direction and type of links between refLevel and current level ---- + # ------------------------------------------------------------------------- + + returnContainer = get_bpfLinkCounts(levels, + key) + + # If we haven't seen any links, skip and don't make entries to linkInfo or the temp DB + if(is.null(returnContainer$seenLinks)) + { + next + } + + oneToMany = returnContainer$oneToMany + manyToOne = returnContainer$manyToOne + + linkInfoEntry = bpf_get_link_info_entry(key = key, + refLevel = refLevel, + oneToMany = oneToMany, + manyToOne = manyToOne) + + upper = linkInfoEntry$fromkey + lower = linkInfoEntry$tokey + + linkInfo[[length(linkInfo) + 1L]] = linkInfoEntry + + # ------------------------------------------------------------------------- + # ----------------------- Insert links into temp DB ----------------------- + # ------------------------------------------------------------------------- + + for(idx in 1:length(levels[[key]])) + { + if(is.na(levels[[key]][[idx]][["links"]][1])) + { + next + } + + for(link in levels[[key]][[idx]][["links"]]) + { + if(!(link %in% names(linkIdxMap))) + { + stop("There is a symbolic link on level ", + key, + " in the following BPF that does not point to any item on the reference level: ", + bpfPath) + } + + if(upper == refLevel) + { + queryTxt = paste0("INSERT INTO links VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + linkIdxMap[[toString(link)]], ", ", + levels[[key]][[idx]][["itemID"]],", ", + " NULL", + ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + } + else if(lower == refLevel) + { + queryTxt = paste0("INSERT INTO links VALUES","(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + levels[[key]][[idx]][["itemID"]], ", ", + linkIdxMap[[toString(link)]], ", ", + " NULL)") + + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + } + } + } + } + + return(linkInfo) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## +## +## @param levels +## @param key +## @keywords emuR BPF Emu +## @return list(oneToMany, manyToOne, seenLinks) + +get_bpfLinkCounts <- function(levels, + key) +{ + seenLinks = NULL + oneToMany = 0 + # oneToMany increments for items on current level linking to more than one item on refLevel + # (+1 for each extra item on refLevel) + manyToOne = 0 + # increments for items on refLevel linking to more than one item on current level + # (+1 for each extra item on current level) + + for(idx in 1:length(levels[[key]])) + { + # Skip if current item does not have any links. + if(is.na(levels[[key]][[idx]][["links"]])[1]) + { + next + } + + oneToMany = oneToMany + (length(levels[[key]][[idx]][["links"]]) - 1) + + for(link in levels[[key]][[idx]][["links"]]) + { + if(link %in% seenLinks) + { + manyToOne = manyToOne + 1 + } + else + { + seenLinks = c(seenLinks, link) + } + } + } + + return(list(oneToMany = oneToMany, + manyToOne = manyToOne, + seenLinks = seenLinks)) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Evaluate link counts to determine link direction and type +## +## @param key +## @param refLevel +## @param oneToMany +## @param manyToOne +## @keywords emuR BPF Emu +## @return list(linkInfoEntry) + +bpf_get_link_info_entry <- function(key, + refLevel, + oneToMany, + manyToOne) +{ + # --------------------------------------------------------------------------- + # ------------------------ Determine link type ------------------------------ + # --------------------------------------------------------------------------- + + if(oneToMany == 0 && manyToOne == 0) + { + linkType = "ONE_TO_ONE" + } + + else if(oneToMany == 0 || manyToOne == 0) + { + linkType = "ONE_TO_MANY" + } + + else if(oneToMany > 0 && manyToOne > 0) + { + linkType = "MANY_TO_MANY" + } + + # --------------------------------------------------------------------------- + # ------------------------ Determine link direction ------------------------- + # --------------------------------------------------------------------------- + + if(oneToMany <= manyToOne) + { + upper = refLevel + lower = key + countRight = manyToOne + countWrong = oneToMany + } + + else + { + upper = key + lower = refLevel + countRight = oneToMany + countWrong = manyToOne + } + + # --------------------------------------------------------------------------- + # ------------------------------------ Return ------------------------------- + # --------------------------------------------------------------------------- + + linkInfoEntry = list(fromkey = upper, + tokey = lower, + type = linkType, + countRight = countRight, + countWrong = countWrong) + + return(linkInfoEntry) +} + +############################################################################### +############################################################################### +############################################################################### +############################################################################### +############################################################################### + +## Unify levels from unifyLevels with the reference level +## +## @param emuDBhandle +## @param levels +## @param unifyLevels +## @param linkIdxMap +## @param levelInfo +## @param refLevel +## @param bpfPath +## @param session +## @param bundle +## @keywords emuR BPF Emu +## @return levelInfo + + +unify_bpfLevels <- function(emuDBhandle, + levels, + unifyLevels, + linkIdxMap, + levelInfo, + refLevel, + bpfPath, + session, + bundle) +{ + # Start currentLabelIdx at 2, since refLevel already has one label (namely the refLevel's name). + currentLabelIdx = 2 + newLabelsforRefLevel = NULL + + for(key in unifyLevels) + { + if(!key %in% names(levels)) + { + next + } + + seenLinks = list() + + for(idx in 1:length(levels[[key]])) + { + + if(levels[[key]][[idx]][["links"]][1] %in% seenLinks) + { + stop("If you want to unify level ", + key, + " with the reference level, you cannot have more than one item on ", + key, + " pointing to one item on the reference level. BPF: ", + bpfPath) + } + + if(is.na(levels[[key]][[idx]][["links"]][1])) + { + stop("If you want to unify level, ", + key, + " with the reference level, it must not contain any link-less items. BPF: ", + bpfPath) + } + + if(!toString(levels[[key]][[idx]][["links"]][1]) %in% names(linkIdxMap)) + { + stop("There is a symbolic link on level ", + key, + " in the following BPF that does not point to any item on the reference level: ", + bpfPath) + } + + for(labelKey in names(levels[[key]][[idx]][["labels"]])) + { + for(link in 1:length(levels[[key]][[idx]][["links"]])) + { + queryTxt = paste0("INSERT INTO labels VALUES(", + " '", emuDBhandle$UUID, "', ", + " '", session, "', ", + " '", bundle, "', ", + linkIdxMap[[toString(levels[[key]][[idx]][["links"]][link])]], ", ", + currentLabelIdx, ", ", + " '", labelKey, "', ", + " '", levels[[key]][[idx]][["labels"]][[labelKey]], "' ", + ")") + + DBI::dbExecute(emuDBhandle$connection, queryTxt) + seenLinks[[length(seenLinks) + 1L]] = levels[[key]][[idx]][["links"]][link] + } + + if(!labelKey %in% newLabelsforRefLevel) + { + newLabelsforRefLevel = c(newLabelsforRefLevel, labelKey) + } + + currentLabelIdx = currentLabelIdx + 1 + } + } + } + + for(idx in 1:length(levelInfo)) + { + if(levelInfo[[idx]][["key"]] == refLevel) + { + levelInfo[[idx]][["labels"]] = c(levelInfo[[idx]][["labels"]], newLabelsforRefLevel) + } + } + + return(levelInfo) +} + + +# TODO: Find a better solution for the ";"-case (links to space in between items) +# TODO: Build syntax tree +# TODO: unify levels with levels other than the reference level +# TODO: unify levels that are not class 1 +# TODO: OOP-Implementation to avoid passing/returning so many variables diff --git a/R/emuR-parser.common.R b/R/emuR-parser.common.R new file mode 100644 index 00000000..df1939c5 --- /dev/null +++ b/R/emuR-parser.common.R @@ -0,0 +1,191 @@ + +## Get first position of character in a string from given position +## +## @param string string to search in +## @param char character to search +## @param pos start position +## @param literalQuote optional quote character to quote literal strings +## @return position in string or -1 if not found or pos outside string constraints +## @import stringr +get_charPosition <- function(string, + char, + pos = 1, + literalQuote = NULL){ + + strLen = nchar(string) + + us = pos:strLen + inLiteral = FALSE + for(c in us){ + if(c > strLen){ + return(-1) + } + ch = substr(string, c, c) + if(!is.null(literalQuote)){ + if(ch == literalQuote){ + inLiteral =! inLiteral + } + } + if(!inLiteral && ch == char){ + return(c) + } + + } + return(-1) +} + +## Get first position of string in a string from given position +## +## @param string string to search in +## @param searchStr string to search +## @param pos start position +## @param literalQuote optional quote character to quote literal strings +## @return position in string or -1 if not found or pos outside string constraints +## @import stringr +get_stringPosition <- function(string, + searchStr, + pos = 1, + literalQuote = NULL){ + + strLen = nchar(string) + searchStrLen = nchar(searchStr) + us = pos:strLen - searchStrLen + 1 + inLiteral = FALSE + for(c in us){ + if(c > strLen){ + return(-1) + } + ch = substr(string, c, c) + if(!is.null(literalQuote)){ + if(ch == literalQuote){ + inLiteral =! inLiteral + } + } + + sstr = substr(string, c, c + searchStrLen - 1) + if(!inLiteral && sstr == searchStr){ + return(c) + } + + } + return(-1) +} + + +## Get last position of character in a string from given position backwards +## +## @param string string to search in +## @param char character to search +## @param pos start position +## @param literalQuote optional quote character to quote literal strings +## @return position in string or -1 if not found or pos outside string constraints +## @import stringr +get_lastCharPosition <- function(string, + char, + pos = nchar(string), + literalQuote = NULL){ + strLen = nchar(string) + + us = pos:1 + inLiteral = FALSE + for(c in us){ + + ch = substr(string, c, c) + if(!is.null(literalQuote)){ + if(ch == literalQuote){ + inLiteral =! inLiteral + } + } + if(!inLiteral && ch == char){ + return(c) + } + + } + return(-1) +} + +## Get first position of character in a string from given position +## +## @param string string to search in +## @param search string to search +## @param pos start position +## @param literalQuote character to quote literal strings +## @param bracket two-dim vector char concatenating open and close bracket (e.g. c('[',']')) +## @return position in string or -1 if not found or pos outside string constraints +## @import stringr +get_stringPositionOutsideBrackets <- function(string, + search, + pos = 1, + literalQuote = NULL, + bracket = NULL){ + + strLen = nchar(string) + sLen = nchar(search) + us = pos:strLen + inLiteral = FALSE + inBracketLvl = 0 + for(c in us){ + if(c > strLen){ + return(-1) + } + ch = substr(string, c, c) + if(!is.null(literalQuote)){ + if(ch == literalQuote){ + inLiteral =! inLiteral + } + } + if(!inLiteral && !is.null(bracket)){ + if(ch == bracket[1]){ + inBracketLvl = inBracketLvl + 1 + }else if(ch == bracket[2]){ + inBracketLvl = inBracketLvl - 1 + } + if(inBracketLvl < 0){ + stop("Syntax error: Close bracket ", + bracket[2], + " without open bracket ", + bracket[1], + "\n") + } + } + if(!inLiteral && inBracketLvl == 0){ + sStr = substr(string, c, c + sLen - 1) + if(sStr == search){ + return(c) + } + } + + } + return(-1) +} + +## Parse line to key value pair +## @param line line +## @param separator separator char +## @param doubleQuoted TRUE if expecting double quoted value +## @param initialTrim remove leading+trailing whitespaces before procceeding (default=TRUE) +## @return character vector conating key and value +## @import stringr +parse_lineToKeyValue = function(line, + separator = '=', + doubleQuoted = FALSE, + initialTrim = TRUE){ + if(initialTrim){ + line = stringr::str_trim(line) + } + eqSignI = get_charPosition(line, separator) + if(eqSignI == -1){ + return(NULL) + } + left = stringr::str_sub(line, + end = eqSignI - 1) + key = stringr::str_trim(left) + right = stringr::str_sub(line, + start = eqSignI + 1) + value = stringr::str_trim(right) + if(doubleQuoted){ + value = sub('^\"', '', value); + value = sub('\"$', '', value); + } + return(c(key, value)) +} \ No newline at end of file diff --git a/R/emuR-parser.esps.R b/R/emuR-parser.esps.R new file mode 100644 index 00000000..0a0822d0 --- /dev/null +++ b/R/emuR-parser.esps.R @@ -0,0 +1,134 @@ +## Parser for ESPS label files +## +## @param labFilePath ESPS label file path +## @param tierName name of the tier +## @param sampleRate sample rate of corresponding signal file +## @param encoding text encoding (default: NULL -> R encoding "unknown") +## @return new tier containing parsed items +## @import stringr +## @keywords emuR ESPS lab Emu +## +parse_espsLabelFile <- function(labFilePath = NULL, + tierName, + tierType = NULL, + sampleRate, + encoding = NULL, + idCnt = 0) { + SIGNAL_KEY = "signal" + NUMBER_OF_FIELDS_KEY = "nfields" + SEPARATOR_KEY = "separator" + COLOR_KEY = "color" + DATA_SECTION_START_KEY = '#' + INTERVAL_FLAG_VALUE = 'H#' + + fileToRead = NULL + inHeaderSection = TRUE + intervalMode = FALSE + firstDataLine = TRUE + intervalStart = NULL + itemList = list() + + if(is.null(labFilePath)){ + stop("Argument labFilepath or labCon must not be NULL\n") + }else{ + fileToRead = labFilePath + } + if(sampleRate <= 0){ + stop("Samplerate must be greater than zero\n") + } + + # read + if(is.null(encoding)){ + lc = try(readr::read_lines(fileToRead)) + }else{ + lc = try(readr::read_lines(fileToRead,readr::locale(encoding = encoding))) + } + if(inherits(lc, "try-error")) { + stop("read.esps: cannot read from file ", fileToRead) + } + + for(line in lc){ + trimmedLine = stringr::str_trim(line) + if(inHeaderSection){ + if(trimmedLine == DATA_SECTION_START_KEY){ + inHeaderSection = FALSE + }else{ + kv = parse_lineToKeyValue(trimmedLine, '[[:space:]]') + if(!is.null(kv) && kv[1] == NUMBER_OF_FIELDS_KEY){ + if(kv[2] != 1){ + stop("only files with one field supported") + } + } + # ignore other headers + } + }else{ + lineTokensLst = stringr::str_split(trimmedLine,'[[:space:]]+', 3) + lineTokens = lineTokensLst[[1]] + lineTokenCount = length(lineTokens); + if(lineTokenCount >= 2){ + + timeStampStr = lineTokens[1] + color = lineTokens[2] + label = NULL + if(lineTokenCount == 3){ + label = lineTokens[3] + } + + timeStamp = as(timeStampStr,"numeric") + timeStampInSamples = timeStamp*sampleRate + if(firstDataLine){ + + if(is.null(tierType)){ + if(label == INTERVAL_FLAG_VALUE){ + tierType = 'SEGMENT' + }else{ + tierType = 'EVENT' + } + } + } + + if(firstDataLine & tierType == 'SEGMENT'){ + samplePoint = floor(timeStampInSamples) + intervalStartPoint = samplePoint + }else{ + lblVal = '' + if(!is.null(label)){ + lblVal = label + } + labelAttrs = list(list(name = tierName, + value = lblVal)) + #labelAttrs[[tierName]]=label + + if(tierType == 'SEGMENT'){ + samplePoint = floor(timeStampInSamples) + # duration calculation according to partitur format + # the sum of all durations is not equal to the complete sample count + duration = samplePoint - intervalStartPoint - 1 + currItem = list(id = idCnt, + sampleStart = intervalStartPoint, + sampleDur = duration, + labels = labelAttrs) + idCnt = idCnt + 1 + itemList[[length(itemList) + 1]] <- currItem + intervalStartPoint = samplePoint + + }else{ + samplePoint = round(timeStampInSamples) + currItem=list(id = idCnt, + samplePoint = samplePoint, + labels = labelAttrs) + idCnt = idCnt + 1 + itemList[[length(itemList) + 1]] <- currItem + } + + } + firstDataLine = FALSE + } + } + } + labTier = list(name = tierName, + type = tierType, + sampleRate = sampleRate, + items = itemList) + return(labTier) +} diff --git a/R/emuR-parser.hlb.R b/R/emuR-parser.hlb.R new file mode 100644 index 00000000..a00fbe3b --- /dev/null +++ b/R/emuR-parser.hlb.R @@ -0,0 +1,330 @@ +## Parser for EMU HLB hierarchy files +## +## @param database the database object +## @param hlbFilePath file path to EMU HLB file +## @param levelDefinitions list of annotation level definitions +## @param levels list of already existing annotation levels +## @return emuDB database object including parsed hlb file +## @import stringr +## @keywords emuR Emu hierarchy hlb +## +parse_hlbFile <- function(hlbFilePath = NULL, + levelDefinitions, + levels, + encoding = NULL) { + EMU_HIERARCHY_HEADER = "**EMU hierarchical labels**" + hlbTiers = list() + + if(is.null(hlbFilePath)){ + stop("Argument hlbFilepath must not be NULL\n") + } + + # read file contents + if(is.null(encoding)){ + lines = try(readr::read_lines(hlbFilePath)) + }else{ + lines = try(readr::read_lines(hlbFilePath, + readr::locale(encoding = encoding))) + } + if(inherits(lines, "try-error")) { + stop("Cannot read from file ", hlbFilePath) + } + lineCount = length(lines) + + # assume header in line 1 + # ALC EMU Db has trailing blank in header line + headerPattern = paste0(gsub('*', + '[*]', + EMU_HIERARCHY_HEADER, + fixed = TRUE), + '[:blank:]*') + # check header + if(!grepl(headerPattern, lines[[1]])){ + stop("No Emu Hlb file header found! ", hlbFilePath) + } + + links = list() + items = list() + # assume max id value in line 2 + maxId = as.integer(lines[[2]]) + + # initialize vars + currentTierName = NULL + newTier = NULL + currentLevelDef = NULL + currentitems = list() + currentExistingItems = NULL + currentIdx = 1 + + # parse through lines + for(lnr in 3:lineCount){ + line = lines[[lnr]] + if(line == ''){ + # empty line marks beginning of new level + if(!is.null(currentTierName)){ + # look for already existing (labfile) tier + tierExists = FALSE + for(t in levels){ + if(t$name == currentTierName){ + tierExists = TRUE + # TODO check items + + # for now I assume that every item in ESPS label file has corresponding item in hlb file and vice versa + #merge + labitems = t$items + currentExistingItems = labitems + labitemsCount = length(labitems) + currentitemsCount = length(currentitems) + if(labitemsCount != currentitemsCount){ + stop("Tier: ", + currentTierName, + ": count of items (", + currentitemsCount, + ") in HLB file '", + hlbFilePath, + "'' differs from count in ESPS label file (", + labitemsCount, + ")"); + } + newTier = t + class(newTier) <- 'emuR.annotation.model.Level' + break + } + } + if(!tierExists){ + currentExistingItems = NULL + newTier = list(name = currentTierName, + type = 'ITEM', + sampleRate = NULL, + items = currentitems); + }else{ + newItems = list() + # TODO !! + exItems = newTier$items + exItemsLen = length(exItems) + currItemsLen = length(currentitems) + if(exItemsLen != currItemsLen){ + # TODO more verbose + stop("Existing item count mismatch: ", + exItemsLen, + " != ", + currItemsLen) + } + i = 0 + if(exItemsLen > 0){ + for(i in 1:exItemsLen){ + exItem = exItems[[i]] + currItem = currentitems[[i]] + exType = exItem$type + # merge labels + mergedLabels = exItem[['labels']] + for(itLbl in currItem[['labels']]){ + for(exLabel in exItem[['labels']]){ + if(exLabel[['name']] == itLbl[['name']]){ + # label exists, check equality + exLblVal = exLabel[['value']] + if(is.null(exLblVal)){ + exLblVal = '' + } + itLblVal = itLbl[['value']] + if(exLblVal != itLblVal){ + stop("Labels of attribute level '", + exLabel[['name']], + "' differ: '", + exLabel[['value']], + "' '", + itLbl[['value']], + "' in HLB file: '", + hlbFilePath, + "' line ", + lnr, + ".\n") + } + }else{ + # merge + mergedLabels[[length(mergedLabels) + 1]] = itLbl + } + } + + } + if(newTier$type == "SEGMENT"){ + newItems[[i]] = list(id = currItem$id, + sampleStart = exItem$sampleStart, + sampleDur = exItem$sampleDur, + labels = mergedLabels) + }else if(newTier$type == "EVENT"){ + newItems[[i]]=list(id = currItem$id, + samplePoint = exItem$samplePoint, + labels = mergedLabels) + } + } + } + newTier$items = newItems + } + hlbTiers[[length(hlbTiers) + 1]] = newTier + } + currentTierName = NULL + currentLevelDef = NULL + currentitems = list() + currentIdx = 1 + } + lineTokensLst = strsplit(line,' ') + lineTokens = lineTokensLst[[1]] + lineTokenCount = length(lineTokens); + if(lineTokenCount >= 1){ + firstTk = lineTokens[[1]] + if(!is.null(currentTierName)){ + # + idStr = firstTk + id = as.integer(idStr) + if(lineTokenCount < 2){ + stop("Missing label for id: ", + id, + " in HLB file: '", + hlbFilePath, + "' line ", + lnr, + " !\n") + } + label = lineTokens[[2]] + labels = NULL + if(!is.null(items[[idStr]])){ + stop("Duplicate item id: ", + id, + " in HLB file: '", + hlbFilePath, + "' line ", + lnr, + " !\n") + } + + attrs = list() + # Add label of tier name + attrs[[length(attrs) + 1]] = list(name = currentTierName, + value = label) + + # add optional other attribute labels + if(lineTokenCount >= 3){ + attrIdx = length(attrs) + 1 + for(ti in 3:lineTokenCount){ + attrNm = attrDefSeq[ti-2] + attrs[[attrIdx]] = list(name = attrNm, + value = lineTokens[[ti]]) + attrIdx = attrIdx + 1 + } + } + id = as.integer(firstTk) + item = NULL + item = list(id = id, + labels = attrs) + currentitems[[length(currentitems)+1]] = item + + items[[firstTk]] = item + } + + for(td in levelDefinitions){ + + if(td$name == firstTk){ + currentTierName = firstTk + currentLevelDef = td + if(lineTokenCount >= 2){ + attrDefSeq = c() + for(ti in 2:lineTokenCount){ + lblNi = ti - 1 + tk = lineTokens[[ti]] + + ldAttrDef = NULL + if(tk != currentTierName){ + # ALC db does not have the same sequence of attributes in .tpl and .hlb files + for(attrDef in td[['attributeDefinitions']]){ + if(attrDef[['name']] == tk){ + ldAttrDef = attrDef + # create sequence order of label names (keys) from HLB file + attrDefSeq = c(attrDefSeq,tk) + break + } + } + tdLblName = ldAttrDef[['name']] + + if(is.null(ldAttrDef)){ + stop("Label name ", + tk, + " has no declaration in level definition. '", + hlbFilePath, + "' line ", + lnr, + ": ", + line) + } + + # This should never happen, the code can be removed safely + if(tdLblName != tk){ + stop("Label name ", + tk, + " does not match label name ", + tdLblName, + " of level definition. '", + hlbFilePath, + "' line ", + lnr, + ": ", + line) + } + } + } + }else{ + stop("Missing label name in '", + hlbFilePath, + "' line ", + lnr, + ": ", + line) + } + break + } + } + if(is.null(currentTierName)){ + # Link line + fromIdStr = firstTk + fromId = as.integer(fromIdStr) + if(lineTokenCount >= 2){ + for(ti in 2:lineTokenCount){ + toIdStr = lineTokens[[ti]] + toId = as.integer(toIdStr) + links[[length(links)+1]] = list(fromID = fromId, + toID = toId) + } + } + + } + } + + } + + # Add levels which are not used in HLB file + maxId = maxId + 1 + for(l in levels){ + ln = l$name + found = FALSE + for(hlbLvl in hlbTiers){ + hlbLvlNm = hlbLvl$name + if(hlbLvlNm == ln){ + found = TRUE + break + } + } + if(!found){ + # fix ids + if(length(l$items) > 0){ + for(item_idx in 1:length(l$items)){ + l$items[[item_idx]]$id = maxId + maxId = maxId + 1 + } + } + hlbTiers[[length(hlbTiers)+1]] = l + } + } + result=list(hlbTiers = hlbTiers, + links = links) + return(result) +} diff --git a/R/emuR-play_segs.R b/R/emuR-play_segs.R new file mode 100644 index 00000000..9c69e2e4 --- /dev/null +++ b/R/emuR-play_segs.R @@ -0,0 +1,72 @@ +play_segs <- function(emuDBhandle, seglist){ + + # for(row_idx in 1:nrow(seglist)){ + # + # cur_row = seglist[row_idx,] + # + # # get audio and write to temp folder + # audio = wrassp::read.AsspDataObj(file.path(emuDBhandle$basePath, + # paste0(cur_row$session, session.suffix), + # paste0(cur_row$bundle, bundle.dir.suffix), + # paste0(cur_row$bundle, ".wav")), + # begin = as.numeric(cur_row$sample_start), + # end = as.numeric(cur_row$sample_end), + # samples = TRUE) + # + # wrassp::write.AsspDataObj(audio, file.path(tempdir(), "cur_play_segs.wav")) + # + # # generate spectrogram + # + # # set parameters for dftSpectrum function + # params = formals(wrassp::dftSpectrum) + # params$fftLength = 512 + # params$windowShift = 1 # in ms + # params$bandwidth = 100 # == 0.01 secs + # params$window = "GAUSS2_5" + # + # dft_vals = get_trackdata(emuDBhandle = emuDBhandle, + # cur_row, + # onTheFlyFunctionName = "dftSpectrum", + # onTheFlyParams = params, + # verbose = FALSE, + # resultType = "tibble") + # + # + # # select data columns + # dft_vals_tracks = dft_vals %>% + # select(matches("^T[0-9]+")) %>% + # as.matrix() + # + # # normalize trackdata to values between 0 and 1 (for each row) + # # apply also transposed matrix + # # '1 - ' to give peaks high values (== more black in raster plot) + # dft_vals_tracks_norm_transp = apply(dft_vals_tracks, 1, function(dft_row) { + # 1 - ((dft_row - min(dft_row)) / (max(dft_row) - min(dft_row))) + # }) + # + # # flip so low Hz values are at the bottom + # td_tracks_norm_transp_flip = apply(dft_vals_tracks_norm_transp, 2, rev) + # + # jpeg(file = file.path(tempdir(), "cur_play_segs.jpg")) + # plot(as.raster(td_tracks_norm_transp_flip, max=1)) + # + # title(main = paste("seglist row entry", row_idx, sep = " "), + # sub = paste(cur_row, collapse = "; ")) # 1 is boundary 0 is non boundary + # dev.off() + # + # + # viewer <- getOption("viewer") + # # if (!is.null(viewer)){x + # file.copy("~/Developer/emuR/R/play_segs.html", file.path(tempdir(), "play_segs.html"), overwrite = TRUE) + # # viewer(file.path(tempdir(), "play_segs.html"), height = 500) + # # }else{ + # utils::browseURL(file.path(tempdir(), "play_segs.html")) + # # } + # + # input_key <- readline(prompt="Press any key to continue (press c to cancel): ") + # if(input_key == "c") break + # + # + # } + +} diff --git a/R/emuR-plotting.R b/R/emuR-plotting.R new file mode 100644 index 00000000..32b98a9c --- /dev/null +++ b/R/emuR-plotting.R @@ -0,0 +1,140 @@ + +##' Create spectrogram image as raster +##' +##' @param audioFilePath path to audio file to plot spectrogram of +##' @param begin begin time in seconds (passed into begin parameter of \code{wrassp::read.AsspDataObj}) +##' @param end end time in seconds (passed into end parameter of \code{wrassp::read.AsspDataObj}) +##' @param windowSizeInSecs window size in seconds +##' @param alpha value of spectrogram +##' @param lowerFreq lower frequency limit of spectrogram +##' @param upperFreq upper frequency limit of spectrogram +##' @param window window type used in spectrogram calculation. Allowed values +##' are: +##' \itemize{ +##' \item "BARTLETT" +##' \item "BARTLETTHANN" +##' \item "BLACKMAN" +##' \item "COSINE" +##' \item "GAUSS" (the default) +##' \item "HAMMING" +##' \item "HANN" +##' \item "LANCZOS" +##' \item "RECTANGULAR" +##' \item "TRIANGULAR" +##' } +##' @param dynRangeInDB dynamic range in DB of spectrogram +##' @param audioChannel channel of audio file to draw spectrogram of (only +##' applicable when using multi-channel audio files) +##' @param preEmphasisFilterFactor used in time domain for amplifying high-freqs +##' @param invert invert the colors of the spectrogram +##' @return a image raster object +##' @importFrom grDevices as.raster +##' @export +create_spectrogram_image_as_raster <- function(audioFilePath, + begin = 0, + end = 0, + windowSizeInSecs = 0.01, + alpha = 0.16, + lowerFreq = 0, + upperFreq = 5000, + window = "GAUSS", + dynRangeInDB = 70, + audioChannel = 1, + preEmphasisFilterFactor = 0.97, + invert = FALSE){ + + path2jsFile = file.path(system.file('inst', package='emuR'), + "js", + "spectro-drawing.class.js") + ct = V8::v8() + ct$reset() + ct$source(path2jsFile) + # create new instance + ct$eval("let spectro_drawer = new SpectroDrawingClass();") + + ado = wrassp::read.AsspDataObj(audioFilePath, + begin = begin, + end = end) + + ct$assign("srXwsis", attr(ado, "sampleRate") * windowSizeInSecs) + fftN = as.numeric(ct$eval("spectro_drawer.calcClosestPowerOf2Gt(srXwsis);")) + # for better resolution set lower limit + if(fftN < 512){ + fftN = 512 + } + + imgWidth = 1920 + imgHeight = 1080 + + allowedWindows = c("BARTLETT", + "BARTLETTHANN", + "BLACKMAN", + "COSINE", + "GAUSS", + "HAMMING", + "HANN", + "LANCZOS", + "RECTANGULAR", + "TRIANGULAR") + + win_idx = match(window, allowedWindows) + + if(is.na(win_idx)){ + stop("unsupported window type!") + } + + args = list( + windowSizeInSecs = windowSizeInSecs, + fftN = fftN, + alpha = alpha, + lowerFreq = lowerFreq, + upperFreq = upperFreq, + samplesPerPxl = (nrow(ado$audio) + 1 - 0) / imgWidth , # (end sample index + 1 - start sample index) / imgWidth + window = 5, + imgWidth = imgWidth, + imgHeight = imgHeight, + dynRangeInDB = dynRangeInDB, + pixelRatio = 1, + sampleRate = attr(ado, "sampleRate"), + transparency = 255, + audioBuffer = c(rep(0, fftN/2 + 1), as.numeric(ado$audio[,audioChannel]), rep(0, fftN/2 + 1)), # zero pad for first and last spectral slice + audioBufferChannels = ncol(ado$audio), # this doesn't seem to be used + drawHeatMapColors = FALSE, + preEmphasisFilterFactor = preEmphasisFilterFactor, + heatMapColorAnchors = list(c(255, 0, 0), c(0, 255, 0), c(0, 0, 0)), # does this map to a matrix?, + invert = invert + ); + + ct$assign("args", args) + ct$eval("res = spectro_drawer.renderSpectrogram(args);") + res = ct$get("res") + + res_mat = matrix(as.integer(res[seq(1, length(res), 4)])/255, + nrow = imgHeight, + ncol = imgWidth, + byrow = TRUE) + + + return(grDevices::as.raster(res_mat)) +} + +############ +# test code +# op = par(mar = rep(0, 4), +# xaxs = "i", +# yaxs = "i") +# plot.new() +# plot.window(xlim = c(0, 1), +# ylim = c(0, 1) +# ) +# spect_raster = create_spectrogram_image_as_raster("~/Desktop/emuR_demoData/ae_emuDB/0000_ses/msajc003_bndl/msajc003.wav", +# begin = 0, +# end = 2, +# invert = FALSE) +# rasterImage(spect_raster, +# 0, +# 0, +# 1, +# 1, +# interpolate = FALSE) # interpolate +# par(op) diff --git a/R/emuR-query.database.R b/R/emuR-query.database.R new file mode 100644 index 00000000..fb098d0d --- /dev/null +++ b/R/emuR-query.database.R @@ -0,0 +1,2137 @@ +########################################################################### +# create table / index definitions for DBI that are used at query time + +# tabels that store "filtered" items and labels (when session/bundlePatterns are used) +database.DDL.emuDB_itemsFilteredTmp = gsub("CREATE TABLE items", + "CREATE TEMP TABLE items_filtered_tmp", + database.DDL.emuDB_items) +database.DDL.emuDB_itemsFilteredTmp = gsub(",...FOREIGN.*CASCADE", + "", + database.DDL.emuDB_itemsFilteredTmp) # remove FOREIGN KEY + +database.DDL.emuDB_labelsFilteredTmp = gsub("CREATE TABLE labels", + "CREATE TEMP TABLE labels_filtered_tmp", + database.DDL.emuDB_labels) +database.DDL.emuDB_labelsFilteredTmp = gsub(",...FOREIGN.*CASCADE", + "", + database.DDL.emuDB_labelsFilteredTmp) # remove FOREIGN KEY + +database.DDL.emuDB_linksFilteredTmp = gsub("CREATE TABLE links", + "CREATE TEMP TABLE links_filtered_tmp", + database.DDL.emuDB_links) +database.DDL.emuDB_linksFilteredTmp = gsub(",...FOREIGN.*CASCADE", + "", + database.DDL.emuDB_linksFilteredTmp) # remove FOREIGN KEY + +##################################### +create_tmpFilteredQueryTablesDBI <- function(emuDBhandle){ + + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_itemsFilteredTmp) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_labelsFilteredTmp) + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuDB_linksFilteredTmp) + + # tabels that store subsets of filtered tables + database.DDL.emuDB_itemsFilteredSubsetTmp = gsub("CREATE TABLE items", + "CREATE TEMP TABLE items_filtered_subset_tmp", + database.DDL.emuDB_items) + database.DDL.emuDB_itemsFilteredSubsetTmp = gsub(",...FOREIGN.*CASCADE", + "", + database.DDL.emuDB_itemsFilteredSubsetTmp) # remove FOREIGN KEY + + database.DDL.emuDB_labelsFilteredSubsetTmp = gsub("CREATE TABLE labels", + "CREATE TEMP TABLE labels_filtered_subset_tmp", + database.DDL.emuDB_labels) + database.DDL.emuDB_labelsFilteredSubsetTmp = gsub(",...FOREIGN.*CASCADE", + "", + database.DDL.emuDB_labelsFilteredSubsetTmp) # remove FOREIGN KEY + + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_itemsFilteredSubsetTmp) + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_labelsFilteredSubsetTmp) + + database.DDL.emuDB_lrExpResTmp = paste0("CREATE TEMP TABLE lr_exp_res_tmp (", + " db_uuid VARCHAR(36),", + " session TEXT,", + " bundle TEXT,", + " l_seq_start_id INTEGER,", + " l_seq_end_id INTEGER,", + " l_seq_len INTEGER,", + " l_level TEXT,", + " l_attribute TEXT,", + " l_seq_start_seq_idx INTEGER,", + " l_seq_end_seq_idx INTEGER,", + " r_seq_start_id INTEGER,", + " r_seq_end_id INTEGER,", + " r_seq_len INTEGER,", + " r_level TEXT,", + " r_attribute TEXT,", + " r_seq_start_seq_idx INTEGER,", + " r_seq_end_seq_idx INTEGER", + ");") + + + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_lrExpResTmp) + +} + +##################################### +create_intermResTmpQueryTablesDBI <- function(emuDBhandle, + suffix = "root"){ + + database.DDL.emuDB_intermRes_itemsTmp = paste0("CREATE TEMP TABLE interm_res_items_tmp_", suffix, " (", + " db_uuid VARCHAR(36),", + " session TEXT,", + " bundle TEXT,", + " seq_start_id INTEGER,", + " seq_end_id INTEGER,", + " seq_len INTEGER,", + " level TEXT,", + " attribute TEXT,", + " seq_start_seq_idx INTEGER,", + " seq_end_seq_idx INTEGER", + #"PRIMARY KEY (db_uuid, session, bundle, seq_start_id, seq_end_id)", + ");") + + database.DDL.emuDB_intermRes_itemsTmp_idx1 = paste0("CREATE INDEX interm_res_items_tmp_", suffix, "_idx1 ", + "ON interm_res_items_tmp_", suffix, "(", + " db_uuid, ", + " session, ", + " bundle, ", + " seq_start_id, ", + " seq_end_id", + ")") + database.DDL.emuDB_intermRes_itemsTmp_idx2 = paste0("CREATE INDEX interm_res_items_tmp_", suffix, "_idx2 ", + "ON interm_res_items_tmp_", suffix, "(", + " db_uuid, ", + " session, ", + " bundle, ", + " seq_end_id", + ")") + database.DDL.emuDB_intermRes_itemsTmp_idx3 = paste0("CREATE INDEX interm_res_items_tmp_", suffix, "_idx3 ", + "ON interm_res_items_tmp_", suffix, "(", + " db_uuid, ", + " session, ", + " bundle, ", + " level, ", + " seq_start_seq_idx, ", + " seq_end_seq_idx", + ")") + + database.DDL.emuDB_intermRes_itemsTmp_idx4 = paste0("CREATE INDEX interm_res_items_tmp_", suffix, "_idx4 ", + "ON interm_res_items_tmp_", suffix, "(", + " db_uuid, ", + " session, ", + " bundle, ", + " level, ", + " seq_end_seq_idx", + ")") + + database.DDL.emuDB_intermRes_projItemsTmp = paste0("CREATE TEMP TABLE interm_res_proj_items_tmp_", suffix, " (", + " db_uuid VARCHAR(36),", + " session TEXT,", + " bundle TEXT,", + " seq_start_id INTEGER,", + " seq_end_id INTEGER,", + " p_seq_start_id INTEGER,", + " p_seq_end_id INTEGER,", + " p_seq_len INTEGER,", + " p_level TEXT,", + " p_attribute TEXT,", + " p_seq_start_seq_idx INTEGER,", + " p_seq_end_seq_idx INTEGER", + ");") + + if(!DBI::dbExistsTable(emuDBhandle$connection, paste0("interm_res_items_tmp_", suffix))){ + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_itemsTmp) + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_itemsTmp_idx1) + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_itemsTmp_idx2) + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_itemsTmp_idx3) + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_itemsTmp_idx4) + }else{ + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_items_tmp_", suffix)) + } + if(!DBI::dbExistsTable(emuDBhandle$connection, + paste0("interm_res_proj_items_tmp_", suffix))){ + DBI::dbExecute(emuDBhandle$connection, + database.DDL.emuDB_intermRes_projItemsTmp) + }else{ + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_proj_items_tmp_", suffix)) + } +} + +##################################### +drop_tmpFilteredQueryTablesDBI <- function(emuDBhandle){ + + tableNames = DBI::dbListTables(emuDBhandle$connection) + if("items_filtered_tmp" %in% tableNames) DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE items_filtered_tmp") + if("labels_filtered_tmp" %in% tableNames) DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE labels_filtered_tmp") + if("links_filtered_tmp" %in% tableNames) DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE links_filtered_tmp") + + if("items_filtered_subset_tmp" %in% tableNames) DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE items_filtered_subset_tmp") + if("labels_filtered_subset_tmp" %in% tableNames) DBI::dbExecute(emuDBhandle$connection, + "DROP TABLE labels_filtered_subset_tmp") +} + +##################################### +drop_allTmpTablesDBI <- function(emuDBhandle){ + + allTables = DBI::dbListTables(emuDBhandle$connection) + allTmpTables = allTables[grepl(".*tmp.*", allTables)] + for(tmpTable in allTmpTables){ + DBI::dbExecute(emuDBhandle$connection, paste0("DROP TABLE ", tmpTable)) + } +} + +##################################### +## @param emuDBhandle +## @param intermResTableSuffix +clear_intermResTabels <- function(emuDBhandle, + intermResTableSuffix, + clearProjectionItems = TRUE){ + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_items_tmp_", intermResTableSuffix)) + + if(clearProjectionItems) { + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_proj_items_tmp_", intermResTableSuffix)) + } +} +################################################################### +################## Functions implementing EQL ##################### +################################################################### + +################################# +# helper functions for query + +create_conditionTextAlternatives <- function(opr, + values){ + o = list(opr = opr, values = values) + return(o) +} + +emuR_regexprl<-function(pattern, + x){ + m = regexpr(pattern, x, perl = TRUE) + return((m == 1) & (attr(m, 'match.length') == nchar(x))) +} + +check_levelAttributeName <- function(emuDBhandle, + name){ + aNms = get_allAttributeNames(emuDBhandle) + if(! (name %in% aNms)){ + stop("Unknown level attribute name: '", + name, + "'. Database attribute names are: ", + paste(aNms,collapse=','), + "\n") + } +} + +################################# +# actual query functions + +################################# +query_labels <- function(emuDBhandle, + attributeName, + intermResTableSuffix, + conditionText, + sessionPattern, + bundlePattern, + useSubsets){ + + if(useSubsets){ + labelTableName = "labels_filtered_subset_tmp" + }else{ + labelTableName = "labels" + } + + # clear tables but keep projectionItems so they don't + # get lost in queries like : [Text == the -> #Text =~ .* & Accent == S] (right side of ->) + clear_intermResTabels(emuDBhandle, + intermResTableSuffix, + clearProjectionItems = FALSE) + + opr = conditionText[['opr']] + values = conditionText[['values']] + res = NULL + levelName = get_levelNameForAttributeName(emuDBhandle, + attributeName) + + if(opr == '==' | opr == '='){ + for(value in values){ + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " it.db_uuid, ", + " it.session, ", + " it.bundle, ", + " it.item_id AS seq_start_id, ", + " it.item_id AS seq_end_id, ", + " 1 AS seq_len,", + " it.level AS level,", + " lt.name AS attribute,", + " it.seq_idx AS seq_start_seq_idx, ", + " it.seq_idx AS seq_end_seq_idx ", + "FROM items AS it, ", + labelTableName, " AS lt ", + "WHERE it.db_uuid = lt.db_uuid ", + " AND it.session = lt.session ", + " AND it.bundle = lt.bundle ", + " AND it.item_id = lt.item_id ", + " AND it.level = '", levelName, "' ", + " AND lt.name = '", attributeName, "' ", + " AND lt.label = '", value, "' ", + " AND it.session REGEXP '", sessionPattern, "' ", + " AND it.bundle REGEXP '", bundlePattern, "' ", + "")) + } + }else if(opr == '!='){ + + sqlStr = paste0("INSERT INTO interm_res_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " it.db_uuid, ", + " it.session, ", + " it.bundle, ", + " it.item_id AS seq_start_id, ", + " it.item_id AS seq_end_id, ", + " 1 AS seq_len,", + " it.level AS level, ", + " lt.name AS attribute, ", + " it.seq_idx AS seq_start_seq_idx, ", + " it.seq_idx AS seq_end_seq_idx ", + "FROM items AS it, ", + labelTableName, " AS lt ", + "WHERE it.db_uuid = lt.db_uuid ", + " AND it.session = lt.session ", + " AND it.bundle = lt.bundle ", + " AND it.item_id = lt.item_id ", + " AND it.level = '", levelName, "'", + " AND lt.name = '", attributeName, "'", + " AND it.session REGEXP '", sessionPattern, "' ", + " AND it.bundle REGEXP '", bundlePattern, "' ", + "" + ) + for(value in values){ + sqlStr = paste0(sqlStr, " AND label <> '", value, "'") + } + + DBI::dbExecute(emuDBhandle$connection, sqlStr) + + }else if(opr == '=~'){ + for(value in values){ + if(value == ".*" || value == ".+" || stringr::str_starts(value, "\\^")){ + }else{ + warning(paste0("=~ now requires ^ if you wish to match the\n", + "first character in a sequence i.e. 'a.*' now also matches\n", + "'weakness' as it contains the sequence. '^a.*'\n", + "matches sequences that start with 'a.*'\n", + "e.g. the word 'amongst'.")) + } + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " it.db_uuid, ", + " it.session, ", + " it.bundle, ", + " it.item_id AS seq_start_id, ", + " it.item_id AS seq_end_id, ", + " 1 AS seq_len,", + " it.level AS level, ", + " lt.name AS attribute, ", + " it.seq_idx AS seq_start_seq_idx, ", + " it.seq_idx AS seq_end_seq_idx ", + "FROM items AS it, ", + labelTableName, " AS lt ", + "WHERE it.db_uuid = lt.db_uuid ", + " AND it.session = lt.session ", + " AND it.bundle = lt.bundle ", + " AND it.item_id = lt.item_id ", + " AND it.level = '", levelName, "' ", + " AND lt.name = '", attributeName, "' ", + " AND lt.label REGEXP '", value, "' ", + " AND it.session REGEXP '", sessionPattern, "' ", + " AND it.bundle REGEXP '", bundlePattern, "' ", + "")) + } + }else if(opr == '!~'){ + for(value in values){ + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " it.db_uuid, ", + " it.session, ", + " it.bundle, ", + " it.item_id AS seq_start_id, ", + " it.item_id AS seq_end_id, ", + " 1 AS seq_len,", + " it.level AS level, ", + " lt.name AS attribute, ", + " it.seq_idx AS seq_start_seq_idx, ", + " it.seq_idx AS seq_end_seq_idx ", + "FROM items AS it, ", + labelTableName, " AS lt ", + "WHERE it.db_uuid = lt.db_uuid ", + " AND it.session = lt.session ", + " AND it.bundle = lt.bundle ", + " AND it.item_id = lt.item_id ", + " AND it.level = '", levelName, "' ", + " AND lt.name = '", attributeName, "' ", + " AND lt.label NOT REGEXP '", value, "' ", + " AND it.session REGEXP '", sessionPattern, "' ", + " AND it.bundle REGEXP '", bundlePattern, "' ", + "")) + } + }else{ + stop("Syntax error: Unknown operator: '", + opr, + "'\n") + } +} + +# EBNF: FUNCQ = POSQ | NUMQ; +query_databaseEqlFUNCQ <- function(emuDBhandle, + q, + intermResTableSuffix, + sessionPattern, + bundlePattern, + useSubsets, + verbose){ + qTrim = stringr::str_trim(q) + if(useSubsets){ + itemsTableName = "items_filtered_subset_tmp" + }else{ + itemsTableName = "items" + } + + + # determine function name + # TODO duplicate code + prbOpen = get_stringPosition(string = qTrim, + searchStr = '(', + literalQuote = "'") + if(prbOpen != -1){ + prbClose = get_stringPosition(string = qTrim, + searchStr = ')', + literalQuote = "'") + if(prbClose == -1){ + stop("Syntax error: Missing closing round bracket ')' in '", q, "'\n") + }else{ + if(prbOpen > prbClose){ + stop("Syntax error: Expected opening round bracket '(' before closing round bracket in '", q, "'\n") + } + if(prbOpen == 1){ + stop("Syntax error: Expected function name in '", q, "'\n") + } + paramsVec = stringr::str_split(substr(qTrim, + prbOpen + 1, + prbClose - 1), + ',') + params = paramsVec[[1]] + paramsLen = length(params) + # all functions require exactly two params + if(paramsLen != 2){ + stop("Syntax error: All EQL functions require exactly two parameters in '", q, "'\n") + } + param1 = stringr::str_trim(params[[1]]) + param2 = stringr::str_trim(params[[2]]) + # check attribute names + aNms = get_allAttributeNames(emuDBhandle) + if(!(param1 %in% aNms)){ + msg = paste0("Unknown level attribute name: '", param1, "'.") + if(length(aNms) > 0){ + msg = paste0(msg," Database attribute names are: ", paste(aNms,collapse=',')) + } + msg = paste0(msg, "\n") + stop(msg) + } + if(!(param2 %in% aNms)){ + msg = paste0("Unknown level attribute name: '", param2, "'.") + if(length(aNms) > 0){ + msg = paste0(msg, " Database attribute names are: ", paste(aNms, collapse = ',')) + } + msg = paste0(msg,"\n") + stop(msg) + } + + funcValueTerm = stringr::str_trim(substring(qTrim, prbClose + 1)) + + + funcName = stringr::str_trim(substr(qTrim, 1, prbOpen - 1)) + # EBNF: POSQ = POSFCT,'(',Level,',',Level,')','=','0'| '1'; + itemsAsSeqs = NULL + + level1 = get_levelNameForAttributeName(emuDBhandle, param1) + level2 = get_levelNameForAttributeName(emuDBhandle, param2) + + ####################################### + # connect all children to parents + level1ItemsTableSuffix = "funcq_level1_items" + create_intermResTmpQueryTablesDBI(emuDBhandle, suffix = level1ItemsTableSuffix) + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_", level1ItemsTableSuffix, " ", + "SELECT ", + " db_uuid, ", + " session, ", + " bundle, ", + " item_id AS start_item_id, ", + " item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " level, ", + " '", param1, "' as attribute, ", + " seq_idx AS seq_start_seq_idx, ", + " seq_idx AS seq_end_seq_idx ", + "FROM items ", + "WHERE db_uuid ='", emuDBhandle$UUID, "' ", + " AND level = '", level1, "'", + " AND items.session REGEXP '", sessionPattern, "' ", + " AND items.bundle REGEXP '", bundlePattern, "' ", + "")) + + # get hierarchy paths to check which level is parent + connectHierPaths = get_hierPathsConnectingLevels(emuDBhandle, + level1, + level2) + + if(connectHierPaths[[1]][length(connectHierPaths[[1]])] == level1){ + stop("Second level/attribute name parameter in:'", + qTrim, + "is not a child of the first level/attribute.", + " This in not permitted in FUNCQ queries!") + } else { + } + + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = level1ItemsTableSuffix, + targetItemsAttributeName = level2, + preserveStartItemsRowLength = TRUE, + sessionPattern = sessionPattern, + bundlePattern = bundlePattern, + verbose = verbose) # result written to lr_exp_res_tmp table (left parents/right children) + + # create tmp table to store seqs in (could this be replaced because of the output of query_hierarchyWalk?) + DBI::dbExecute(emuDBhandle$connection,paste0("CREATE TEMP TABLE IF NOT EXISTS items_as_seqs_tmp ( ", + " db_uuid VARCHAR(36), ", + " session TEXT, ", + " bundle TEXT, ", + " seq_start_id INTEGER, ", + " seq_end_id INTEGER, ", + " seq_len INTEGER, ", + " level TEXT, ", + " attribute TEXT ", + ")")) + + + # EBNF: COP = '=' | '!=' | '>' | '<' | '<=' | '>='; + if(funcName == 'Start' | funcName == 'End' | funcName == 'Medial'){ + + if(funcValueTerm != ''){ + # check equals operator == or = + expEqualSign = substr(funcValueTerm, 1, 1) + if(expEqualSign != '='){ + stop("Syntax error: Expected equal sign '==' for in function term: '", qTrim, "'\n") + } + op = '=' + funcValuePos = 2 + if(substr(funcValueTerm, 2, 2) == '='){ + funcValuePos = 3 + op = '==' + } + funcValue = stringr::str_trim(substring(text = funcValueTerm, + first = funcValuePos)) + }else{ + stop("Syntax error: function ", + funcName, + " requires function value in: '", + qTrim, + "'\n") + } + } + if(funcName == 'Start'){ + cond = NULL + if(funcValue == '0' | funcValue == 'F' | funcValue == 'FALSE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND lr_exp_res_tmp.r_seq_start_seq_idx < i1.seq_idx ", + " AND lr_exp_res_tmp.r_seq_end_seq_idx >= i1.seq_idx ", + "")) + + }else if(funcValue == '1' | funcValue == 'T' | funcValue == 'TRUE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", + itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND lr_exp_res_tmp.r_seq_start_seq_idx = i1.seq_idx ", + "")) + + }else{ + stop("Syntax error: Expected function value TRUE or FALSE / T OR F / 0 or 1 after '", + op, + "' in function term: '", + qTrim, + "'\n") + } + + resultLevel = param2 + + }else if(funcName == 'Medial'){ + cond = NULL + bOp = NULL + if(funcValue == '0' | funcValue == 'F' | funcValue == 'FALSE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", + itemsTableName, " AS i1 ", + "WHERE (", + " lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND i1.seq_idx = lr_exp_res_tmp.r_seq_start_seq_idx) ", + "OR (lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND i1.seq_idx = lr_exp_res_tmp.r_seq_end_seq_idx)", + "")) + + }else if(funcValue == '1' | funcValue == 'T' | funcValue == 'TRUE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND i1.seq_idx > lr_exp_res_tmp.r_seq_start_seq_idx ", + " AND i1.seq_idx < lr_exp_res_tmp.r_seq_end_seq_idx ", + "")) + }else{ + stop("Syntax error: Expected function value 0 or 1 after '", + op, + "' in function term: '", + qTrim, + "'\n") + } + + resultLevel = param2 + }else if(funcName == 'End'){ + cond = NULL + if(funcValue == '0' | funcValue == 'F' | funcValue == 'FALSE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", + itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND i1.seq_idx >= lr_exp_res_tmp.r_seq_start_seq_idx ", + " AND i1.seq_idx < lr_exp_res_tmp.r_seq_end_seq_idx ", + "")) + + }else if(funcValue == '1' | funcValue == 'T' | funcValue == 'TRUE'){ + #extract according items + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " lr_exp_res_tmp.r_level AS level, ", + " lr_exp_res_tmp.r_attribute AS attribute ", + "FROM lr_exp_res_tmp, ", + itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.r_level = i1.level ", + " AND lr_exp_res_tmp.r_seq_end_seq_idx = i1.seq_idx ", + "")) + + }else{ + stop("Syntax error: Expected function value 0 or 1 after '", + op, + "' in function term: '", + qTrim, + "'\n") + } + resultLevel = param2 + }else if(funcName == 'Num'){ + funcVal = NULL + funcOpr = NULL + for(opr in c('==', + '!=', + '<=', + '>=', + '=', + '>', + '<')){ + p = get_stringPosition(string = funcValueTerm, + searchStr = opr) + if(p == 1){ + oprLen = nchar(opr) + funcOpr = substr(funcValueTerm, 1, oprLen) + funcValStr = stringr::str_trim(substring(funcValueTerm, oprLen + 1)) + funcVal = as.integer(funcValStr) + if(is.na(funcVal)){ + stop("Syntax error: Could not parse Num function value as integer: '", + funcValStr, + "'\n") + } + break + } + } + if(is.null(funcOpr) | is.null(funcVal)){ + stop("Syntax error: Unknown operator and/or value for Num function: '", + funcValueTerm, + "'\n") + } + if(funcOpr == '=='){ + sqlFuncOpr = '=' + }else{ + sqlFuncOpr = funcOpr + } + + # EBNF: NUMQ = 'Num','(',Level,',',Level,')',COP,INTPN; + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_as_seqs_tmp ", + "SELECT ", + " lr_exp_res_tmp.db_uuid, ", + " lr_exp_res_tmp.session, ", + " lr_exp_res_tmp.bundle, ", + " i1.item_id AS seq_start_id, ", + " i1.item_id AS seq_end_id, ", + " 1 AS seq_len, ", + " '", level1, "' AS level, ", + " '", param1, "' AS attribute ", + "FROM lr_exp_res_tmp, ", + itemsTableName, " AS i1 ", + "WHERE lr_exp_res_tmp.db_uuid = i1.db_uuid ", + " AND lr_exp_res_tmp.session = i1.session ", + " AND lr_exp_res_tmp.bundle = i1.bundle ", + " AND lr_exp_res_tmp.l_seq_start_id = i1.item_id ", # parents are never sequences + " AND (lr_exp_res_tmp.r_seq_end_seq_idx - lr_exp_res_tmp.r_seq_start_seq_idx) + 1 ", + sqlFuncOpr, " ", + funcVal, " ", + "")) + resultLevel = param1 + }else{ + stop("Syntax error: Unknwon function: '", funcName, "'") + } + + # merge results with itemsTableName table (in case filtered subsets are used) + # and place in interm_res_items_tmp_ + intermResTableSuffix table + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_items_tmp_", intermResTableSuffix)) + + DBI::dbExecute(emuDBhandle$connection, + paste0("INSERT INTO interm_res_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " iast.db_uuid, ", + " iast.session, ", + " iast.bundle, ", + " iast.seq_start_id, ", + " iast.seq_end_id, ", + " iast.seq_len, ", + " iast.level, ", + " iast.attribute, ", + " it.seq_idx AS seq_start_seq_idx, ", + " it.seq_idx AS seq_end_seq_idx ", + "FROM items_as_seqs_tmp AS iast, ", + itemsTableName, " AS it ", + "WHERE iast.db_uuid = it.db_uuid ", + " AND iast.session = it.session ", + " AND iast.bundle = it.bundle ", + " AND iast.seq_start_id = it.item_id ", + "")) + + # drop temp table + DBI::dbExecute(emuDBhandle$connection, + paste0("DROP TABLE IF EXISTS seq_idx_tmp")) + DBI::dbExecute(emuDBhandle$connection, + paste0("DROP TABLE IF EXISTS items_as_seqs_tmp")) + + } + }else{ + stop("Syntax error: Missing opening round bracket '(' in '", + q, + "'\n") + } +} + +########################### +query_databaseEqlLABELQ <- function(emuDBhandle, + q, + sessionPattern, + bundlePattern, + useSubsets, + intermResTableSuffix){ + # EBNF: LABELQ = ['#'],LEVEL,("=" | "==" | "!=" | "=~" | "!~"),LABELALTERNATIVES; + + qTrim = stringr::str_trim(q) + dbConfig = load_DBconfig(emuDBhandle) + for(opr in c('==', + '!=', + '=~', + '!~', + '=')){ + p = get_stringPosition(string = q, + searchStr = opr, + literalQuote = "'") + if(p != -1){ + oprLen = nchar(opr) + level = substr(q, 1, p - 1) + projectionLevel = FALSE + attributeTrim = stringr::str_trim(level) + attributeName = attributeTrim + if(grepl('^#', attributeTrim)){ + # projection marker + # the EBNF does not allow white space between '#' and level string + # but the implementation of Emu does, so we allow it here too + + attributeName = stringr::str_trim(substring(attributeTrim, 2)) + projectionLevel = TRUE + } + aNms = get_allAttributeNames(emuDBhandle) + if(! (attributeName %in% aNms)){ + stop("Unknown level attribute name: '", + attributeName, + "'. Database attribute names are: ", + paste(aNms, collapse = ','), + "\n") + } + labelStr = substring(q, p + oprLen) + labelTrim = stringr::str_trim(labelStr) + + + # check label for key chars + # TODO Labels should to be allowed to contain key chars if they are single quoted + deniedStrs = c('^', '->', '==', '!=', '=') + for(deniedStr in deniedStrs){ + pt = get_stringPosition(string = labelTrim, + searchStr = deniedStr, + literalQuote = "'") + if(pt != -1){ + stop("Syntax error label ", + labelStr, + " contains '", + deniedStr, + "'. Quote label with ''.") + } + } + + # EBNF: LABELALTERNATIVES = LABEL , {'|',LABEL}; + # parse alternatives + labelAlts = c() + lp = 1 + lsp = 0 + while(lsp != -1){ + lsp = get_stringPosition(string = labelTrim, + pos = lp, + searchStr = '|', + literalQuote = "'") + if(lsp != -1){ + if(lsp == 1){ + stop("Syntax error: label alternatives cannot start with '|' character in '", + labelTrim, + "'") + } + labelAltTerm = substr(labelTrim, lp, lsp - 1) + labelAlt = stringr::str_trim(labelAltTerm) + labelAlts = c(labelAlts, labelAlt) + lp = lsp + 1 + } + } + # add last term + labelAltTerm = substring(labelTrim,lp) + labelAlt = stringr::str_trim(labelAltTerm) + labelAlts = c(labelAlts, labelAlt) + + labelAltsUq = c() + # unquote labels + # EBNF: LABEL = LABELING | ("'",LABELING,"'"); + # Suggestion for improvement: + # labelGroups (legacy EMU 'legal' directive) MUST NOT be quoted, to distinguish labelGroups from ordinary label or label pattern: + # EBNF: LABEL = LABEL_GROUP_NAME | LABELING | ("'",LABELING,"'"); + # LABELING = {ALPHA|DIGIT} + + for(labelAlt in labelAlts){ + label = NULL + if(substr(labelAlt, 1, 1) == "'"){ + lblTrimLen = nchar(labelAlt) + if(substring(labelAlt,lblTrimLen) != "'"){ + stop("Syntax error: expected closing single quote at end of label '", + labelAlt, + "'\n") + } + label = substr(labelAlt, 2, lblTrimLen - 1) + labelAltsUq = c(labelAltsUq, label) + }else{ + # check for labelGroup on level + lvlDefs = dbConfig[['levelDefinitions']] + isLabelGroup = FALSE + for(lvlDef in lvlDefs){ + for(attrDef in lvlDef[['attributeDefinitions']]){ + if(attributeName == attrDef[['name']]){ + lblGrps = attrDef[['labelGroups']] + for(lblGrp in lblGrps){ + if(labelAlt == lblGrp[['name']]){ + # is label group, expand + for(lblGrpVal in lblGrp[['values']]){ + labelAltsUq = c(labelAltsUq,lblGrpVal) + } + isLabelGroup = TRUE + break + } + } + } + } + } + if(!isLabelGroup){ + # check for database labelGroup + dbLblGrps=dbConfig$labelGroups + for(dbLblGrp in dbLblGrps){ + if(labelAlt == dbLblGrp[['name']]){ + # is label group, expand + for(dbLblGrpVal in dbLblGrp[['values']]){ + labelAltsUq = c(labelAltsUq, dbLblGrpVal) + } + isLabelGroup = TRUE + break + } + } + } + if(!isLabelGroup){ + # ordinary label + label = labelAlt + labelAltsUq = c(labelAltsUq, label) + } + } + } + cond = NULL + cond = create_conditionTextAlternatives(opr, labelAltsUq) + + query_labels(emuDBhandle, + attributeName = attributeName, + intermResTableSuffix = intermResTableSuffix, + cond, + sessionPattern, + bundlePattern, + useSubsets) + if(projectionLevel){ + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_proj_items_tmp_", intermResTableSuffix, " ", + "SELECT ", + " db_uuid, ", + " session, ", + " bundle, ", + " seq_start_id, ", + " seq_end_id, ", + " seq_start_id AS p_seq_start_id, ", + " seq_end_id AS p_seq_end_id, ", + " seq_len AS p_seq_len, ", + " level AS p_level, ", + " attribute AS p_attribute, ", + " seq_start_seq_idx AS p_seq_start_seq_idx, ", + " seq_end_seq_idx AS p_seq_end_seq_idx ", + "FROM interm_res_items_tmp_", intermResTableSuffix)) + + } + return() + } + } + stop("Syntax error: No operator found.") +} + +# EBNF: SQ = LABELQ | FUNCQ; +query_databaseEqlSQ <- function(emuDBhandle, + q, + sessionPattern, + bundlePattern, + intermResTableSuffix, + useSubsets, + verbose){ + + qTrim = stringr::str_trim(q) + res = NULL + # detect function calls by existence of round brackets + prbOpen = get_stringPosition(string = qTrim, + searchStr = '(', + literalQuote = "'") + if(prbOpen != -1){ + prbClose = get_stringPosition(string = qTrim, + searchStr = ')', + literalQuote = "'") + if(prbClose == -1){ + stop("Syntax error: Missing closing round bracket ')' in '", + q, + "'\n") + }else{ + if(prbOpen > prbClose){ + stop("Syntax error: Expected opening round bracket '(' before closing round bracket in '", + q, + "'\n") + } + if(prbOpen == 1){ + stop("Syntax error: Expected function name in '", + q, + "'\n") + } + query_databaseEqlFUNCQ(emuDBhandle, + qTrim, + intermResTableSuffix, + sessionPattern, + bundlePattern, + useSubsets, + verbose = verbose) + } + }else{ + # No round brackets, assuming a level query + query_databaseEqlLABELQ(emuDBhandle, + qTrim, + sessionPattern, + bundlePattern, + useSubsets, + intermResTableSuffix = intermResTableSuffix) + } +} + +# EBNF: CONJQ = SQ,{'&',SQ}; +query_databaseEqlCONJQ <- function(emuDBhandle, + q, + sessionPattern, + bundlePattern, + intermResTableSuffix, + verbose){ + qTrim = stringr::str_trim(q) + conditions = list() + # initialize with empty result + startPos = 1 + p = 0 + resultAttribute = NULL + projection = FALSE + useSubsets = FALSE + + # parse through all terms of and (&) operation + while(p >= 0){ + # find ampersand '&' char + p = get_stringPosition(string = qTrim, + searchStr = '&', + pos = startPos, + literalQuote = "'") + if(p == -1){ + # get single term + condStr = stringr::str_trim(substring(qTrim, startPos)) + }else{ + # get leading term + condStr = stringr::str_trim(substr(qTrim, startPos, p - 1)) + # advance to next + startPos = p + 1 + } + # find projection marker (#) in condStr + pHash = get_stringPosition(string = condStr, + searchStr = '#', + literalQuote = "'") + if(pHash != -1){ + if(projection){ + stop("Only one hashtag allowed in linear query term: ", + qTrim) + }else{ + projection = TRUE + } + } + # execute query on term + query_databaseEqlSQ(emuDBhandle, + condStr, + sessionPattern, + bundlePattern, + intermResTableSuffix, + useSubsets = useSubsets, + verbose = verbose) + + # set resultAttribute of first term + if(is.null(resultAttribute)){ + resultAttribute = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT DISTINCT attribute FROM interm_res_items_tmp_", intermResTableSuffix))$attribute + } + + nRes = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM interm_res_items_tmp_", intermResTableSuffix))$n + if(nRes == 0){ + # empty result stop here and return + return() + }else{ + # remove all entries from subsets + DBI::dbExecute(emuDBhandle$connection, "DELETE FROM items_filtered_subset_tmp") + DBI::dbExecute(emuDBhandle$connection, "DELETE FROM labels_filtered_subset_tmp") + + # Proceed with items matching current condition by placeing them into subset tabels + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO items_filtered_subset_tmp ", + "SELECT DISTINCT i.* ", + "FROM items AS i, ", + " interm_res_items_tmp_", intermResTableSuffix, " imr ", + "WHERE i.db_uuid = imr.db_uuid ", + " AND i.session = imr.session ", + " AND i.bundle = imr.bundle ", + " AND i.item_id = imr.seq_start_id ", + " AND i.session REGEXP '", sessionPattern, "' ", + " AND i.bundle REGEXP '", bundlePattern, "' ", + "")) + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO labels_filtered_subset_tmp ", + "SELECT DISTINCT l.* ", + "FROM labels AS l, ", + " interm_res_items_tmp_", intermResTableSuffix, " imr ", + "WHERE l.db_uuid = imr.db_uuid ", + " AND l.session = imr.session ", + " AND l.bundle = imr.bundle ", + " AND l.item_id = imr.seq_start_id ", + " AND l.session REGEXP '", sessionPattern, "' ", + " AND l.bundle REGEXP '", bundlePattern, "' ", + "")) + + useSubsets = TRUE + + } + } + DBI::dbExecute(emuDBhandle$connection, + paste0("UPDATE interm_res_items_tmp_", intermResTableSuffix, " ", + "SET attribute ='", resultAttribute, "'")) +} + + +# Attempt of a function to replace the old query_databaseHier function with a "simple" +# and more perfomant CTE version that walks up and down the hierarchy +# @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +# @param startItemsTableSuffix suffix of 'interm_res_items_tmp_' table +# in which starting item sequences are stored +# @param targetItemsAttributeName name of target attribute to walk down/up to. +# The level name containing the attribute is acquired using the \code{get_levelNameForAttributeName} +# function. +# @param preserveStartItemsRowLength preserve the length (nrow()) of the table +# that is passed in (currently unused) +# @param walkDown if set to TRUE (the default) start items seqs are parents, targets are childs +# if FALSE start items seqs are children and targets are parents +# @param sessionPattern Regex used to filter sessions +# @param bundlePattern Regex used to filter bundles +# @param verbose be verbose (interactively query hierarchy path if multiple paths are available) +query_hierarchyWalk <- function(emuDBhandle, + startItemsTableSuffix, + targetItemsAttributeName, + preserveStartItemsRowLength, + walkDown = TRUE, + sessionPattern = ".*", + bundlePattern = ".*", + verbose) { + + # get hierarchy paths + startItemsAttributeName = unique(na.omit(DBI::dbReadTable(emuDBhandle$connection, + paste0("interm_res_items_tmp_", + startItemsTableSuffix))$level)) + + startItemsLevelName = get_levelNameForAttributeName(emuDBhandle, startItemsAttributeName) + + targetItemsLevelName = get_levelNameForAttributeName(emuDBhandle, targetItemsAttributeName) + + connectHierPaths = get_hierPathsConnectingLevels(emuDBhandle, + startItemsLevelName, + targetItemsLevelName) + + # check if multiple paths are available + # and ask user to choose a path (only in verbose mode) + if(verbose & length(connectHierPaths) >= 2){ + + cat(paste0("More than one path connecting: '", + startItemsLevelName, + "' and '", + targetItemsLevelName, + "' was found! The paths were: \n" )) + for(i in 1:length(connectHierPaths)){ + cat(paste0(i, ".) ", + paste0(connectHierPaths[[i]], collapse = "->")), + "\n") + } + idx <- readline(prompt="Choose a path by selecting its number (note that comma seperated numbers (e.g., 1, 2, 3) works to select multiple paths): ") + + idx = as.integer(stringr::str_split(idx, ",\\s*", simplify = TRUE)) + # check if on path in CTE (see below) + sqlStr_checkIfOnPath = paste0(" AND i.level IN ('", paste0(connectHierPaths[idx], collapse = "', '"), "')") + } else { + # no checks if on path in CTE (see below) + sqlStr_checkIfOnPath = "" + } + + # empty table just to be safe + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM lr_exp_res_tmp")) + + ######################################################### + # perform CTE that walks up/down the hierarchy using links + # and checks if on correct path + # results are written to lr_exp_res_tmp + # where left side are starting items seqs and right side are target items + + # depending on preserveStartItemsRowLength + # collapse children into sequences and preserve + # NA row placement using diff. join types & ORDER BY + if(preserveStartItemsRowLength){ + joinType = "LEFT JOIN" + + selectString = "SELECT " # carefull but this is needed to not : UNION ALL doesn't check for duplicates + + groupByString = paste0("GROUP BY irit.rowid, ", # using irit.rowid to preserve duplicates (requery only) + " irit.db_uuid, ", + " irit.session, ", + " irit.bundle, ", + " irit.seq_start_id, ", + " irit.seq_end_id ") + + orderByString = "ORDER BY irit.rowid" # don't reorder if left joining to perserve NA/NULL row placement + }else{ + joinType = "INNER JOIN" + + selectString = "SELECT DISTINCT " # distinct because UNION ALL doesn't check for duplicates + + groupByString = paste0("GROUP BY cte_hier.db_uuid, ", + " cte_hier.session, ", + " cte_hier.bundle, ", + " cte_hier.item_id ") + + orderByString = paste0("ORDER BY irit.db_uuid, ", + " irit.session, ", + " irit.bundle, ", + " irit.seq_start_seq_idx") + + } + + # depending on walkDown switch to/from_id order in join + if(walkDown) { + sqlStr_firstItemTableLinkId = " AND ch.item_id = l.from_id " + sqlStr_secondItemTableLinkId = " AND l.to_id = i.item_id " + }else { + sqlStr_firstItemTableLinkId = " AND ch.item_id = l.to_id " + sqlStr_secondItemTableLinkId = " AND l.from_id = i.item_id " + } + + DBI::dbExecute(emuDBhandle$connection, paste0("WITH RECURSIVE cte_hier AS (", + " SELECT irit.rowid AS start_items_table_row_idx, ", # anchor: expand seqs + " items.* ", + " FROM interm_res_items_tmp_", startItemsTableSuffix, " AS irit, ", + " items ", + " WHERE irit.db_uuid = items.db_uuid ", + " AND irit.session = items.session ", + " AND irit.bundle = items.bundle ", + " AND items.level = '", startItemsLevelName, "' ", + " AND items.seq_idx BETWEEN irit.seq_start_seq_idx AND irit.seq_end_seq_idx ", + " UNION ALL ", # contains repeats -> faster coz no checking of duplicates + " SELECT ch.start_items_table_row_idx, i.* ", # recursive part of CTE: join cte_hier to items using links + " FROM cte_hier AS ch ", + " INNER JOIN links AS l ", + " ON ch.db_uuid = l.db_uuid ", + " AND ch.session = l.session ", + " AND ch.bundle = l.bundle ", + sqlStr_firstItemTableLinkId, + " AND l.session REGEXP '", sessionPattern, "' ", # limit to session RegEx + " AND l.bundle REGEXP '", bundlePattern, "' ", # limit to bundle RegEx + " INNER JOIN items AS i ", + " ON l.db_uuid = i.db_uuid ", + " AND l.session = i.session ", + " AND l.bundle = i.bundle ", + sqlStr_secondItemTableLinkId, + sqlStr_checkIfOnPath, # check that on path (if str is set) + " AND i.session REGEXP '", sessionPattern, "' ", # limit to session RegEx + " AND i.bundle REGEXP '", bundlePattern, "' ", # limit to bundle RegEx + ") ", + "INSERT INTO lr_exp_res_tmp ", + # "SELECT * FROM cte_hier ", + selectString, + " irit.db_uuid, ", + " irit.session, ", + " irit.bundle, ", + " irit.seq_start_id AS l_seq_start_id, ", + " irit.seq_end_id AS l_seq_end_id, ", + " irit.seq_len AS l_seq_len, ", + " irit.level AS l_level, ", + " irit.attribute AS l_attribute, ", + " irit.seq_start_seq_idx AS l_seq_start_seq_idx, ", + " irit.seq_end_seq_idx AS l_seq_end_seq_idx,", + " NULL AS r_seq_start_id, ", + " NULL AS r_seq_end_id, ", + " 1 AS r_seq_len, ", + " cte_hier.level AS r_level, ", + " '", targetItemsAttributeName, "' AS r_attribute, ", + " min(cte_hier.seq_idx) AS r_seq_start_seq_idx, ", + " max(cte_hier.seq_idx) AS r_seq_end_seq_idx ", + "FROM interm_res_items_tmp_", startItemsTableSuffix ," AS irit ", + joinType, " cte_hier ", + "ON irit.rowid = cte_hier.start_items_table_row_idx ", + " AND cte_hier.level = '", targetItemsLevelName, "'", # extract only child levels + groupByString, + orderByString, + "")) + # View(DBI::dbReadTable(emuDBhandle$connection, paste0("lr_exp_res_tmp"))) + + # calculate and update missing r_seq_start_id & r_seq_end_id + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE lr_exp_res_tmp ", + "SET r_seq_start_id = joined.item_id ", + "FROM ( ", + " SELECT items.item_id AS item_id, ", + " items.db_uuid, ", + " items.session, ", + " items.bundle, ", + " items.level, ", + " items.seq_idx, ", + " items.item_id ", + " FROM lr_exp_res_tmp, ", + " items ", + " WHERE lr_exp_res_tmp.db_uuid = items.db_uuid ", + " AND lr_exp_res_tmp.session = items.session ", + " AND lr_exp_res_tmp.bundle = items.bundle ", + " AND lr_exp_res_tmp.r_level = items.level ", + " AND lr_exp_res_tmp.r_seq_start_seq_idx = items.seq_idx ", + ") as joined ", + "WHERE lr_exp_res_tmp.db_uuid = joined.db_uuid ", + "AND lr_exp_res_tmp.session = joined.session ", + "AND lr_exp_res_tmp.bundle = joined.bundle ", + "AND lr_exp_res_tmp.r_level = joined.level ", + "AND lr_exp_res_tmp.r_seq_start_seq_idx = joined.seq_idx ", + "")) + + DBI::dbExecute(emuDBhandle$connection, paste0("UPDATE lr_exp_res_tmp ", + "SET r_seq_end_id = joined.item_id ", + "FROM ( ", + " SELECT items.item_id AS item_id, ", + " items.db_uuid, ", + " items.session, ", + " items.bundle, ", + " items.level, ", + " items.seq_idx, ", + " items.item_id ", + " FROM lr_exp_res_tmp, ", + " items ", + " WHERE lr_exp_res_tmp.db_uuid = items.db_uuid ", + " AND lr_exp_res_tmp.session = items.session ", + " AND lr_exp_res_tmp.bundle = items.bundle ", + " AND lr_exp_res_tmp.r_level = items.level ", + " AND lr_exp_res_tmp.r_seq_end_seq_idx = items.seq_idx ", + ") as joined ", + "WHERE lr_exp_res_tmp.db_uuid = joined.db_uuid ", + "AND lr_exp_res_tmp.session = joined.session ", + "AND lr_exp_res_tmp.bundle = joined.bundle ", + "AND lr_exp_res_tmp.r_level = joined.level ", + "AND lr_exp_res_tmp.r_seq_end_seq_idx = joined.seq_idx ", + "")) +} + + +################################## +query_databaseEqlInBracket<-function(emuDBhandle, + q, + sessionPattern, + bundlePattern, + intermResTableSuffix, + leftRightTableNrCounter = 0, + verbose){ + parseRes = list() + qTrim = stringr::str_trim(q) + # parse SEQQ or DOMQ + seqPos = get_stringPositionOutsideBrackets(qTrim, '->', literalQuote = "'", bracket = c('[',']')) + domPos = get_stringPositionOutsideBrackets(qTrim, '^', literalQuote = "'", bracket = c('[',']')) + if(seqPos != -1 || domPos != -1){ + # parse DOMQ or SEQQ + lExpRes = NULL + prjIts = NULL + if(domPos != -1){ + left = stringr::str_trim(substr(qTrim, 1, domPos - 1)) + right = stringr::str_trim(substring(qTrim, domPos + 1)) + }else if(seqPos != -1){ + left = stringr::str_trim(substr(qTrim, 1, seqPos - 1)) + right = stringr::str_trim(substring(qTrim, seqPos + 2)) + } + + # create left & right temp table + leftTableSuffix = paste0("left_", leftRightTableNrCounter) + rightTableSuffix = paste0("right_", leftRightTableNrCounter) + leftRightTableNrCounter = leftRightTableNrCounter + 1 + + create_intermResTmpQueryTablesDBI(emuDBhandle, suffix = leftTableSuffix) + create_intermResTmpQueryTablesDBI(emuDBhandle, suffix = rightTableSuffix) + + query_databaseWithEql(emuDBhandle, + left, + sessionPattern, + bundlePattern, + intermResTableSuffix = leftTableSuffix, + leftRightTableNrCounter, + verbose) + query_databaseWithEql(emuDBhandle, + right, + sessionPattern, + bundlePattern, + intermResTableSuffix = rightTableSuffix, + leftRightTableNrCounter + 1, + verbose) + + # check if left or right side results are empty -> clear tabels and return + nLeftResIts = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM interm_res_items_tmp_", leftTableSuffix))$n + if(nLeftResIts == 0){ + clear_intermResTabels(emuDBhandle, leftTableSuffix) + clear_intermResTabels(emuDBhandle, rightTableSuffix) + return() + } + + + nRightResIts = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM interm_res_items_tmp_", rightTableSuffix))$n + if(nRightResIts == 0){ + clear_intermResTabels(emuDBhandle, leftTableSuffix) + clear_intermResTabels(emuDBhandle, rightTableSuffix) + return() + } + + nLeftProjItems = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM interm_res_proj_items_tmp_", leftTableSuffix))$n + nRightProjItems = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM interm_res_proj_items_tmp_", rightTableSuffix))$n + + if(nLeftProjItems != 0 & nRightProjItems != 0){ + stop("Multiple hash tags '#' not allowed in EQL2 query!") + } + # get items on dominance compare levels + lResAttrName = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT attribute ", + "FROM interm_res_items_tmp_", leftTableSuffix))$attribute + lResLvl = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT level ", + "FROM interm_res_items_tmp_", leftTableSuffix))$level + + rResAttrName = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT attribute ", + "FROM interm_res_items_tmp_", rightTableSuffix))$attribute + rResLvl = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT level ", + "FROM interm_res_items_tmp_", rightTableSuffix))$level + + if(domPos != -1 & lResLvl == rResLvl){ + stop("Dominance query on same levels impossible.\nLeft level: ", + lResLvl, + " (attr:", lResAttrName, ") equals right level: ", + lResLvl, + " (attr:", rResAttrName, ")\n") + } + # check equal levels for sequence query + # (Do this already at this point, fixes issue: Sequence query should + # always throw an error if arguments not on same level. #39 ) + if(seqPos != -1 & lResLvl != rResLvl){ + stop("Queried attribute names of sequence query '", + qTrim, + "' do not match. (", + lResAttrName, + " not equal ", + rResAttrName,")") + } + + + if(domPos != -1){ + # parse DOMQ + # query the result level of left term + nLinks = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM links", + " WHERE links.session REGEXP '", sessionPattern, "' ", + " AND links.bundle REGEXP '", bundlePattern, "' "))$n + if(nLinks == 0){ + clear_intermResTabels(emuDBhandle, leftTableSuffix) + clear_intermResTabels(emuDBhandle, rightTableSuffix) + return() + } + # check which side is parent + hierPaths = get_hierPathsConnectingLevels(emuDBhandle, + lResLvl, + rResLvl) + if(which(hierPaths[[1]] == lResLvl) < which(hierPaths[[1]] == rResLvl)) { + leftIsParent = TRUE + } else { + leftIsParent = FALSE + } + + if(leftIsParent){ + # get all child sequences of childLevel that are linked to items on parentLevel + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = leftTableSuffix, + targetItemsAttributeName = rResLvl, + preserveStartItemsRowLength = TRUE, # get sequences (i.e. collapse) + sessionPattern = sessionPattern, + bundlePattern = bundlePattern, + walkDown = TRUE, + verbose = verbose) # result written to lr_exp_res_tmp table (left parents/right children) + # reduce to sequences in rightTableSuffix + #DBI::dbReadTable(emuDBhandle$connection, "lr_exp_res_tmp") + #DBI::dbReadTable(emuDBhandle$connection, paste0("interm_res_items_tmp_", rightTableSuffix)) + # TODO don't extract and rewrite but to all in SQL + lrertTmp = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT lrert.db_uuid,", # retain left side as parents + " lrert.session, ", + " lrert.bundle, ", + " lrert.l_seq_start_id, ", + " lrert.l_seq_end_id, ", + " lrert.l_seq_len, ", + " lrert.l_level, ", + " lrert.l_attribute, ", + " lrert.l_seq_start_seq_idx, ", + " lrert.l_seq_end_seq_idx, ", + " irit.seq_start_id AS r_seq_start_id, ", + " irit.seq_end_id AS r_seq_end_id, ", + " irit.seq_len AS r_seq_len, ", + " irit.level AS r_level, ", + " irit.attribute AS r_attribute, ", + " irit.seq_start_seq_idx AS r_seq_start_seq_idx, ", + " irit.seq_end_seq_idx AS r_seq_end_seq_idx ", + "FROM interm_res_items_tmp_", rightTableSuffix, " AS irit ", + "JOIN lr_exp_res_tmp AS lrert ", + "ON irit.db_uuid = lrert.db_uuid ", + " AND irit.session = lrert.session ", + " AND irit.bundle = lrert.bundle ", + " AND irit.level = lrert.r_level ", + " AND irit.seq_start_seq_idx ", + " BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx ", # r_seq_start_seq_idx coz all have length 1 + " AND irit.seq_end_seq_idx ", + " BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx ", # r_seq_start_seq_idx coz all have length 1 + "")) + + } else { + # get all child sequences of childLevel that are linked to items on parentLevel + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = rightTableSuffix, + targetItemsAttributeName = lResLvl, + preserveStartItemsRowLength = TRUE, # get sequences (i.e. collapse) + sessionPattern = sessionPattern, + bundlePattern = bundlePattern, + walkDown = TRUE, + verbose = verbose) # result written to lr_exp_res_tmp table (left parents/right children) + + # reduce to sequences in rightTableSuffix + # TODO don't extract and rewrite but to all in SQL + lrertTmp = DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT lrert.db_uuid,", # switch sides to maint. lr_ order of query + " lrert.session, ", + " lrert.bundle, ", + " irit.seq_start_id AS l_seq_start_id, ", + " irit.seq_end_id AS l_seq_end_id, ", + " irit.seq_len AS l_seq_len, ", + " irit.level AS l_level, ", + " irit.attribute AS l_attribute, ", + " irit.seq_start_seq_idx AS l_seq_start_seq_idx, ", + " irit.seq_end_seq_idx AS l_seq_end_seq_idx, ", + " lrert.l_seq_start_id AS r_seq_start_id, ", + " lrert.l_seq_end_id AS r_seq_end_id, ", + " lrert.l_seq_len AS r_seq_len, ", + " lrert.l_level AS r_level, ", + " lrert.l_attribute AS r_attribute, ", + " lrert.l_seq_start_seq_idx AS r_seq_start_seq_idx, ", + " lrert.l_seq_end_seq_idx AS r_seq_end_seq_idx ", + "FROM interm_res_items_tmp_", leftTableSuffix, " AS irit ", + "JOIN lr_exp_res_tmp AS lrert ", + "ON irit.db_uuid = lrert.db_uuid ", + " AND irit.session = lrert.session ", + " AND irit.bundle = lrert.bundle ", + " AND irit.level = lrert.r_level ", + " AND irit.seq_start_seq_idx ", + " BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx ", + " AND irit.seq_end_seq_idx ", + " BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx ", + "")) + + + } + # write back to table + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM lr_exp_res_tmp")) + + DBI::dbWriteTable(emuDBhandle$connection, + name = "lr_exp_res_tmp", + value = lrertTmp, + append = TRUE) + + nLrExpRes = DBI::dbGetQuery(emuDBhandle$connection, + "SELECT COUNT(*) AS n FROM lr_exp_res_tmp")$n + if(nLrExpRes > 0){ + if(nLeftProjItems != 0){ + # reduce projection items to DOMQ result items and store in correct table + qStr = paste0("SELECT ", + " i.db_uuid, ", + " i.session, ", + " i.bundle, ", + " i.l_seq_start_id AS seq_start_id, ", + " i.l_seq_end_id AS seq_end_id, ", + " pi.p_seq_start_id, ", + " pi.p_seq_end_id, ", + " pi.p_seq_len, ", + " pi.p_level, ", + " pi.p_attribute, ", + " pi.p_seq_start_seq_idx, ", + " pi.p_seq_end_seq_idx ", + "FROM lr_exp_res_tmp i, ", + " interm_res_proj_items_tmp_", leftTableSuffix, " AS pi ", + "WHERE ", + " i.db_uuid = pi.db_uuid AND ", + " i.session = pi.session ", + " AND i.bundle = pi.bundle ", + " AND i.l_seq_start_id = pi.seq_start_id ", + " AND i.l_seq_end_id = pi.seq_end_id ") + reducedPI = DBI::dbGetQuery(emuDBhandle$connection, qStr) + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_proj_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_proj_items_tmp_", intermResTableSuffix), + reducedPI, + append = TRUE, + row.names = FALSE) + + } + + if(nRightProjItems != 0) { + # reduce projection items to DOMQ result items and store in correct table + qStr = paste0("SELECT ", + " i.db_uuid, ", + " i.session, ", + " i.bundle, ", + " i.l_seq_start_id AS seq_start_id, ", + " i.l_seq_end_id AS seq_end_id, ", + " pi.p_seq_start_id, ", + " pi.p_seq_end_id, ", + " pi.p_seq_len, ", + " pi.p_level, ", + " pi.p_attribute, ", + " pi.p_seq_start_seq_idx, ", + " pi.p_seq_end_seq_idx ", + "FROM lr_exp_res_tmp i, ", + " interm_res_proj_items_tmp_", rightTableSuffix, " AS pi ", + "WHERE i.db_uuid = pi.db_uuid ", + " AND i.session = pi.session ", + " AND i.bundle=pi.bundle ", + " AND i.r_seq_start_id = pi.seq_start_id ", + " AND i.r_seq_end_id=pi.seq_end_id ", + "") + reducedPI = DBI::dbGetQuery(emuDBhandle$connection, qStr) + DBI::dbExecute(emuDBhandle$connection, paste0("DELETE FROM interm_res_proj_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_proj_items_tmp_", intermResTableSuffix), + reducedPI, + append = TRUE, + row.names = FALSE) + + } + } + + # place result in correct table + resItems = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT DISTINCT ", + " db_uuid, ", + " session, ", + " bundle, ", + " l_seq_start_id AS seq_start_id, ", + " l_seq_end_id AS seq_end_id, ", + " l_seq_len AS seq_len, ", + " l_level AS level, ", + " l_attribute AS attribute, ", + " l_seq_start_seq_idx AS seq_start_seq_idx, ", + " l_seq_end_seq_idx AS seq_end_seq_idx ", + "FROM lr_exp_res_tmp")) + + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_items_tmp_", intermResTableSuffix), + resItems, + append = TRUE, + row.names = FALSE) + + } + if(seqPos != -1){ + # query the result level of left term (removed lid.seq_end_id AS leId,rid.seq_start_id AS rsId,) + lrSeqQueryStr = paste0("SELECT ", + " lid.db_uuid, ", + " lid.session, ", + " lid.bundle, ", + " lid.seq_start_id AS l_seq_start_id, ", + " lid.seq_end_id AS l_seq_end_id, ", + " lid.seq_len AS l_seq_len, ", + " lid.level AS l_level, ", + " lid.attribute AS l_attribute, ", + " lid.seq_start_seq_idx AS l_seq_start_seq_idx, ", + " lid.seq_end_seq_idx AS l_seq_end_seq_idx, ", + " rid.seq_start_id AS r_seq_start_id, ", + " rid.seq_end_id AS r_seq_end_id, ", + " rid.seq_len AS r_seq_len, ", + " rid.level AS r_level, ", # this was lid.level? + " rid.attribute AS r_attribute, ", + " rid.seq_start_seq_idx AS r_seq_start_seq_idx, ", + " rid.seq_end_seq_idx AS r_seq_end_seq_idx ", + "FROM interm_res_items_tmp_", leftTableSuffix, " AS lid, ", + " interm_res_items_tmp_", rightTableSuffix, " AS rid, ", + " items AS il, ", + " items AS ir ", + "WHERE il.db_uuid = ir.db_uuid ", + " AND il.session=ir.session ", + " AND il.bundle=ir.bundle ", + " AND il.db_uuid = lid.db_uuid ", + " AND il.session = lid.session ", + " AND il.bundle = lid.bundle ", + " AND il.db_uuid = rid.db_uuid ", + " AND il.session=rid.session ", + " AND il.bundle=rid.bundle ", + " AND il.item_id = lid.seq_end_id ", + " AND ir.item_id = rid.seq_start_id ", + " AND il.level = ir.level ", + " AND ir.seq_idx = il.seq_idx + 1 ", + " AND il.session REGEXP '", sessionPattern, "' ", + " AND il.bundle REGEXP '", bundlePattern, "' ", + "") + + + # perform query an place result into lr_exp_res_tmp table + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM lr_exp_res_tmp")) + insertQueryStr = paste0("INSERT INTO lr_exp_res_tmp ", lrSeqQueryStr) + DBI::dbExecute(emuDBhandle$connection, insertQueryStr) + + # check if no sequences where found -> clear & return + nSeq = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT COUNT(*) AS n FROM lr_exp_res_tmp"))$n + if(nSeq == 0) { + clear_intermResTabels(emuDBhandle, leftTableSuffix) + clear_intermResTabels(emuDBhandle, rightTableSuffix) + return() + } + + + if(nLeftProjItems != 0){ + # reduce to projection items + # check if SEQQ result items and store in correct table + qStr=paste0("SELECT ", + " i.db_uuid, ", + " i.session, ", + " i.bundle, ", + " i.l_seq_start_id AS seq_start_id, ", + " i.r_seq_end_id AS seq_end_id, ", + " pi.p_seq_start_id, ", + " pi.p_seq_end_id, ", + " pi.p_seq_len, ", + " pi.p_level, ", + " pi.p_attribute, ", + " pi.p_seq_start_seq_idx, ", + " pi.p_seq_end_seq_idx ", + "FROM lr_exp_res_tmp i, ", + " interm_res_proj_items_tmp_", leftTableSuffix, " AS pi ", + "WHERE i.db_uuid = pi.db_uuid ", + " AND i.session = pi.session ", + " AND i.bundle=pi.bundle ", + " AND i.l_seq_start_id = pi.seq_start_id ", + " AND i.l_seq_end_id = pi.seq_end_id") + + reducedPI = DBI::dbGetQuery(emuDBhandle$connection, qStr) + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_proj_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_proj_items_tmp_", intermResTableSuffix), + reducedPI, + append = TRUE, + row.names = FALSE) + } + + if(nRightProjItems != 0){ + # reduce to projection items + qStr=paste0("SELECT ", + " i.db_uuid, ", + " i.session, ", + " i.bundle, ", + " i.l_seq_start_id AS seq_start_id, ", + " i.r_seq_end_id AS seq_end_id, ", + " pi.p_seq_start_id, ", + " pi.p_seq_end_id, ", + " pi.p_seq_len, ", + " pi.p_level, ", + " pi.p_attribute, ", + " pi.p_seq_start_seq_idx, ", + " pi.p_seq_end_seq_idx ", + "FROM lr_exp_res_tmp AS i, ", + " interm_res_proj_items_tmp_", rightTableSuffix, " AS pi ", + "WHERE i.db_uuid = pi.db_uuid ", + " AND i.session = pi.session ", + " AND i.bundle = pi.bundle ", + " AND i.r_seq_start_id = pi.seq_start_id ", + " AND i.r_seq_end_id = pi.seq_end_id") + + reducedPI = DBI::dbGetQuery(emuDBhandle$connection, qStr) + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_proj_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_proj_items_tmp_", intermResTableSuffix), + reducedPI, + append = TRUE, + row.names = FALSE) + } + + # place result in correct table + resItems = DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT DISTINCT ", + " db_uuid, ", + " session, ", + " bundle, ", + " l_seq_start_id AS seq_start_id, ", + " r_seq_end_id AS seq_end_id, ", + " l_seq_len + r_seq_len AS seq_len, ", + " l_level AS level, ", + " l_attribute AS attribute, ", + " l_seq_start_seq_idx AS seq_start_seq_idx, ", + " r_seq_end_seq_idx AS seq_end_seq_idx ", + "FROM lr_exp_res_tmp")) + DBI::dbExecute(emuDBhandle$connection, + paste0("DELETE FROM interm_res_items_tmp_", intermResTableSuffix)) + DBI::dbWriteTable(emuDBhandle$connection, + paste0("interm_res_items_tmp_", intermResTableSuffix), + resItems, + append = TRUE, + row.names = FALSE) + } + return() + }else{ + query_databaseWithEql(emuDBhandle, + qTrim, + sessionPattern, + bundlePattern, + intermResTableSuffix, + leftRightTableNrCounter, + verbose = verbose) + } +} + +################### +query_databaseWithEql <- function(emuDBhandle, + query, + sessionPattern, + bundlePattern, + intermResTableSuffix, + leftRightTableNrCounter, + verbose){ + parseRes = list() + qTrim = stringr::str_trim(query) + brOpenPos = get_charPosition(qTrim, '[', literalQuote = "'") + if(brOpenPos == -1){ + query_databaseEqlCONJQ(emuDBhandle, + qTrim, + sessionPattern, + bundlePattern, + intermResTableSuffix = intermResTableSuffix, + verbose) + return() + }else{ + + brClosePos = get_lastCharPosition(qTrim, ']', literalQuote = "'") + if(brClosePos == -1){ + stop("Syntax error: missing close bracket ']' for open bracket at pos ", + brOpenPos, + "\n") + } + + if(brOpenPos != 1){ + stop("Syntax error: Expected open bracket '[' at the beginning\n") + } + + if(brClosePos != nchar(qTrim)){ + stop("Syntax error: Expected close bracket ']' at the end\n") + } + + #parse string in bracket + inBr = substr(qTrim, brOpenPos + 1, brClosePos - 1) + query_databaseEqlInBracket(emuDBhandle, + inBr, + sessionPattern, + bundlePattern, + intermResTableSuffix, + leftRightTableNrCounter, + verbose = verbose) + return() + + } + stop("Unknown syntax error.") +} + +#################### +query_databaseWithEqlEmusegs <- function(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose){ + # create "root" intermediate result tables + create_intermResTmpQueryTablesDBI(emuDBhandle, suffix = "root") + # query emuDB + query_databaseWithEql(emuDBhandle, + query, + sessionPattern, + bundlePattern, + intermResTableSuffix = "root", + leftRightTableNrCounter = 0, + verbose) + # escape singel quotes + query = gsub("'", "''", query) + + emusegs = convert_queryResultToEmusegs(emuDBhandle, + timeRefSegmentLevel, + sessionPattern, + bundlePattern, + query, + calcTimes, + verbose) + return(emusegs) + +} + +#################### +query_databaseWithEqlEmuRsegs <- function(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose){ + # create "root" intermediate result tables + create_intermResTmpQueryTablesDBI(emuDBhandle, suffix = "root") + + # query emuDB + query_databaseWithEql(emuDBhandle, + query, + sessionPattern, + bundlePattern, + intermResTableSuffix = "root", + leftRightTableNrCounter = 0, + verbose = verbose) + # escape single quotes + queryStr = gsub("'", "''", query) + emuRsegs = convert_queryResultToEmuRsegs(emuDBhandle, + timeRefSegmentLevel, + sessionPattern, + bundlePattern, + queryStr = queryStr, + calcTimes = calcTimes, + verbose = verbose) + return(emuRsegs) + +} + + +##' Query emuDB +##' @description Function to query annotation items/structures in an emuDB +##' @details Evaluates a query string of query language queryLang on an +##' emuDB referenced by \code{emuDBhandle} and returns a segment list of the desired type resultType. +##' For details of the query language please refer to the EMU-SDMS manual's query +##' system chapter (\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-querysys.html}). +##' This function extracts a list of segments which meet the conditions given by the query string. +##' A segment can consist of one (e.g. 's') or more (e.g. 's->t') items from +##' the specified emuDB level. Segment objects (type 'SEGMENT') contain the label +##' string and the start and end time information of the segment (in ms). +##' The \code{tibble} return type (now the defaults) objects additionally contain +##' sample position of start and end item. +##' Time information of symbolic elements (type 'ITEM') are derived from linked SEGMENT +##' levels if available. If multiple linked SEGMENT levels exist, you can specify the +##' level with the \code{timeRefSegmentLevel} argument. If time and sample values cannot be +##' derived they will be set to \code{\link{NA}}. \link[tibble]{tibble}s will +##' be ordered by the columns UUID, session, bundle and sequence index (seq_idx). +##' Legacy \link{emusegs} lists are ordered by the columns utts and start. +##' The query may be limited to session and/or bundle names specified by regular +##' expression pattern strings (see \link{regex}) in parameters \code{sessionPattern} +##' respectively \code{bundlePattern}. +##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) +##' @param query string (see vignette \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-querysys.html}) +##' @param sessionPattern A regular expression pattern matching session names to +##' be searched from the database +##' @param bundlePattern A regular expression pattern matching bundle names to be +##' searched from the database +##' @param queryLang query language used for evaluating the query string +##' @param timeRefSegmentLevel set time segment level from which to derive time +##' information. It is only necessary to set this parameter if more than one child +##' level contains time information and the queried parent level is of type ITEM. +##' @param resultType type (class name) of result (either 'tibble', 'emuRsegs' or +##' 'emusegs' (use 'emusegs' for legacy compatablility only)) +##' @param calcTimes calculate times for resulting segments (results in +##' \code{NA} values for start and end times in emuseg/emuRsegs). As it can be +##' very computationally expensive to +##' calculate the times for large nested hierarchies, it can be turned off via this +##' parameter. +##' @param verbose be verbose. Set this to \code{TRUE} if you wish to choose which +##' path to traverse on intersecting hierarchies. If set to \code{FALSE} (the default) +##' all paths will be traversed (= legacy EMU behavior). +##' @return result set object of class resultType (default: \link[tibble]{tibble}, +##' compatible to legacy types \link{emuRsegs} and \link{emusegs}) +##' @export +##' @seealso \code{\link{load_emuDB}} +##' @keywords emuDB database query Emu EQL +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' ## Query database ae with EQL query "[Phonetic=t -> Phonetic=s]": +##' ## 'Find all sequences /ts/ on the level named Phonetics'. +##' ## and store result seglist in variable segListTs +##' +##' seglistTs = query(ae, "[Phonetic == t -> Phonetic == s]") +##' +##' ## Query database ae with EQL query "[Syllable == S ^ Phoneme == t]": +##' ## 'Find all items 't' on the level named Phoneme that are dominated by +##' ## items 'S' in level Syllable.' +##' ## Return legacy Emu result type 'emusegs' +##' +##' query(ae, "[Syllable == S ^ Phoneme == t]", resultType = "emusegs") +##' +##' ## Query 'p' items on the level named Phoneme from bundles whose +##' ## bundle names start with 'msajc07' +##' ## and whose session names start with '00' +##' ## (Note that here the query uses the operator '=' (meaning '==') +##' ## which is kept for backwards compatibilty to EQL1.) +##' +##' query(ae, "Phoneme = p", bundlePattern = "msajc05.*", sessionPattern = "00.*") +##' +##' } +##' +query <- function(emuDBhandle, + query, + sessionPattern = '.*', + bundlePattern = '.*', + queryLang = 'EQL2', + timeRefSegmentLevel = NULL, + resultType = "tibble", + calcTimes = TRUE, + verbose = FALSE){ + + check_emuDBhandle(emuDBhandle) + + if(queryLang=='EQL2'){ + # create temp tables + drop_allTmpTablesDBI(emuDBhandle) + create_tmpFilteredQueryTablesDBI(emuDBhandle) + + if(is.null(resultType)){ + emuRsegs = query_databaseWithEqlEmuRsegs(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose = verbose) + drop_allTmpTablesDBI(emuDBhandle) + return(emuRsegs) + }else{ + if(resultType == 'emuRsegs'){ + emuRsegs = query_databaseWithEqlEmuRsegs(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose) + drop_allTmpTablesDBI(emuDBhandle) + return(emuRsegs) + }else if(resultType == 'emusegs'){ + if(!is.null(timeRefSegmentLevel)){ + # TODO + stop("Parameter timeRefSegmentLevel not yet supported for", + " resultType 'emusegs'. Please use resultType 'tibble' (the default).") + } + return(query_databaseWithEqlEmusegs(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose)) + }else if(resultType == 'tibble'){ + emuRsegs = query_databaseWithEqlEmuRsegs(emuDBhandle, + query, + sessionPattern, + bundlePattern, + timeRefSegmentLevel, + calcTimes, + verbose) + + res_tibble = convert_queryEmuRsegsToTibble(emuDBhandle, emuRsegs) + drop_allTmpTablesDBI(emuDBhandle) + return(res_tibble) + }else{ + stop("Unknown result type: '", + resultType, + "'. Supported result types: 'emuRsegs', 'emusegs' or 'tibble'") + } + } + + }else{ + stop("Unknown query language '",queryLang,"'.") + } +} diff --git a/R/emuR-releaseQuestions.R b/R/emuR-releaseQuestions.R new file mode 100644 index 00000000..1b61e0e4 --- /dev/null +++ b/R/emuR-releaseQuestions.R @@ -0,0 +1,5 @@ +release_questions <- function() { + c( + "Have you run the runBASwebservice tests?" + ) +} \ No newline at end of file diff --git a/R/emuR-requery.database.R b/R/emuR-requery.database.R new file mode 100644 index 00000000..bfb5ad52 --- /dev/null +++ b/R/emuR-requery.database.R @@ -0,0 +1,731 @@ +database.DDL.emuRsegsTmp = paste0("CREATE TEMP TABLE emursegs_tmp (", + " labels TEXT, ", + " start FLOAT, ", + " end FLOAT, ", + " utts TEXT, ", + " db_uuid VARCHAR(36) NOT NULL, ", + " session TEXT, ", + " bundle TEXT, ", + " start_item_id INTEGER, ", + " end_item_id INTEGER, ", + " level TEXT, ", + " start_item_seq_idx INTEGER, ", + " end_item_seq_idx INTEGER, ", + " type TEXT, ", + " sample_start INTEGER, ", + " sample_end INTEGER, ", + " sample_rate FLOAT, ", + " attribute TEXT ", + ");") + +create_requeryTmpTables <- function(emuDBhandle){ + DBI::dbExecute(emuDBhandle$connection, database.DDL.emuRsegsTmp) +} + +drop_requeryTmpTables <- function(emuDBhandle){ + if("emursegs_tmp" %in% DBI::dbListTables(emuDBhandle$connection)){ + DBI::dbExecute(emuDBhandle$connection, "DROP TABLE IF EXISTS emursegs_tmp") + } +} + +check_emuRsegsForRequery <- function(sl){ + + if(length(unique(sl$level)) != 1){ + warning("emuRsegs contains segments/annotation items of multiple levels!") + } + + sl_df = as.data.frame(sl) + + sl_df_sorted = dplyr::arrange(sl_df, .data$session, .data$bundle, .data$sample_start) + comp_res = compare::compare(sl_df, sl_df_sorted, allowAll = FALSE, ignoreAttrs = TRUE) + if(!comp_res$result){ + warning("emuRsegs is not ordered correctly (by session; bundle; seq_idx)! ", + "Hence, the ordering of the resulting emuRsegs object of the requery will ", + "NOT be the same! Use sort(emuRsegs) to sort the emuRsegs object correctly!") + } + +} + +check_tibbleForRequery <- function(tbl){ + req_columns = c("db_uuid", "session", "bundle", "start_item_id", + "end_item_id", "level", "attribute", "start_item_seq_idx", + "end_item_seq_idx") + if(!all(req_columns %in% names(tbl))){ + stop(paste0("tibble object does not contain all required columns. The required columns are: ", + paste(req_columns, collapse = "; "))) + } + +} + + +##' Requery sequential context of segment list in an emuDB +##' @description Function to requery sequential context of a segment list queried +##' from an emuDB +##' @details Builds a new segment list on the same hierarchical level +##' and the same length as the segment list given in \code{seglist}. The +##' resulting segments usually have different start position and length (in +##' terms of items of the respective level) controlled by the \code{offset}, +##' \code{offsetRef} and \code{length} parameters. +##' A segment here is defined as a single item or a chain of items from the +##' respective level, e.g. if a level in a bundle instance has labels 'a', 'b' +##' and 'c' in that order, 'a' or 'a->b' oder 'a->b->c' are all valid segments, +##' but not 'a->c'. +##' \code{offsetRef} determines if the position offset is referenced to the +##' start or the end item of the segments in the input list \code{seglist}; +##' parameter \code{offset} determines the offset of the resulting item start +##' position to this reference item; parameter \code{length} sets the item +##' length of the result segments. If the requested segments are out of bundle +##' item boundaries and parameter \code{ignoreOutOfBounds} is \code{FALSE} +##' (the default), an error is generated. To get residual resulting segments +##' that lie within the bounds the \code{ignoreOutOfBounds} parameter can be +##' set to \code{TRUE}. The returned segment list is usually of the same +##' length and order as the input \code{seglist}; if \code{ignoreOutOfBounds=FALSE}, +##' the resulting segment list may be out of sync. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param seglist segment list to requery on (type: 'tibble' or 'emuRsegs') +##' @param offset start item offset in sequence (default is 0, meaning the start +##' or end item of the input segment) +##' @param offsetRef reference item for offset: 'START' for first and 'END' +##' for last item of segment +##' @param length item length of segments in the returned segment list +##' @param ignoreOutOfBounds ignore result segments that are out of bundle bounds +##' @param resultType type of result (either 'tibble' == default, 'emuRsegs') +##' @param calcTimes calculate times for resulting segments (results in \code{NA} +##' values for start and end times in emuseg/emuRsegs). As it can be very +##' computationally expensive to calculate the times for large nested hierarchies, +##' it can be turned off via this boolean parameter. +##' @param timeRefSegmentLevel set time segment level from which to derive time +##' information. It is only necessary to set this parameter if more than one +##' child level contains time information and the queried parent level is of type ITEM. +##' @param verbose be verbose. Set this to \code{TRUE} if you wish to choose which +##' path to traverse on intersecting hierarchies. If set to \code{FALSE} (the +##' default) all paths will be traversed (= legacy EMU behaviour). +##' @return result set object of class \link{emuRsegs} or \link[tibble]{tibble} +##' @export +##' @seealso \code{\link{query}} \code{\link{requery_hier}} \code{\link{emuRsegs}} +##' @keywords emuDB database requery +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' ## Requery previous item of 'p' on level 'Phonetic' +##' sl1 = query(ae, "Phonetic == p") +##' +##' requery_seq(ae, sl1, offset = -1) +##' +##' ## Requery context (adding previuos and following elements) +##' ## of 'p' on phonetic level +##' +##' requery_seq(ae, sl1, offset = -1, length = 3) +##' +##' ## Requery previous item of n->t sequence +##' sl2 = query(ae, "[Phoneme == n -> Phoneme == t]") +##' +##' requery_seq(ae, sl2, offset = -1) +##' +##' ## Requery last item within n->t sequence +##' +##' requery_seq(ae, sl2, offsetRef = 'END') +##' +##' ## Requery following item after n->t sequence +##' +##' requery_seq(ae, sl2, offset = 1, offsetRef = 'END') +##' +##' ## Requery context (previous and following items) of n->t sequence +##' +##' requery_seq(ae, sl2, offset = -1, length = 4) +##' +##' ## Requery next word contexts (sequence includes target word) +##' +##' sl3 = query(ae, "Text == to") +##' requery_seq(ae, sl3, length = 2) +##' +##' ## Requery following two word contexts, ignoring segment +##' ## sequences that are out of bundle end bounds +##' requery_seq(ae, sl3, length = 3, ignoreOutOfBounds = TRUE) +##' +##' } +requery_seq <- function(emuDBhandle, + seglist, + offset = 0, + offsetRef = 'START', + length = 1, + ignoreOutOfBounds = FALSE, + resultType = "tibble", + calcTimes = TRUE, + timeRefSegmentLevel = NULL, + verbose = FALSE){ + + check_emuDBhandle(emuDBhandle) + + if(!inherits(seglist, c("emuRsegs", "tbl_df"))){ + stop("Segment list 'seglist' must be of type 'emuRsegs' or ", + "'tibble' with the requiered fields. (Do not set a ", + "value for 'resultType' parameter in the query() command; ", + "then the default resultType=emuRsegs will be used)") + } + + if(length <= 0){ + stop("Parameter length must be greater than 0") + } + + if(nrow(seglist) == 0){ + # empty seglist, return the empty list + return(seglist) + }else{ + if(inherits(seglist, "emuRsegs")){ + check_emuRsegsForRequery(seglist) + }else{ + check_tibbleForRequery(seglist) + } + # drop create tmp tables and recreate (will ensure they are empty) + drop_requeryTmpTables(emuDBhandle) + create_requeryTmpTables(emuDBhandle) + # place in emuRsegsTmp table + DBI::dbExecute(emuDBhandle$connection, + "DELETE FROM emursegs_tmp;") # delete + + DBI::dbWriteTable(emuDBhandle$connection, + "emursegs_tmp", + as.data.frame(seglist), + append = TRUE, + row.names = FALSE) # append to make sure field names don't get overwritten + + # load config + dbConfig = load_DBconfig(emuDBhandle) + + if(FALSE){ # here the boolean input parameter should be + join_type = "LEFT JOIN" + }else{ + join_type = "JOIN" + } + + if(offsetRef=='START'){ + heQueryStr = paste0("SELECT ", + " sl.db_uuid, ", + " sl.session, ", + " sl.bundle, ", + " items_start.item_id AS seq_start_id, ", + " items_end.item_id AS seq_end_id, ", + length, " AS seq_len, ", + " sl.level AS level, ", + " sl.attribute AS attribute, ", + " items_start.seq_idx AS seq_start_seq_idx, ", + " items_end.seq_idx AS seq_end_seq_idx ", + "FROM emursegs_tmp sl ", + join_type, " items items_start ", + "ON sl.db_uuid = items_start.db_uuid ", + " AND sl.session = items_start.session ", + " AND sl.bundle = items_start.bundle ", + " AND sl.level = items_start.level ", + " AND sl.start_item_seq_idx + ", offset, " = items_start.seq_idx ", + join_type, " items AS items_end ", + "ON sl.db_uuid = items_end.db_uuid ", + " AND sl.session = items_end.session ", + " AND sl.bundle = items_end.bundle ", + " AND sl.level = items_end.level ", + " AND sl.start_item_seq_idx + ", offset + length - 1, " = items_end.seq_idx ", + "") + + #heQueryStr=paste0(heQueryStr,"il.level = sll.level AND il.seq_idx = sll.seq_idx + ", offset, " AND ", + # "ir.level=sll.level AND ir.seq_idx=sll.seq_idx+",offset+length-1) + }else if(offsetRef == 'END'){ + heQueryStr=paste0("SELECT ", + " sl.db_uuid, ", + " sl.session, ", + " sl.bundle, ", + " items_start.item_id AS seq_start_id, ", + " items_end.item_id AS seq_end_id, ", + length, " AS seq_len, ", + " sl.level AS level, ", + " sl.attribute AS attribute, ", + " items_start.seq_idx AS seq_start_seq_idx, ", + " items_end.seq_idx AS seq_end_seq_idx ", + "FROM emursegs_tmp AS sl ", + join_type, " items items_start ", + "ON sl.db_uuid = items_start.db_uuid ", + " AND sl.session = items_start.session ", + " AND sl.bundle = items_start.bundle ", + " AND sl.level = items_start.level ", + " AND sl.end_item_seq_idx + ", offset, " = items_start.seq_idx ", + join_type, " items items_end ", + "ON sl.db_uuid = items_end.db_uuid ", + " AND sl.session = items_end.session ", + " AND sl.bundle = items_end.bundle ", + " AND sl.level = items_end.level ", + " AND sl.end_item_seq_idx + ", offset + length - 1, " = items_end.seq_idx ", + "") + #heQueryStr=paste0(heQueryStr,"il.level=slr.level AND il.seq_idx=slr.seq_idx+",offset," AND ", + # "ir.level=slr.level AND ir.seq_idx=slr.seq_idx+",offset+length-1) + }else{ + stop("Parameter offsetRef must be one of 'START' or 'END'\n") + } + #heQueryStr=paste0(heQueryStr," ORDER BY il.ROWID"); + he = DBI::dbGetQuery(emuDBhandle$connection, heQueryStr) + slLen = nrow(seglist) + resLen = nrow(he) + outOfBndCnt = slLen - resLen + if(!ignoreOutOfBounds & outOfBndCnt > 0){ + if(outOfBndCnt == slLen){ + stop("All (", + outOfBndCnt, + ") of the requested sequence(s) is/are out of boundaries.") + }else{ + stop(outOfBndCnt, + " of the requested sequence(s) is/are out of boundaries.\nSet parameter ", + "'ignoreOutOfBounds=TRUE' to get residual result segments that lie within the bounds.") + } + } + + # drop and create tmpQueryTables and write to table + drop_allTmpTablesDBI(emuDBhandle) + create_tmpFilteredQueryTablesDBI(emuDBhandle) + DBI::dbWriteTable(emuDBhandle$connection, + "interm_res_items_tmp_root", + he, + overwrite = TRUE) + + trSl = convert_queryResultToEmuRsegs(emuDBhandle, + timeRefSegmentLevel = timeRefSegmentLevel, + sessionPattern = ".*", + bundlePattern = ".*", + queryStr = "FROM REQUERY", + calcTimes = calcTimes, + verbose = verbose) + + inSlLen=nrow(seglist) + trSlLen=nrow(trSl) + + if(inSlLen != trSlLen){ + warning("Found missing items in resulting segment list! ", + "Replacing missing rows with NA values.") + + seglist_manip = seglist + + if(offsetRef=='START'){ + seglist_manip$start_item_seq_idx = seglist_manip$start_item_seq_idx + offset + seglist_manip$end_item_seq_idx = seglist_manip$start_item_seq_idx + length - 1 + } else{ + seglist_manip$start_item_seq_idx = seglist_manip$end_item_seq_idx + offset + seglist_manip$end_item_seq_idx = seglist_manip$end_item_seq_idx + offset + length - 1 + } + + join_col_names = c("db_uuid", + "session", + "bundle", + "level", + "start_item_seq_idx", + "end_item_seq_idx") + joined_with_orig_sl = dplyr::left_join(seglist_manip, + trSl, + by = join_col_names) %>% + dplyr::select(join_col_names, dplyr::matches(".+\\.y$")) + + # remove trailing .y from column names + colnames(joined_with_orig_sl) = stringr::str_replace(colnames(joined_with_orig_sl), + "(.+)\\.y$", + "\\1") + + # re-add utts column + joined_with_orig_sl$utts = paste0(joined_with_orig_sl$session, + ":", + joined_with_orig_sl$bundle) + + # resort columns + joined_with_orig_sl = joined_with_orig_sl %>% + dplyr::select(colnames(trSl)) + + # NA-out entire line + joined_with_orig_sl[is.na(joined_with_orig_sl$labels),] = NA + + # replace trSl + trSl = make.emuRsegs(emuDBhandle$dbName, + seglist = joined_with_orig_sl, + query = attr(trSl, "query"), + type = attr(trSl, "type")) + + } + + + if(resultType == "emuRsegs"){ + result = trSl + }else if(resultType == "tibble"){ + result = convert_queryEmuRsegsToTibble(emuDBhandle, trSl) + }else{ + # should probably check this somewhere above + stop("Unsupported resultType!") + + } + + drop_allTmpTablesDBI(emuDBhandle) + + return(result) + } +} + +##' Requery hierarchical context of a segment list in an emuDB +##' @description Function to requery the hierarchical context of a segment list queried from an emuDB +##' @details A segment is defined as a single item or a chain of items from the respective level, e.g. +##' if a level in a bundle instance has labels 'a', 'b' and 'c' in that order, 'a' or 'a->b' or 'a->b->c' +##' are all valid segments, 'a->c' is not. For each segment of the input segment list \code{seglist} +##' the function checks the start and end item for hierarchically linked items in the given target +##' level, and based on them constructs segments in the target level. As the start item in the resulting +##' segment the item with the lowest sequence index is chosen; for the end item that with the highest +##' sequence index. If the parameter \code{collapse} is set to \code{TRUE} (the default), it is guaranteed +##' that result and input segment list have the same length (for each input +##' segment one or multiple segments on the target level was found). If multiple linked segments where found +##' they are collapsed into a sequence of segments ('a->b->c') and if no linked items where found an NA row +##' is inserted. +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param seglist segment list to requery on (type: \link{emuRsegs}) +##' @param level character string: name of target level +##' @param collapse collapse the found items in the requested level to a sequence (concatenated with ->). +##' If set to \code{FALSE} separate items as new entries in the emuRsegs object are returned. +##' @param resultType type of result (either 'tibble' == default or 'emuRsegs') +##' @param calcTimes calculate times for resulting segments (results in \code{NA} values for start and end +##' times in emuseg/emuRsegs). As it can be very computationally expensive to +##' calculate the times for large nested hierarchies, it can be turned off via this boolean parameter. +##' @param timeRefSegmentLevel set time segment level from which to derive time information. It is only +##' necessary to set this parameter if more than one child level contains time information and the queried +##' parent level is of type ITEM. +##' @param verbose be verbose. Set this to \code{TRUE} if you wish to choose which path to traverse on intersecting +##' hierarchies. If set to \code{FALSE} (the default) all paths will be traversed (= legacy EMU behaviour). +##' @return result set object of class \link{emuRsegs} or \link[tibble]{tibble} +##' @export +##' @seealso \code{\link{query}} \code{\link{requery_seq}} \code{\link{emuRsegs}} +##' @keywords emuDB database requery +##' @examples +##' \dontrun{ +##' +##' ################################## +##' # prerequisite: loaded ae emuDB +##' # (see ?load_emuDB for more information) +##' +##' ## Downward requery: find 'Phoneme' sequences of all words 'beautiful' (of level 'Text') +##' ## Note that the resulting segments consists of phoneme sequences and have therefore +##' ## the same length as the word segments. +##' +##' sl1 = query(ae, "Text == beautiful") +##' requery_hier(ae, sl1, level = "Phoneme") +##' +##' ## Upward requery: find all word segments that dominate a 'p' on level 'Phoneme' +##' ## Note that the resulting segments are larger than the input segments, +##' ## because they contain the complete words. +##' +##' sl1 = query(ae, "Phonetic == p") +##' requery_hier(ae, sl1, level = 'Text') +##' +##' ## Why is there a 'p' the word 'emphazised'? Requery the whole words back down to 'Phoneme' level: +##' +##' requery_hier(ae, sl1, level = 'Phoneme') +##' +##' ## ... because of 'stop epenthesis' a 'p' is inserted between 'm' and 'f' +##' +##' ## Combined requery: last phonemes of all words beginning with 'an'. +##' ## Note that we use a regular expression 'an.*' (EQL operator '=~') in the query. +##' +##' sl1=query(ae, "Text =~ an.*") +##' requery_seq(ae, requery_hier(ae, sl1, level = 'Phoneme'), offsetRef = 'END') +##' +##' } +requery_hier <- function(emuDBhandle, + seglist, + level, + collapse = TRUE, + resultType = "tibble", + calcTimes = TRUE, + timeRefSegmentLevel = NULL, + verbose = FALSE){ + + check_emuDBhandle(emuDBhandle) + + if(!inherits(seglist, c("emuRsegs", "tbl_df"))){ + stop("Segment list 'seglist' must be of type 'emuRsegs'. (Do not set a value ", + "for 'resultType' parameter for the query, the default resultType will be used)") + } + + if(nrow(seglist) == 0){ + # empty seglist, return the empty list + return(seglist) + }else{ + if(inherits(seglist,"emuRsegs")){ + check_emuRsegsForRequery(seglist) + }else{ + check_tibbleForRequery(seglist) + } + # drop create tmp tables and recreate (will ensure they are empty) + drop_allTmpTablesDBI(emuDBhandle) + create_requeryTmpTables(emuDBhandle) + drop_tmpFilteredQueryTablesDBI(emuDBhandle) + create_tmpFilteredQueryTablesDBI(emuDBhandle) + + # place in emursegs_tmp table + DBI::dbExecute(emuDBhandle$connection, "DELETE FROM emursegs_tmp;") + DBI::dbWriteTable(emuDBhandle$connection, + "emursegs_tmp", + as.data.frame(seglist), + append = TRUE, + row.names = FALSE) # append to avoid rewirte of col names + + # get level for attribute definition specified in seglist + segListLevel = DBI::dbGetQuery(emuDBhandle$connection, + "SELECT DISTINCT level FROM emursegs_tmp;")$level + + if(length(segListLevel) > 1){ + stop("Multiple levels found in seglist! This is not supported by requery_hier()!") + } + + seglistAttrDefLn = get_levelNameForAttributeName(emuDBhandle, segListLevel) + # get level for req level (which is actually a attribute definition) + reqAttrDef = level # TODO rename input parameter + check_levelAttributeName(emuDBhandle, reqAttrDef) # check if valid attr. def + + reqAttrDefLn = get_levelNameForAttributeName(emuDBhandle, reqAttrDef) + + alreadyInInterm_res_items_tmp_root = FALSE + + if(seglistAttrDefLn != reqAttrDefLn){ + # insert all original emuRsegs items new table + origSeglistItemsTableSuffix = "orig_seglist_items" + create_intermResTmpQueryTablesDBI(emuDBhandle, + suffix = origSeglistItemsTableSuffix) + DBI::dbExecute(emuDBhandle$connection, + paste0("INSERT INTO interm_res_items_tmp_", origSeglistItemsTableSuffix, " ", + "SELECT ", + " erst.db_uuid,", + " erst.session, ", + " erst.bundle, ", + " erst.start_item_id AS seq_start_id, ", + " erst.end_item_id AS seq_end_id, ", + " (i_end.seq_idx - i_start.seq_idx) + 1 AS seq_len, ", + " erst.level AS level, ", + " erst.attribute AS attribute, ", + " erst.start_item_seq_idx AS seq_start_seq_idx, ", + " erst.end_item_seq_idx AS seq_end_seq_idx ", + "FROM emursegs_tmp AS erst, ", + " items AS i_start, ", + " items AS i_end ", + "WHERE erst.db_uuid = i_start.db_uuid ", + " AND erst.session = i_start.session ", + " AND erst.bundle = i_start.bundle ", + " AND erst.start_item_id = i_start.item_id ", + " AND erst.db_uuid = i_end.db_uuid ", + " AND erst.session = i_end.session ", + " AND erst.bundle = i_end.bundle ", + " AND erst.end_item_id = i_end.item_id ", + "")) + + # don't need requery tmp tables any more -> drop them + drop_requeryTmpTables(emuDBhandle) + + # get hierarchy paths to check if going up or + # down the hierarchy (requery to parent or to child level) + connectHierPaths = get_hierPathsConnectingLevels(emuDBhandle, + seglistAttrDefLn, + reqAttrDefLn) + + seglistLevelIndexInPath = match(seglistAttrDefLn, connectHierPaths[[1]]) + reqLevelIndexInPath = match(reqAttrDefLn, connectHierPaths[[1]]) + + preserveChildLength = FALSE + preserveParentLength = FALSE + + if(reqLevelIndexInPath < seglistLevelIndexInPath){ + # going up + preserveChildLength = TRUE + if(!collapse){ + # override perserveLengths if not collapsing + preserveChildLength = FALSE + } + if(any(seglist$end_item_seq_idx - seglist$start_item_seq_idx + 1 != 1)){ + + # first get all parents + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = origSeglistItemsTableSuffix, + targetItemsAttributeName = reqAttrDef, + preserveStartItemsRowLength = FALSE, + walkDown = FALSE, + verbose = verbose) # result written to lr_exp_res_tmp table + # DBI::dbReadTable(emuDBhandle$connection, "lr_exp_res_tmp") + # place parents into new table + allParentsItemsTableSuffix = "all_parents_items" + create_intermResTmpQueryTablesDBI(emuDBhandle, + suffix = allParentsItemsTableSuffix) + DBI::dbExecute(emuDBhandle$connection, + paste0("INSERT INTO interm_res_items_tmp_", allParentsItemsTableSuffix, " ", + "SELECT ", + " db_uuid, ", + " session, ", + " bundle, ", + " r_seq_start_id AS seq_start_id, ", + " r_seq_end_id AS seq_end_id, ", + " r_seq_len AS seq_len, ", + " '", reqAttrDef, "' AS attribute, ", + " '", reqAttrDefLn, "' AS level, ", + " r_seq_start_seq_idx AS seq_start_seq_idx, ", + " r_seq_end_seq_idx AS seq_end_seq_idx ", + " FROM lr_exp_res_tmp")) + # DBI::dbReadTable(emuDBhandle$connection, + # paste0("interm_res_items_tmp_", allParentsItemsTableSuffix)) + + # get all seqs on seglist level that are dominated by parents + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = allParentsItemsTableSuffix, + targetItemsAttributeName = seglistAttrDefLn, + preserveStartItemsRowLength = TRUE, + walkDown = TRUE, + verbose = verbose) # result written to lr_exp_res_tmp table + + # DBI::dbReadTable(emuDBhandle$connection, "lr_exp_res_tmp") + # DBI::dbReadTable(emuDBhandle$connection, + # paste0("interm_res_items_tmp_", origSeglistItemsTableSuffix)) + + # extract parents that are parents of sequences that encapsulate the seglist seqs + # and write to new table + alreadyInInterm_res_items_tmp_root = TRUE + create_intermResTmpQueryTablesDBI(emuDBhandle) + + # DBI::dbReadTable(emuDBhandle$connection, "interm_res_items_tmp_root") + + DBI::dbExecute(emuDBhandle$connection, paste0("INSERT INTO interm_res_items_tmp_root ", + "SELECT lrert.db_uuid, ", + " lrert.session, ", + " lrert.bundle, ", + " lrert.l_seq_start_id AS seq_start_id, ", + " lrert.l_seq_end_id AS seq_end_id, ", + " lrert.l_seq_len AS seq_len, ", + "'", reqAttrDefLn, "' AS level, ", + "'", reqAttrDef, "' AS attribute,", + " lrert.l_seq_start_seq_idx AS seq_start_seq_idx, ", + " lrert.l_seq_end_seq_idx AS seq_end_seq_idx ", + "FROM interm_res_items_tmp_", origSeglistItemsTableSuffix, " AS irit ", + "LEFT JOIN lr_exp_res_tmp AS lrert ", + "ON irit.db_uuid == lrert.db_uuid ", + " AND irit.session == lrert.session ", + " AND irit.bundle == lrert.bundle ", + " AND irit.level == lrert.r_level ", + " AND irit.seq_start_seq_idx BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx", + " AND irit.seq_end_seq_idx BETWEEN lrert.r_seq_start_seq_idx ", + " AND lrert.r_seq_end_seq_idx", + "")) + + } else { + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = origSeglistItemsTableSuffix, + targetItemsAttributeName = reqAttrDef, + preserveStartItemsRowLength = preserveChildLength, + walkDown = FALSE, + verbose = verbose) # result written to lr_exp_res_tmp table + } + + }else{ + # going down + if(collapse){ + # override perserveLengths if not collapsing + preserveParentLength = TRUE + } + query_hierarchyWalk(emuDBhandle, + startItemsTableSuffix = origSeglistItemsTableSuffix, + targetItemsAttributeName = reqAttrDef, + preserveStartItemsRowLength = preserveParentLength, + verbose = verbose) # result written to lr_exp_res_tmp table + + } + + if(!alreadyInInterm_res_items_tmp_root){ + # move query_databaseHier results into interm_res_items_tmp_root + # and reset level back to requested attribute + create_intermResTmpQueryTablesDBI(emuDBhandle) + DBI::dbExecute(emuDBhandle$connection, + paste0("INSERT INTO interm_res_items_tmp_root ", + "SELECT ", + " db_uuid, ", + " session, ", + " bundle, ", + " r_seq_start_id AS seq_start_id, ", + " r_seq_end_id AS seq_end_id, ", + " r_seq_len AS seq_len, ", + " r_level AS level, ", + " r_attribute AS attribute, ", + " r_seq_start_seq_idx AS seq_start_seq_idx, ", + " r_seq_end_seq_idx AS seq_end_seq_idx ", + " FROM lr_exp_res_tmp")) + } + } else { + # just reset level as convert_queryResultToEmuRsegs does the rest! + create_intermResTmpQueryTablesDBI(emuDBhandle) + + DBI::dbExecute(emuDBhandle$connection, + paste0("INSERT INTO interm_res_items_tmp_root ", + "SELECT erst.db_uuid, ", + " erst.session, ", + " erst.bundle, ", + " erst.start_item_id AS seq_start_id, ", + " erst.end_item_id AS seq_end_id, ", + " (i_end.seq_idx - i_start.seq_idx) + 1 AS seq_len, ", + " '", level, "' AS level, ", + " '", level, "' AS attribute, ", + " erst.start_item_seq_idx AS seq_start_seq_idx, ", + " erst.end_item_seq_idx AS seq_end_seq_idx ", + "FROM emursegs_tmp AS erst, ", + " items AS i_start, ", + " items AS i_end ", + "WHERE erst.db_uuid = i_start.db_uuid ", + " AND erst.session = i_start.session ", + " AND erst.bundle = i_start.bundle ", + " AND erst.start_item_id = i_start.item_id ", + " AND erst.db_uuid = i_end.db_uuid ", + " AND erst.session = i_end.session ", + " AND erst.bundle = i_end.bundle ", + " AND erst.end_item_id = i_end.item_id ")) + # don't need requery tmp tables any more -> drop them + drop_requeryTmpTables(emuDBhandle) + + } + trSl = convert_queryResultToEmuRsegs(emuDBhandle, + timeRefSegmentLevel = timeRefSegmentLevel, + sessionPattern = ".*", + bundlePattern = ".*", + queryStr = "FROM REQUERY", + calcTimes = calcTimes, + preserveParentLength = TRUE, + verbose = verbose) + + inSlLen = nrow(seglist) + trSlLen = nrow(trSl) + + if(inSlLen != trSlLen){ + warning("Length of requery segment list (", + trSlLen, + ") differs from input list (", + inSlLen, + ")!") + } + if(resultType == "emuRsegs"){ + result = trSl + }else if(resultType == "tibble"){ + result = convert_queryEmuRsegsToTibble(emuDBhandle, trSl) + }else{ + # should probably check this somewhere above + stop("Unsupported resultType!") + + } + if(any(is.na(result$db_uuid))){ + warning("Found missing items in resulting segment list! ", + "Replaced missing rows with NA values.") + } + + drop_allTmpTablesDBI(emuDBhandle) + return(result) + } +} + +####################### +# FOR DEVELOPMENT +# library('testthat') +# test_file("tests/testthat/test_aaa_initData.R") +# test_file('tests/testthat/test_emuR-requery.database.R') diff --git a/R/emuR-server.R b/R/emuR-server.R new file mode 100644 index 00000000..c3d530d0 --- /dev/null +++ b/R/emuR-server.R @@ -0,0 +1,923 @@ + +check_tibbleForServe <- function(tbl){ + req_columns = c("db_uuid", "session", "bundle", "start", + "end", "sample_rate") + + if(!all(req_columns %in% names(tbl))){ + stop(paste0("tibble object does not contain all required columns. The required columns are: ", + paste(req_columns, collapse = "; "))) + } + +} + +##' Serve EMU database to EMU-webApp +##' +##' @description Serves emuDB media files, SSFF tracks and annotations for +##' EMU-webApp browser GUI \url{http://ips-lmu.github.io/EMU-webApp/} +##' +##' Instructions: +##' +##' Start and connect (this should happen automatically): +##' +##' \itemize{ +##' \item Call this function to start the server. +##' \item Start a suitable HTML5 capable Web-Browser (Google Chrome, Firefox,...). +##' \item Navigate to the EMU-Webapp URL: \url{http://ips-lmu.github.io/EMU-webApp/}. +##' \item Press the 'Connect' button in the EMU-webApp and connect with default URL. +##' \item EMU-webApp loads the bundle list and the first +##' bundles media file, SSFF tracks and annotations. +##' } +##' +##' Disconnect and stop: +##' \itemize{ +##' \item Disconnect and stop the server with the 'Clear' button of +##' the webapp or the reload button of your browser. +##' \item The server can also be stopped by +##' calling \code{\link[httpuv]{stopAllServers}} of the \link[httpuv]{httpuv} package +##' } +##' +##' Hints: +##' \itemize{ +##' \item To serve only a subset of sessions or bundles use +##' the parameters \code{sessionPattern} and/or \code{bundlePattern}. +##' \item Use the \code{seglist} parameter to pass in a segment list +##' which was generated using the \code{query} function. This will +##' allow quick navigation to those segments. +##' } +##' +##' @details Function opens a HTTP/websocket and waits in a loop for browser requests. +##' Parameter host determines the IP address(es) of hosts allowed to connect to the +##' server. By default the server only listens to localhost. If you want to allow +##' connection from any host set the host parameter to \code{0.0.0.0}. Please note +##' that this might be an safety issue! The \code{port} parameter determines the port +##' the server listens on. The \code{host} and \code{port} parameters are intended +##' only for expert users. When started the R console will be blocked. On successful +##' connection the server sends the session and bundle list of the database referenced +##' by name by parameter \code{dbName} or by UUID parameter \code{dbUUID}. +##' The Web application requests bundle data for viewing or editing. If a bundle +##' is modified with the EMU-webApp and the save button is pressed the server modifies +##' the internal database and saves the changes to disk. +##' Communication between server and EMU webApp is defined by EMU-webApp-websocket-protocol +##' version 0.0.2 (\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/app-chap-wsProtocol.html}). +##' +##' @param emuDBhandle emuDB handle as returned by \code{\link{load_emuDB}} +##' @param sessionPattern A regular expression pattern matching session names to be served +##' @param bundlePattern A regular expression pattern matching bundle names to be served +##' @param seglist segment list to use for times anchors and session + bundle restriction (type: \link{emuRsegs}) +##' @param bundleListName name of bundleList stored in emuDB/bundleLists subdir to send to EMU-webApp +##' @param host host IP to listen to (default: 127.0.0.1 (localhost)) +##' @param port the port number to listen on (default: 17890) +##' @param autoOpenURL URL passed to \code{\link{browseURL}} function. If NULL or an empty string are passed in +##' \code{\link{browseURL}} will not be invoked. +##' @param browser argument passed on to \code{browser} argument of \code{\link{browseURL}} (see +##' it's documentation for details ) +##' @param debug TRUE to enable debugging (default: no debugging messages) +##' @param debugLevel integer higher values generate more detailed debug output +##' @param useViewer Use the viewer provided by \code{getOption("viewer")} (the viewer pane when using RStudio) +##' and host a local version of the EMU-webApp in it. This will clone the current +##' EMU-webApp build (\url{https://github.com/IPS-LMU/EMU-webApp/tree/gh-pages/}) into the directory provided by +##' \code{\link{tempdir}} and serve this local version. A clone will +##' only be performed if no \code{file.path(tempdir(), "EMU-webApp")} directory is present. An alternative +##' directory can be also set: \code{options(emuR.emuWebApp.dir="path/to/EMU-webApp")} (use if offline functionality is required). +##' @return TRUE (invisible) if the server was started +##' @export +##' @keywords emuDB EMU-webApp database websocket Emu +##' @examples +##' \dontrun{ +##' ## Load EMU database 'myDb' and serve it to the EMU-webApp (opens default HTTP/websocket port 17890) +##' +##' myDb = load_emuDB("/path/to/myDb") +##' serve(myDb) +##' } +##' +serve <- function(emuDBhandle, + sessionPattern = '.*', + bundlePattern = '.*', + seglist = NULL, + bundleListName = NULL, + host = '127.0.0.1', + port = 17890, + autoOpenURL = "https://ips-lmu.github.io/EMU-webApp/?autoConnect=true", + browser = getOption("browser"), + useViewer = TRUE, + debug = FALSE, + debugLevel = 0){ + + check_emuDBhandle(emuDBhandle) + + if(debug && debugLevel==0){ + debugLevel=2 + } + + bundleCount = 0 + DBconfig = load_DBconfig(emuDBhandle) + if(is.null(seglist)){ + allBundlesDf = list_bundles(emuDBhandle) + }else{ + check_tibbleForServe(seglist) + tmp = data.frame(session = seglist$session, + bundle = seglist$bundle, + stringsAsFactors = FALSE) + allBundlesDf = unique(tmp) + } + + bundlesDf = allBundlesDf + + if(!is.null(bundleListName)){ + if(!is.null(seglist)){ + stop("both seglist & bundleListName can't be set at the same time!") + } + bundlesDf = read_bundleList(emuDBhandle, bundleListName) + if(is.null(DBconfig$EMUwebAppConfig$restrictions$bundleComments) || is.null(DBconfig$EMUwebAppConfig$restrictions$bundleFinishedEditing)){ + # TODO ask user to set? + DBconfig$EMUwebAppConfig$restrictions$bundleComments = TRUE + DBconfig$EMUwebAppConfig$restrictions$bundleFinishedEditing = TRUE + store_DBconfig(emuDBhandle, DBconfig) + } + } + + if(!is.null(DBconfig$EMUwebAppConfig$restrictions$bundleComments) || !is.null(DBconfig$EMUwebAppConfig$restrictions$bundleFinishedEditing)){ + if(is.null(bundleListName)){ + warning(paste0("'bundleComments' and/or 'bundleFinishedEditing' are set to true ", + "in the DBconfig and the bundleListName parameter wasn't set! Any changes made ", + "to those fields in the bundleListSideBar in the EMU-webApp won't be saved as ", + "those values are stored in the bundleLists!")) + } + } + + + if(!is.null(sessionPattern) && sessionPattern!='.*'){ + ssl = emuR_regexprl(sessionPattern, bundlesDf[['session']]) + bundlesDf = bundlesDf[ssl,] + } + if(!is.null(bundlePattern) && bundlePattern!='.*'){ + bsl = emuR_regexprl(bundlePattern,bundlesDf[['name']]) + bundlesDf = bundlesDf[bsl,] + } + + if(!is.null(bundleListName)){ + bundlesDf = read_bundleList(emuDBhandle, bundleListName) + } + + + httpRequest = function(req){ + # See here: https://github.com/jeffreyhorner/Rook/blob/a5e45f751/README.md + # for req env params + if(req$REQUEST_METHOD == "GET"){ + # this is used if URL is used instead of BASE64 in encoding of media file in bundle + queryStr = shiny::parseQueryString(req$QUERY_STRING) + # SIC this should also have a third parameter "ext/extension" + if(!is.null(queryStr$session) && !is.null(queryStr$bundle)){ + #print("processing GET request to media file...") + mediaFilePath = file.path(emuDBhandle$basePath, + paste0(queryStr$session, session.suffix), + paste0(queryStr$bundle, bundle.dir.suffix), + paste0(queryStr$bundle, ".", queryStr$fileExtension)) + + audioFile = file(mediaFilePath, "rb") + audioFileData = readBin(audioFile, + raw(), + n = file.info(mediaFilePath)$size) + close(audioFile) + # Only + # Rook conform answer + res = list( + status = 200L, # + headers = list( + 'Content-Type' = 'audio/x-wav', + 'Access-Control-Allow-Origin' = "*" + ), + body = audioFileData + ) + return(res) + } else { + # Serve local EMU-webApp files + # adopted from servr:::serve_dir() + #owd = setwd(dir) + #on.exit(setwd(owd)) + path = httpuv::decodeURIComponent(req$PATH_INFO) + Encoding(path) = "UTF-8" + #print(path) + status = 200L + # only allow requests to / -> mapped to getOption("emuR.emuWebApp.dir") + if (TRUE) { # TODO: check if correct path + path = file.path(getOption("emuR.emuWebApp.dir"), path) + } else { + # reject all other requests + return(list( + status = 404L, + headers = list( + 'Content-Type' = 'text/html' + ), + body = paste0("404: Requested path that doesn't contain /EMU-webApp/") + )) + } + + # test if dir is requested -> use index.html as body + body = if (utils::file_test("-d", path)) { + type = "text/html" + if (file.exists(idx <- file.path(path, "index.html"))) { + readLines(idx, warn = FALSE) + } + else { + # this should currently never be reached! + d = file.info(list.files(path, all.files = TRUE, + full.names = TRUE)) + title = utils::URLencode(path,reserved = TRUE) + c("", + "", + "", + sprintf("%s", title), + "", + "", + c(sprintf("

Index of %s

", title), + fileinfo_table(d)), + "", + "") + + } + } + else { + type = guess_type(path) + range = req$HTTP_RANGE + if (is.null(range) || identical(range, "bytes=0-")){ + readBin(path, "raw", file.info(path)[, "size"]) + #read_raw(path) + } else { + # handle range requests + range = strsplit(range, split = "(=|-)")[[1]] + b2 = as.numeric(range[2]) + b3 = as.numeric(range[3]) + if (length(range) < 3 || (range[1] != "bytes") || + (b2 >= b3) || (b3 == 0)) + return(list(status = 416L, + headers = list(`Content-Type` = "text/plain"), + body = "Requested range not satisfiable\r\n")) + status = 206L + con = file(path, open = "rb", raw = TRUE) + on.exit(close(con)) + seek(con, where = b2, origin = "start") + readBin(con, "raw", b3 - b2 + 1) + } + } + if (is.character(body) && length(body) > 1){ + body = paste(body, collapse = "\n") + } + res = list(status = status, + body = body, + headers = c(list(`Content-Type` = type), + if (status == 206L) list(`Content-Range` = paste(sub("=", " ", req$HTTP_RANGE), file.info(path)[, "size"], sep = "/")))) + + return(res) + } + + } + } + + onHeaders <- function(req){ + # following httuv docs we should return NULL here to proceed but that terminates the R session! + #return(NULL) + } + + serverEstablished = function(ws){ + + cat("emuR websocket service established\n") + + serverClosed = function(ws){ + cat("emuR websocket service closed\n") + } + + sendError = function(ws,errMsg,callbackID){ + status = list(type = 'ERROR', details = errMsg); + response = list(callbackID = callbackID, + status) + responseJSON = jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + result=ws$send(responseJSON) + } + + serverReceive = function(isBinary, DATA){ + if(debugLevel >= 4 ){ + cat("onMessage() call, binary:", + isBinary, + " data: ", + DATA, + "\n") + + } + D = "" + if(is.raw(DATA)) { + D = rawToChar(DATA) + }else{ + D = DATA + } + D = readr::parse_character(D) # ensure UTF-8 encoding windows + jr = jsonlite::fromJSON(D, simplifyVector = FALSE) + if(debugLevel >= 2 ){ + cat("Received command from EMU-webApp: ", jr[['type']], "\n") + if(debugLevel >= 3){ + jrNms = names(jr) + + for( jrNm in jrNms){ + value = jr[[jrNm]] + cat("param: ", jrNm) + if(inherits(value, 'character')){ + cat(": ", jr[[jrNm]]) + } + cat("\n") + } + } + + } + if(!is.null(jr$type)){ + if(debugLevel >= 2 ){ + cat("Received type from EMU-webApp: ", jr[['type']], "\n") + } + + } + if(jr$type == 'GETPROTOCOL'){ + + protocolData = list(protocol = 'EMU-webApp-websocket-protocol', + version = '0.0.2') + response=list(status = list(type='SUCCESS'), + callbackID = jr$callbackID, + data = protocolData) + responseJSON=jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + result = ws$send(responseJSON) + if(debugLevel >= 2){ + cat("Sent protocol. \n") + } + + }else if(jr$type == 'GETDOUSERMANAGEMENT'){ + # R server mode is single user mode + response = list(status = list(type = 'SUCCESS'), + callbackID = jr$callbackID, + data = "NO") + responseJSON = jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + result = ws$send(responseJSON) + if(debugLevel >= 2){ + cat("Sent user managment: no. \n") + } + + }else if(jr$type == 'GETGLOBALDBCONFIG'){ + if(debugLevel >= 4){ + cat("Send config: ", as.character(DBconfig), "\n") + } + response = list(status = list(type='SUCCESS'), + callbackID = jr$callbackID, + data = DBconfig) + responseJSON = jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + result = ws$send(responseJSON) + if(debugLevel >= 2){ + if(debugLevel >=4){ + cat(responseJSON,"\n") + } + cat("Sent config. \n") + } + #} + + + }else if(jr$type == 'GETBUNDLELIST'){ + response = list(status = list(type = 'SUCCESS'), + callbackID = jr$callbackID, + dataType = 'uttList', + data = bundlesDf) + # create time anchors + if(!is.null(seglist)){ + dataWithTimeAnchors = list() + for(i in 1:nrow(response$data)){ + sesBool = response$data[i,]$session == seglist$session + bndlBool = response$data[i,]$bundle == seglist$bundle + start_sample_vals = round(((seglist[sesBool & bndlBool,]$start / 1000) + 0.5 / seglist[sesBool & bndlBool,]$sample_rate) + * seglist[sesBool & bndlBool,]$sample_rate) + # end_sample_vals calculated with + 1 as EMU-webApp seems to always mark the right boundary left of the selected sample + end_sample_vals = round(((seglist[sesBool & bndlBool,]$end / 1000) + 0.5/seglist[sesBool & bndlBool,]$sample_rate) * + seglist[sesBool & bndlBool,]$sample_rate) + dataWithTimeAnchors[[i]] = list(session = response$data[i,]$session, + name = response$data[i,]$bundle, + timeAnchors = data.frame(sample_start = start_sample_vals, + sample_end = end_sample_vals)) + + } + response$data = dataWithTimeAnchors + } + responseJSON = jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + if(debugLevel >= 5) cat(responseJSON,"\n") + result = ws$send(responseJSON) + if(debugLevel >= 2){ + cat("Sent utterance list with length: ", + nrow(bundlesDf), + " \n") + } + + }else if(jr$type == 'GETBUNDLE'){ + + bundleName = jr[['name']] + bundleSess = jr[['session']] + #cat("data:",jr[['data']],"\n") + if(debugLevel > 2){ + cat("Requested bundle:", bundleName, ",session:", bundleSess, "\n") + } + err = NULL + if(debugLevel > 3){ + cat("Convert bundle to S3 format", bundleName, "\n") + } + # construct path to annotJSON + annotFilePath = normalizePath(file.path(emuDBhandle$basePath, + paste0(bundleSess, session.suffix), + paste0(bundleName, bundle.dir.suffix), + paste0(bundleName, bundle.annotation.suffix, '.json'))) + + b = jsonlite::fromJSON(annotFilePath, + simplifyVector = FALSE) + if(is.null(b)){ + # error + err = simpleError(paste('Could not load bundle ', + bundleName, + ' of session ', + bundleSess)) + } + if(rstudioapi::isAvailable()){ + translateFunction = rstudioapi::translateLocalUrl + } else { + translateFunction = paste0 + } + mediaFile = list(encoding = "GETURL", + data = paste0(translateFunction(paste0("http://", ws$request$HTTP_HOST)), + "?session=", + utils::URLencode(bundleSess, reserved = TRUE), + "&bundle=", + utils::URLencode(bundleName, reserved = TRUE), + "&fileExtension=", + utils::URLencode(DBconfig$mediafileExtension, reserved = TRUE))) + # print(mediaFile) + + if(is.null(err)){ + ssffTracksInUse = get_ssffTracksUsedByDBconfig(DBconfig) + ssffTrackNmsInUse = c() + for(ssffTrackInUse in ssffTracksInUse){ + ssffTrackNmsInUse = c(ssffTrackNmsInUse, + ssffTrackInUse[['name']]) + } + if(debugLevel >= 4){ + + cat(length(ssffTrackNmsInUse), " track definitions in use:\n") + for(sfInU in ssffTrackNmsInUse){ + cat(sfInU, " ") + } + cat("\n") + } + ssffFiles = list() + # Hash (here: named character vector) with SSFF files extension as key and file path as value + # avoids duplicates in ssff files list + ssffFilesHash = character(0) + for(ssffTr in DBconfig$ssffTrackDefinitions){ + if(ssffTr[['name']] %in% ssffTrackNmsInUse){ + fe = ssffTr[['fileExtension']] + ssffFilesHash[fe] = normalizePath(file.path(emuDBhandle$basePath, + paste0(bundleSess, session.suffix), + paste0(bundleName, bundle.dir.suffix), + paste0(bundleName, ".", fe))) + } + } + # read SSFF track file data + ssffFileExts = names(ssffFilesHash) + for(ssffFileExt in ssffFileExts){ + ssffFilePath = ssffFilesHash[ssffFileExt] + mf = tryCatch(file(ssffFilePath, "rb"), + error = function(e) {err<<-e}) + if(is.null(err)){ + mfData = readBin(mf, + raw(), + n = file.info(ssffFilePath)$size) + if(inherits(mfData,'error')){ + err = mfData + break + } + }else{ + break + } + mfDataBase64 = base64enc::base64encode(mfData) + encoding = "BASE64" + ssffDatObj = list(encoding = encoding, + data = mfDataBase64, + fileExtension = ssffFileExt) + ssffFiles[[length(ssffFiles) + 1]] = ssffDatObj + close(mf) + } + if(is.null(err)){ + data = list(mediaFile = mediaFile, + ssffFiles = ssffFiles, + annotation = b) + } + } + + if(is.null(err)){ + responseBundle = list(status = list(type = 'SUCCESS'), + callbackID = jr$callbackID, + responseContent = 'bundle', + contentType = 'text/json', + data = data) + }else{ + errMsg = err[['message']] + cat("Error: ", errMsg, "\n") + responseBundle = list(status = list(type = 'ERROR', message=errMsg), + callbackID = jr[['callbackID']], + responseContent = 'status', + contentType = 'text/json') + + } + responseBundleJSON = jsonlite::toJSON(responseBundle, + auto_unbox = TRUE, + force = TRUE, + pretty = FALSE) + # print(mediaFile) + result = ws$send(responseBundleJSON) + + if(is.null(err) & debugLevel >= 2){ + + if(debugLevel >= 8){ + cat(responseBundleJSON, "\n") + } + cat("Sent bundle containing", length(ssffFiles), "SSFF files\n") + } + # reset error + err = NULL + + } else if(jr[['type']] == 'SAVEBUNDLE'){ + jrData = jr[['data']] + jrAnnotation = jrData[['annotation']] + bundleSession = jrData[['session']] + bundleName = jrData[['annotation']][['name']] + if(debugLevel > 3){ + cat("Save bundle ", bundleName, " from session ", bundleSession, "\n"); + } + err = NULL + + ssffFiles = jr[['data']][['ssffFiles']] + oldBundleAnnotDFs = load_bundleAnnotDFsDBI(emuDBhandle, bundleSession, bundleName) + + # warnings as errors + warnOptionSave = getOption('warn') + options('warn' = 2) + on.exit(options(warn = warnOptionSave)) + responseBundle = NULL + + # check if cached version of bundle is available + if(is.null(oldBundleAnnotDFs)){ + # error + err = simpleError(paste('Could not load bundle ', + bundleSession, + bundleName)) + }else{ + for(ssffFile in ssffFiles){ + inCfg = FALSE + sp = normalizePath(file.path(emuDBhandle$basePath, + paste0(bundleSession, session.suffix), + paste0(bundleName, bundle.dir.suffix), + paste0(bundleName, ".", ssffFile$fileExtension))) + if(is.null(sp)){ + errMsg = paste0("SSFF track definition for file extension '", + ssffFile[['fileExtension']], + "' not found!") + err = simpleError(errMsg) + }else{ + # store + if(debugLevel > 3){ + cat("Writing SSFF track to file: ", sp, "\n") + } + ssffTrackBin = base64enc::base64decode(ssffFile[['data']]) + ssffCon = tryCatch(file(sp,'wb'), + error = function(e){err <<- e}) + if(is.null(err)){ + res = tryCatch(writeBin(ssffTrackBin, ssffCon)) + close(ssffCon) + if(inherits(res,'error')){ + err = res + break + } + # modified<<-TRUE + } + } + } + bundleData = jr[['data']][['annotation']] + + # if we do not have an (error) response already + if(is.null(err)){ + ##### emuDB #### + # construct path to annotJSON and store + annotFilePath = file.path(emuDBhandle$basePath, + paste0(bundleSession, session.suffix), + paste0(bundleName, bundle.dir.suffix), + paste0(bundleName, bundle.annotation.suffix, '.json')) + + json = jsonlite::toJSON(bundleData, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + + # use try mainly for permission problems on file system + res = tryCatch(writeLines(json, annotFilePath, useBytes = TRUE), + error = function(e) e) + if(inherits(res,'error')){ + err = res + } + + #### DBI ### + # remove + DBI::dbBegin(emuDBhandle$connection) + remove_bundleDBI(emuDBhandle, + sessionName = bundleSession, + name = bundleName) + remove_bundleAnnotDBI(emuDBhandle, + sessionName = bundleSession, + bundleName = bundleName) + # store + # calculate MD5 sum of bundle annotJSON + newMD5annotJSON = tools::md5sum(annotFilePath) + names(newMD5annotJSON) = NULL + + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(as.character(json)) + add_bundleDBI(emuDBhandle, + sessionName = bundleSession, + name = bundleName, + bundleAnnotDFs$annotates, + bundleAnnotDFs$sampleRate, + newMD5annotJSON) + store_bundleAnnotDFsDBI(emuDBhandle, + bundleAnnotDFs, + sessionName = bundleSession, + bundleName = bundleName) + + DBI::dbCommit(emuDBhandle$connection) + + # update bundlesDf and store as bundleList + if(!is.null(bundleListName)){ + bl = read_bundleList(emuDBhandle, bundleListName) + # print(jr[['data']][['comment']]) + bl[bl$session == bundleSession & + bl$name == bundleName,]$comment = jr[['data']][['comment']] + bl[bl$session == bundleSession & + bl$name == bundleName,]$finishedEditing = jr[['data']][['finishedEditing']] + + write_bundleList(emuDBhandle, bundleListName, bl) + + } + } + } + if(is.null(err)){ + responseBundle = list(status = list(type = 'SUCCESS'), + callbackID = jr$callbackID, + responseContent = 'status', + contentType = 'text/json') + }else{ + m = err[['message']] + cat('Error: ', m, "\n") + responseBundle = list(status = list(type = 'ERROR', message = m), + callbackID = jr[['callbackID']], + responseContent = 'status', + contentType = 'text/json') + } + # response object to JSON + responseBundleJSON = jsonlite::toJSON(responseBundle, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + # send response + result = ws$send(responseBundleJSON) + + # reset error + err = NULL + + }else if(jr[['type']] == 'DISCONNECTWARNING'){ + response = list(status = list(type = 'SUCCESS'), + callbackID = jr[['callbackID']], + responseContent = 'status', + contentType = 'text/json') + responseJSON = jsonlite::toJSON(response, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + result = ws$send(responseJSON) + ws$close() + cat("emuR websocket service closed by EMU-webApp\n") + } + } + ws$onMessage(serverReceive) + ws$onClose(serverClosed) + } + + # stop all running servers + httpuv::stopAllServers() + + # user messages + cat("Navigate your browser to the EMU-webApp URL: https://ips-lmu.github.io/EMU-webApp/ (should happen automatically)\n") + cat("Server connection URL: ws://localhost:", port, "\n", sep = '') + cat("To stop the server either press the 'clear' button in the EMU-webApp, close/reload the webApp in your browser,\n") + cat("or call the httpuv::stopAllServers() function\n") + + # build app list + app = list(call = httpRequest, + onHeaders = onHeaders, + onWSOpen = serverEstablished) + + # start server + httpuv::startServer(host = host, + port = port, + app = app) + + # either open browser of clone and host local EMU-webApp + if(length(autoOpenURL) != 0 && autoOpenURL != ""){ + # open browser with EMU-webApp + viewer <- getOption("viewer") + if(useViewer & rstudioapi::isAvailable()){ + webApp_path = getOption("emuR.emuWebApp.dir") + # TODO: can this be emulated? git clone --depth 1 -b gh-pages https://github.com/IPS-LMU/EMU-webApp + # unlink(webApp_path, recursive = TRUE) + if(!dir.exists(webApp_path)){ + + # for development + # file.copy(from = "~/Developer/EMU-webApp/dist/", + # to = tempdir(), + # recursive = TRUE) + # + # file.rename(from = file.path(tempdir(), "dist"), + # to = webApp_path) + + resp = httr::GET("https://github.com/IPS-LMU/EMU-webApp/releases/latest") + redirect_url_split = stringr::str_split(resp$url, "/") + tag = redirect_url_split[[1]][length(redirect_url_split[[1]])] + zip_download_url = paste0("https://github.com/IPS-LMU/EMU-webApp/archive/refs/tags/", tag, ".zip") + zip_path_local = paste0(webApp_path, ".zip") + httr::GET(zip_download_url, + httr::write_disk(zip_path_local, overwrite=TRUE)) + + utils::unzip(zipfile = zip_path_local, # this creates a dir like EMU-webApp-1.x.x in tempdir() + exdir = tempdir(), + overwrite = TRUE) + unziped_path_local = file.path(tempdir(), + paste0("EMU-webApp-",stringr::str_remove(tag, "v"))) + file.rename(from = file.path(unziped_path_local, "dist"), + to = webApp_path) + + # clean up + unlink(zip_path_local) + unlink(unziped_path_local, recursive = TRUE) + } + + # replace tag because rstudio changes this + # in the web version and Angular needs it to be set + base_path = "/" + if(rstudioapi::isAvailable()){ + if(rstudioapi::translateLocalUrl(paste0("http://localhost:", port, "/")) != paste0("http://localhost:", port, "/")){ + base_path = paste0("/", rstudioapi::translateLocalUrl(paste0("http://localhost:", port, "/"))) + } + } + + index_html = readr::read_file(file.path(webApp_path, "index.html")) + index_html_new = stringr::str_replace(index_html, + pattern = "", + replacement = paste0("")) + # remove manifest entry to avoid caching of local version + index_html_new = stringr::str_replace(index_html_new, + pattern = "manifest=\"manifest.appcache\"", + replacement = "") + + + readr::write_file(x = index_html_new, + file = file.path(webApp_path, "index.html")) + + if (!is.null(viewer)){ + # host in viewer + viewer(paste0("http://127.0.0.1:", + port, + "/?autoConnect=true", + "&serverUrl=", + stringr::str_replace(rstudioapi::translateLocalUrl(paste0("http://127.0.0.1:", port), absolute = TRUE), + "http", + "ws"))) + }else{ + # host in browser + utils::browseURL(paste0("http://127.0.0.1:", + port, + "/?autoConnect=true", + "&serverUrl=ws://127.0.0.1:", + port), + browser = browser) + } + + }else{ + # use online version + utils::browseURL(autoOpenURL, browser = browser) + cat("Unable to detect RStudio. Serving to online version.\n") + } + } + + + + return(invisible(TRUE)) + +} + +## searches for all tracks needed by the EMUwebApp and +## returns their ssffTrackDefinitions +get_ssffTracksUsedByDBconfig <- function(DBconfig){ + allTracks = NULL + + # anagestConfig ssffTracks + for(ld in DBconfig$levelDefinitions){ + allTracks = c(allTracks, + ld$anagestConfig$verticalPosSsffTrackName, + ld$anagestConfig$velocitySsffTrackName) + } + + for(p in DBconfig$EMUwebAppConfig$perspectives){ + # tracks in signalCanvases$order + for(sco in p$signalCanvases$order){ + allTracks = c(allTracks, sco) + } + # tracks in twoDimCanvases$order + for(tdco in p$twoDimCanvases$order){ + allTracks = c(allTracks, tdco) + } + + # tracks in signalCanvases$assign + for(sca in p$signalCanvases$assign){ + allTracks = c(allTracks, sca$ssffTrackName) + } + # tracks in p$twoDimCanvases$twoDimDrawingDefinitions + for(tddd in p$twoDimCanvases$twoDimDrawingDefinitions){ + # dots + for(dot in tddd$dots){ + allTracks = c(allTracks, dot$xSsffTrack, dot$ySsffTrack) + } + # staticContours + for(dot in tddd$staticContours){ + allTracks = c(allTracks, dot$xSsffTrack, dot$ySsffTrack) + } + } + } + # uniq tracks + allTracks = unique(allTracks) + # remove OSCI and SPEC tracks + allTracks = allTracks[allTracks != 'OSCI' & allTracks != 'SPEC'] + + # get corresponding ssffTrackDefinitions + allTrackDefs = list() + for(std in DBconfig$ssffTrackDefinitions){ + if(std$name %in% allTracks){ + allTrackDefs[[length(allTrackDefs) + 1]] = std + } + } + + return(allTrackDefs) +} + + +# copy of servr:::fileinfo_table +fileinfo_table = function (info) { + info = info[order(info$isdir, decreasing = TRUE), ] + d = info$isdir + i = !is.na(d) + x1 = paste(basename(rownames(info)), ifelse(d & i, "/", ""), + sep = "") + x1 = utils::URLencode(x1, reserved = TRUE) + x1[i] = sprintf("%s", x1[i], x1[i]) + x2 = paste(format(info$size, scientific = FALSE, big.mark = ","), + "B") + x2[is.na(info$size) | d] = "" + x3 = as.character(info$mtime) + x3[is.na(x3)] = "" + c("", "", sprintf("", c("Name", + "Size", "Date Modified")), "", apply(cbind("", + sprintf("", x1), sprintf("", + x2), sprintf("", x3), ""), 1, paste, + collapse = ""), "
%s
%s%s%s
") +} + +# copy of servr:::guess_type +guess_type <- function (path) +{ + mimetype = function(...) { + system2("mimetype", c("-b", shQuote(path)), ...) + } + if (Sys.which("mimetype") == "" || mimetype(stdout = NULL) != + 0) + return(mime::guess_type(path)) + mimetype(stdout = TRUE) +} + diff --git a/R/emuR-validate.R b/R/emuR-validate.R new file mode 100644 index 00000000..106a52fe --- /dev/null +++ b/R/emuR-validate.R @@ -0,0 +1,78 @@ +## Validates the DBI representation of bundle +## +validate_bundleDBI <- function(emuDBhandle, + session, + bundle){ + + DBconfig = load_DBconfig(emuDBhandle) + + dbLevelDefs = list_levelDefinitions(emuDBhandle) + + # check that levels with same name are present + levelNames <- DBI::dbGetQuery(emuDBhandle$connection, + paste0("SELECT DISTINCT level ", + "FROM items ", + "WHERE db_uuid = '", emuDBhandle$UUID, "' ", + " AND session = '", session, "' ", + " AND bundle ='", bundle, "'"))$level + + levelDefNames = sapply(DBconfig$levelDefinitions, function(l) l$name) + delta1 = setdiff(levelNames, levelDefNames) + delta2 = setdiff(levelDefNames, levelNames) + + if(length(delta1) != 0 || length(delta2) != 0){ + if(length(delta1) != 0){ + return(list(type = 'ERROR', + message = paste('Following levels where found that do not ", + "match any levelDefinition:', paste(delta1), ';', + 'in bundle:', bundle))) + }else{ + warning("No items for levelDefinition where found for level:'", + paste(delta2), "';", "in bundle:'", bundle , "'") + } + } + + # check that levels have same types + bundleLevels <- DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT ", + " level AS name, ", + " type ", + "FROM items ", + "WHERE db_uuid = '", emuDBhandle$UUID, "' ", + " AND session = '", session, "' ", + " AND bundle ='", bundle, "' ")) + + joinedLevelDefs = bundleLevels %>% + dplyr::left_join(dbLevelDefs, by = "name") %>% + dplyr::select("name", + DBconfigType = "type.x", + bundleType = "type.y") + + if(!all(joinedLevelDefs$DBconfigType == joinedLevelDefs$bundleType)){ + return(list(type = 'ERROR', + message = paste0('There are level types that differ from those defined:\n', + paste(utils::capture.output(print(joinedLevelDefs)), + collapse = "\n")))) + } + + # validate sequence and overlaps in items of type SEGMENTS + tmp <- DBI::dbGetQuery(emuDBhandle$connection, paste0("SELECT DISTINCT * ", + "FROM items ", + "WHERE session = '", session,"' ", + " AND bundle ='", bundle, "' ", + " AND type = 'SEGMENT'")) + + #TODO: VALIDATE: SEQUENCE + OVERLAPS / LINKS' + + + + return(list(type = 'SUCCESS', + message = '')) + +} + + + + +## FOR DEVELOPMENT +# library('testthat') +# test_file('tests/testthat/test_validate.R') diff --git a/R/emuRtestsuite.R b/R/emuRtestsuite.R deleted file mode 100644 index f4c0f08a..00000000 --- a/R/emuRtestsuite.R +++ /dev/null @@ -1,83 +0,0 @@ -#-- emuRtestsuite.R -#-- -# -#-- Created by Tina John on 14.02.12. -#-- Copyright 2012 IPS LMU Munich. All rights reserved. - -emu.testsuite = function ( ) { - # emu.query - message("\n Available Databases:") - cat("dbinfo() \n\n") - print(dbinfo()) - - if(!any(dbinfo()=="demo")) { - message("No database demo found in the local EMU System - function can not be run") - return(invisible()) - } - par(mfrow=c(2,2)) - - # emu.query - message("\n An EMU query") - cat("seg = emu.query(\"demo\",\"*\",\"Phonetic = @: | e: | ei | A | E | @u | \'4\'\")\n\n") - - seg = emu.query("demo","*","Phonetic = @: | e: | ei | A | E | @u | '4'") - print(seg) - - message("\n Extract Labels from segment list") - cat("seg.lab = label(seg) \n") - seg.lab = label(seg) - print(seg.lab) - - # emu.track - message("\n Extraction of the tracks for the segment list") - cat("seg.sample = emu.track(seg,\"samples\")\n\n") - seg.sample = emu.track(seg,"samples") - message("\n Plot of first signal in track objekt and segment list [@]") - cat("plot(seg.sample[1],label=seg.lab[1], type=\"l\", main=\"waveforms\")\n\n") - plot(seg.sample[1],label=seg.lab[1], type="l", main="waveforms") - - # dplot, eplot - message("\n Extract track values at point in time") - cat("seg.fm = emu.track(seg,\"fm\"); # get formants\n") - seg.fm = emu.track(seg,"fm") - cat("seg.fm.5 = dcut(seg.fm, .5, prop =TRUE); # cut data at 50% segment duration\n") - seg.fm.5 = dcut(seg.fm, .5, prop =TRUE) - message("\n Plot the data as time signal and formant card\n") - cat("dplot(seg.fm[,1:2], seg.lab, normalise=TRUE, main = \"Formants over vowel duration\"); #time signal\n") - dplot(seg.fm[,1:2], seg.lab, normalise=TRUE, main = "Formants over vowel duration") - cat("eplot(seg.fm.5[,1:2], seg.lab, dopoints=TRUE, doellipse=FALSE, main = \"F1/F2 of vowel midpoint\", formant=TRUE, xlab = \"F2 in Hz\", ylab = \"F1 in Hz\"); # F1/F2 plane\n") - eplot(seg.fm.5[,1:2], seg.lab, dopoints=TRUE, doellipse=FALSE, main = "F1/F2 of vowel midpoint", formant=TRUE, xlab = "F2 in Hz", ylab = "F1 in Hz") - - # emu.requery - message("\n An EMU query and ...") - cat("segH = emu.query(\"demo\",\"*\",\"Phonetic = H\")\n\n") - segH = emu.query("demo","*","Phonetic = H") - print(segH) - - message("\n ... and requery") - cat("segHseql1 = emu.requery(segH,\"Phonetic\",\"Phonetic\",sequence=-1)\n\n") - segHseql1 = emu.requery(segH,"Phonetic","Phonetic",sequence=-1) - print(segHseql1) - segH.lab = label(segHseql1) - - message("\n Plot of spectral data from 1% of aspiration duration (burst) ") - cat("segH.dft = emu.track(segH,\"dft\"); #spectral data\n") - segH.dft = emu.track(segH, "dft") - cat("segH.dft.01 = dcut(segH.dft, .01,prop=TRUE); #Extract mid point\n") - segH.dft.01 = dcut(segH.dft, 0.01, prop = TRUE) - cat("plot(segH.dft.01,segH.lab, main = \"Spectral data of aspiration\"); #Plot data\n") - cat("... ... #Create label vektor with alv and bil to separate stops\n") - alv = segH.lab - alv = segH.lab == "d" - bil = segH.lab %in% c("p","b") - alvbi = segH.lab - alvbi[segH.lab == "d"] = "alv" - alvbi[segH.lab %in% c("p","b")] = "bil" - - cat("segH.dft.01.smooth = fapply(segH.dft.01,dct,5,fit=TRUE); #Smooth data with 5 dct coefficients\n") - segH.dft.01.smooth = fapply(segH.dft.01,dct,5,fit=TRUE) - - cat("plot(segH.dft.01.smooth[alv | bil,], alvbi[alv | bil], fun=mean, power=TRUE, main = \"Spectral data of burst\"); #Plot data\n") - plot(segH.dft.01.smooth[alv | bil,], alvbi[alv | bil], fun=mean, power=TRUE,main = "DCT smoothed spectral data of burst") - -} diff --git a/R/emusegs.R b/R/emusegs.R index 77e66830..45aef9a6 100644 --- a/R/emusegs.R +++ b/R/emusegs.R @@ -1,30 +1,45 @@ -############################################################################# -## # -## copyright : (C) 2000 SHLRC, Macquarie University # -## email : Steve.Cassidy@mq.edu.au # -## url : http://www.shlrc.mq.edu.au/emu # -## # -## This program is free software; you can redistribute it and/or modify # -## it under the terms of the GNU General Public License as published by # -## the Free Software Foundation; either version 2 of the License, or # -## (at your option) any later version. # -## # -############################################################################# - - - -read.emusegs<- function(file) +##' Create an Emu segment list from a file +##' +##' Create an Emu segment list from a file saved by the Emu query tools. +##' +##' Reads segment lists created by programs external to R/Splus and stored in +##' text files on disk. +##' +##' @param file The name of the file to read +##' @return An Emu segment list. +##' @author Steve Cassidy +##' @seealso \code{\link{query}} +##' @keywords IO +##' @examples +##' +##' ## create a segment list file and write it out +##' # seglist.txt <- "database:demo"\ +##' # query:Phonetic=vowel\ +##' # type:segment\ +##' #\ +##' # @@: 3059.65 3343.65 msdjc001\ +##' # e: 5958.55 6244.55 msdjc002\ +##' # @@u 8984.75 9288.75 msdjc003\ +##' # A 11880.8 12184.8 msdjc004\ +##' # E 17188.3 17366.4 msdjc005\ +##' # ei 20315.2 20655.2 msdjc006" +##' +##' \dontrun{cat(seglist.txt, file="seglist.txt")} +##' +##' # now read it back as a segment list +##' \dontrun{segs <- read.emusegs("seglist.txt")} +##' \dontrun{segs} +##' ## and clean up +##' \dontrun{unlink("seglist.txt")} +##' +##' +##' @export read.emusegs +read.emusegs <- function(file) { ## scan the lines of the file into a vector - - ## R 1.4 introduced comment.char="#" arg to scan, grrr - if( is.R() && as.numeric(version$minor) > 3.0 ) { - ## in R, we need to avoid skipping the # as a comment line - lines <- scan(file, what = "", sep="\n", comment.char="") - } else { - lines <- scan(file, what = "", sep="\n") - } - + + lines <- scan(file, what = "", sep="\n", comment.char="") + ## first three lines are header followed by a hash line inheader <- 1 i <- 1 @@ -42,17 +57,17 @@ read.emusegs<- function(file) i <- i + 1 } } - + if (inheader) { stop( "End of header (#) not found in segment file" ) } - + ## now slurp the body of the segment list mat <- matscan( file, 4, what="", sk=i ) segs <- make.seglist(mat[,1], mat[,2], mat[,3], mat[,4], - query, type, database ) - + query, type, database ) + segs } @@ -60,44 +75,192 @@ if( version$major >= 5 ) { setOldClass(c("emusegs", "data.frame")) } + + + + + + + + +##' Make an Emu segment list from the various components +##' +##' This is the appropriate way to make an Emu segment list and ensure that it +##' has all of the required components. +##' +##' An Emu segment list is the result of a query to a speech database (see +##' \code{\link{query}}) and has one row per matching segment or event from +##' the query. Each row lists the label, start and end times (in milliseconds) +##' and utterance name for the segment. This information is used by +##' \code{\link{get_trackdata}} and other functions to extract data corresponding +##' to these segments. +##' +##' In order to ensure the proper format for segment lists and to ensure +##' against future changes to the format, \code{make.seglist} should be used +##' whenever you wish to create a segment list. Another function, +##' \code{\link{modify.seglist}} can be used to change some part of an existing +##' segment list. The functions \code{\link{label.emusegs}}, +##' \code{\link{start.emusegs}}, \code{\link{end.emusegs}} and +##' \code{\link{utt.emusegs}} can be used to access the different columns of +##' the segment list. +##' +##' @param labels A character vector of labels for each segment +##' @param start A vector of start times +##' @param end A vector of end times +##' @param utts A character vector of utterance names +##' @param query A query string +##' @param type \code{segment} or \code{event} +##' @param database The database name associated with the segment list +##' @return An Emu segment list. +##' @author Steve Cassidy +##' @seealso \code{\link{modify.seglist}}, \code{\link{label.emusegs}} +##' @keywords misc +##' @examples +##' +##' +##' l <- c("A", "B", "C") +##' s <- 1:3 +##' e <- 2:4 +##' u <- c("u1", "u1", "u1") +##' segs <- make.seglist(l, s, e, u, "Fake Query", "segment", "fake") +##' segs +##' ## summary gives an overview of the data in the segment list +##' summary(segs) +##' +##' +##' # The following should be TRUE +##' label(segs) == l +##' dur(segs) == s +##' end(segs) == e +##' utt(segs) == u +##' emusegs.database(segs) == "fake" +##' emusegs.type(segs) == "segment" +##' emusegs.query(segs) == "Fake Query" +##' +##' # segment durations should all be 1 +##' dur(segs) == c(1,1,1) +##' +##' +##' @export make.seglist make.seglist <- function(labels, start, end, utts, query, type, database) { seglist <- data.frame(labels=I(as.character(labels)), - start=as.numeric(start), - end=as.numeric(end), - utts=I(as.character(utts))) - + start=as.numeric(start), + end=as.numeric(end), + utts=I(as.character(utts))) + if( version$major >= 5 ) { oldClass(seglist) <- "emusegs" } else { class(seglist) <- c("emusegs", "data.frame") } - + attr(seglist, "query") <- query attr(seglist, "type") <- type attr(seglist, "database") <- database - + seglist } + + + + + + + + +##' is seglist +##' +##' see function +##' +##' +##' @keywords internal +##' @export is.seglist is.seglist <- function(object) { return( inherits(object, "emusegs") ) } ## modify a segment list by changing one or more of the fields + + + + + + + + +##' Modify one of the components of an Emu segment list +##' +##' This function can be used to modify one of the parts of an Emu segment list +##' while leaving the other parts unchanged. +##' +##' An Emu segment list has a number of components and is stored as an R object +##' of class \code{emusegs}. This function can be used to modify a segment +##' list while retaining all of the proper structures. +##' +##' Any new vectors passed to the function must have the same length as the +##' segment list itself for this call to succeed. +##' +##' All arguments are optional and default to not modifying the segment list if +##' not supplied. +##' +##' The original segment list is not modified, instead, a modified copy is +##' returned. +##' +##' @param segs A segment list to modify, a modified copy is returned +##' @param labels A new label vector +##' @param start A new start time vector +##' @param end A new end time vector +##' @param utts A new vector of utterance labels +##' @param query A new query string to associate with the segment list +##' @param type A new type string +##' @param database A new database name +##' @return An Emu segment list. +##' @author Steve Cassidy +##' @seealso \code{\link{query}} +##' @keywords misc +##' @examples +##' +##' data(vowlax) +##' segs = vowlax +##' # extend the start times by 10ms +##' newsegs <- modify.seglist( segs, start=start(segs)+10 ) +##' +##' # change the associated database name +##' # this will affect where emu.track looks to find data +##' newsegs <- modify.seglist( segs, database="notdemo" ) +##' +##' +##' @export modify.seglist "modify.seglist" <- function( segs, - labels=label.emusegs(segs), - start=start.emusegs(segs), - end=end.emusegs(segs), - utts=utt.emusegs(segs), - query=emusegs.query(segs), - type=emusegs.type(segs), - database=emusegs.database(segs)) + labels=label.emusegs(segs), + start=start.emusegs(segs), + end=end.emusegs(segs), + utts=utt.emusegs(segs), + query=emusegs.query(segs), + type=emusegs.type(segs), + database=emusegs.database(segs)) { make.seglist( labels, start, end, utts, - query, type, database ) + query, type, database ) } + + + + + + + + +##' emusegs database +##' +##' Returns the database attribute from a segmentlist +##' +##' +##' @keywords internal +##' @export emusegs.database "emusegs.database" <- function(sl) { if(is.seglist(sl)) @@ -106,6 +269,21 @@ is.seglist <- function(object) { stop( "not an emu segment list" ) } + + + + + + + + +##' segment list type +##' +##' Gives SEGMENT or EVENT +##' +##' +##' @keywords internal +##' @export emusegs.type "emusegs.type" <- function(sl) { if(is.seglist(sl)) @@ -114,6 +292,21 @@ is.seglist <- function(object) { stop( "not an emu segment list" ) } + + + + + + + + +##' emusegs query +##' +##' sends a emu query to EMU +##' +##' +##' @keywords internal +##' @export emusegs.query "emusegs.query" <- function(sl) { if(is.seglist(sl)) @@ -122,7 +315,22 @@ is.seglist <- function(object) { stop( "not an emu segment list" ) } -"print.emusegs" <- function(x, ...) + + + + + + + + +##' print emusegs +##' +##' see function +##' +##' +##' @keywords internal +##' @export +"print.emusegs" <- function(x, ...) { cat(attributes(x)$type, " list from database: ", attributes(x)$database, "\n") cat("query was: ", attributes(x)$query, "\n" ) @@ -134,24 +342,18 @@ is.seglist <- function(object) { print.data.frame(x, ...) } -"[.emusegs"<- function(segs,i,j) -{ - NextMethod("[",drop=FALSE) -} -if( version$major >= 5 ) { -setMethod("[", "emusegs", - function(x, i, j=1:ncol(x), drop = T) - { - if(missing(drop)) - "[.emusegs"(x, i,j) - else - "[.emusegs"(x, i,j) - } - ) -} - -"summary.emusegs" <- function(object, ...) +##' summary emusegs +##' +##' summarizes data in emu segment lists +##' +##' +##' @param object the segmentlist +##' @param \dots nothing special +##' @keywords internal +##' @method summary emusegs +##' @export +summary.emusegs <- function(object, ...) { cat(attributes(object)$type, " list from database: ", attributes(object)$database, "\n") cat("query was: ", attributes(object)$query, "\n" ) @@ -161,22 +363,109 @@ setMethod("[", "emusegs", invisible() } + + + + + + + + +##' Get labels / utterances from segment list +##' +##' label: extracts the labels from the segment list. utt: extracts the +##' utterances from the segment list. +##' +##' +##' @aliases label.emusegs label utt.emusegs utt +##' @param segs segment list +##' @return label / utterance vector +##' @author Jonathan Harrington +##' @seealso \code{\link{segmentlist} \link{start} \link{end}} +##' @keywords methods +##' @examples +##' +##' data(dip) +##' #dip is a segment list - first ten segments only +##' dip[1:10,] +##' +##' +##' #extract labels from the segment list +##' dips.labs = label(dip) +##' dips.labs +##' +##' +##' @export label "label" <- function(segs) { UseMethod("label") } + + + +##' @export "label.emusegs" <- function(segs) { as.character(segs$label) } + + + + + + + + +##' as.matrix.emusegs +##' +##' see function +##' +##' +##' @keywords internal +##' @export "as.matrix.emusegs" <- function(x, ...) { cbind( as.character(x$label), x$start, x$end, as.character(x$utt) ) } + + + + + + + + +##' Write an Emu segment list to a file +##' +##' Writes an Emu segment list to a file +##' +##' +##' @param seglist An Emu segment list +##' @param file The name of a file to write the segment list into. +##' @return None. +##' @section Side Effects: The segment list is written to a file in the +##' standard format, suitable for input to \code{gettrack} or other Emu utility +##' programs. +##' @seealso \code{\link{query}} +##' @keywords misc +##' @examples +##' +##' data(dip) +##' #dip a segment list - first 10 segments only +##' dip[1:10,] +##' \dontrun{write.emusegs(dip, "write.emusegs.example.txt")} +##' +##' #The file write.emusegs.example.txt would have been written to R_HOME +##' \dontrun{unlink("write.emusegs.example.txt")} +##' +##' @export write.emusegs "write.emusegs" <- function(seglist, file) { + if(inherits(seglist,"emuRsegs")){ + warning("You are using the write function of the legacy class emusegs for an emuRsegs object. The persisted object cannot be read back as emuRsegs object. It is recommended to use standard R function save() instead to persist an emuRsegs object.") + } cat(paste("database:", attributes(seglist)$database, "\n", sep=""), file=file) cat(paste("query:", attributes(seglist)$query, "\n", sep=""), file=file, append=TRUE) cat(paste("type:", attributes(seglist)$type, "\n", sep=""), file=file, append=TRUE) @@ -185,37 +474,61 @@ setMethod("[", "emusegs", } -"start.emusegs" <- -function(x, ...) +##' @export +"start.emusegs" <- function(x, ...) { -as.numeric(x$start) + as.numeric(x$start) } -"end.emusegs" <- -function(x, ...) +##' @export +"end.emusegs" <- function(x, ...) { -as.numeric(x$end) + as.numeric(x$end) } -"utt" <- -function(x) { + +##' @export +"utt" <- function(x) { UseMethod("utt") } -"utt.emusegs" <- - function(x) +##' @export +"utt.emusegs" <- function(x) { as.character(x$utts) } -"dur" <- - function(x) { - UseMethod("dur") - } -"dur.emusegs" <- - function (x) +##' duration +##' +##' calculates durations +##' +##' @param x ??? +##' @export +"dur" <- function(x) { + UseMethod("dur") +} + + + + + + + + + +##' Duration of segments (NOTE: does not work for new default resultType = "tibble" of \code{query()}) +##' +##' duration of segments is calculated for each segment in the segment list +##' +##' +##' @param x a segment list +##' @return a vector of durations +##' @author Jonathan Harrington +##' @keywords internal +##' @export +"dur.emusegs" <- function (x) { if(all(end(x)==0)) d <- end(x) @@ -223,11 +536,3 @@ function(x) { d <- end(x) - start(x) d } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: - - diff --git a/R/end.emusegs.R b/R/end.emusegs.R index cc54a3cf..24c45134 100644 --- a/R/end.emusegs.R +++ b/R/end.emusegs.R @@ -1,6 +1,4 @@ -"end.emusegs" <- -function(x, ...) -{ -as.numeric(x$end) -} - +# "end.emusegs" <- function(x, ...) +# { +# as.numeric(x$end) +# } diff --git a/R/epg.class.R b/R/epg.class.R index ccc569f3..0db6e28f 100644 --- a/R/epg.class.R +++ b/R/epg.class.R @@ -1,83 +1,158 @@ -"epgai" <- -function(epgdata, weights = c(1, 9, 81, 729, 4921)) +##' Electropalatographic contact indices +##' +##' epgai(), epgci(), epgdi() return the anteriority index, the centrality +##' index, the dorsopalatal index respectively as a trackdata object or a +##' vector +##' +##' These are exact implementations of the formulae for calculating the EPG +##' anteriority, EPG centrality, and EPG dorsopalatal indices as described in +##' Recasens & Pallares (2001). +##' +##' @aliases epgai epgci epgdi +##' @param epgdata An eight-columned EPG-compressed trackdata object, or an +##' eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +##' array that is the output of palate() +##' @param weights A vector of five values that are applied to EPG rows 1-5 +##' respectively in epgai(). A vector of four values that are applied to +##' columns 1 and 8, to columns 2 and 7, columns 3 and 6, columns 4 and 5 +##' respectively. Defaults to the values given in Recasens & Pallares (2001). +##' @return These functions return a trackdata object if they are applied to an +##' eight-columned EPG-compressed trackdata object, otherwise a one-columned +##' matrix. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgcog}} \code{\link{epggs}} \code{\link{palate}} +##' @references GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. +##' Hardcastle & N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge +##' University Press: Cambridge. +##' +##' RECASENS, D. & PALLARES, M. (2001) Coarticulation, assimilation and +##' blending in Catalan consonant clusters. Journal of Phonetics, 29, 273-301. +##' @keywords math +##' @examples +##' +##' # Anteriority index: trackdata +##' ai <- epgai(coutts.epg) +##' # Dorsopalatal index, one-columned matrix +##' di <- epgdi(dcut(coutts.epg, 0.5, prop=TRUE)) +##' # Next to examples: Centrality index, one-columed matrix +##' ci <- epgci(palate(coutts.epg)) +##' ci <- epgci(palate(dcut(coutts.epg, 0.5, prop=TRUE))) +##' +##' +##' @export +"epgai" <- function(epgdata, weights = c(1, 9, 81, 729, 4921)) { -# function to calculate the anteriority index per palate -# as in Recasens & Pallares, 2001, 29, Jphon, p. 283, -# epgdata: either a trackdata object or an array of type EPG -# or an 8 columned matrix or 8-element vector that's -# the output of dcut() applied to an EPG-trackdata object. - -# weights: apply weights to rows 5, 4, 3, 2, 1. - -# -# returns: if p is a trackdata object, then -# the function returns trackdata of the -# same length as p with ant.index values. -# Otherwise, if p is an array of palates, -# one value (the ant.index) per palate) is returned -# -if(!inherits(epgdata, "EPG")) p <- palate(epgdata) -else p <- epgdata -# in case there is only one palate -if(length(dim(p) )==2) -{ -p <- array(p, c(8, 8, 1)) -class(p) <- "EPG" + # function to calculate the anteriority index per palate + # as in Recasens & Pallares, 2001, 29, Jphon, p. 283, + # epgdata: either a trackdata object or an array of type EPG + # or an 8 columned matrix or 8-element vector that's + # the output of dcut() applied to an EPG-trackdata object. + + # weights: apply weights to rows 5, 4, 3, 2, 1. + + # + # returns: if p is a trackdata object, then + # the function returns trackdata of the + # same length as p with ant.index values. + # Otherwise, if p is an array of palates, + # one value (the ant.index) per palate) is returned + # + if(!inherits(epgdata, "EPG")) p <- palate(epgdata) + else p <- epgdata + # in case there is only one palate + if(length(dim(p) )==2) + { + p <- array(p, c(8, 8, 1)) + class(p) <- "EPG" + } + N <- dim(p)[3] + o <- epgsum(p, 1, rows=5:1) + w <- matrix(weights, nrow=N, ncol=5, byrow=TRUE) + divisor <- matrix(c(rep(8, 4), 6), nrow=N, ncol=5, byrow=TRUE) + num <- log(apply(w * o/divisor, 1, sum) + 1) + den <- log(sum(weights) + 1) + result <- cbind(num/den) + if(is.trackdata(epgdata)) { + epgdata$data <- result + epgdata$trackname <- "anteriority" + } + else epgdata <- result + epgdata } -N <- dim(p)[3] -o <- epgsum(p, 1, rows=5:1) -w <- matrix(weights, nrow=N, ncol=5, byrow=TRUE) -divisor <- matrix(c(rep(8, 4), 6), nrow=N, ncol=5, byrow=TRUE) -num <- log(apply(w * o/divisor, 1, sum) + 1) -den <- log(sum(weights) + 1) -result <- cbind(num/den) - if(is.trackdata(epgdata)) { - epgdata$data <- result - epgdata$trackname <- "anteriority" - } - else epgdata <- result - epgdata + + +##' @export +"epgci" <- function (epgdata, weights = c(1, 17, 289, 4913)) +{ + # function to calculate the centrality index per palate + # as in the CCa formula in Recasens & Pallares, 2001, 29, Jphon, p. 283, + # p: either a list of epg track + # data returned by track () or a three-dimensionsal array of palates + # weights: apply weights to columns 1 and 8; + # columns 2 and 7, columns 3 and 6, columns 4 and 5. + + # + # returns: if p is a list, then + # the function returns trackdata of the + # same length as p with ant.index values. + # Otherwise, if p is an array of palates, + # one value (the ant.index) per palate) is returned + # + if (!inherits(epgdata, "EPG")) + p <- palate(epgdata) + else p <- epgdata + if (length(dim(p)) == 2) { + p <- array(p, c(8, 8, 1)) + class(p) <- "EPG" + } + N <- dim(p)[3] + num <- log((weights[1] * epgsum(p, columns = c(1, 8))/14 + + weights[2] * epgsum(p, columns = c(2, 7))/16 + weights[3] * + epgsum(p, columns = c(3, 6))/16 + weights[4] * epgsum(p, + columns = c(4, 5))/16) + 1) + den <- log(sum(weights) + 1) + result <- cbind(num/den) + if (is.trackdata(epgdata)) { + epgdata$data <- result + epgdata$trackname <- "centrality" + } + else epgdata <- result + epgdata } -`epgci` <- -function (epgdata, weights = c(1, 17, 289, 4913)) +##' @export +"epgdi" <- function(epgdata) { -# function to calculate the centrality index per palate -# as in the CCa formula in Recasens & Pallares, 2001, 29, Jphon, p. 283, -# p: either a list of epg track -# data returned by track () or a three-dimensionsal array of palates -# weights: apply weights to columns 1 and 8; -# columns 2 and 7, columns 3 and 6, columns 4 and 5. - -# -# returns: if p is a list, then -# the function returns trackdata of the -# same length as p with ant.index values. -# Otherwise, if p is an array of palates, -# one value (the ant.index) per palate) is returned -# - if (!inherits(epgdata, "EPG")) - p <- palate(epgdata) - else p <- epgdata - if (length(dim(p)) == 2) { - p <- array(p, c(8, 8, 1)) - class(p) <- "EPG" - } - N <- dim(p)[3] - num <- log((weights[1] * epgsum(p, columns = c(1, 8))/14 + - weights[2] * epgsum(p, columns = c(2, 7))/16 + weights[3] * - epgsum(p, columns = c(3, 6))/16 + weights[4] * epgsum(p, - columns = c(4, 5))/16) + 1) - den <- log(sum(weights) + 1) - result <- cbind(num/den) - if (is.trackdata(epgdata)) { - epgdata$data <- result - epgdata$trackname <- "centrality" - } - else epgdata <- result - epgdata + # function to calculate the Qp, or + # dorsopalatal index per palate + # as in Recasens & Pallares, 2001, 29, Jphon, p. 283, + # p: either a list of epg track + # data returned by track () or a three-dimensionsal array of palates + + # + # returns: if p is a list, then + # the function returns trackdata of the + # same length as p with ant.index values. + # Otherwise, if p is an array of palates, + # one value (the ant.index) per palate) is returned + # + if(!inherits(epgdata, "EPG")) p <- palate(epgdata) + else p <- epgdata + # in case there is only one palate + if(length(dim(p) )==2) + { + p <- array(p, c(8, 8, 1)) + class(p) <- "EPG" + } + result <- cbind(epgsum(p, rows=6:8)/24) + if(is.trackdata(epgdata)) { + epgdata$data <- result + epgdata$trackname <- "dorsopalatal" + } + else epgdata <- result + epgdata } @@ -86,289 +161,536 @@ function (epgdata, weights = c(1, 17, 289, 4913)) -`epgcog` <- -function (epgdata, weights = seq(7.5, 0.5, by = -1), rows = 1:8, - columns = 1:8, row1 = NULL) + +##' Electropalatographic centre of gravity +##' +##' Calculate the centre of gravity in palatographic data. +##' +##' The centre of gravity is a key function in palatographic research and gives +##' an value per palate that is indicative of the overall location of contacts +##' along the anterior-posterior dimension. The formula is an implementation of +##' the ones discussed in Hardcastle et al. (1991), Gibbon et al (1993), and +##' Gibbon & Nicolaidis (1999). +##' +##' @param epgdata An eight-columned EPG-compressed trackdata object, or an +##' eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +##' array that is the output of palate() +##' @param weights A vector of 8 values that are applied to EPG rows 1-8 +##' respectively. Defaults to 7.5, 7.0, 6.5...0.5. +##' @param rows Calculate EPG-COG over selected row number(s). rows = 5:8, +##' columns = 3:6 is an implementation of posterior centre of gravity, as +##' defined by Gibbon & Nicolaidis (1999,p. 239). See examples below. +##' @param columns Calculate EPG-COG over selected column number(s). +##' @param row1 an optional single valued numeric vector to allow a separate +##' weighting of the electrodes in row1. For example, if row1=4/3, then all the +##' electrodes in row1 are multiplied by that value, before EPG-COG is +##' calculated. Defaults to NULL (no weighting). +##' @return These functions return a trackdata object if they are applied to an +##' eight-columned EPG-compressed trackdata object, otherwise a one-columned +##' matrix. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgai}} \code{\link{epgsum}} \code{\link{palate}} +##' @references GIBBON, F., HARDCASTLE, W. and NICOLAIDIS, K. (1993) Temporal +##' and spatial aspects of lingual coarticulation in /kl/ sequences: a +##' cross-linguistic investigation. Language & Speech, 36, 26t1-277. +##' +##' GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. Hardcastle & +##' N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge University +##' Press: Cambridge. +##' +##' HARDCASTLE, W, GIBBON, F. and NICOLAIDIS, K. (1991) EPG data reduction +##' methods and their implications for studies of lingual coarticulation. +##' Journal of Phonetics, 19, 251-266. +##' @keywords math +##' @examples +##' +##' # COG: trackdata +##' cog <- epgcog(coutts.epg) +##' # cog, one-columned matrix +##' cog <- epgcog(dcut(coutts.epg, 0.5, prop=TRUE)) +##' # posterior cog for Fig. 10.5, p. 239 in Gibbon & Nicolaidis (1999) +##' r = array(0, c(8, 8, 2)) +##' r[6,c(1, 8),1] <- 1 +##' r[7,c(1, 2, 7, 8), 1] <- 1 +##' r[8, ,1] <- 1 +##' r[4, c(1, 2, 8), 2] <- 1 +##' r[5, c(1, 2, 7, 8), 2] <- 1 +##' r[6, c(1, 2, 3, 7, 8), 2] <- 1 +##' r[7:8, , 2] = 1 +##' class(r) <- "EPG" +##' epgcog(r, rows=5:8, columns=3:6) +##' +##' @export epgcog +`epgcog` <- function (epgdata, weights = seq(7.5, 0.5, by = -1), + rows = 1:8, columns = 1:8, row1 = NULL) { -# function to calculate the centre of gravity per palate -# p: either a list of epg track -# data returned by track () or a three-dimensionsal array of palates -# weights: apply weights to rows 1..8. -# (defaults to 7.5, 6.5...0.5) -# row1: an optional numeric argument -# to allow a separate weighting of -# the electrodes in row1. For example, if row1=4/3, -# then all the electrodes in row1 are multiplied by -# that value, before the COG is calculated. -# Defaults to NULL (no weighting). -# -# returns: if p is a list, then -# the function returns trackdata of the -# same length as p with COG values. -# Otherwise, if p is an array of palates, -# one value (the COG) per palate) is returned -# -# gives the same result (0.5 and 1.17) as the -# posterior COG measure in Fig. 10.5, -# Gibbon & Nicolaidis, 1999, p. 239, -# in Hardcastle & Hewlett Eds, 'Coarticulation'. CUP -# r = array(0, c(8, 8, 2)) -# r[6,c(1, 8),1] = 1 -# r[7,c(1, 2, 7, 8), 1] = 1 -# r[8, ,1] = 1 -# r[4, c(1, 2, 8), 2] = 1 -# r[5, c(1, 2, 7, 8), 2] = 1 -# r[6, c(1, 2, 3, 7, 8), 2] = 1 -# r[7, , 2] = 1 -# r[8, , 2] = 1 -# epgcog(r, rows=5:8, columns=3:6) - - if (!inherits(epgdata, "EPG")) - p <- palate(epgdata) - else p <- epgdata - if (length(dim(p)) == 2) { - p <- array(p, c(8, 8, 1)) - class(p) <- "EPG" - } - N <- dim(p)[3] - times <- dimnames(p)[[3]] - if (!is.null(row1)) - p[1, , ] <- p[1, , ] * row1 - rowsum <- epgsum(p, 1, columns = columns) - w <- matrix(weights, nrow = N, ncol = 8, byrow = TRUE) - prodsum <- rowsum * w - prodsum <- rbind(prodsum[, rows]) - sumval <- apply(prodsum, 1, sum) - psum <- epgsum(p, rows = rows, columns = columns) - result <- rep(0, length(psum)) - temp <- psum == 0 - result[!temp] <- sumval[!temp]/psum[!temp] - result <- cbind(result) - rownames(result) <- times - if (is.trackdata(epgdata)) { - epgdata$data <- result - epgdata$trackname <- "centre of gravity" - } - else epgdata <- result - epgdata + # function to calculate the centre of gravity per palate + # p: either a list of epg track + # data returned by track () or a three-dimensionsal array of palates + # weights: apply weights to rows 1..8. + # (defaults to 7.5, 6.5...0.5) + # row1: an optional numeric argument + # to allow a separate weighting of + # the electrodes in row1. For example, if row1=4/3, + # then all the electrodes in row1 are multiplied by + # that value, before the COG is calculated. + # Defaults to NULL (no weighting). + # + # returns: if p is a list, then + # the function returns trackdata of the + # same length as p with COG values. + # Otherwise, if p is an array of palates, + # one value (the COG) per palate) is returned + # + # gives the same result (0.5 and 1.17) as the + # posterior COG measure in Fig. 10.5, + # Gibbon & Nicolaidis, 1999, p. 239, + # in Hardcastle & Hewlett Eds, 'Coarticulation'. CUP + # r = array(0, c(8, 8, 2)) + # r[6,c(1, 8),1] = 1 + # r[7,c(1, 2, 7, 8), 1] = 1 + # r[8, ,1] = 1 + # r[4, c(1, 2, 8), 2] = 1 + # r[5, c(1, 2, 7, 8), 2] = 1 + # r[6, c(1, 2, 3, 7, 8), 2] = 1 + # r[7, , 2] = 1 + # r[8, , 2] = 1 + # epgcog(r, rows=5:8, columns=3:6) + + if (!inherits(epgdata, "EPG")) + p <- palate(epgdata) + else p <- epgdata + if (length(dim(p)) == 2) { + p <- array(p, c(8, 8, 1)) + class(p) <- "EPG" + } + N <- dim(p)[3] + times <- dimnames(p)[[3]] + if (!is.null(row1)) + p[1, , ] <- p[1, , ] * row1 + rowsum <- epgsum(p, 1, columns = columns) + w <- matrix(weights, nrow = N, ncol = 8, byrow = TRUE) + prodsum <- rowsum * w + prodsum <- rbind(prodsum[, rows]) + sumval <- apply(prodsum, 1, sum) + psum <- epgsum(p, rows = rows, columns = columns) + result <- rep(0, length(psum)) + temp <- psum == 0 + result[!temp] <- sumval[!temp]/psum[!temp] + result <- cbind(result) + rownames(result) <- times + if (is.trackdata(epgdata)) { + epgdata$data <- result + epgdata$trackname <- "centre of gravity" + } + else epgdata <- result + epgdata } -"epgdi" <- -function(epgdata) -{ -# function to calculate the Qp, or -# dorsopalatal index per palate -# as in Recasens & Pallares, 2001, 29, Jphon, p. 283, -# p: either a list of epg track -# data returned by track () or a three-dimensionsal array of palates - -# -# returns: if p is a list, then -# the function returns trackdata of the -# same length as p with ant.index values. -# Otherwise, if p is an array of palates, -# one value (the ant.index) per palate) is returned -# -if(!inherits(epgdata, "EPG")) p <- palate(epgdata) -else p <- epgdata -# in case there is only one palate -if(length(dim(p) )==2) -{ -p <- array(p, c(8, 8, 1)) -class(p) <- "EPG" -} -result <- cbind(epgsum(p, rows=6:8)/24) -if(is.trackdata(epgdata)) { - epgdata$data <- result - epgdata$trackname <- "dorsopalatal" - } - else epgdata <- result - epgdata -} -"epggs" <- -function(epgdata, gscale=100, gridlines=TRUE, gridcol="gray", gridlty=1, axes=TRUE, xlab="", ylab="", ...) -{ -# function to plot a 3D greyscale EPG imageb -# p is palate data, returned by palate() or EPG-trackdata -# plots greyscale image of contacts -# such that -# the darker the square, the greater the -# proportion of contacts. Thus a black square -# means that a contact was always on -# for all palatograms in p; a white -# square means that it was always off. - -if(!inherits(epgdata, "EPG")) p <- palate(epgdata) -else p <- epgdata -# in case there is only one palate -if(length(dim(p) )==2) -{ -p <- array(p, c(8, 8, 1)) -class(p) <- "EPG" -} -n = dim(p)[3] -sump = (apply(p, c(1,2), sum))/n -image(1:8, 1:8, t(1-sump[8:1,]), col = gray(0:gscale/gscale), axes=FALSE, xlab=xlab, ylab=ylab, ...) -if(axes) + +##' Plot a grey-scale image of palatographic data. +##' +##' The function plots a grey-scale image of palatographic data such that the +##' greyness in cell r, c is in proportion to the frequency of contacts in +##' cells of row r and columns c of all palatograms in the object passed to +##' this function. +##' +##' The function plots a grey-scale image of up to 62 values arranged over an 8 +##' x 8 grid with columns 1 and 8 unfilled for row 1. If cell row r column c +##' is contacted for all palatograms in the object that is passed to this +##' function, the corresponding cell is black; if none of of the cells in row r +##' column c are contacted, then the cell is white (unfilled). +##' +##' @param epgdata An eight-columned EPG-compressed trackdata object, or an +##' eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +##' array that is the output of palate() +##' @param gscale a single valued numeric vector that defines the granularity +##' of the greyscale. Defaults to 100. +##' @param gridlines if TRUE (default) grid lines over the palatographic image are +##' drawn are drawn. +##' @param gridlty A single-valued numeric vector that defines the linetype for +##' plotting the grid. +##' @param gridcol color of grid +##' @param axes TRUE for show axes, FALSE for no axes +##' @param xlab A character vector for the x-axis label. +##' @param ylab A character vector for the y-axis label. +##' @param ... graphical parameters can be given as arguments to 'epggs'. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgai}} \code{\link{epgcog}} \code{\link{epgplot}} +##' \code{\link{palate}} +##' @keywords dplot +##' @examples +##' +##' # greyscale image across the first two segments 'just relax' +##' # with title +##' epggs(coutts.epg[1:2,], main="just relax") +##' +##' # as above but with dotted gridlines in blue +##' epggs(coutts.epg[1:2,], main="just relax", gridlty=2, gridcol="blue") +##' +##' # as the first example, but with greyscale set to 2 +##' epggs(coutts.epg[1:2,], 2, main="just relax") +##' +##' # get palatograms for "S" from the polhom.epg database +##' temp = polhom.l == "S" +##' # greyscale image of all "S" segments at their temporal midpoint +##' epggs(dcut(polhom.epg[temp,], 0.5, prop=TRUE)) +##' +##' # greyscale image of all "S" segments from their onset to offset +##' epggs(polhom.epg[temp,]) +##' +##' # the same but derived from palates +##' p <- palate(polhom.epg[temp,]) +##' epggs(p) +##' +##' @export epggs +"epggs" <- function(epgdata, gscale = 100, gridlines = TRUE, + gridcol = "gray", gridlty = 1, axes = TRUE, + xlab = "", ylab = "", ...) { -axis(side=1) -axis(side=2, at=c(1, 3, 5, 7), labels=as.character(c(8, 6, 4, 2))) -} -if(gridlines) -grid(8, 8, col = gridcol, lty=gridlty) + # function to plot a 3D greyscale EPG imageb + # p is palate data, returned by palate() or EPG-trackdata + # plots greyscale image of contacts + # such that + # the darker the square, the greater the + # proportion of contacts. Thus a black square + # means that a contact was always on + # for all palatograms in p; a white + # square means that it was always off. + + if (!requireNamespace("grDevices", quietly = TRUE)){ + stop("'grDevices' package required to run 'grDevices::gray()'") + } + + if(!inherits(epgdata, "EPG")) p <- palate(epgdata) + else p <- epgdata + # in case there is only one palate + if(length(dim(p) )==2) + { + p <- array(p, c(8, 8, 1)) + class(p) <- "EPG" + } + n = dim(p)[3] + sump = (apply(p, c(1,2), sum))/n + graphics::image(1:8, 1:8, t(1-sump[8:1,]), col = grDevices::gray(0:gscale/gscale), axes=FALSE, xlab=xlab, ylab=ylab, ...) + if(axes) + { + graphics::axis(side=1) + graphics::axis(side=2, at=c(1, 3, 5, 7), labels=as.character(c(8, 6, 4, 2))) + } + if(gridlines) + graphics::grid(8, 8, col = gridcol, lty=gridlty) } -"epgplot" <- -function(epgdata, select=NULL, numbering = "times", gridlines = TRUE, mfrow = NULL, col = 1, mar=c(.8, .1, .8, .1), xlim=NULL) + + + + + + + + +##' Plot palatographic data +##' +##' Function to plot palatograms from EPG compressed objects or from a +##' 3D-palatographic array that is output from palate(). +##' +##' The function plots 62 values arranged over an 8 x 8 grid with columns 1 and +##' 8 unfilled for row 1. When there is a contact (1), the corresponding +##' rectangle of the grid is filled otherwise the rectangle is empty. +##' +##' @param epgdata An eight-columned EPG-compressed trackdata object, or an +##' eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +##' array that is the output of palate() +##' @param select A vector of times. Palatograms are plotted at these times +##' only. Note: this argument should only be used if epgdata is temporally +##' contiguous, i.e. the entire trackdata object contains palatograms at +##' successive multiple times of the EPG sampling frequency. (as in +##' coutts.epg$ftime). Defaults to NULL, in which case palatograms are plotted +##' for all times available in epgdata. +##' @param numbering Either "times" (default), or logical TRUE, or a character +##' vector of the same length as the number of segments in epgdata. In the +##' default case, the times at which the palatograms occur are printed above +##' the palatograms. If logical TRUE, then the palatograms are numbered 1, 2, ... +##' number of segments and this value is printed above the palatograms. If a +##' character vector, then this must be the same length as the number of +##' segments in epgdata. +##' @param gridlines if TRUE (default) grid lines over the palatogram are drawn. +##' @param mfrow By default, the function tries to work out a sensible number +##' of rows and columns for plotting the palatograms. Otherwise, this can be +##' user-specified, in which case mfrow is a vector of two integer numeric +##' values. +##' @param xlim A numeric vector of two time values over which the epgdata +##' should be plotted. Note: this argument should only be used if epgdata is +##' temporally contiguous, i.e. the entire trackdata object contains +##' palatograms at successive multiple times of the EPG sampling frequency. (as +##' in coutts.epg$ftime). Defaults to NULL (plot all time values). +##' @param col specify a colour for plotting the filled EPG cells. +##' @param mar A numerical vector of the form 'c(bottom, left, top, right)' +##' which gives the number of lines of margin to be specified on the four sides +##' of the plot. The default in this function is c(0.8, 0.1, 0.8, 0.1). (The +##' default in the R plot() function is c(5, 4, 4, 2) + 0.1. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgai}} \code{\link{epgcog}} \code{\link{epggs}} +##' \code{\link{palate}} +##' @keywords dplot +##' @examples +##' +##' epgplot(polhom.epg[10,]) +##' +##' # as above but between times 1295 ms and 1330 ms +##' epgplot(polhom.epg[10,], xlim=c(1295, 1330)) +##' +##' # the same as above, but the data is first +##' # converted to a 3D palatographic array +##' p <- palate(polhom.epg[10,]) +##' epgplot(p, xlim=c(1295, 1330)) +##' +##' # plot palatograms 2 and 8 +##' epgplot(p[,,c(2, 8)]) +##' +##' # as above but +##' # no gridlines, different colour, numbering rather than times +##' epgplot(p[,,c(2, 8)], gridlines=FALSE, col="pink", numbering=TRUE) +##' +##' # as above but with a user-specified title +##' +##' epgplot(p[,,c(2, 8)], gridlines=FALSE, col="pink", numbering=c("s1", "s2")) +##' +##' # plot the palatograms in the second +##' # segment of coutts.epg that are closest in time +##' # to 16377 ms and 16633 ms +##' epgplot(coutts.epg[2,], c(16377, 16633)) +##' +##' +##' @export epgplot +"epgplot" <- function(epgdata, select = NULL, numbering = "times", + gridlines = TRUE, mfrow = NULL, col = 1, + mar = c(.8, .1, .8, .1), xlim = NULL) { -# epgdata: a list as returned by emu.track() -# or else an array of palates. -# numbering can be T or F or else a numeric or character vector -# which is equal in length to the number of palates) -# xlim: can only be used if epgdata are contiguous! - - oldpar = par(no.readonly=TRUE) - on.exit(par(oldpar)) - par(mar = mar) - epggrid <- function() { - xgrid <- NULL - for (j in 0:8) { - vec <- c(j, j, NA) - xgrid <- c(xgrid, vec) - } - ygrid <- rep(c(0, 8, NA), 9) - ygrid[c(2, 26)] <- 7 - lines(xgrid, ygrid) - ygrid[25] <- 1 - ygrid[2] <- 8 - lines(ygrid, xgrid) - } - if (!inherits(epgdata, "EPG")) - epgdata <- palate(epgdata) - if (!is.null(select)) { - times <- dimnames(epgdata)[[3]] - smat <- NULL - for (j in select) { - cl <- closest(as.numeric(times), j)[1] - smat <- c(smat, cl) - } - epgdata <- epgdata[, , smat] + # epgdata: a list as returned by emu.track() + # or else an array of palates. + # numbering can be TRUE or FALSE or else a numeric or character vector + # which is equal in length to the number of palates) + # xlim: can only be used if epgdata are contiguous! + + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + graphics::par(mar = mar) + epggrid <- function() { + xgrid <- NULL + for (j in 0:8) { + vec <- c(j, j, NA) + xgrid <- c(xgrid, vec) } - N <- dim(epgdata) - if (length(N) == 2) { - N <- 1 - epgdata <- array(epgdata, c(8, 8, 1)) - } - else N <- N[3] + ygrid <- rep(c(0, 8, NA), 9) + ygrid[c(2, 26)] <- 7 + graphics::lines(xgrid, ygrid) + ygrid[25] <- 1 + ygrid[2] <- 8 + graphics::lines(ygrid, xgrid) + } + if (!inherits(epgdata, "EPG")) + epgdata <- palate(epgdata) + if (!is.null(select)) { times <- dimnames(epgdata)[[3]] - if (!is.null(xlim)) { - temp <- as.numeric(times) > xlim[1] & as.numeric(times) < - xlim[2] - epgdata <- epgdata[, , temp] - times <- times[temp] - N <- sum(temp) - } - if (is.logical(numbering)) { - if (numbering) - main <- as.character(1:N) - else main <- rep("", N) - } - else if (length(numbering) == 1) { - if (numbering == "times") - main <- times - } - else main <- as.character(numbering) - x <- rep(0:7, rep(8, 8)) - xpoly <- cbind(x, x + 1, x + 1, x) - y <- rep(7:0, 8) - ypoly <- cbind(y, y, y + 1, y + 1) - if (is.null(mfrow)) { - foo <- ceiling(sqrt(N)) - bar <- ceiling(N/foo) - mfrow <- c(foo, bar) - } - epgplot.sub <- function(pgram, xpoly, ypoly, col = 1, main = "") { - which <- c(pgram) == 1 - if (any(which)) { - xpoly <- xpoly[which, ] - ypoly <- ypoly[which, ] - xpoly <- rbind(xpoly) - ypoly <- rbind(ypoly) - mat <- NULL - for (j in 1:sum(which)) { - mat$x <- c(mat$x, c(xpoly[j, ], NA)) - mat$y <- c(mat$y, c(ypoly[j, ], NA)) - } - mat$x <- mat$x[-length(mat$x)] - mat$y <- mat$y[-length(mat$y)] - } - plot(0:8, 0:8, type = "n", axes = FALSE, xlab = "", ylab = "", - main = main) - if (any(which)) - polygon(mat$x, mat$y, col = col) + smat <- NULL + for (j in select) { + cl <- closest(as.numeric(times), j)[1] + smat <- c(smat, cl) } - par(mfrow = mfrow) - if (N > 1) { - for (j in 1:N) { - epgplot.sub(epgdata[, , j], xpoly, ypoly, col = col, - main = main[j]) - if (gridlines) - epggrid() - } + epgdata <- epgdata[, , smat] + } + N <- dim(epgdata) + if (length(N) == 2) { + N <- 1 + epgdata <- array(epgdata, c(8, 8, 1)) + } + else N <- N[3] + times <- dimnames(epgdata)[[3]] + if (!is.null(xlim)) { + temp <- as.numeric(times) > xlim[1] & as.numeric(times) < + xlim[2] + epgdata <- epgdata[, , temp] + times <- times[temp] + N <- sum(temp) + } + if (is.logical(numbering)) { + if (numbering) + main <- as.character(1:N) + else main <- rep("", N) + } + else if (length(numbering) == 1) { + if (numbering == "times") + main <- times + } + else main <- as.character(numbering) + x <- rep(0:7, rep(8, 8)) + xpoly <- cbind(x, x + 1, x + 1, x) + y <- rep(7:0, 8) + ypoly <- cbind(y, y, y + 1, y + 1) + if (is.null(mfrow)) { + foo <- ceiling(sqrt(N)) + bar <- ceiling(N/foo) + mfrow <- c(foo, bar) + } + epgplot.sub <- function(pgram, xpoly, ypoly, col = 1, main = "") { + which <- c(pgram) == 1 + if (any(which)) { + xpoly <- xpoly[which, ] + ypoly <- ypoly[which, ] + xpoly <- rbind(xpoly) + ypoly <- rbind(ypoly) + mat <- NULL + for (j in 1:sum(which)) { + mat$x <- c(mat$x, c(xpoly[j, ], NA)) + mat$y <- c(mat$y, c(ypoly[j, ], NA)) + } + mat$x <- mat$x[-length(mat$x)] + mat$y <- mat$y[-length(mat$y)] } - else { - epgplot.sub(epgdata, xpoly, ypoly, col = col, main = main) - if (gridlines) - epggrid() + graphics::plot(0:8, 0:8, type = "n", axes = FALSE, xlab = "", ylab = "", + main = main) + if (any(which)) + graphics::polygon(mat$x, mat$y, col = col) + } + graphics::par(mfrow = mfrow) + if (N > 1) { + for (j in 1:N) { + epgplot.sub(epgdata[, , j], xpoly, ypoly, col = col, + main = main[j]) + if (gridlines) + epggrid() } - par(mar = oldpar$mar) + } + else { + epgplot.sub(epgdata, xpoly, ypoly, col = col, main = main) + if (gridlines) + epggrid() + } + graphics::par(mar = oldpar$mar) } -"epgsum" <- -function(epgdata, profile=c(1,3), inactive = FALSE, rows=1:8, columns=1:8, trackname="EPG-sum") + + + + + + + + +##' Sum contacts in palatograms. +##' +##' The function calculates EPG contact profiles, i.e. sums active or inactive +##' electrodes optionally by row and/or column in palatographic data. +##' +##' Contact profiles are standard tools in electropalatographic analysis. See +##' e.g., Byrd (1996) for details. +##' +##' @param epgdata An eight-columned EPG-compressed trackdata object, or an +##' eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +##' array that is the output of palate() +##' @param profile A numeric vector of one or two values. The options are as +##' follows. c(1,3) and c(1) sum the contacts by row, but the latter outputs +##' the summation in the rows. c(2,3) and c(2) sum the contacts by column, but +##' the latter outputs the summation in the columns. (see also rows and columns +##' arguments and the examples below for further details). +##' @param inactive a single element logical vector. If FALSE (the default), then +##' the active electrodes (i.e, 1s) are summed, otherwise the inactive +##' electrodes (i.e., 0s) are summed. +##' @param rows vector of rows to sum +##' @param columns vector of columns to sum +##' @param trackname single element character vector of the name of the track +##' (defaults to "EPG-sum") +##' @return These functions return a trackdata object if they are applied to an +##' eight-columned EPG-compressed trackdata object, otherwise a one-columned +##' matrix. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgai}} \code{\link{epgcog}} \code{\link{epggs}} +##' \code{\link{palate}} +##' @references BYRD, D. (1996). Influences on articulatory timing in consonant +##' sequences. Journal of Phonetics, 24, 209-244. +##' +##' GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. Hardcastle & +##' N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge University +##' Press: Cambridge. +##' @keywords math +##' @examples +##' +##' # Trackdata object of the sum of contacts in the 1st segment of polhom.epg +##' epgsum(polhom.epg[1,]) +##' # as above, but the summation is in rows 1-3 only. +##' epgsum(polhom.epg[1,], rows=c(1:3)) +##' # as epgsum(polhom.epg[1,]), except sum the inactive electrodes in columns 3-6. +##' epgsum(polhom.epg[1,], columns=3:6, inactive=TRUE) +##' # Obtain compressed EPG-trackdata object for the 1st four segments of polhom.epg +##' # at the temporal midpoint +##' mid <- dcut(polhom.epg[1:4,], .5, prop=TRUE) +##' # sum of contacts in these four palatograms. +##' epgsum(mid) +##' # gives the same result as the previous command. +##' p <- palate(mid) +##' # sum the contacts in the palatograms. +##' epgsum(p) +##' # as above, but show the separate row summmations. +##' epgsum(p, 1) +##' # as above, but show the separate column summmations. +##' epgsum(p, 2) +##' # sum of the contacts in rows 1-4 showing the separate row summations. +##' epgsum(p, 1, rows=1:4) +##' # sum of the contacts in rows 1-4 showing the separate column summations. +##' epgsum(p, 2, rows=1:4) +##' # sum of the contacts in columns 3-6 showing the separate row summations. +##' epgsum(p, 1, columns=3:6) +##' # sum of the contacts in columns 3-6 showing the separate column summations. +##' epgsum(p, 2, columns=3:6) +##' +##' +##' @export epgsum +"epgsum" <- function(epgdata, profile=c(1,3), inactive = FALSE, + rows=1:8, columns=1:8, trackname="EPG-sum") { -# function that sums by row or by column -# either the active or inactive electrodes of EPG-data. -# returns trackdata of the summed result. -# epgdata: epg data as returned by emu.track() -# allcontacts: if T, then all the contacts per palate are summed -# column: if T, then the summation is applied to -# columns, rather than to rows -# inactive: if T, then the summation is applied -# to the inactive (zero) electrodes, rather than to -# the active ones. -# -k <- profile[1] -if(!inherits(epgdata, "EPG")) p<- palate(epgdata) -else p <- epgdata -# in case there is only one palate -if(length(dim(p) )==2) -p <- array(p, c(8, 8, 1)) -if(length(rows) > 1 & length(columns) > 1) -p <- (p[rows, columns, ]) -else -p <- array(p[rows,columns,], c(length(rows), length(columns), dim(p)[3])) - - -# in case there is only one palate -if(length(dim(p) )==2) -p <- array(p, c(length(rows), length(columns), 1)) -summation <- apply(p, c(k, 3), sum) -summation <- t(summation) -if(inactive) { -mat <- matrix(ncol(p), nrow = nrow(summation), ncol = ncol(summation) -) -summation <- mat - summation + # function that sums by row or by column + # either the active or inactive electrodes of EPG-data. + # returns trackdata of the summed result. + # epgdata: epg data as returned by emu.track() + # allcontacts: if TRUE, then all the contacts per palate are summed + # column: if TRUE, then the summation is applied to + # columns, rather than to rows + # inactive: if TRUE, then the summation is applied + # to the inactive (zero) electrodes, rather than to + # the active ones. + # + k <- profile[1] + if(!inherits(epgdata, "EPG")) p<- palate(epgdata) + else p <- epgdata + # in case there is only one palate + if(length(dim(p) )==2) + p <- array(p, c(8, 8, 1)) + if(length(rows) > 1 & length(columns) > 1) + p <- (p[rows, columns, ]) + else + p <- array(p[rows,columns,], c(length(rows), length(columns), dim(p)[3])) + + + # in case there is only one palate + if(length(dim(p) )==2) + p <- array(p, c(length(rows), length(columns), 1)) + summation <- apply(p, c(k, 3), sum) + summation <- t(summation) + if(inactive) { + mat <- matrix(ncol(p), nrow = nrow(summation), ncol = ncol(summation) + ) + summation <- mat - summation + } + if(length(profile)==2 & profile[2]==3) + summation <- apply(summation, 1, sum) + if(is.trackdata(epgdata)) + result <- as.trackdata(summation, epgdata$index, epgdata$ftime, trackname) + else result <- summation + result } -if(length(profile)==2 & profile[2]==3) -summation <- apply(summation, 1, sum) -if(is.trackdata(epgdata)) -result <- as.trackdata(summation, epgdata$index, epgdata$ftime, trackname) -else result <- summation -result -} - diff --git a/R/eplot.R b/R/eplot.R index 330171b7..f3f1e84f 100644 --- a/R/eplot.R +++ b/R/eplot.R @@ -1,220 +1,337 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - -########################################################################## -`eplot` <- -function (x, labs, chars, formant = FALSE, scaling = "linear", - prob = 0.95, nsdev = NULL, dopoints = FALSE, doellipse = TRUE, - centroid = FALSE, axes = TRUE, - xlim, ylim, col = TRUE, lty = FALSE, - lwd = NULL, ...) +##' Plot ellipses for two-dimensional data (DEPRECATED see below) +##' +##' The function plots ellipses for different categories from two-dimensional +##' data. DEPRECATED as this function does not play well with with the new +##' resultType = "tibble" of \code{get_trackdata()}. See \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/recipe-plottingSnippets.html} +##' for an alternative plotting routines using ggplot2. +##' +##' +##' @param x A two-columned matrix of data +##' @param labs An optional vector of labels, parallel to 'data' +##' @param chars An optional vector of labels, parallel to 'data'. If this +##' argument is specified these labels will be plotted rather than the labels +##' in 'labs'. +##' @param formant If TRUE) then the data is negated and the axes are switched +##' so that, for formant data, the plot is made with decreasing F2 on the +##' x-axis and decreasing F1 on the y-axis. +##' @param scaling Either "mel" or "bark" for mel or bark scaling of the data +##' @param prob A single numeric vector greater than zero and less than 1 +##' representing the confidence interval of the ellipse contours. Defaults to +##' 0.95 +##' @param nsdev Defines the length of the major and minor axes of the ellipses +##' in terms of the standard deviation of the data and overrides the prob +##' argument. +##' @param dopoints If TRUE) character labels (from 'labs' or 'chars') are +##' plotted for each data point +##' @param doellipse If TRUE, ellipses are drawn on the plot. If FALSE, no +##' ellipses are drawn and, if 'dopoints' is also FALSE, 'centroids' is set to +##' TRUE +##' @param centroid One label for each ellipse is drawn +##' @param axes If TRUE axes are drawn on the plot. +##' @param xlim A vector of two numeric values giving the range of the x-axis. +##' @param ylim A vector of two numeric values giving the range of the y-axis. +##' @param col If colour is TRUE) the ellipses and labels will be plotted in +##' different colours +##' @param lty If linetype is TRUE) the ellipses will be plotted with different +##' linetypes. This is useful for plots that will be printed. +##' @param lwd A code passed to the lwd argument in plotting functions. 'lwd' +##' can be either a single element numeric vector, or its length must be equal +##' to the number of unique types in labs. For example, if lwd=3 and if labs = +##' c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +##' lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +##' default is NULL in which case all lines are drawn with lwd=1 +##' @param ... graphical options \link{par} +##' @return NULL +##' @author Jonathan Harrington, Steve Cassidy +##' @seealso \code{\link{dcut}} +##' @keywords dplot +##' @examples +##' +##' +##' data(vowlax) +##' data <- cbind(vowlax.df$F1,vowlax.df$F2) +##' phonetic = vowlax.l +##' word = vowlax.word +##' +##' eplot(data, phonetic) +##' +##' +##' eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", centroid=TRUE) +##' eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", dopoints=TRUE) +##' eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark", +##' dopoints=TRUE, scaling="bark") +##' eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark b/w with linetype", +##' col=FALSE, lty=TRUE, dopoints=TRUE, scaling="bark") +##' eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", +##' doellipse=FALSE, dopoints=TRUE) +##' eplot(data, phonetic, form=TRUE, dopoints=TRUE, +##' prob=0.5, main="F1 x F2 plane, 50% confidence intervals") +##' eplot(data, phonetic, form=TRUE, dopoints=TRUE, +##' nsdev=2, main="F1 x F2 plane, 2 standard deviations") +##' +##' +##' temp <- phonetic %in% c("a", "O") +##' eplot(data[temp,], phonetic[temp], form=TRUE, main="F1 x F2 [A] and [O] only", centroid=TRUE) +##' +##' +##' temp <- phonetic=="O" +##' eplot(data[temp,], phonetic[temp], word[temp], form=TRUE, +##' dopoints=TRUE, main="[O] only showing word labels") +##' +##' +##' +##' +##' +##' @export eplot +`eplot` <- function (x, labs, chars, formant = FALSE, scaling = "linear", + prob = 0.95, nsdev = NULL, dopoints = FALSE, + doellipse = TRUE, centroid = FALSE, axes = TRUE, + xlim, ylim, col = TRUE, lty = FALSE, lwd = NULL, ...) { - ocall <- match.call() - if (is.null(nsdev)) - nsdev <- sqrt(qchisq(prob, 2)) - - if (missing(labs)) - labs <- rep(".", nrow(x)) - if (!doellipse & !dopoints) - centroid <- TRUE - if (nrow(x) != length(labs)) - stop("x and labels don't match") - if (ncol(x) != 2) - stop("Eplot needs 2 dimensional x") - if (!missing(chars)) - if (length(labs) != length(chars)) - stop("Length of chars must match that of labs") - if (scaling == "mel") - x <- mel(x) - if (scaling == "bark") - x <- bark(x) - if (formant) { - x <- cbind(-x[, 2], -x[, 1]) - if (!missing(xlim)) - xlim <- -rev(xlim) - if (!missing(ylim)) - ylim <- -rev(ylim) + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + + ocall <- match.call() + if (is.null(nsdev)) + nsdev <- sqrt(qchisq(prob, 2)) + + if (missing(labs)) + labs <- rep(".", nrow(x)) + if (!doellipse & !dopoints) + centroid <- TRUE + if (nrow(x) != length(labs)) + stop("x and labels don't match") + if (ncol(x) != 2) + stop("Eplot needs 2 dimensional x") + if (!missing(chars)) + if (length(labs) != length(chars)) + stop("Length of chars must match that of labs") + if (scaling == "mel") + x <- mel(x) + if (scaling == "bark") + x <- bark(x) + if (formant) { + x <- cbind(-x[, 2], -x[, 1]) + if (!missing(xlim)) + xlim <- -rev(xlim) + if (!missing(ylim)) + ylim <- -rev(ylim) + } + col.lty <- mu.colour(labs, col, lty, lwd) + lty <- col.lty$linetype + linewidth <- col.lty$lwd + uniqlabels <- unique(labs) + emat <- nums <- cen <- k <- l <- NULL + for (j in uniqlabels) { + temp <- labs == j + mat <- x[temp, , drop = FALSE] + if (nrow(mat) > 2) { + evals <- eigen(var(mat)) + m1 <- mean(mat[, 1]) + m2 <- mean(mat[, 2]) + e <- ellipse(m1, m2, sqrt(evals$values[1]) * nsdev, + sqrt(evals$values[2]) * nsdev, aperm(evals$vectors, + c(2, 1))) } - col.lty <- mu.colour(labs, col, lty, lwd) - lty <- col.lty$linetype - linewidth <- col.lty$lwd - uniqlabels <- unique(labs) - emat <- nums <- cen <- k <- l <- NULL - for (j in uniqlabels) { - temp <- labs == j - mat <- x[temp, , drop = FALSE] - if (nrow(mat) > 2) { - evals <- eigen(var(mat)) - m1 <- mean(mat[, 1]) - m2 <- mean(mat[, 2]) - e <- ellipse(m1, m2, sqrt(evals$values[1]) * nsdev, - sqrt(evals$values[2]) * nsdev, aperm(evals$vectors, - c(2, 1))) - } - else { - cat("Too few x points for label ", j, " will plot a point or a line\n") - m1 <- mean(mat[, 1]) - m2 <- mean(mat[, 2]) - e <- mat - } - nums <- c(nums, nrow(e)) - emat <- rbind(emat, e) - k <- c(k, col.lty$legend$col[match(j, col.lty$legend$lab)]) - l <- c(l, col.lty$legend$lty[match(j, col.lty$legend$lab)]) - linewidth <- c(linewidth, col.lty$legend$lwd[match(j, - col.lty$legend$lab)]) - if (centroid) - cen <- rbind(cen, cbind(m1, m2)) + else { + warning("Too few x points for label ", j, " will plot a point or a line\n") + m1 <- mean(mat[, 1]) + m2 <- mean(mat[, 2]) + e <- mat } + nums <- c(nums, nrow(e)) + emat <- rbind(emat, e) + k <- c(k, col.lty$legend$col[match(j, col.lty$legend$lab)]) + l <- c(l, col.lty$legend$lty[match(j, col.lty$legend$lab)]) + linewidth <- c(linewidth, col.lty$legend$lwd[match(j, + col.lty$legend$lab)]) + if (centroid) + cen <- rbind(cen, cbind(m1, m2)) + } + if (doellipse) { + if (missing(xlim)) + xlim <- range(c(emat[, 1], x[, 1])) + if (missing(ylim)) + ylim <- range(c(emat[, 2], x[, 2])) + } + else { + if (missing(xlim)) + xlim <- range(x[, 1]) + if (missing(ylim)) + ylim <- range(x[, 2]) + } + rightlim <- cumsum(nums) + leftlim <- cumsum(nums) - (nums - 1) + rowmarker <- cbind(leftlim, rightlim) + for (j in 1:nrow(rowmarker)) { + lowerlim <- rowmarker[j, 1] + upperlim <- rowmarker[j, 2] if (doellipse) { - if (missing(xlim)) - xlim <- range(c(emat[, 1], x[, 1])) - if (missing(ylim)) - ylim <- range(c(emat[, 2], x[, 2])) + graphics::plot(emat[lowerlim:upperlim, ], type = "l", axes = FALSE, + xlim = xlim, ylim = ylim, col = k[j], + lty = as.numeric(l[j]), lwd = as.numeric(linewidth[j]), xlab="", ylab="", main="") } else { - if (missing(xlim)) - xlim <- range(x[, 1]) - if (missing(ylim)) - ylim <- range(x[, 2]) + graphics::plot(emat[lowerlim:upperlim, ], type = "n", axes = FALSE, + xlim = xlim, ylim = ylim, col = k[j], + lty = as.numeric(l[j]), lwd = as.numeric(linewidth[j]), xlab="", ylab="", main="") } - rightlim <- cumsum(nums) - leftlim <- cumsum(nums) - (nums - 1) - rowmarker <- cbind(leftlim, rightlim) - for (j in 1:nrow(rowmarker)) { - lowerlim <- rowmarker[j, 1] - upperlim <- rowmarker[j, 2] - if (doellipse) { - plot(emat[lowerlim:upperlim, ], type = "l", axes = FALSE, - xlim = xlim, ylim = ylim, col = k[j], - lty = as.numeric(l[j]), lwd = as.numeric(linewidth[j]), xlab="", ylab="", main="") - } - else { - plot(emat[lowerlim:upperlim, ], type = "n", axes = FALSE, - xlim = xlim, ylim = ylim, col = k[j], - lty = as.numeric(l[j]), lwd = as.numeric(linewidth[j]), xlab="", ylab="", main="") - } - if (dopoints) { - centroid <- FALSE - singlelab <- uniqlabels[j] - temp <- labs == singlelab - if (!missing(chars)) - { - if(is.numeric(chars)) - points(x[temp, 1], x[temp, 2], pch=chars[temp], - col = k[j]) - else - text(x[temp, 1], x[temp, 2], chars[temp], - col = k[j]) - } - else text(x[temp, 1], x[temp, 2], labs[temp], + if (dopoints) { + centroid <- FALSE + singlelab <- uniqlabels[j] + temp <- labs == singlelab + if (!missing(chars)) + { + if(is.numeric(chars)) + graphics::points(x[temp, 1], x[temp, 2], pch=chars[temp], + col = k[j]) + else + graphics::text(x[temp, 1], x[temp, 2], chars[temp], + col = k[j]) + } + else graphics::text(x[temp, 1], x[temp, 2], labs[temp], col = k[j]) - } - if (centroid) { - singlelab <- uniqlabels[j] - text(cen[j, 1], cen[j, 2], singlelab, col = k[j]) - } - if (j < nrow(rowmarker)) - par(new = TRUE) } - par(col = 1) - - if (axes) { - if (formant) { - xaxp <- par("xaxp") - yaxp <- par("yaxp") - xat <- seq(xaxp[1], xaxp[2], length.out = xaxp[3] + - 1) - yat <- seq(yaxp[1], yaxp[2], length.out = yaxp[3] + - 1) - axis(1, at = xat, labels = -xat) - axis(2, at = yat, labels = -yat, srt = 90) - } - else { - axis(1) - axis(2) - } + if (centroid) { + singlelab <- uniqlabels[j] + graphics::text(cen[j, 1], cen[j, 2], singlelab, col = k[j]) + } + if (j < nrow(rowmarker)) + graphics::par(new = TRUE) + } + graphics::par(col = 1) + + if (axes) { + if (formant) { + xaxp <- graphics::par("xaxp") + yaxp <- graphics::par("yaxp") + xat <- seq(xaxp[1], xaxp[2], length.out = xaxp[3] + + 1) + yat <- seq(yaxp[1], yaxp[2], length.out = yaxp[3] + + 1) + graphics::axis(1, at = xat, labels = -xat) + graphics::axis(2, at = yat, labels = -yat, srt = 90) + } + else { + graphics::axis(1) + graphics::axis(2) } - title(...) - box(...) + } + graphics::title(...) + graphics::box(...) } + + + + + + + + +##' Calculate ellipse coordinates +##' +##' Calculates ellipse coordinates for eplot +##' +##' +##' @param x X coordinate of center +##' @param y y coordinate of center +##' @param rx Radius in the x direction +##' @param ry Radius in the y direction +##' @param orient Orientation, in radians. The angle of the major axis to the x +##' axis. +##' @param incr The increment between points, in degrees. +##' @return A matrix of x and y coordinates for the ellipse. +##' @seealso \code{\link{eplot}} +##' @keywords misc +##' @export ellipse "ellipse"<- function(x, y, rx, ry, orient, incr = 360/100) { - rincr <- radians(incr) - theta <- seq(0, 2 * pi, rincr) - xcoord <- rx * cos(theta) - ycoord <- ry * sin(theta) - mat <- cbind(xcoord, ycoord) - mat <- mat %*% orient - mat[, 1] <- mat[, 1] + x - mat[, 2] <- mat[, 2] + y - mat + rincr <- radians(incr) + theta <- seq(0, 2 * pi, rincr) + xcoord <- rx * cos(theta) + ycoord <- ry * sin(theta) + mat <- cbind(xcoord, ycoord) + mat <- mat %*% orient + mat[, 1] <- mat[, 1] + x + mat[, 2] <- mat[, 2] + y + mat } + + + + + + + + +##' polygonplot +##' +##' plots a polygon +##' +##' +##' @param data data matrix +##' @param labels labels +##' @param order order +##' @param formant formant TRUE or FALSE transposes the axes +##' @param axes axes +##' @param xlab xlab +##' @param ylab ylab +##' @param main main +##' @param xlim xlim +##' @param ylim ylim +##' @keywords internal +##' @export polygonplot "polygonplot" <- function(data, labels, order, formant=TRUE, axes=TRUE, xlab="", ylab="", main = "", xlim, ylim) { - + if( ncol(data) > 2 ) { data <- data[,1:2] } if( ncol(data) != 2 ) { stop( "polygonplot() requires two columns of data" ) } - + if(formant) data <- cbind(-data[, 2], -data[, 1]) - - + + points <- NULL for( l in order ) { tmp <- matrix(data[labels==l],ncol=2) points <- rbind( points, apply(tmp, 2, mean) ) } + + graphics::plot( points, type="b", pch=" ", axes=FALSE, xlab="", ylab="" ) + graphics::text( points, order, axes=FALSE, , xlab="", ylab="" ) + + oldpar = graphics::par(col = 1) + on.exit(graphics::par(oldpar)) - plot( points, type="b", pch=" ", axes=FALSE, xlab="", ylab="" ) - text( points, order, axes=FALSE, , xlab="", ylab="" ) - - par(col = 1) - box() + graphics::box() if(axes) { if(formant) { if(missing(xlab)) - xlab <- "F2" + xlab <- "F2" if(missing(ylab)) - ylab <- "F1" - xaxp <- par("xaxp") - yaxp <- par("yaxp") + ylab <- "F1" + xaxp <- graphics::par("xaxp") + yaxp <- graphics::par("yaxp") xat <- seq(xaxp[1], xaxp[2], length.out = xaxp[3] + 1) yat <- seq(yaxp[1], yaxp[2], length.out = yaxp[3] + 1) - axis(1, at = xat, labels = - xat) - axis(2, at = yat, labels = - yat, srt = 90) + graphics::axis(1, at = xat, labels = - xat) + graphics::axis(2, at = yat, labels = - yat, srt = 90) } else { - axis(1) - axis(2) + graphics::axis(1) + graphics::axis(2) } } - title(main = main, xlab = xlab, ylab = ylab) + graphics::title(main = main, xlab = xlab, ylab = ylab) } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/euclidean.R b/R/euclidean.R index f76629d4..9ea26aea 100644 --- a/R/euclidean.R +++ b/R/euclidean.R @@ -1,19 +1,21 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - -library(stats) - +##' Find the inter-euclidean distance for a data matrix +##' +##' Finds the inter-euclidean distance for a data matrix +##' +##' +##' @aliases euclidean euclidean.metric +##' @param data A vector or matrix of numerical data. +##' @param m The first column of data to be used in the distance calculation. +##' @param n The last column of data to be used in the distance calculation. +##' @return Calculates the euclidean distance between successive rows of the +##' matrix based on columns m:n. +##' @seealso steady +##' @keywords misc +##' @examples +##' +##' euclidean(cbind(c(1,2,3,4), c(2,3,2,2))) +##' @import stats +##' @export euclidean "euclidean"<- function(data, m = 1, n = ncol(data)) { ## returns a vector of Euclidean distances between adjacent @@ -23,15 +25,12 @@ library(stats) ## It makes use of the Splus program dist ## m and n are the columns of data over which the euclidean ## distances are to be calculated (defaults to all the columns) + if (!requireNamespace("stats", quietly = TRUE)){ + stop("'stats' package required to run 'stats::dist()'") + } data <- data[, m:n] lengths <- nrow(data) downstep <- seq((lengths - 1), 2, -1) values <- c(1, 1 + cumsum(downstep)) - dist(data)[values] + stats::dist(data)[values] } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/fft.fns.R b/R/fft.fns.R new file mode 100644 index 00000000..682d89b5 --- /dev/null +++ b/R/fft.fns.R @@ -0,0 +1,361 @@ +# +# +# +# "fcalc"<- function(fftdata, byrow = TRUE, samfreq = 20000, nyq = samfreq/2, +# low = 0, high = nyq, fun = sum, ...) +# { +# ## fftdata: a matrix of fft values as returned by muspec() and muslice() +# ## low: starting frequency over which the function in fun is to be applied +# ## high: ending frequency in Hz over which the function is to eb applied +# ## fun: apply which function? Can be any S-PLUS arithmetic function +# ## e.g. fft.calc(fftdata, low=1000, high=4000, fun=var) calculates +# ## the variance of each spectrum in the 1-4 kHz range +# ## written by Jonathan Harrington, 1992 +# mat <- NULL +# +# for(j in 1:length(low)) { +# values <- fft.extract(fftdata, samfreq, nyq, low[j], high[j]) +# newdata <- 10^(values/10) +# +# if(!is.matrix(newdata)) +# resdata <- fun(newdata, ...) +# else if(byrow) +# resdata <- apply(newdata, 1, fun, ...) +# else +# resdata <- apply(newdata, 2, fun, ...) +# +# mat$vals <- cbind(mat$vals, resdata) +# olab <- paste(paste(low[j], high[j], sep = "-"), "Hz", sep = "") +# mat$dimlabs <- c(mat$dimlabs, olab) +# } +# +# mat$vals <- 10 * log(mat$vals, base = 10) +# +# if(ncol(mat$vals) > 1) +# dimnames(mat$vals) <- list(NULL, mat$dimlabs) +# else +# mat$vals <- c(mat$vals) +# +# mat$vals +# } +# +# +# "fft.complex"<- function(data, normlen = TRUE) +# { +# ## data: usally a matrix of sampled data values with successive columns +# ## corresponding to successive segments +# ## data can also be a vector corresponding to a single segment +# ## calculates the real and imaginary parts of the fft for each column +# ## if normlen is True, the fft is normalised by dividing each +# ## value by the square root of its length +# if(is.matrix(data) == FALSE) { +# n <- length(data) +# fftres <- fft(data) +# } +# else { +# fftres <- apply(data, 2, fft) +# n <- nrow(data) +# } +# if(normlen) +# fftres <- fftres/sqrt(n) +# fftres +# } +# +# +# "fft.extract"<- function(fftdata, samfreq = 20000, nyq = samfreq/2, +# low = 0, high = nyq) +# { +# ## assume a sampling frequency of n, i.e. 20 kHz +# ## extract fft components in the frequency range low to high, where low +# ## and high are in Hertz +# +# if(!is.matrix(fftdata)) fftdata <- rbind(fftdata) +# fftlen <- ncol(fftdata) +# +# if(low != 0) +# left <- round(((low/nyq) * (fftlen - 1)) + 1) +# else +# left <- 1 +# +# if(high != nyq) +# right <- round(((high/nyq) * (fftlen - 1)) + 1) +# else +# right <- fftlen +# +# fftdata[, left:right] +# } +# +# +# "fft.mod"<- function(fftvals, unreflected = FALSE, transpose = TRUE) +# { +# ## calculate the Mod values from fftvals. fftvals consists +# ## of real and imag. parts and is usually the output of +# ## Fft.complex. Successive columns of fftvals relate +# ## to successive segments. fftvals may also be a vector +# ## that relates to a single segment. +# ## If unreflected is true, only the first n/2 +# ## points are returned per column. If transpose is true +# ## the resulting matrix is transposed such that the nth row +# ## that is derived corresponds to the nth row of the segment +# ## list that was used to produce fftvals +# if(is.matrix(fftvals) == FALSE) fftvals <- cbind(fftvals) +# n <- nrow(fftvals) +# fft.mod <- apply(fftvals, 2, Mod) +# +# if(unreflected) fft.mod <- fft.mod[1:(n/2), ] +# +# if(transpose) fft.mod <- t(fft.mod) +# +# fft.mod +# } +# +# +# "fft.ratio"<- function(fftdata, samfreq = 20000, nyq = samfreq/2, +# low.a = 0, high.a = nyq, +# low.b = 0, high.b = nyq) +# { +# ## finds energy ratios +# ## fftdata: a matrix of spectral values, as returned by muspec, muslice +# ## low.ahigh.a, low.b, high.b: values in Hertz +# ## calculates the ratio of energy in the range low.a to high.a : low.b to high.b +# ## e.g. to calculate the ratio of energy from 0-300 Hz to 2000-4000 Hz, +# ## low.a=0, low.b=300, high.a=2000, high.b=4000. The default for low.a, low.b +# ## is 0 Hz, for high.a, high.b 10000 Hz. Therefore to calculat the ratio +# ## of energy from 2-4 kHz to the rest of the spectrum, enter low.a=2000, +# ## high.a=4000 leaving low.b, high.b unspecified +# ## written by Jonathan Harrington, 1992 +# values.a <- fft.extract(fftdata, samfreq, nyq, low.a, high.a) +# newdata <- 10^(values.a/20) +# if(!is.matrix(newdata)) +# resdata.a <- sum(newdata) +# else resdata.a <- apply(newdata, 1, sum) +# values.b <- fft.extract(fftdata, samfreq, nyq, low.b, high.b) +# newdata.b <- 10^(values.b/20) +# if(!is.matrix(newdata.b)) +# resdata.b <- sum(newdata.b) +# else resdata.b <- apply(newdata.b, 1, sum) +# resdata.a/resdata.b +# } +# +# "fplot"<- function(fftdata, labs = NULL, which = NULL, colour = TRUE, +# linetype = FALSE, samfreq = 20000, nyq = samfreq/2, +# xlab = "Frequency (Hz)", ylab = "Intensity (dB)", +# low = 0, high = nyq, dbrange = NULL, axes = TRUE, +# main = "", average = FALSE, smoothing = FALSE, points = 20, +# coeff = FALSE, type = "l", super = FALSE, legn="tl", cex=1) +# { +# ## a matrix of fft values, as returned from muspec() or muslice() +# ## low: plot from this frequency in Hz +# ## high: plot up to this frequency in Hz (default range is 0-10000) +# ## dbrange: specify a range for the y-axis in db +# ## axes: if FALSE, no axes will be plotted +# ## main: provide a main axis title +# ## super: superimpose FFTs that occur in successive rows of fftdata +# ## on the same plot +# ## labs: provide a label file which is parallel to fftdata; +# ## the resulting plot will be color-coded (use super=TRUE to superimpose the ffts +# ## colmain: specify a color for the axes +# ## returns: spectral plots, db against Hz +# ## examples: mu.sub37 +# ## written by Jonathan Harrington, 1992 +# ## assume a sampling frequency of n, i.e. 20 kHz +# flag <- FALSE +# colour.flag <- colour +# linetype.flag <- linetype +# +# if(!is.logical(colour)) stop("colour must be TRUE or FALSE") +# if(!is.logical(linetype)) stop("linetype must be TRUE or FALSE") +# +# mat <- NULL +# if(!is.matrix(fftdata)) +# fftdata <- rbind(fftdata) +# if(is.null(labs)) +# {legn <- FALSE +# flag <- TRUE +# labs <- rep(".", nrow(fftdata))} +# +# if(!is.null(which)) { +# temp <- muclass(labs, which) +# fftdata <- fftdata[temp, ] +# labs <- labs[temp] +# } +# if(average) +# { +# av.data <- fplot.mean(fftdata, labs) +# fftdata <- av.data$mspec +# labs <- av.data$lab +# } +# if(smoothing) { +# if(super) { +# dbrange <- range(fftdata) +# zdat <- fftdata +# } +# cdat <- cepstrum(fftdata, points = points, spectrum = TRUE) +# fftdata <- -cdat$cep +# if(!is.matrix(fftdata)) +# fftdata <- rbind(fftdata) +# } +# +# col.lty <- mu.colour(labs, colour, linetype) +# colour <- col.lty$colour +# lty <- col.lty$linetype +# +# fftlen <- ncol(fftdata) +# if(is.null(dbrange)) dbrange <- range(fftdata) +# if(low != 0) +# left <- round(((low/nyq) * (fftlen - 1)) + 1) +# else +# left <- 1 +# +# if(high != nyq) +# right <- round(((high/nyq) * (fftlen - 1)) + 1) +# else +# right <- fftlen +# +# nums <- seq(low, high, length = (right - left) + 1) +# +# for(i in 1:nrow(fftdata)) { +# plot(nums, fftdata[i, left:right], type = type, xlab = "", +# ylab = "", col = colour[i], ylim = dbrange, +# , lty = lty[i]) +# par(new = TRUE) +# } +# +# if(axes) +# title(main = main, xlab = xlab, ylab = ylab, col = 1, cex=cex) +# +# if(legn != FALSE){ +# legn <- mu.legend(legn, c(low, high), dbrange) +# legend(legn$x, legn$y, col.lty$legend$lab, col = col.lty$legend$col, +# lty = col.lty$legend$lty, cex = cex) +# } +# if(smoothing) { +# if(super) { +# par(new = TRUE) +# if(flag) labs <- NULL +# +# fplot(zdat, labs = labs, samfreq = samfreq, nyq = nyq, +# xlab = "", ylab = "", low = low, high = high, +# dbrange = dbrange, main = "", cex=cex, +# colour=colour.flag, linetype=linetype.flag) +# par(new = FALSE) +# } +# if(coeff) +# mat$coeff <- cdat$coeff +# } +# mat +# } +# +# +# "fplot.mean"<- function(fftdata, labs) +# { +# ## fftdata: spectral values +# ## labs: a parallel label file +# ## returns: $mspec an averaged spectrum, per label-type in labs +# ## returns: $lab: the label corresponding to the row of mspec +# mat <- NULL +# fftdata <- 10^(fftdata/20) +# +# for(j in unique(labs)) { +# temp <- labs == j +# vals <- fftdata[temp, ] +# mvals <- apply(vals, 2, mean) +# mat$mspec <- rbind(mat$mspec, mvals) +# mat$lab <- c(mat$lab, j) +# } +# +# mat$mspec <- 20 * log(mat$mspec, base=10) +# mat +# } +# +# +# "moment"<- function(specvals, least = TRUE, nyq = 10000, low = 0, high = nyq) +# { +# ## specvals: the output of muspec; dB-FFT values +# ## least: this normalises each spectrum so that its minimum +# ## db value is set to 0 +# ## low, high: (in Hz). Over which spectral range do you +# ## want to calculate the first two spectral moments +# ## returns: $first: the first spectral moment (spectral centre +# ## of gravity; +# ## $second: the second spectral moment (variance, or moment +# ## of inertia +# matout <- NULL +# fftlen <- ncol(specvals) +# if(!is.matrix(specvals)) +# specvals <- rbind(specvals) +# if(least) { +# minv <- apply(specvals, 1, min) +# minv <- rep(minv, times = rep(fftlen, length(minv))) +# minv <- matrix(t(minv), ncol = fftlen, byrow = TRUE) +# specvals <- specvals - minv +# } +# x <- seq(0, nyq, length = fftlen) +# x <- rep(x, nrow(specvals)) +# x <- matrix(t(x), ncol = fftlen, byrow = TRUE) +# if((low != 0) | (high != nyq)) { +# x <- fft.extract(x, low = low, high = high) +# specvals <- fft.extract(specvals, low = low, high = high) +# } +# prodvals <- x * specvals +# sumvals <- apply(prodvals, 1, sum) +# sumspecvals <- apply(specvals, 1, sum) +# matout$first <- sumvals/sumspecvals +# sqprodvals <- specvals * (x^2) +# sumsqprodvals <- apply(sqprodvals, 1, sum) +# matout$second <- sqrt((sumsqprodvals/sumspecvals) - (matout$first^2)) +# matout +# } +# +# +# +# "moments" <- +# function(count, x, minval=FALSE) +# +# { +# # compute moments. x is a numeric class +# # count is the frequency with which that +# # particular class occurs +# # This function gives exactly the same +# # results as those for the mean, variance +# # skewness and kurtosis in example Table 3.13.1 +# # p. 87, Snedecor & Cochran, 'Statistical Methods' +# # 6th Edition, 1975. Let the arguments count and x +# # equal f and U respectively in their example +# # the centre of gravity with minval = FALSE. +# # the first two moments in this function +# # also give the same results as in Harrington & Cassidy. +# if(minval) +# count <- count - min(count) +# if(missing(x)) +# { +# if(is.spectral(count)) +# x <- trackfreq(count) +# else +# x <- 0:(length(count)-1) +# } +# k <- 1 +# mom1 <- sum((x - 0)^k * count) / sum(count) +# # the variance +# k <- 2 +# mom2 <- sum((x - mom1)^k * count) / sum(count) +# +# # third moment +# k <- 3 +# mom3 <- (sum((x - mom1)^k * count) / sum(count)) / (mom2 * sqrt(mom2)) +# +# # fourth moment +# k <- 4 +# # peaked distributions show positive kurtosis +# # flat-topped distributions show negative kurtosis +# mom4 <- (sum((x - mom1)^k * count) / sum(count)) / mom2^2 - 3 +# c(mom1, mom2, mom3, mom4) +# } +# +# +# +# +# # Local Variables: +# # mode:S +# # S-temp-buffer-p:t +# # End: diff --git a/R/fft.fns.Sx b/R/fft.fns.Sx deleted file mode 100644 index 3a523d17..00000000 --- a/R/fft.fns.Sx +++ /dev/null @@ -1,373 +0,0 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - - -"fcalc"<- function(fftdata, byrow = T, samfreq = 20000, nyq = samfreq/2, - low = 0, high = nyq, fun = sum, ...) -{ - ## fftdata: a matrix of fft values as returned by muspec() and muslice() - ## low: starting frequency over which the function in fun is to be applied - ## high: ending frequency in Hz over which the function is to eb applied - ## fun: apply which function? Can be any S-PLUS arithmetic function - ## e.g. fft.calc(fftdata, low=1000, high=4000, fun=var) calculates - ## the variance of each spectrum in the 1-4 kHz range - ## written by Jonathan Harrington, 1992 - mat <- NULL - - for(j in 1:length(low)) { - values <- fft.extract(fftdata, samfreq, nyq, low[j], high[j]) - newdata <- 10^(values/10) - - if(!is.matrix(newdata)) - resdata <- fun(newdata, ...) - else if(byrow) - resdata <- apply(newdata, 1, fun, ...) - else - resdata <- apply(newdata, 2, fun, ...) - - mat$vals <- cbind(mat$vals, resdata) - olab <- paste(paste(low[j], high[j], sep = "-"), "Hz", sep = "") - mat$dimlabs <- c(mat$dimlabs, olab) - } - - mat$vals <- 10 * log(mat$vals, base = 10) - - if(ncol(mat$vals) > 1) - dimnames(mat$vals) <- list(NULL, mat$dimlabs) - else - mat$vals <- c(mat$vals) - - mat$vals -} - - -"fft.complex"<- function(data, normlen = T) -{ - ## data: usally a matrix of sampled data values with successive columns - ## corresponding to successive segments - ## data can also be a vector corresponding to a single segment - ## calculates the real and imaginary parts of the fft for each column - ## if normlen is True, the fft is normalised by dividing each - ## value by the square root of its length - if(is.matrix(data) == F) { - n <- length(data) - fftres <- fft(data) - } - else { - fftres <- apply(data, 2, fft) - n <- nrow(data) - } - if(normlen) - fftres <- fftres/sqrt(n) - fftres -} - - -"fft.extract"<- function(fftdata, samfreq = 20000, nyq = samfreq/2, - low = 0, high = nyq) -{ - ## assume a sampling frequency of n, i.e. 20 kHz - ## extract fft components in the frequency range low to high, where low - ## and high are in Hertz - - if(!is.matrix(fftdata)) fftdata <- rbind(fftdata) - fftlen <- ncol(fftdata) - - if(low != 0) - left <- round(((low/nyq) * (fftlen - 1)) + 1) - else - left <- 1 - - if(high != nyq) - right <- round(((high/nyq) * (fftlen - 1)) + 1) - else - right <- fftlen - - fftdata[, left:right] -} - - -"fft.mod"<- function(fftvals, unreflected = F, transpose = T) -{ - ## calculate the Mod values from fftvals. fftvals consists - ## of real and imag. parts and is usually the output of - ## Fft.complex. Successive columns of fftvals relate - ## to successive segments. fftvals may also be a vector - ## that relates to a single segment. - ## If unreflected is true, only the first n/2 - ## points are returned per column. If transpose is true - ## the resulting matrix is transposed such that the nth row - ## that is derived corresponds to the nth row of the segment - ## list that was used to produce fftvals - if(is.matrix(fftvals) == F) fftvals <- cbind(fftvals) - n <- nrow(fftvals) - fft.mod <- apply(fftvals, 2, Mod) - - if(unreflected) fft.mod <- fft.mod[1:(n/2), ] - - if(transpose) fft.mod <- t(fft.mod) - - fft.mod -} - - -"fft.ratio"<- function(fftdata, samfreq = 20000, nyq = samfreq/2, - low.a = 0, high.a = nyq, - low.b = 0, high.b = nyq) -{ - ## finds energy ratios - ## fftdata: a matrix of spectral values, as returned by muspec, muslice - ## low.ahigh.a, low.b, high.b: values in Hertz - ## calculates the ratio of energy in the range low.a to high.a : low.b to high.b - ## e.g. to calculate the ratio of energy from 0-300 Hz to 2000-4000 Hz, - ## low.a=0, low.b=300, high.a=2000, high.b=4000. The default for low.a, low.b - ## is 0 Hz, for high.a, high.b 10000 Hz. Therefore to calculat the ratio - ## of energy from 2-4 kHz to the rest of the spectrum, enter low.a=2000, - ## high.a=4000 leaving low.b, high.b unspecified - ## written by Jonathan Harrington, 1992 - values.a <- fft.extract(fftdata, samfreq, nyq, low.a, high.a) - newdata <- 10^(values.a/20) - if(!is.matrix(newdata)) - resdata.a <- sum(newdata) - else resdata.a <- apply(newdata, 1, sum) - values.b <- fft.extract(fftdata, samfreq, nyq, low.b, high.b) - newdata.b <- 10^(values.b/20) - if(!is.matrix(newdata.b)) - resdata.b <- sum(newdata.b) - else resdata.b <- apply(newdata.b, 1, sum) - resdata.a/resdata.b -} - -"fplot"<- function(fftdata, labs = NULL, which = NULL, colour = T, - linetype = F, samfreq = 20000, nyq = samfreq/2, - xlab = "Frequency (Hz)", ylab = "Intensity (dB)", - low = 0, high = nyq, dbrange = NULL, axes = T, - main = "", average = F, smoothing = F, points = 20, - coeff = F, type = "l", super = F, legn="tl", cex=1) -{ - ## a matrix of fft values, as returned from muspec() or muslice() - ## low: plot from this frequency in Hz - ## high: plot up to this frequency in Hz (default range is 0-10000) - ## dbrange: specify a range for the y-axis in db - ## axes: if F, no axes will be plotted - ## main: provide a main axis title - ## super: superimpose FFTs that occur in successive rows of fftdata - ## on the same plot - ## labs: provide a label file which is parallel to fftdata; - ## the resulting plot will be color-coded (use super=T to superimpose the ffts - ## colmain: specify a color for the axes - ## returns: spectral plots, db against Hz - ## examples: mu.sub37 - ## written by Jonathan Harrington, 1992 - ## assume a sampling frequency of n, i.e. 20 kHz - flag <- F - colour.flag <- colour - linetype.flag <- linetype - - if(!is.logical(colour)) stop("colour must be T or F") - if(!is.logical(linetype)) stop("linetype must be T or F") - - mat <- NULL - if(!is.matrix(fftdata)) - fftdata <- rbind(fftdata) - if(is.null(labs)) - {legn <- F - flag <- T - labs <- rep(".", nrow(fftdata))} - - if(!is.null(which)) { - temp <- muclass(labs, which) - fftdata <- fftdata[temp, ] - labs <- labs[temp] - } - if(average) - { - av.data <- fplot.mean(fftdata, labs) - fftdata <- av.data$mspec - labs <- av.data$lab - } - if(smoothing) { - if(super) { - dbrange <- range(fftdata) - zdat <- fftdata - } - cdat <- cepstrum(fftdata, points = points, spectrum = T) - fftdata <- -cdat$cep - if(!is.matrix(fftdata)) - fftdata <- rbind(fftdata) - } - - col.lty <- mu.colour(labs, colour, linetype) - colour <- col.lty$colour - lty <- col.lty$linetype - - fftlen <- ncol(fftdata) - if(is.null(dbrange)) dbrange <- range(fftdata) - if(low != 0) - left <- round(((low/nyq) * (fftlen - 1)) + 1) - else - left <- 1 - - if(high != nyq) - right <- round(((high/nyq) * (fftlen - 1)) + 1) - else - right <- fftlen - - nums <- seq(low, high, length = (right - left) + 1) - - for(i in 1:nrow(fftdata)) { - plot(nums, fftdata[i, left:right], type = type, xlab = "", - ylab = "", col = colour[i], ylim = dbrange, - , lty = lty[i]) - par(new = T) - } - - if(axes) - title(main = main, xlab = xlab, ylab = ylab, col = 1, cex=cex) - - if(legn != F){ - legn <- mu.legend(legn, c(low, high), dbrange) - legend(legn$x, legn$y, col.lty$legend$lab, col = col.lty$legend$col, - lty = col.lty$legend$lty, cex = cex) - } - if(smoothing) { - if(super) { - par(new = T) - if(flag) labs <- NULL - - fplot(zdat, labs = labs, samfreq = samfreq, nyq = nyq, - xlab = "", ylab = "", low = low, high = high, - dbrange = dbrange, main = "", cex=cex, - colour=colour.flag, linetype=linetype.flag) - par(new = F) - } - if(coeff) - mat$coeff <- cdat$coeff - } - mat -} - - -"fplot.mean"<- function(fftdata, labs) -{ - ## fftdata: spectral values - ## labs: a parallel label file - ## returns: $mspec an averaged spectrum, per label-type in labs - ## returns: $lab: the label corresponding to the row of mspec - mat <- NULL - fftdata <- 10^(fftdata/20) - - for(j in unique(labs)) { - temp <- labs == j - vals <- fftdata[temp, ] - mvals <- apply(vals, 2, mean) - mat$mspec <- rbind(mat$mspec, mvals) - mat$lab <- c(mat$lab, j) - } - - mat$mspec <- 20 * log(mat$mspec, base=10) - mat -} - - -"moment"<- function(specvals, least = T, nyq = 10000, low = 0, high = nyq) -{ - ## specvals: the output of muspec; dB-FFT values - ## least: this normalises each spectrum so that its minimum - ## db value is set to 0 - ## low, high: (in Hz). Over which spectral range do you - ## want to calculate the first two spectral moments - ## returns: $first: the first spectral moment (spectral centre - ## of gravity; - ## $second: the second spectral moment (variance, or moment - ## of inertia - matout <- NULL - fftlen <- ncol(specvals) - if(!is.matrix(specvals)) - specvals <- rbind(specvals) - if(least) { - minv <- apply(specvals, 1, min) - minv <- rep(minv, times = rep(fftlen, length(minv))) - minv <- matrix(t(minv), ncol = fftlen, byrow = T) - specvals <- specvals - minv - } - x <- seq(0, nyq, length = fftlen) - x <- rep(x, nrow(specvals)) - x <- matrix(t(x), ncol = fftlen, byrow = T) - if((low != 0) | (high != nyq)) { - x <- fft.extract(x, low = low, high = high) - specvals <- fft.extract(specvals, low = low, high = high) - } - prodvals <- x * specvals - sumvals <- apply(prodvals, 1, sum) - sumspecvals <- apply(specvals, 1, sum) - matout$first <- sumvals/sumspecvals - sqprodvals <- specvals * (x^2) - sumsqprodvals <- apply(sqprodvals, 1, sum) - matout$second <- sqrt((sumsqprodvals/sumspecvals) - (matout$first^2)) - matout -} - - - -"moments" <- -function(count, x, minval=F) - -{ -# compute moments. x is a numeric class -# count is the frequency with which that -# particular class occurs -# This function gives exactly the same -# results as those for the mean, variance -# skewness and kurtosis in example Table 3.13.1 -# p. 87, Snedecor & Cochran, 'Statistical Methods' -# 6th Edition, 1975. Let the arguments count and x -# equal f and U respectively in their example -# the centre of gravity with minval = F. -# the first two moments in this function -# also give the same results as in Harrington & Cassidy. -if(minval) -count <- count - min(count) -if(missing(x)) -{ -if(is.spectral(count)) -x <- trackfreq(count) -else -x <- 0:(length(count)-1) -} -k <- 1 -mom1 <- sum((x - 0)^k * count) / sum(count) -# the variance -k <- 2 -mom2 <- sum((x - mom1)^k * count) / sum(count) - -# third moment -k <- 3 -mom3 <- (sum((x - mom1)^k * count) / sum(count)) / (mom2 * sqrt(mom2)) - -# fourth moment -k <- 4 -# peaked distributions show positive kurtosis -# flat-topped distributions show negative kurtosis -mom4 <- (sum((x - mom1)^k * count) / sum(count)) / mom2^2 - 3 -c(mom1, mom2, mom3, mom4) -} - - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/generics.R b/R/generics.R index 0341ccad..204a5f79 100644 --- a/R/generics.R +++ b/R/generics.R @@ -1,6 +1,6 @@ -`Math.trackdata` <- - function (x,...) +##' @export +`Math.trackdata` <- function (x,...) { ftime <- x$ftime @@ -10,26 +10,28 @@ as.trackdata(o, inds, ftime, nm) } -`Math2.trackdata` <- - function (x) + +##' @export +`Math2.trackdata` <- function (x, digits) { ftime <- x$ftime inds <- x$index nm <- x$name - o <- get(.Generic)(x$data) + o <- get(.Generic)(x$data, digits) as.trackdata(o, inds, ftime, nm) } -`Ops.trackdata` <- - function (e1, e2) + +##' @export +`Ops.trackdata` <- function (e1, e2) { x = e1 y = e2 arithmetic = c("+", "-", "*", "^", "%%", "%/%", "/") compare = c("==", ">", "<", "!=", "<=", ">=") - - if (class(x) == "trackdata") { + + if (inherits(x, "trackdata")) { ftime <- x$ftime inds <- x$index nm <- x$name @@ -39,11 +41,11 @@ inds <- y$index nm <- y$name } - if (class(x) == "trackdata" & class(y) == "trackdata") + if (inherits(x, "trackdata") & inherits(y, "trackdata")) o <- get(.Generic)(x$data, y$data) - else if (class(x) == "trackdata" & class(y) != "trackdata") + else if (inherits(x, "trackdata") & (!inherits(y, "trackdata"))) o <- get(.Generic)(x$data, y) - else if (class(x) != "trackdata" & class(y) == "trackdata") + else if ((!inherits(x, "trackdata")) & inherits(y, "trackdata")) o <- get(.Generic)(x, y$data) if (.Generic %in% arithmetic) result <- as.trackdata(o, inds, ftime, nm) @@ -53,10 +55,10 @@ } -`Summary.trackdata` <- - function (x,..., na.rm=TRUE) + +##' @export +`Summary.trackdata` <- function (x,..., na.rm=TRUE) { get(.Generic)(x$data) } - diff --git a/R/linear.R b/R/linear.R index 0a5fba96..e3e16b9f 100644 --- a/R/linear.R +++ b/R/linear.R @@ -1,17 +1,16 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Perform linear time normalisation on trackdata. +##' +##' Performs linear time normalisation on trackdata. +##' +##' The data for each segment is normalised using the \code{approx} function. +##' +##' @param dataset A trackdata object as returned by \code{track}. +##' @param n The number of points (samples) required for each segment. +##' @return A new trackdata object where the data for each segment has the same +##' number (\code{n}) of samples. +##' @seealso approx +##' @keywords misc +##' @export linear "linear"<- function(dataset, n = 20) { ## perform linear time normalisation of a data set as returned @@ -41,6 +40,21 @@ } + + + + + + + + +##' linear av +##' +##' see function +##' +##' +##' @keywords internal +##' @export linear.av "linear.av"<- function(dataset, labs) { finmat <- NULL @@ -68,8 +82,3 @@ finmat$lab <- unique(labs) finmat } - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/locus.R b/R/locus.R index d92cb0bb..7f007039 100644 --- a/R/locus.R +++ b/R/locus.R @@ -1,41 +1,94 @@ - - -`locus` <- -function (target, onset, labels.vow = NULL, yxline = TRUE, - plotgraph = TRUE, axes=TRUE, ...) +##' Calculate locus equations for two-dimensional data +##' +##' The function plots a locus equation and returns associated statistical +##' information. +##' +##' A locus equation is a straight line regression fitted with lm() in which +##' the F2- values typically at the vowel onset are regressed on those of the +##' target. The slope can be used to give an indication of target-on-onset +##' coarticulatory influences. +##' +##' The best estimate of the locus frequency is where the locus equation +##' bisects the line target = onset. +##' +##' @param target a numerical vector typically of F2 values at the vowel target +##' @param onset a numerical vector typically of the same length as target of +##' F2 values at the vowel onset +##' @param labels.vow an optionally character vector for plotting labels at the +##' points (target, onset) of the same length as target +##' @param yxline optionally plot the line target = onset. Defaults to True. +##' @param plotgraph a logical vector for specifying whether the data should be +##' plotted. Defaults to True. +##' @param axes A logical vector indicating whether the axes should be plotted +##' @param ... graphical options \link{par} +##' @return A list containing regression diagnostics of the function lm() that +##' can be accessed with summary() and the estimated locus frequency in +##' $locus. A plot of values in the onset x target plane with superimposed +##' locus equation and line onset=target. +##' @author Jonathan Harrington +##' @keywords math +##' @examples +##' +##' +##' # calculate an F2-locus equation for initial [d] +##' # preceding lax vowels produced by female speaker "68". +##' # the onset is taken at the vowel onset; the +##' # vowel target is taken at the vowel's temporal midpoint. +##' +##' # identify initial "d" of speaker "68" +##' temp <- vowlax.left == "d" & vowlax.spkr == "68" +##' # get the F2 value at the vowel's temporal midpoint +##' targ <- dcut(vowlax.fdat[temp,2], .5, prop=TRUE) +##' # F2 value at the vowel's acoustic onset. +##' on <- dcut(vowlax.fdat[temp,2], 0, prop=TRUE) +##' +##' # locus equation plot +##' result <- locus(targ, on, vowlax.l[temp]) +##' # statistical diagnostics of the regression line (locus equation) +##' summary(result) +##' # intercept and slope +##' result$coeff +##' # best estimate of the locus frequency, i.e. the +##' # point of bisection of on = TRUEarg with the regression line +##' result$locus +##' +##' +##' @export locus +`locus` <- function (target, onset, labels.vow = NULL, + yxline = TRUE, plotgraph = TRUE, + axes=TRUE, ...) { -# target: vector of target freqs -# onset: vector of onset freqs -# labels.vow: optional vowel labels for plotting -# xlim, ylim: optional range for x and y-axes -# xlab, ylab: optional label for axes -# plot: if T, produces a plot of target x onset -# with a superimposed regression line through -# the scatter with superimposed line target = onset -# returns: statistical results of the fitting -# the regression line and the locus frequency in $locus -regr <- lm(onset ~ target) - mat <- rbind(cbind(1, -1), cbind(1, -regr$coef[2])) - vec <- c(0, regr$coef[1]) - regr$locus <- ((solve(mat) %*% vec)[1]) - if (plotgraph) { - if (is.null(labels.vow)) - labels.vow <- rep("x", length(target)) - plot(target, onset, type = "n", axes=FALSE, ...) - if(axes) - { - axis(side=1) - axis(side=2) - } - if(is.character(labels.vow)) - text(target, onset, labels.vow, ...) - else if(is.numeric(labels.vow)) - points(target, onset, pch=labels.vow, ...) - abline(regr, ...) - if (yxline) - abline(0, 1, lty = 2) + # target: vector of target freqs + # onset: vector of onset freqs + # labels.vow: optional vowel labels for plotting + # xlim, ylim: optional range for x and y-axes + # xlab, ylab: optional label for axes + # plot: if TRUE, produces a plot of target x onset + # with a superimposed regression line through + # the scatter with superimposed line target = onset + # returns: statistical results of the fitting + # the regression line and the locus frequency in $locus + regr <- lm(onset ~ target) + mat <- rbind(cbind(1, -1), cbind(1, -regr$coef[2])) + vec <- c(0, regr$coef[1]) + regr$locus <- ((solve(mat) %*% vec)[1]) + if (plotgraph) { + if (is.null(labels.vow)) + labels.vow <- rep("x", length(target)) + graphics::plot(target, onset, type = "n", axes=FALSE, ...) + if(axes) + { + graphics::axis(side=1) + graphics::axis(side=2) } - - regr + if(is.character(labels.vow)) + graphics::text(target, onset, labels.vow, ...) + else if(is.numeric(labels.vow)) + graphics::points(target, onset, pch=labels.vow, ...) + graphics::abline(regr, ...) + if (yxline) + graphics::abline(0, 1, lty = 2) + } + + regr } - diff --git a/R/makelab.R b/R/makelab.R index d93029a3..407a6470 100644 --- a/R/makelab.R +++ b/R/makelab.R @@ -1,61 +1,107 @@ -"makelab" <- -function(vectimes, uttname, dir, extn="xlab", labels=NULL) -# Function to write out ESPS label files -# One label file is written per element in uttname -# The resulting file is uttname.extn -# and it is written to the directory given by dir. -# vectimes: a vector of times -# uttname: a character vector of the same length as vectimes -# giving the utterance name associated with each -# element of vectimes -# dir: a character specifying the directory -# extn: a character specifying the extension of the -# resulting files. Defaults to xlab -# labels: if missing, each label written out -# has the label "x". Otherwise it can be a single -# element character vector such as "T" (each label -# then has the label "T") or else a vector of -# the same length as vectimes. -# Example: - -# s.vk <- emu.query("epg-demo", "*", "[Phoneme!=x -> Phoneme=k]") -# l.vk <- label(s.vk) -# e.vk <- emu.track(s.vk, "epg") -# e.dp <- dp(e.vk) -# maxzeit <- dmax(e.dp) -# labelfile(maxzeit[,1], utt(s.vk), "c:/d/test", "T") - +##' Write out ESPS-style label files +##' +##' Writes out separate ESPS-label files for each utterance to a specified +##' directory. +##' +##' +##' @param vectimes a vector of times +##' @param uttname a character vector of the same length as vectimes giving the +##' utterance name associated with each element of vectimes +##' @param dir a character specifying the directory +##' @param extn a character specifying the extension of the resulting files. +##' Defaults to xlab +##' @param labels either a single character vector or a character vector the +##' same length as vectimes. Defaults to "T" +##' @return ESPS-style label files are written out to the directory of the +##' user's choice. One ESPS-label file is created for each utterance containing +##' all time values for that utterance. +##' @author Jonathan Harrington +##' @keywords IO +##' @examples +##' +##' #first two segments (for the whole example) of segmentlist vowlax +##' vowlax[1:2,] +##' +##' #format track of vowlax +##' vowlax.fdat[1:2,] +##' +##' #Formant values of the midpoint of the segment +##' vowlax.fdat.5 = dcut(vowlax.fdat,0.5,prop=TRUE) +##' +##' #the time marks of the midpoint of the segment +##' times = vowlax.fdat.5[1:2,1] +##' times +##' +##' #utterance names to the segments +##' utts = utt(vowlax[1:2,]) +##' utts +##' +##' #the path to save the label files to "." is the RHOME Directory +##' path = "." +##' +##' #write the label files to path +##' \dontrun{makelab(times, utts, path, labels="T")} +##' +##' #the first two segments are from the same utterance, +##' #thus one label file was created in the R_HOME directory +##' +##' @export makelab +"makelab" <- function(vectimes, uttname, dir, extn="xlab", labels=NULL) { - -if(is.null(labels)) -labels <- rep("x", length(vectimes)) -if(length(labels)==1) -labels <- rep(labels, length(vectimes)) - -ufun <- function(vectimes, uttname, labels, extn, dir) -{ -a1 <- paste("signal", uttname) -a2 <- "nfields 1" -a3 <- "#" -omat <- cbind(vectimes/1000, rep(125, length(vectimes)), -labels) -psort <- sort.list(vectimes/1000) -omat <- omat[psort,] - - -dirloc <- paste(paste(dir, uttname, sep="/"), extn, sep=".") -write(t(a1), dirloc) -write(t(a2), dirloc, append=TRUE) -write(t(a3), dirloc, append=TRUE) -write(t(omat), dirloc, ncolumns=3, append=TRUE) - -} - - -for(j in unique(uttname)){ -temp <- uttname==j -ufun(vectimes[temp], j, labels[temp], extn, dir) -} - + # Function to write out ESPS label files + # One label file is written per element in uttname + # The resulting file is uttname.extn + # and it is written to the directory given by dir. + # vectimes: a vector of times + # uttname: a character vector of the same length as vectimes + # giving the utterance name associated with each + # element of vectimes + # dir: a character specifying the directory + # extn: a character specifying the extension of the + # resulting files. Defaults to xlab + # labels: if missing, each label written out + # has the label "x". Otherwise it can be a single + # element character vector such as "T" (each label + # then has the label "T") or else a vector of + # the same length as vectimes. + # Example: + + # s.vk <- emu.query("epg-demo", "*", "[Phoneme!=x -> Phoneme=k]") + # l.vk <- label(s.vk) + # e.vk <- emu.track(s.vk, "epg") + # e.dp <- dp(e.vk) + # maxzeit <- dmax(e.dp) + # labelfile(maxzeit[,1], utt(s.vk), "c:/d/test", "T") + + if(is.null(labels)) + labels <- rep("x", length(vectimes)) + if(length(labels)==1) + labels <- rep(labels, length(vectimes)) + + ufun <- function(vectimes, uttname, labels, extn, dir) + { + a1 <- paste("signal", uttname) + a2 <- "nfields 1" + a3 <- "#" + omat <- cbind(vectimes/1000, rep(125, length(vectimes)), + labels) + psort <- sort.list(vectimes/1000) + omat <- omat[psort,] + + + dirloc <- paste(paste(dir, uttname, sep="/"), extn, sep=".") + write(t(a1), dirloc) + write(t(a2), dirloc, append=TRUE) + write(t(a3), dirloc, append=TRUE) + write(t(omat), dirloc, ncolumns=3, append=TRUE) + + } + + + for(j in unique(uttname)){ + temp <- uttname==j + ufun(vectimes[temp], j, labels[temp], extn, dir) + } + } diff --git a/R/mel.R b/R/mel.R index 47324b25..615456a8 100644 --- a/R/mel.R +++ b/R/mel.R @@ -1,14 +1,92 @@ -"mel" <- -function(a) +##' Convert Hz to the mel scale +##' +##' The calculation is done using the formulae mel = 1/log(2) * (log(1 + +##' (Hz/1000))) * 1000 where Hz is the frequency in Hz. +##' +##' If 'data' is a spectral object, then the frequencies are changed so that +##' they are proportional to the mel scale and such that the mel intervals +##' between frequencies are constant between the lowest and highest +##' frequencies. More specifically, suppose that a spectral object has +##' frequencies at 0, 1000, 2000, 3000, 4000 Hz. Then the corresponding +##' frequencies extend in mel between 0 and 2321.928 mel (=4000 Hz in mels) in +##' four equal intervals, and linear interpolation is used with the 'approx' +##' function to obtain the dB values at those frequencies. +##' +##' @aliases mel mel.trackdata mel.spectral +##' @param a A vector or matrix of data or a spectral object. +##' @return A vector or matrix or spectral object of the same length and +##' dimensions as data. +##' @author Jonathan Harrington +##' @seealso \code{\link{bark}}, \code{\link{plot.spectral}} +##' @references Traunmueller, H. (1990) "Analytical expressions for the +##' tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100. +##' @keywords math +##' @examples +##' +##' +##' #convert Hertz values to mel +##' +##' vec <- c(500, 1500, 2500) +##' vec +##' mel(vec) +##' +##' +##' # convert Hertz values to mel +##' +##' mel(vec) +##' +##' +##' # convert the $data values in a trackdata object to mel +##' # create a new track data object +##' +##' t1 <- dip.fdat +##' t1[1] +##' +##' # convert Hertz to mel +##' +##' t1$data <- mel(t1$data) +##' t1[1] +##' +##' # warp the frequency axis of a spectral object such +##' # that it is proportional to the mel scale. +##' +##' w = mel(e.dft) +##' oldpar = par(mfrow=c(1,2)) +##' plot(w, type="l") +##' +##' +##' # The values of w are at equal mel intervals. Compare +##' # with +##' +##' plot(e.dft, freq=mel(trackfreq(e.dft))) +##' +##' # the latter has a greater concentration of values +##' # in a higher frequency range. +##' +##' par(oldpar) +##' +##' @export mel +"mel" <- function(a) { -UseMethod("mel") + UseMethod("mel") } -"mel.default" <- -function (a) -{ - 1/log(2) * (log(1 + (a/1000))) * 1000 -} + + + + + +##' mel default +##' +##' see function +##' +##' +##' @keywords internal +##' @export +"mel.default" <- function (a) +{ + 1/log(2) * (log(1 + (a/1000))) * 1000 +} diff --git a/R/misc.R b/R/misc.R index cf2dd700..8d57939e 100644 --- a/R/misc.R +++ b/R/misc.R @@ -1,30 +1,39 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - - -## return the index of the value in vec which is closest to val +##' closest +##' +##' see function +##' +##' +##' @keywords internal +##' @export closest "closest" <- function(vec, val) { - newval <- min(abs((vec - val))) - z <- abs(vec - val) - temp <- z == newval - nums <- c(1:length(vec)) - nums[temp] + ## return the index of the value in vec which is closest to val + newval <- min(abs((vec - val))) + z <- abs(vec - val) + temp <- z == newval + nums <- c(1:length(vec)) + nums[temp] } + + + + + + + + +##' num label +##' +##' see function +##' +##' depricated function of the legacy EMU system +##' still available for backward compatibility +##' +##' @keywords internal +##' @export label_num "label_num" <- function(labs) { ## labs: a vector of labels @@ -39,12 +48,27 @@ } -## label_convert -- -## map one set of labels to another in a segment -## list or label vector -## + + + + + + + + +##' convert label +##' +##' see function +##' +##' +##' @keywords internal +##' @export label_convert "label_convert" <- function(segs.or.labels, match, replace) { + ## label_convert -- + ## map one set of labels to another in a segment + ## list or label vector + ## if (is.seglist( segs.or.labels ) ) { labs <- label(segs.or.labels) } else { @@ -57,7 +81,7 @@ stop("Lengths of match and replace vectors differ in label_convert") } } - + if (length(replace) == 1) { temp <- muclass(labs, match) labs[temp] <- replace @@ -74,7 +98,34 @@ } -"matscan"<- function(file, num.cols=count.fields(file)[1], what = 0, sk = 0) + + + + + + + + +##' Read matrix data from a file +##' +##' Reads matrix data from a file +##' +##' This function has been partially superseded by the introduction of data +##' frames and the read.table function. It is still useful however for reading +##' data into Splus matrix objects. +##' +##' @param file A filename. +##' @param num.cols The number of columns of data in the file. +##' @param what A template for the data elements in the file, it should be a +##' number for numeric data (the default) or a string for string data. Note +##' that an Splus matrix can only hold one type of data (string or numeric), +##' for mixed types use data tables and the \code{read.table} function. +##' @param sk The number of leading lines of the file to skip. +##' @return A matrix corresponding to the data in \code{file}. +##' @seealso read.table +##' @keywords misc +##' @export matscan +"matscan"<- function(file, num.cols=utils::count.fields(file)[1], what = 0, sk = 0) { ## first make a template, a list of num.cols what's template <- as.list(rep(what, num.cols)) @@ -89,11 +140,71 @@ + + + + + + + + +##' Find common elements in vectors +##' +##' Finds common elements in vectors +##' +##' +##' @param labels A vector of labels. +##' @param class A label or vector of labels. +##' @return A logical vector which is TRUE for each element in \code{labels} which +##' matches \code{class} or an element of \code{class}. +##' @seealso match +##' @keywords misc +##' @examples +##' +##' muclass(c("a", "b", "c"), c("a", "c")) +##' #[1] TRUE FALSE TRUE +##' +##' @export muclass "muclass"<- function(labels, class) { !(is.na(match(labels, class))) } + + + + + + + + +##' Randomise or Reverse items in a segment list +##' +##' Randomises or Reverses items in a segment list +##' +##' +##' @param segs An Emu segment list. +##' @param bwd If TRUE, reverse the order of the segment list. +##' @param rand If TRUE, randomise the order of the segment lists (default). +##' @return A segment list containing the original elements in random or +##' reversed order. This is useful if the segment list is to be used as the +##' source for a set of stimuli in a perception experiment. +##' @seealso \code{\link{query}} +##' @keywords misc +##' @examples +##' +##' data(vowlax) +##' ## assumes a database called demo is available on your system and that +##' ## the Emu system is installed. +##' +##' # all Phonetic vowels in the database +##' segs <- vowlax +##' +##' # randomise the segment list +##' rsegs <- randomise.segs( segs ) +##' +##' +##' @export randomise.segs "randomise.segs" <- function( segs, rand = TRUE, bwd=FALSE ) { if( bwd ){ @@ -105,8 +216,46 @@ } } + + + + + + + + +##' Converts degrees to radians +##' +##' Converts degrees to radians +##' +##' There are 360 degrees or 2 * PI radians in one full rotation. +##' +##' @param degrees Angular measurement for conversion. +##' @return Angular measurement in radians. +##' @keywords misc +##' @export radians "radians"<- function(degrees) (degrees * 2 * pi)/360 + + + + + + + + +##' Sort matrix by label +##' +##' Sorts matrix by label +##' +##' +##' @param mat A mu+ segment matrix. +##' @param labs A label vector which has the same number of columns as +##' \code{mat}. +##' @return Returns a sorted matrix by label, created from \code{mat}. +##' @seealso label, phon +##' @keywords misc +##' @export sortmatrix "sortmatrix" <- function(mat, labs = dimnames(mat)[[2]]) { ## labs can also be a vector of labels, which has @@ -114,124 +263,321 @@ ## if labs is "p" "t" "k", then mat ## will be sorted with these three labels in the first ## three columns - b1 <- labs - b2 <- dimnames(mat)[[1]] - c1 <- match(b2, b1) - d1 <- cbind(c1, mat) - newmat <- d1[sort.list(d1[, 1]), ] - newmat <- newmat[, -1] - b1 <- dimnames(newmat)[[1]] - b2 <- dimnames(newmat)[[2]] - c1 <- match(b2, b1) - d1 <- rbind(c1, newmat) - newmat2 <- d1[, (sort.list(d1[1, ], ))] - newmat2[-1, ] + b1 <- labs + b2 <- dimnames(mat)[[1]] + c1 <- match(b2, b1) + d1 <- cbind(c1, mat) + newmat <- d1[sort.list(d1[, 1]), ] + newmat <- newmat[, -1] + b1 <- dimnames(newmat)[[1]] + b2 <- dimnames(newmat)[[2]] + c1 <- match(b2, b1) + d1 <- rbind(c1, newmat) + newmat2 <- d1[, (sort.list(d1[1, ], ))] + newmat2[-1, ] } -"rad" <- -function(vec, samfreq = 20000, hz = TRUE) + + + + + + + + +##' Function to convert between Hertz and Radians +##' +##' convert between Hertz and Radians +##' +##' +##' @param vec A numerical vector of frequencies in Hz or radians +##' @param samfreq A single element numerical vector of the sampling frequency. +##' Defaults to 20000 Hz +##' @param hz Logical. If TRUE, convert from Hz to radians otherwise from radians +##' to hz +##' @author Jonathan Harrington +##' @seealso \code{\link{help}} +##' @keywords math +##' @examples +##' +##' # 4000 Hz in radians at a sampling frequency of 8000 Hz +##' rad(4000, 8000) +##' # pi/2 and pi/4 radians in Hz at a sampling frequency of 10000 Hz +##' rad(c(pi/2, pi/4), 10000, FALSE) +##' +##' +##' @export rad +"rad" <- function(vec, samfreq = 20000, hz = TRUE) { -# hz: if T, vec is a vector in Hertz, otherwise it's radians -# convert from radians to Hz, or Hz to radians - if(hz) vals <- (vec * 2 * pi)/samfreq else vals <- (vec * samfreq)/(2 * - pi) - vals + # hz: if TRUE, vec is a vector in Hertz, otherwise it's radians + # convert from radians to Hz, or Hz to radians + if(hz) vals <- (vec * 2 * pi)/samfreq else vals <- (vec * samfreq)/(2 * + pi) + vals } -"freqtoint" <- -function(trackdata, j){ -# note to remove the dc offset, set j to -1 -sg <- sign(j) -zerowhich <- sg==0 -if(all(sg[!zerowhich] > 0)) -sg[zerowhich] = 1 -else sg[zerowhich] = -1 -j <- abs(j) -fs <- trackfreq(trackdata) -N <- length(fs) -res <- 1+ (((j - fs[1]) * (N-1))/(fs[N] - fs[1])) -res[res < 1] = 1 -res[res > N] = N -res <- sg * res -unique(round(res)) -} -"dbnorm" <- -function(specdata, f=0, db=0) -{ -if(is.trackdata(specdata)) -dat <- specdata$data -else -dat <- specdata -# normalise to dbnorm -minfun <- function(specvals, f, db) -{ -specvals <- specvals - specvals[f]+db + + +##' Function to find the column number corresponding to frequencies of a +##' spectral object +##' +##' Find the column number corresponding to frequencies of a spectral object. +##' +##' This function is used in conjunction with object oriented programming of +##' EMU spectral objects. It should not in general be called from inside a +##' function. Its principal use is to determine the column number(s) +##' corresponding to frequencies for spectral trackdata objects or spectral +##' matrices or the element number for spectral vectors. +##' +##' @param trackdata A spectral object +##' @param j A vector of frequencies +##' @author Jonathan Harrington +##' @keywords math +##' @examples +##' +##' freqtoint(fric.dft,1000:2000) +##' # all frequencies except 1000-2000 +##' freqtoint(vowlax.dft.5, -(1000:2000)) +##' # all frequencies except 1000 Hz +##' freqtoint(e.dft, -1000) +##' # the d.c. offset - i.e. column 1 +##' freqtoint(vowlax.dft.5, 0) +##' # all freqs except the d.c. offset - i.e. not column 1 +##' freqtoint(vowlax.dft.5, -1) +##' +##' +##' +##' +##' @export freqtoint +"freqtoint" <- function(trackdata, j){ + # note to remove the dc offset, set j to -1 + sg <- sign(j) + zerowhich <- sg==0 + if(all(sg[!zerowhich] > 0)) + sg[zerowhich] = 1 + else sg[zerowhich] = -1 + j <- abs(j) + fs <- trackfreq(trackdata) + N <- length(fs) + res <- 1+ (((j - fs[1]) * (N-1))/(fs[N] - fs[1])) + res[res < 1] = 1 + res[res > N] = N + res <- sg * res + unique(round(res)) } -if(is.matrix(dat)) -dat <- fapply(dat, minfun, f, db) -else -dat = dat-dat[f]+db -if(is.trackdata(specdata)) + + + + + + + + + +##' Function to dB-normalise spectral objects +##' +##' The function can be used to rescale a spectrum to a dB value at a +##' particular frequency - for example, to rescale the spectrum so that 3000 Hz +##' has 0 dB and all other values are shifted in relation to this. +##' +##' +##' @param specdata An object of class 'spectral' +##' @param f A single element vector specifying the frequency. Defaults to 0 +##' @param db A single element vector specifying the dB value to which the +##' spectrum is to be rescaled. Defaults to zero +##' @return An object of the same class with rescaled dB values. The default is +##' to rescale the dB-values of the spectrum to 0 dB at 0 Hz. +##' @author Jonathan Harrington +##' @seealso \code{\link{dbtopower}} \code{\link{plot.spectral}} +##' @keywords manip +##' @examples +##' +##' # normalise to - 40 dB at 1500 Hz +##' res = dbnorm(e.dft, 1500, 0) +##' # compare the two +##' ylim = range(c(res, e.dft)) +##' plot(e.dft, ylim=ylim, type="l") +##' oldpar = par(new=TRUE) +##' plot(res, ylim=ylim, type="l", col=2) +##' +##' par(oldpar) +##' +##' @export dbnorm +"dbnorm" <- function(specdata, f=0, db=0) { -specdata$data <- dat -return(specdata) + + + if(is.trackdata(specdata)) + dat <- specdata$data + else + dat <- specdata + + + # normalise to dbnorm + minfun <- function(specvals, f, db) + { + specvals <- specvals - specvals[f]+db + } + if(is.matrix(dat)) + dat <- fapply(dat, minfun, f, db) + else + dat = dat-dat[f]+db + + if(is.trackdata(specdata)) + { + specdata$data <- dat + return(specdata) + } + else return(dat) + } -else return(dat) -} -"dbtopower" <- -function(specdata, const = 10, base=10, inv=FALSE) -{ -# function for converting from db to power and back -if(is.trackdata(specdata)) -dat <- specdata$data -else dat <- specdata -if(!inv) -result <- base^(dat/const) -else -result <- const * log(dat, base=base) -if(is.trackdata(specdata)) + + + + + + + +##' Function for inter-converting between decibels and a linear scale +##' +##' The function converts from decibels to a linear scale +##' +##' The function returns base\eqn{\mbox{\textasciicircum}}{^}(specdata/const) +##' if inv=FALSE, otherwise, const * log(dat, base=base). If the object to which +##' this function is applied is of class 'trackdata' then this function is +##' applied to $data. +##' +##' @param specdata A numeric object or an object of class trackdata +##' @param const A single element numeric vector. Defaults to 10 +##' @param base A single element numeric vector. Defaults to 10 +##' @param inv Logical. If TRUE, then the conversion is from a logarithmic to an +##' anti-logarithmic form, otherwise the other way round +##' @return An object of the same class. +##' @author Jonathan Harrington +##' @seealso \code{\link{dbtopower}} \code{\link{plot.spectral}} +##' @keywords math +##' @examples +##' +##' +##' # convert 10 dB to a power ratio +##' vec = dbtopower(10) +##' # convert dB-data to a power ratio and back to decibels +##' res = dbtopower(vowlax.dft.5) +##' res = dbtopower(res, inv=TRUE) +##' +##' @export dbtopower +"dbtopower" <- function(specdata, const = 10, base=10, inv=FALSE) { -specdata$data <- result -return(specdata) -} -else return(result) + # function for converting from db to power and back + if(is.trackdata(specdata)) + dat <- specdata$data + else dat <- specdata + if(!inv) + result <- base^(dat/const) + else + result <- const * log(dat, base=base) + if(is.trackdata(specdata)) + { + specdata$data <- result + return(specdata) + } + else return(result) } -"shift" <- -function(x, delta = 1, circular = TRUE) + + + + + + + + +##' Function to shift the elements of a vector. +##' +##' The function makes use of the function 'filter' to delay or advance a +##' signal by k points. +##' +##' The function makes use of the function 'filter' for linear filtering to +##' carry out the shifting. +##' +##' @param x A numeric vector +##' @param delta A single element numeric vector. Defines the number of points +##' by which the signal should be shifted. +##' @param circular Logical. If TRUE, the signal is wrapped around itself so that +##' if delta = 1, x[n] becomes x[1]. Otherwise, if delta is positive, the same +##' number of zeros are prepended to the signal +##' @return The signal shifted by a certain number of points. ... +##' @author Jonathan Harrington +##' @seealso filter +##' @keywords manip +##' @examples +##' +##' vec = 1:10 +##' shift(vec, 2) +##' shift(vec, -2) +##' shift(vec, 2, circular=FALSE) +##' +##' +##' +##' @export shift +"shift" <- function(x, delta = 1, circular = TRUE) { -## converts x[n] into x[n-1] by multiplying the Fourier -## transform on x[n] by z^-1 i.e. by e^-iw -N <- length(x) -if(delta < 0) -delta <- N + delta -h <- c(rep(0, delta), 1) -if(!circular) { -N <- length(x) + length(h) - 1 -x <- c(x, rep(0, N - length(x))) + ## converts x[n] into x[n-1] by multiplying the Fourier + ## transform on x[n] by z^-1 i.e. by e^-iw + N <- length(x) + if(delta < 0) + delta <- N + delta + h <- c(rep(0, delta), 1) + if(!circular) { + N <- length(x) + length(h) - 1 + x <- c(x, rep(0, N - length(x))) + } + filter(x, h, sides = 1, circular = TRUE)[1:N] + } -filter(x, h, sides = 1, circular = TRUE)[1:N] -} + + + + + + + +##' Split a string into words. +##' +##' Splits a string into words. +##' +##' +##' @param str A string. +##' @param char A character to split on +##' @return A vector of strings. The original \code{str} is split at ever +##' occurrence of \code{char} to generate a vector of strings. +##' @keywords misc +##' @examples +##' +##' splitstring("/home/recog/steve/foo", "/") +##' #[1] "home" "recog" "steve" "foo" +##' +##' @export splitstring splitstring <- function(str,char) { if(str == "") mat <- c(str) @@ -243,34 +589,24 @@ splitstring <- function(str,char) { ministr <- NULL length <- 0 while(TRUE) { - ch <- substring(str, ind, ind) - if(ch == char) { - ind <- ind + 1 - break - } - if(ch == "") { - break - } - ministr <- c(ministr, ch) - ind <- ind + 1 - length <- length + 1 + ch <- substring(str, ind, ind) + if(ch == char) { + ind <- ind + 1 + break + } + if(ch == "") { + break + } + ministr <- c(ministr, ch) + ind <- ind + 1 + length <- length + 1 } ## now concatenate string if(length > 0) - mat <- c(mat, paste(ministr, collapse = "")) + mat <- c(mat, paste(ministr, collapse = "")) if(ch == "") - break + break } } mat } - - - - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/moments.R b/R/moments.R index 324b441a..2dc76ad3 100644 --- a/R/moments.R +++ b/R/moments.R @@ -1,43 +1,77 @@ -"moments" <- -function(count, x, minval=FALSE) - +##' Function to calculate statistical moments +##' +##' The function calculates the first 4 moments, i.e. the mean, variance, skew, +##' kurtosis. +##' +##' The units of the first moment are the same as x, the units of the second +##' moment are x\eqn{\mbox{\textasciicircum}}{^}2, and the third and fourth +##' moments are dimensionless. +##' +##' @param count A vector of the observed instances per class +##' @param x A vector of the same length as count defining the class. If +##' missing, and if count is of class spectral, then x is equal to +##' trackfreq(count). If x is missing and is not of class spectral, then x +##' default to 0:(length(count)-1) +##' @param minval If TRUE, subtract min(count) from count so that the minimum +##' value of count is zero. This is principally used in calculating spectral +##' moments where count is in decibels, and more generally if count contains +##' negative values. +##' @author Jonathan Harrington +##' @references Snedecor, G & Cochran, W. 'Statistical Methods' Iowa State +##' Press. Wuensch,K., 2005 +##' @keywords math +##' @examples +##' +##' # first four moments of a vector +##' mom <- moments(bridge[,2]) +##' # the above is the same as moments(bridge[,2], 0:12) +##' # first four moments of a spectral vector with the dB values +##' # reset so that the minimum dB value is 0. The d.c. offset is also +##' # excluded in the calculation +##' mom <- moments(e.dft[-1], minval=TRUE) +##' # the temporal skew of F1 for the 10th segment. Use +##' m <- moments(vowlax.fdat[10,1]$data)[3] +##' +##' +##' @export moments +"moments" <- function(count, x, minval = FALSE) + { -# compute moments. x is a numeric class -# count is the frequency with which that -# particular class occurs -# This function gives exactly the same -# results as those for the mean, variance -# skewness and kurtosis in example Table 3.13.1 -# p. 87, Snedecor & Cochran, 'Statistical Methods' -# 6th Edition, 1975. Let the arguments count and x -# equal f and U respectively in their example -# the centre of gravity with minval = F. -# the first two moments in this function -# also give the same results as in Harrington & Cassidy. -if(minval) -count <- count - min(count) -if(missing(x)) -{ -if(is.spectral(count)) -x <- trackfreq(count) -else -x <- 0:(length(count)-1) -} -k <- 1 -mom1 <- sum((x - 0)^k * count) / sum(count) -# the variance -k <- 2 -mom2 <- sum((x - mom1)^k * count) / sum(count) - -# third moment -k <- 3 -mom3 <- (sum((x - mom1)^k * count) / sum(count)) / (mom2 * sqrt(mom2)) - -# fourth moment -k <- 4 -# peaked distributions show positive kurtosis -# flat-topped distributions show negative kurtosis -mom4 <- (sum((x - mom1)^k * count) / sum(count)) / mom2^2 - 3 -c(mom1, mom2, mom3, mom4) + # compute moments. x is a numeric class + # count is the frequency with which that + # particular class occurs + # This function gives exactly the same + # results as those for the mean, variance + # skewness and kurtosis in example Table 3.13.1 + # p. 87, Snedecor & Cochran, 'Statistical Methods' + # 6th Edition, 1975. Let the arguments count and x + # equal f and U respectively in their example + # the centre of gravity with minval = FALSE. + # the first two moments in this function + # also give the same results as in Harrington & Cassidy. + if(minval) + count <- count - min(count) + if(missing(x)) + { + if(is.spectral(count)) + x <- trackfreq(count) + else + x <- 0:(length(count)-1) + } + k <- 1 + mom1 <- sum((x - 0)^k * count) / sum(count) + # the variance + k <- 2 + mom2 <- sum((x - mom1)^k * count) / sum(count) + + # third moment + k <- 3 + mom3 <- (sum((x - mom1)^k * count) / sum(count)) / (mom2 * sqrt(mom2)) + + # fourth moment + k <- 4 + # peaked distributions show positive kurtosis + # flat-topped distributions show negative kurtosis + mom4 <- (sum((x - mom1)^k * count) / sum(count)) / mom2^2 - 3 + c(mom1, mom2, mom3, mom4) } - diff --git a/R/mu.colour.R b/R/mu.colour.R index 1d758d66..95b63554 100644 --- a/R/mu.colour.R +++ b/R/mu.colour.R @@ -1,98 +1,172 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - -`mu.colour` <- -function (labs, col = TRUE, linetype = FALSE, lwd = NULL, pch=NULL) +##' Function for specifying color, linetype, and line-widths in EMU plotting +##' functions. +##' +##' The function specifies color, linetype and linewidths in EMU plotting +##' functions as is used mostly in calls from within plot.trackdata, +##' plot.spectral, eplot, and dplot +##' +##' Parameters are also supplied for use with the function 'legend' +##' +##' @param labs A vector of character labels +##' @param col A code passed to the 'col' argument in plotting functions. There +##' are four possibilities. Either logical, a character vector, or a numeric +##' vector. In the first case, if TRUE, then a different numeric code is given +##' for each unique label type. For example, if labs is c("a", "b", "a", "c"), +##' then the output is c(1, 2, 1, 3). If FALSE, then for this example, the output +##' is c(1, 1, 1, 1). In the second case, the character vector can be either a +##' single element specifying a character, or there can be as many elements as +##' there are unique colors. Thus if col = "red", then for the example c("a", +##' "b", "a", "c"), the output is c("red", "red", "red", "red"). Alternatively, +##' since there are three unique labels for this example, then the user could +##' specify col = c("green", "red", "blue") and the output is c("green", "red", +##' "green", "blue") if labs is c("a", "b", "a", "c"). In the third case, +##' 'col'. can be either a single element numeric vector, or its length must be +##' equal to the number of unique types in labs. For example, if col=3 and if +##' labs = c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). +##' Alternatively, if col = c(2,3,1), then the output is c(2, 3, 2, 1) for the +##' same example. Finally, col can be specified as a character or numeric +##' vector that is the same length as labs, allowing the user to choose the +##' color in which each line should be drawn. The default is col = TRUE. +##' @param linetype A code specifying linetypes, i.e. the values passed to lty +##' in plotting functions.There are 2 possibilities. Either logical, a +##' character vector, or a numeric vector. In the first case, if TRUE, then a +##' different numeric code is given for each unique label type. For example, if +##' labs is c("a", "b", "a", "c"), then the output is c(1, 2, 1, 3). If FALSE, then +##' for this example, the output is c(1, 1, 1, 1). In the second case, +##' 'linetype' can be either a single element numeric vector, or its length +##' must be equal to the number of unique types in labs. For example, if +##' linetype=3 and if labs = c("a", "b", "a", "c"), then the output is c(3, 3, +##' 3, 3). Alternatively, if linetype = c(2,3,1), then the output is c(2, 3, 2, +##' 1) for the same example. Finally, linetype can be specified as a numeric +##' vector that is the same length as labs, allowing the user to choose the +##' linetype in which each line should be drawn. The default is linetype=FALSE +##' @param lwd A code passed to the lwd argument in plotting functions. 'lwd' +##' can be either a single element numeric vector, or its length must be equal +##' to the number of unique types in labs. For example, if lwd=3 and if labs = +##' c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +##' lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +##' default is NULL in which case all lines are drawn with lwd=1 +##' @param pch A code passed to the pch argument in plotting functions. +##' Functions in the same way as lwd above +##' @return If it is a LISTRUE, use \item{colour}{A code for the color'} +##' \item{linetype}{A code for the linetype} \item{lwd}{A code for the line +##' width} \item{legend}{A list consisting of $legend$lab, $legend$lty and +##' $legend$lwd that specify the parameters for the 'legend' function. +##' +##' ... } +##' @author Steve Cassidy, modified by Jonathan Harrington +##' @seealso \code{\link{plot.trackdata}} \code{\link{dplot}} +##' \code{\link{eplot}} \code{\link{plot.spectral}} +##' @keywords utilities +##' @examples +##' +##' # examples will be given using the above functions +##' # b/w but with different linetypes +##' eplot(vowlax.fdat.5[,1:2], vowlax.l, col=FALSE, lty=TRUE) +##' +##' # user-defined colors +##' eplot(vowlax.fdat.5[,1:2], vowlax.l, col=c("green", "blue", "red", "orange")) +##' +##' # spectral plot, user-defined colors, the last one is dotted +##' # and with a line-thickness of 2 +##' plot(vowlax.dft.5[1:20,], vowlax.l[1:20], +##' col=c("green", "blue", "red", "orange"), +##' fun=mean, lty=c(1, 1, 1, 2), lwd=c(1, 1, 1, 2)) +##' +##' # similar but using dplot() +##' dplot(vowlax.fdat[1:20,2], vowlax.l, +##' col=c("green", "blue", "red", "orange"), +##' lwd=c(1, 1, 1, 2), lty=c(1, 1, 1, 2)) +##' +##' # the default except plot everything with a dotted line and plotting symbol 4 +##' dplot(vowlax.fdat[,2], vowlax.l, average=TRUE, lty=2, pch=4, type="b", xlim=c(40, 60)) +##' +##' # the default except plot everything with a dotted line and +##' # with double line thickness +##' eplot(vowlax.fdat.5[,1:2], vowlax.l, lty=2, lwd=2) +##' +##' @export mu.colour +`mu.colour` <- function (labs, col = TRUE, linetype = FALSE, + lwd = NULL, pch=NULL) { - result <- NULL - if (is.logical(col)) { - if (col) - result$colour <- label_num(labs) - else result$colour <- rep(1, length(labs)) + result <- NULL + if (is.logical(col)) { + if (col) + result$colour <- label_num(labs) + else result$colour <- rep(1, length(labs)) + } + else if (length(col) == length(labs)) + result$colour <- col + else if (length(col) == length(unique(labs))) { + k <- 1 + result$colour <- labs + for (j in unique(labs)) { + temp <- labs == j + result$colour[temp] = col[k] + k <- k + 1 } - else if (length(col) == length(labs)) - result$colour <- col - else if (length(col) == length(unique(labs))) { - k <- 1 - result$colour <- labs - for (j in unique(labs)) { - temp <- labs == j - result$colour[temp] = col[k] - k <- k + 1 - } + } + else if (length(col) == 1) + result$colour <- rep(col, length(labs)) + if (is.logical(linetype)) { + if (linetype) + result$linetype <- label_num(labs) + else result$linetype <- rep(1, length(labs)) + } + else if (length(linetype) == length(labs)) + result$linetype <- linetype + else if (length(linetype) == length(unique(labs))) { + k <- 1 + result$linetype <- labs + for (j in unique(labs)) { + temp <- labs == j + result$linetype[temp] = linetype[k] + k <- k + 1 } - else if (length(col) == 1) - result$colour <- rep(col, length(labs)) - if (is.logical(linetype)) { - if (linetype) - result$linetype <- label_num(labs) - else result$linetype <- rep(1, length(labs)) + } + else if (length(linetype) == 1) + result$linetype <- rep(linetype, length(labs)) + if (is.null(lwd)) + result$lwd <- rep(1, length(labs)) + else if (length(lwd) == length(labs)) + result$lwd <- lwd + else if (length(lwd) == length(unique(labs))) { + k <- 1 + result$lwd <- labs + for (j in unique(labs)) { + temp <- labs == j + result$lwd[temp] = lwd[k] + k <- k + 1 } - else if (length(linetype) == length(labs)) - result$linetype <- linetype - else if (length(linetype) == length(unique(labs))) { - k <- 1 - result$linetype <- labs - for (j in unique(labs)) { - temp <- labs == j - result$linetype[temp] = linetype[k] - k <- k + 1 - } + } + else if (length(lwd) == 1) + result$lwd <- rep(lwd, length(labs)) + + if (is.null(pch)) + result$pch <- rep(1, length(labs)) + else if (length(pch) == length(labs)) + result$pch <- pch + else if (length(pch) == length(unique(labs))) { + k <- 1 + result$pch <- labs + for (j in unique(labs)) { + temp <- labs == j + result$pch[temp] = pch[k] + k <- k + 1 } - else if (length(linetype) == 1) - result$linetype <- rep(linetype, length(labs)) - if (is.null(lwd)) - result$lwd <- rep(1, length(labs)) - else if (length(lwd) == length(labs)) - result$lwd <- lwd - else if (length(lwd) == length(unique(labs))) { - k <- 1 - result$lwd <- labs - for (j in unique(labs)) { - temp <- labs == j - result$lwd[temp] = lwd[k] - k <- k + 1 - } - } - else if (length(lwd) == 1) - result$lwd <- rep(lwd, length(labs)) - - if (is.null(pch)) - result$pch <- rep(1, length(labs)) - else if (length(pch) == length(labs)) - result$pch <- pch - else if (length(pch) == length(unique(labs))) { - k <- 1 - result$pch <- labs - for (j in unique(labs)) { - temp <- labs == j - result$pch[temp] = pch[k] - k <- k + 1 - } - } - else if (length(pch) == 1) - result$pch <- rep(pch, length(labs)) - - - p1 <- paste(labs, result$colour, result$linetype, result$lwd, result$pch) - p1.temp <- duplicated(p1) - result$legend$lab <- labs[!p1.temp] - result$legend$col <- result$colour[!p1.temp] - result$legend$lty <- result$linetype[!p1.temp] - result$legend$lwd <- result$lwd[!p1.temp] - result$legend$pch <- result$pch[!p1.temp] - result + } + else if (length(pch) == 1) + result$pch <- rep(pch, length(labs)) + + + p1 <- paste(labs, result$colour, result$linetype, result$lwd, result$pch) + p1.temp <- duplicated(p1) + result$legend$lab <- labs[!p1.temp] + result$legend$col <- result$colour[!p1.temp] + result$legend$lty <- result$linetype[!p1.temp] + result$legend$lwd <- result$lwd[!p1.temp] + result$legend$pch <- result$pch[!p1.temp] + result } @@ -100,22 +174,46 @@ function (labs, col = TRUE, linetype = FALSE, lwd = NULL, pch=NULL) ## return the colour for a given label via the colour object -mu.colour.get <- function(col.lty, label) { + + + + + + + +##' get a EMU color +##' +##' see function +##' +##' +##' @keywords internal +##' @export mu.colour.get +mu.colour.get <- function(col.lty, label) { + colour <- col.lty$legend$col[match(label, col.lty$legend$lab)] return( colour ) - + } -mu.linetype.get <- function(col.lty, label) { - lty <- col.lty$legend$lty[match(label, col.lty$legend$lab)] - return( lty ) -} -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: + + + + +##' mu linetype get +##' +##' see function +##' +##' +##' @keywords internal +##' @export mu.linetype.get +mu.linetype.get <- function(col.lty, label) { + + lty <- col.lty$legend$lty[match(label, col.lty$legend$lab)] + return( lty ) + +} diff --git a/R/mu.legend.R b/R/mu.legend.R index cc569725..c67dc254 100644 --- a/R/mu.legend.R +++ b/R/mu.legend.R @@ -1,16 +1,10 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - +##' make a EMU legend +##' +##' see function +##' +##' +##' @keywords internal +##' @export mu.legend "mu.legend"<- function(legn, xlim, ylim) { fudge.x <- (xlim[2]-xlim[1])/5 @@ -24,12 +18,6 @@ if(legn=="bl") return(list(x=xlim[1], y=ylim[1]+fudge.y)) if(legn=="loc") - return(locator(1)) + return(graphics::locator(1)) stop("Unknown legend locator in mu.legend") } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/norm.R b/R/norm.R index a28953e5..115d6224 100644 --- a/R/norm.R +++ b/R/norm.R @@ -1,17 +1,32 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +##' Normalise speech data +##' +##' Normalises speech data +##' +##' Types of normalisation: \code{"nearey"}, Nearey : Find the log of each data +##' element and subtract from each the mean of the logarithmic data. +##' \code{"cen"}, centroid: Find the mean of the data column and subtract it +##' from each data element in that column. \code{"lob"}, Lobanov: Find the +##' mean and standard deviation of the data. Subtract the mean from each data +##' element and divide each result by the standard deviation. "gerst", +##' Gerstman: Subtract from the data the minimum formant value then divide by +##' the formant range. +##' +##' @param data A matrix of data. Can be either an n-columned matrix or a +##' trackdata object as returned by \code{track}. +##' @param speakerlabs A parallel vector of speaker labels. +##' @param type The type of extrinsic normalisation to be performed on data. +##' type can be \code{"nearey"}, \code{"cen"}, \code{"lob"}, \code{"gerst"} +##' (default), for normalisation according to Nearey, centroid method, Lobanov, +##' or Gerstman. +##' @param rescale Currently only works for Lobanov normalisation. The +##' normalised values are multiplied by the standard deviation and then the +##' mean is added, where the standard deviation and mean are across all +##' original speakers' unnormalised data. +##' @return Normalised values of data are returned, having the same structure as +##' data. +##' @seealso track +##' @keywords misc +##' @export norm "norm"<- function(data, speakerlabs, type = "gerst", rescale = FALSE) { ## data: a matrix of data. Can be @@ -63,12 +78,12 @@ vals <- data[temp, ] if(type == "gerst") nvals <- gerst.sub(vals) - else if(type == "lob") - nvals <- lob.sub(vals) - else if(type == "nearey") - nvals <- nearey.sub(vals) - else if(type == "cen") - nvals <- cen.sub(vals) + else if(type == "lob") + nvals <- lob.sub(vals) + else if(type == "nearey") + nvals <- nearey.sub(vals) + else if(type == "cen") + nvals <- cen.sub(vals) data[temp, ] <- nvals } if(rescale) { @@ -91,6 +106,21 @@ data } + + + + + + + + +##' gerst sub +##' +##' see function +##' +##' +##' @keywords internal +##' @export gerst.sub "gerst.sub"<- function(data) { minvals <- apply(data, 2, min) @@ -103,6 +133,21 @@ (data - vecmat)/rvecmat } + + + + + + + + +##' lob sub +##' +##' see function +##' +##' +##' @keywords internal +##' @export lob.sub "lob.sub"<- function(data) { if(!is.matrix(data)) @@ -117,6 +162,21 @@ } + + + + + + + + +##' nearey sub +##' +##' see function +##' +##' +##' @keywords internal +##' @export nearey.sub "nearey.sub"<- function(data) { if(!is.matrix(data)) @@ -127,6 +187,21 @@ ldata - tmean } + + + + + + + + +##' Subfunction of cen +##' +##' see function +##' +##' +##' @keywords internal +##' @export cen.sub "cen.sub"<- function(data) { if(!is.matrix(data)) @@ -140,6 +215,27 @@ mat } + + + + + + + + +##' Label each data sample +##' +##' Labels each data sample +##' +##' +##' @param indvals Index component of a trackdata object as returned by +##' \code{frames}, or \code{track}. +##' @param labs A label vector parallel to \code{indvals}. +##' @return Returns a vector of labels, one for each row in the data matrix +##' that corresponds to \code{indvals}. +##' @seealso frames, track +##' @keywords misc +##' @export expand_labels "expand_labels"<- function(indvals, labs) { ## indvals is the index component returned by frames/track @@ -158,50 +254,89 @@ } + + + + + + + + +##' rescale lob +##' +##' see function +##' +##' +##' @keywords internal +##' @export rescale.lob "rescale.lob"<- function(data, mvals, sdvals) { -# rescales the Lobanov normalised data. mvals is the -# mean of the raw data, sdvals the standard dev. of -# the raw data - if(!is.matrix(data)) data <- mvals + (data * sdvals) else mat <- NULL - { - for(j in 1:ncol(data)) { - vec <- data[, j] * sdvals[j] - mvec <- vec + mvals[j] - mat <- cbind(mat, mvec) - } - } - mat + # rescales the Lobanov normalised data. mvals is the + # mean of the raw data, sdvals the standard dev. of + # the raw data + if(!is.matrix(data)) data <- mvals + (data * sdvals) else mat <- NULL +{ + for(j in 1:ncol(data)) { + vec <- data[, j] * sdvals[j] + mvec <- vec + mvals[j] + mat <- cbind(mat, mvec) + } } +mat +} + + + + + + + + +##' rescale gerst +##' +##' see function +##' +##' +##' @keywords internal +##' @export rescale.gerst "rescale.gerst"<- function(data, mind, ranged) { - for(j in 1:ncol(data)) { - data[, j] <- data[, j] * ranged[j] + mind[j] - } - data + for(j in 1:ncol(data)) { + data[, j] <- data[, j] * ranged[j] + mind[j] + } + data } + + + + + + + + +##' rescale nearey +##' +##' see function +##' +##' +##' @keywords internal +##' @export rescale.nearey "rescale.nearey"<- function(data, neardata) { - if(!is.matrix(data)) - data <- rbind(data) - for(j in 1:ncol(data)) { - rval <- max(neardata[, j]) - min(neardata[, j]) - mindata <- min(data[, j]) - maxdata <- max(data[, j]) - rangedata <- maxdata - mindata - data[, j] <- min(neardata[, j]) + - (((data[, j] - mindata)/rangedata) * rval) - } - data + if(!is.matrix(data)) + data <- rbind(data) + for(j in 1:ncol(data)) { + rval <- max(neardata[, j]) - min(neardata[, j]) + mindata <- min(data[, j]) + maxdata <- max(data[, j]) + rangedata <- maxdata - mindata + data[, j] <- min(neardata[, j]) + + (((data[, j] - mindata)/rangedata) * rval) + } + data } - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/onAttach.R b/R/onAttach.R deleted file mode 100644 index 6c606c28..00000000 --- a/R/onAttach.R +++ /dev/null @@ -1,55 +0,0 @@ -############################################################################## -## # -## copyright : (C) 2012 IPS, LMU Munich # -## email : Steve.Cassidy@mq.edu.au # -## url : http://www.emu.sourceforge.net # -## # -## This program is free software; you can redistribute it and/or modify # -## it under the terms of the GNU General Public License as published by # -## the Free Software Foundation; either version 2 of the License, or # -## (at your option) any later version. # -## # -############################################################################## - - - -".onAttach"<- function(libname, pkgname) -{ - - if (interactive()) { - - packpath = path.package(package = "emuR", quiet = FALSE) - sepa = .Platform$file.sep - nfile = "emudirectory" - filepath = paste(packpath,sepa,nfile,sep = "") - mess = paste("\n \n _ _________ _______ \n | || ______ \\ / _______| \n | || | _____| || |_______ \n | || ||______/ \\_______ \\ \n | || | _ . . . _ ___| | \n |_||_||_||.:.:.:.||_|____/ \n : : : \n INSTITUTE OF PHONETICS \n AND SPEECH PROCESSING \n \n(C) IPS University of Munich\n\nR Package of the EMU Speech Database system - Version",packageVersion("emuR")," \nFor support requests contact http://emu.sourceforge.net \n") - packageStartupMessage(mess, appendLF = FALSE) - packageStartupMessage( "\nLibrary successfully loaded." ) - } -} - - - - - -emudata.init <- function() { - messemudatamore = "" - if(require(emudata)) { - - messemudata="V Additional datasets successfully loaded from package emudata." - } else { - messemudata="X NO additional databases loaded." - messemudatamore=" To have access to all datasets used in \n Harrington, J. (2010). The Phonetic Analysis of Speech Corpora. Blackwell,\n install package emudata." - } - if (interactive()) { - message(messemudata) - cat(messemudatamore) - } -} - - - - -## Local Variables: -## mode:S -## End: diff --git a/R/outliers.R b/R/outliers.R index 94d98b92..49a2037d 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -1,37 +1,31 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - +##' outliers +##' +##' see function +##' +##' +##' @keywords internal +##' @export outliers "outliers" <- function(data, labels, threshold) { - model <- train(data,labels) - dist <- distance(data, model, labels, metric="mahal") - good <- NULL - bad <- NULL - ddd <- NULL - for(n in 1:length(model$label)){ - l <- model$label[n] - which <- labels==l - dd <- dist[which, n] - meandd <- mean(dd) - sdevdd <- sqrt(var(dd)) - - cutoff <- sdevdd*threshold - - idx <- (1:length(labels))[which] - tmp <- idx[dd=cutoff] - bad <- c(bad, tmp ) - ddd <- c(ddd, dist[which,n] ) - } - list(good=good[!is.na(good)], bad=bad[!is.na(bad)], dist=ddd) + model <- train(data,labels) + dist <- distance(data, model, labels, metric="mahal") + good <- NULL + bad <- NULL + ddd <- NULL + for(n in 1:length(model$label)){ + l <- model$label[n] + which <- labels==l + dd <- dist[which, n] + meandd <- mean(dd) + sdevdd <- sqrt(var(dd)) + + cutoff <- sdevdd*threshold + + idx <- (1:length(labels))[which] + tmp <- idx[dd=cutoff] + bad <- c(bad, tmp ) + ddd <- c(ddd, dist[which,n] ) } + list(good=good[!is.na(good)], bad=bad[!is.na(bad)], dist=ddd) +} diff --git a/R/palate.R b/R/palate.R index 92b07f36..dd224972 100644 --- a/R/palate.R +++ b/R/palate.R @@ -1,50 +1,76 @@ -"palate" <- -function(epgdata) +##' Obtain a three-dimensional palatographic array +##' +##' Function to calculate a three-dimensional palatographic array from. +##' +##' An EPG compressed trackdata object that is output from the Reading system +##' contains eight columns of data and each row value when converted to binary +##' numbers (after adding 1) gives the corresponding EPG contact patterns. This +##' function does the conversion to binary values. +##' +##' @param epgdata An eight-columned EPG-compressed trackdata object or an +##' eight columned matrix of EPG-compressed trackdata. +##' @return An array of three dimensions of 8 rows x 8 columns x n segments +##' where n is the number of segments in the trackdata object or matrix. The +##' rows and columns are given dimension names, the dimension names of the +##' third dimension contains the times at which the palatograms occur. +##' @author Jonathan Harrington +##' @seealso \code{\link{epgcog}} \code{\link{epggs}} \code{\link{epgai}} +##' \code{\link{epgplot}} +##' @keywords datagen +##' @examples +##' +##' # convert an EPG-compressed trackdata object to palatograms +##' p <- palate(coutts.epg) +##' +##' # convert an EPG-compressed matrix to palatograms +##' p <- palate(dcut(coutts.epg, 0, prop=TRUE)) +##' +##' +##' @export palate +"palate" <- function(epgdata) { -# epgdata: either a vector of length 8 or -# a matrix of ncol = 8 with 1 row per segment -# or trackdata. If it's trackdata, then the -# result returned is applied to epgdata$data -if(is.trackdata(epgdata)) -epgdata <- epgdata$data -times <- dimnames(epgdata)[[1]] - if(!is.matrix(epgdata)) epgdata <- rbind(epgdata) - if(ncol(epgdata) != 8) { - print("input must have 8 columns or be a vector of length 8") - stop() - } - bingen <- function(n = 8) - { -# n is the number of columns in the result - mat <- NULL - x <- 2^(0:(n - 1)) - vec <- rev(x) - for(j in length(vec):1) { - res <- rep(c(rep(0, x[j]), rep(1, x[j])), vec[j]) - mat <- cbind(mat, res) - } - mat - } - nsegs <- nrow(epgdata) - epgdata <- c(t(epgdata)) - epgdata <- epgdata + 1 - p <- bingen() - p <- p[epgdata, ] - amat <- array(t(p), c(8, 8, nsegs)) -if(nsegs > 1) - p <- aperm(amat[8:1, 8:1, ], c(2, 1, 3)) -# usual silly annoying hack in case there's only one palatogram -else -{ -v <- amat[8:1, 8:1, ] -v <- array(v, c(8, 8, 1)) -p <- aperm(v, c(2, 1, 3)) -} - -charrow <- paste("R", 1:8, sep="") -charcol <- paste("C", 1:8, sep="") -dimnames(p) <- list(charrow, charcol, times) -class(p) <- c("array", "EPG") -p + # epgdata: either a vector of length 8 or + # a matrix of ncol = 8 with 1 row per segment + # or trackdata. If it's trackdata, then the + # result returned is applied to epgdata$data + if(is.trackdata(epgdata)) + epgdata <- epgdata$data + times <- dimnames(epgdata)[[1]] + if(!is.matrix(epgdata)) epgdata <- rbind(epgdata) + if(ncol(epgdata) != 8) { + stop("input must have 8 columns or be a vector of length 8") + } + bingen <- function(n = 8) + { + # n is the number of columns in the result + mat <- NULL + x <- 2^(0:(n - 1)) + vec <- rev(x) + for(j in length(vec):1) { + res <- rep(c(rep(0, x[j]), rep(1, x[j])), vec[j]) + mat <- cbind(mat, res) + } + mat + } + nsegs <- nrow(epgdata) + epgdata <- c(t(epgdata)) + epgdata <- epgdata + 1 + p <- bingen() + p <- p[epgdata, ] + amat <- array(t(p), c(8, 8, nsegs)) + if(nsegs > 1) + p <- aperm(amat[8:1, 8:1, ], c(2, 1, 3)) + # usual silly annoying hack in case there's only one palatogram + else + { + v <- amat[8:1, 8:1, ] + v <- array(v, c(8, 8, 1)) + p <- aperm(v, c(2, 1, 3)) + } + + charrow <- paste("R", 1:8, sep="") + charcol <- paste("C", 1:8, sep="") + dimnames(p) <- list(charrow, charcol, times) + class(p) <- c("array", "EPG") + p } - diff --git a/R/perform.R b/R/perform.R index 4700e722..07bc1cea 100644 --- a/R/perform.R +++ b/R/perform.R @@ -1,19 +1,14 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - - - +##' Performance (hit rate) of a confusion matrix +##' +##' Performs (hit rate) of a confusion matrix +##' +##' +##' @param data A confusion matrix. +##' @return Calculates the accuracy (total score) of the confusion matrix, +##' returning percentage of correct, and incorrect matches. +##' @seealso confusion +##' @keywords misc +##' @export perform "perform" <- function(data) { ## calculates total score in a confusion matrix, data @@ -30,9 +25,3 @@ dimnames(m) <- list(NULL, labcol) m } - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: - diff --git a/R/plafit.R b/R/plafit.R index a11b625c..2559f0c1 100644 --- a/R/plafit.R +++ b/R/plafit.R @@ -1,36 +1,69 @@ -"plafit" <- -function(wav,fit=FALSE, n=101) +##' Calculate the coefficients of a parabola +##' +##' Fit a second ordered polynomial to a vector of values +##' +##' The function fits a parabola (2nd order polynomial) following the method of +##' van Bergem, Speech Communication, 14, 1994, 143-162. The algorithm fixes +##' the parabola at the onset, midpoint, and offset of the vector i.e. such +##' htat the fitted parabola and original vector have the same values at these +##' points. +##' +##' @param wav a vector or single column matrix of numeric values to which the +##' 2nd order polynomial is to be fitted. +##' @param fit if FALSE, return the coefficients of the polynomial; if TRUE, the +##' values of the polynomial are returned to the same length as the vector wav. +##' @param n in fitting the polynomial, linear time normalisation is first +##' applied to the input vector wav to 101 points. The polynomial is fitted +##' under the assumption that these points extend linearly in time between t = +##' -1 and t = 1 with t = 0 occurring at the temporal midpoint. +##' @return The function returns the coefficients of c0, c1, c2 in the parabola +##' y = c0 + c1t + c2t\eqn{\mbox{\textasciicircum}}{^}2 where t extends between +##' -1 and 1. The function can also be used to derive the values of the +##' parabola as a function of time from the coefficients. +##' @author Jonathan Harrington +##' @seealso \code{\link{dct}} +##' @keywords math +##' @examples +##' +##' # fit a polynomial to a segment of fundamental frequency data +##' plafit(vowlax.fund[1,]$data) +##' +##' # return the fitted values of the polynomial +##' plafit(vowlax.fund[1,]$data, fit=TRUE) +##' +##' +##' @export plafit +"plafit" <- function(wav, fit = FALSE, n = 101) { -if(!is.vector(wav) & !is.matrix(wav) ) -stop("input signal must be a vector or a one-columned matrix") -if(is.matrix(wav) ) -{ -if(ncol(wav)!=1) -stop("input signal must be a vector or a one-columned matrix") -} -if(is.vector(wav)) -nz <- names(wav) -if(is.matrix(wav)) -nz <- dimnames(wav)[[1]] -if(n %% 2 != 1) -n <- n + 1 -N <- length(wav) -a <- approx(wav, n=n)$y -times <- seq(-1, 1, length=n) -c0 <- a[times==0] -c1 <- 0.5 * (a[n] - a[1]) -c2 <- 0.5 * (a[n] + a[1]) - c0 -if(fit) -{ -y <- c0 + c1 * times + c2 * (times^2) -result <- approx(y, n=N)$y -names(result) <- nz -} -else -{ -result <- c(c0, c1, c2) -names(result) <- c("c0", "c1", "c2") -} -result + if(!is.vector(wav) & !is.matrix(wav) ) + stop("input signal must be a vector or a one-columned matrix") + if(is.matrix(wav) ) + { + if(ncol(wav)!=1) + stop("input signal must be a vector or a one-columned matrix") + } + if(is.vector(wav)) + nz <- names(wav) + if(is.matrix(wav)) + nz <- dimnames(wav)[[1]] + if(n %% 2 != 1) + n <- n + 1 + N <- length(wav) + a <- approx(wav, n=n)$y + times <- seq(-1, 1, length=n) + c0 <- a[times==0] + c1 <- 0.5 * (a[n] - a[1]) + c2 <- 0.5 * (a[n] + a[1]) - c0 + if(fit) + { + y <- c0 + c1 * times + c2 * (times^2) + result <- approx(y, n=N)$y + names(result) <- nz + } + else + { + result <- c(c0, c1, c2) + names(result) <- c("c0", "c1", "c2") + } + result } - diff --git a/R/rbind.trackdata.R b/R/rbind.trackdata.R index 0e950e58..f4a95b2f 100644 --- a/R/rbind.trackdata.R +++ b/R/rbind.trackdata.R @@ -1,26 +1,69 @@ -"rbind.trackdata" <- -function (...) +##' A method of the generic function rbind for objects of class trackdata +##' +##' Different track data objects from one segment list are bound by combining +##' the $data columns of the track data object by rows. Track data objects +##' are created by \code{\link{get_trackdata}}. +##' +##' All track data objects have to be track data of the same segment list. +##' Thus $index and $ftime values have to be identically for all track data +##' objects. The number of columns of the track data objects must match. Thus +##' a track data object of more than one formant and single columned F0 track +##' data object can not be rbind()ed. +##' +##' @aliases rbind.trackdata rbind +##' @param \dots track data objects +##' @return A track data object with the same $index and $ftime values of the +##' source track data objects and with $data that includes all columns of +##' $data of the source track data objects. +##' @author Jonathan Harrington +##' @seealso \code{\link{rbind}} \code{\link{cbind.trackdata}} +##' \code{\link{trackdata}} \code{\link{get_trackdata}} +##' @keywords methods +##' @examples +##' +##' data(vowlax) +##' +##' #segment list vowlax - first segment only +##' vowlax[1,] +##' +##' #F0 track data object for vowlax - first segment only +##' vowlax.fund[1] +##' +##' #rms track data object for vowlax - first segment only +##' vowlax.rms[1] +##' +##' #now combine both track data objects +##' fund.rms.lax = rbind(vowlax.fund[1:10,], vowlax.rms[1:10,]) +##' +##' #the combined track data object +##' #The first ten rows in $data keep vowlax.fund data, the 11th to last row keeps vowlax.rms data +##' fund.rms.lax +##' +##' +##' +##' @export +"rbind.trackdata" <- function (...) { - mat <- NULL - for (j in list(...)) { - if (is.matrix(j$data)) - mat$data <- rbind(mat$data, j$data) - else mat$data <- c(mat$data, j$data) - mat$index <- rbind(mat$index, j$index) - if (!is.null(j$ftime)) - mat$ftime <- rbind(mat$ftime, j$ftime) - } - diffinds <- mat$index[, 2] - mat$index[, 1] + 1 - right <- cumsum(diffinds) - first.left <- diffinds - 1 - left <- right - first.left - mat$index <- cbind(left, right) - if (version$major >= 5) { - oldClass(mat) <- "trackdata" - } - else { - class(mat) <- "trackdata" - } - mat + mat <- NULL + for (j in list(...)) { + if (is.matrix(j$data)) + mat$data <- rbind(mat$data, j$data) + else mat$data <- c(mat$data, j$data) + mat$index <- rbind(mat$index, j$index) + if (!is.null(j$ftime)) + mat$ftime <- rbind(mat$ftime, j$ftime) + } + diffinds <- mat$index[, 2] - mat$index[, 1] + 1 + right <- cumsum(diffinds) + first.left <- diffinds - 1 + left <- right - first.left + mat$index <- cbind(left, right) + if (version$major >= 5) { + oldClass(mat) <- "trackdata" + } + else { + class(mat) <- "trackdata" + } + mat } diff --git a/R/slope.test.R b/R/slope.test.R index b614f90f..81b92d79 100644 --- a/R/slope.test.R +++ b/R/slope.test.R @@ -1,16 +1,31 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - +##' Slope Test +##' +##' Tests whether the difference between two or more regression lines is +##' significant +##' +##' +##' @param ... this function takes any number of two column matrices. The +##' first column is the y-data (in the case of locus equations, this is the +##' vowel onset) and the second column is the x-data (in the case of locus +##' equations, vowel target). +##' @return The return value consists of the following components: +##' +##' \item{separate}{ slope, intercept, r-squared, F-ratio, "d(egrees of) +##' f(reedom)" and "prob(ability that) line fits data" for the separate data +##' matrices entered. } \item{combined}{ F-ratio, "d(egrees of) f(reedom)", and +##' "Probability of them being DIFFERENT" for the slope and for the intercept +##' of the combined data. } \item{x}{ the combined x-data for all the +##' matrices. } \item{y}{ the combined y-data for all the matrices. } +##' \item{mat}{ the category vectors for the combined data (consists of 1, 0 +##' and -1). } \item{numrows}{ the number of rows in each matrix. } +##' \item{numcats}{ the sum number of matrices entered. +##' +##' } +##' @seealso lm(), summary.lm(), pf() +##' @references see E. Pedhazur, Multiple Regression in Behavioral Research +##' p.436-450, 496-507. +##' @keywords misc +##' @export Slope.test "Slope.test" <- function(...) { ## compiled by Jonathan Harrington and Marija Tabain (October 1997) @@ -22,56 +37,56 @@ ## see E. Pedhazur, Multiple Regression in Behavioral Research ## p.436-450, 496-507. Slope.sub <- function(...) - { - ## combine the matrices, and find out how many rows there are altogether - omat <- NULL - omat$numcats <- length(list(...)) - for(j in list(...)) { - numrows <- nrow(j) - omat$y <- c(omat$y, j[, 1]) - omat$x <- c(omat$x, j[, 2]) - omat$numrows <- c(omat$numrows, numrows) - } - ## set up category vectors of 1, 0, 0, .... -1 - vec <- rep(0, omat$numcats - 1) - omat$mat <- NULL - for(j in 1:length(vec)) { - zeros <- vec - zeros[j] <- 1 - zeros <- c(zeros, -1) - zeros <- rep(zeros, omat$numrows) - omat$mat <- cbind(omat$mat, zeros) - } - omat + { + ## combine the matrices, and find out how many rows there are altogether + omat <- NULL + omat$numcats <- length(list(...)) + for(j in list(...)) { + numrows <- nrow(j) + omat$y <- c(omat$y, j[, 1]) + omat$x <- c(omat$x, j[, 2]) + omat$numrows <- c(omat$numrows, numrows) } - + ## set up category vectors of 1, 0, 0, .... -1 + vec <- rep(0, omat$numcats - 1) + omat$mat <- NULL + for(j in 1:length(vec)) { + zeros <- vec + zeros[j] <- 1 + zeros <- c(zeros, -1) + zeros <- rep(zeros, omat$numrows) + omat$mat <- cbind(omat$mat, zeros) + } + omat + } + ## main function begins here omat <- Slope.sub(...) ## number of category vectors and the (1) continuous vector for intercept k1 <- omat$numcats # the (1) continuous vector for intercept k2 <- 1 # number of category vectors, product - # vectors and (1)continuous vector - + # vectors and (1)continuous vector + ## for slope k3 <- 1 + ((omat$numcats - 1) * 2) ## number of category vectors and (1) continuous vector for slope k4 <- omat$numcats ## length of y and of x N <- sum(omat$numrows) - + for(j in list(...)) { ## find the F-ratio, degrees of freedom, r-squared values, slope and intercept ## for the separate matrices firstvals <- summary.lm(lm(j[, 1] ~ j[, 2])) first.pf <- pf(firstvals$fstatistic[1], firstvals$fstatistic[2], - firstvals$fstatistic[3]) + firstvals$fstatistic[3]) first.out <- c(firstvals$r.squared, firstvals$fstatistic, - first.pf, firstvals$coefficients[, 1]) + first.pf, firstvals$coefficients[, 1]) omat$separate <- rbind(omat$separate, first.out) } - + dimnames(omat$separate)[[2]] <- c("r-sq", "F ratio", "df", "df", - "prob. line fits data", "intercept", "slope") - + "prob. line fits data", "intercept", "slope") + ## multiply the category vectors by the x-values prodvals <- omat$x * omat$mat z123 <- lm(omat$y ~ omat$x + omat$mat + prodvals) @@ -84,7 +99,7 @@ fratio.in <- fval.in.num/fval.in.den s123 <- summary.aov(z123) fratio.slope <- s123$"F Value"[3] - + ## calculate probabilities and degrees of freedom prob.in <- pf(fratio.in, k1 - k2, N - k1 - 1) prob.slope <- pf(fratio.slope, k3 - k4, N - k3 - 1) @@ -93,7 +108,7 @@ outtemp <- rbind(first, second) col.lab <- c("intercept", "slope") row.lab <- c("F ratio", "Probability of them being DIFFERENT", "df", - "df") + "df") dimnames(outtemp) <- list(col.lab, row.lab) omat$combined <- outtemp omat diff --git a/R/spectralclass.R b/R/spectralclass.R index a78ca902..ca0e06ed 100644 --- a/R/spectralclass.R +++ b/R/spectralclass.R @@ -1,5 +1,26 @@ -"is.spectral" <- - function(dat) +##' Function to test whether the object is of class "spectral" +##' +##' Returns TRUE or FALSE depending on whether the object is of class "spectral" +##' +##' +##' @param dat An R object +##' @return A single element logical vector: TRUE or FALSE +##' @author Jonathan Harrington +##' @seealso \code{\link{as.spectral}} +##' @keywords attribute +##' @examples +##' +##' +##' is.spectral(vowlax.dft.5) +##' is.spectral(fric.dft) +##' is.spectral(fric.dft$data) +##' is.spectral(vowlax.dft.5[1,]) +##' is.spectral(fric.dft[1,1]) +##' +##' +##' +##' @export is.spectral +"is.spectral" <- function(dat) { if(!is.trackdata(dat)) return(any(class(dat) %in% "spectral")) @@ -9,78 +30,196 @@ -"as.spectral" <- - function(trackdata, fs) + + + + + + + + +##' Function to convert an object into an object of class 'spectral'. +##' +##' The function converts a vector, matrix, or EMU-trackdata object into an +##' object of the same class and of class 'spectral' +##' +##' If fs is a single element numeric vector, then the frequencies of trackdata +##' are defined to extend to fs/2. If fs is missing, then the frequencies are +##' 0:(N-1) where N is the length of trackdata. +##' +##' @param trackdata A vector, matrix, or EMU-trackdata object. +##' @param fs Either a single element numeric vector, or a numeric vector of +##' the same length as the length of trackdata if trackdata is a vector, or of +##' the same number of rows as trackdata +##' @return The same object but of class 'spectral'. +##' @author Jonathan Harrington +##' @seealso \code{\link{is.spectral}} \code{\link{plot.spectral}} +##' @keywords attribute +##' @examples +##' +##' vec = 1:10 +##' as.spectral(vec, 2000) +##' mat = rbind(1:10, 1:10) +##' as.spectral(mat) +##' # turn a spectral trackdata object into a trackdata object +##' tr = as.trackdata(rbind(fric.dft$data), fric.dft$index, fric.dft$ftime) +##' # turn it into a spectral trackdata object with sampling freq 16 kHz +##' tr = as.spectral(tr, 16000) +##' # list the frequencies +##' trackfreq(tr) +##' # Notice that only the $data is made into a spectral matrix, +##' # not the entire trackdata object +##' # so this is trackdata +##' class(tr) +##' # this is a spectral matrix +##' class(tr$data) +##' +##' +##' +##' +##' @export as.spectral +"as.spectral" <- function(trackdata, fs) { if(is.trackdata(trackdata)){ - + if(is.spectral(trackdata$data)) { warning("matrix is already of class spectral") return(trackdata) - } + } N <- ncol(trackdata$data) if(missing(fs)) fs <- 0: (ncol(trackdata$data)-1) else{ if(length(fs)==1) - { - fs <- fs/2 - fs <- seq(0, fs, length=N) - } + { + fs <- fs/2 + fs <- seq(0, fs, length=N) + } } attr(trackdata$data, "fs") <- fs class(trackdata$data) <- c(class(trackdata$data), "spectral") } - + else if (is.matrix(trackdata)){ if(is.spectral(trackdata)) { warning("matrix is already of class spectral") return(trackdata) - } + } N <- ncol(trackdata) if(missing(fs)) fs <- 0: (ncol(trackdata)-1) else{ if(length(fs)==1) - { - fs <- fs/2 - fs <- seq(0, fs, length=N) - } + { + fs <- fs/2 + fs <- seq(0, fs, length=N) + } } attr(trackdata, "fs") <- fs class(trackdata) <- c(class(trackdata), "spectral") } else - { - - if(is.spectral(trackdata)){ - warning("matrix is already of class spectral") - return(trackdata) - } - N <- length(trackdata) - if(missing(fs)) - fs <- 0: (length(trackdata)-1) - else{ - if(length(fs)==1) - { - fs <- fs/2 - fs <- seq(0, fs, length=N) - } + { + + if(is.spectral(trackdata)){ + warning("matrix is already of class spectral") + return(trackdata) + } + N <- length(trackdata) + if(missing(fs)) + fs <- 0: (length(trackdata)-1) + else{ + if(length(fs)==1) + { + fs <- fs/2 + fs <- seq(0, fs, length=N) } - attr(trackdata, "fs") <- fs - class(trackdata) <- c(class(trackdata), "spectral") } + attr(trackdata, "fs") <- fs + class(trackdata) <- c(class(trackdata), "spectral") + } trackdata } -`plot.spectral` <- - function (x, labs, ylim, xlim, col, lty, lwd, fun, - freq, type = "l", power = FALSE, powcoeffs = c(10, 10), dbnorm = FALSE, - dbcoeffs = c(0, 0), legend = TRUE, axes=TRUE, ...) + + + + + + + + +##' Plot spectra from EMU spectral objects +##' +##' The function plots spectrum of any EMU spectral object. +##' +##' This function is implemented when a spectral trackdata object is called +##' with the 'plot' function. +##' +##' @param x An EMU object of class 'spectral' +##' @param labs An optional vector character labels. Must be the same length as +##' specdata +##' @param ylim A two-element numeric vector for the y-axis range (see 'par') +##' @param xlim A two-element numeric vector for the x-axis range (see 'par') +##' @param col Specify a color - see 'mu.colour') +##' @param lty Specify a linetype - see 'mu.colour' +##' @param lwd Specify line thickness - see 'mu.colour' +##' @param fun An R function name e.g., mean, var, sum, etc. The function is +##' applied separately to each category type specified in labs +##' @param freq A numeric vector the same length as the number of columns in +##' specdata specifying the frequencies at which the spectral data is to be +##' plotted. If not supplied, defaults to trackfreq(specdata) +##' @param type A single element character vector for the linetype +##' @param power Logical. If TRUE, then specdata (or specdata$data if specdata is +##' a trackdata object, is converted to a * +##' specdata\eqn{\mbox{\textasciicircum}}{^}b, where a and b have the values +##' given in powcoeffs. This operation is applied before b +##' @param powcoeffs A two-element numeric vector. Defaults to c(10, 10) +##' @param dbnorm Logical. If TRUE, apply dB-level normalization per spectrum as +##' defined by dbcoeffs below. Defaults to FALSE. +##' @param dbcoeffs A two element numeric vector (x, y). The spectra are +##' normalised in such a way that the values of each spectrum at a frequency of +##' y are set to a dB level of x. For example, to normalise the spectrum to 10 +##' dB at 2000 Hz, set dbnorm to TRUE and dbcoeffs to c(2000, 10) +##' @param legend Parameters for defining the legend. See 'mu.legend' for +##' further details +##' @param axes A logical vector indicating whether the axes should be plotted +##' @param \dots Further graphical parameters may be supplied. +##' @note To plot spectral data from a spectral trackdata object, then call the +##' function explicitly with 'plot/spectral' rather than with just 'plot' +##' @export +##' @author Jonathan Harrington +##' @seealso \code{\link{plot}} \code{\link{plot.trackdata}} +##' \code{\link{as.spectral}} +##' @keywords dplot +##' @examples +##' \dontrun{ +##' +##' plot(vowlax.dft.5[1,]) +##' +##' # with label types +##' plot(vowlax.dft.5[1:20,], vowlax.l[1:20]) +##' +##' # As above but averaged after converting to power ratios. +##' plot(vowlax.dft.5[1:20,], vowlax.l[1:20], fun=mean, power=TRUE) +##' +##' # All the spectra of one segment in a trackdata object +##' plot(fric.dft[1,]) +##' +##' } +##' +"plot.spectral" <- function (x, labs, ylim, xlim, col, lty, + lwd, fun, freq, type = "l", + power = FALSE, powcoeffs = c(10, 10), + dbnorm = FALSE, dbcoeffs = c(0, 0), + legend = TRUE, axes=TRUE, ...) { + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + specdata = x if (is.trackdata(specdata)) specdata <- specdata$data @@ -133,10 +272,10 @@ lwd <- NULL cols <- mu.colour(labs, col, lty, lwd) for (j in 1:nrow(specdata)) { - plot(f, specdata[j, ], type = type, col = cols$colour[j], + graphics::plot(f, specdata[j, ], type = type, col = cols$colour[j], lty = cols$linetype[j], lwd = cols$lwd[j], xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE) - par(new = TRUE) + graphics::par(new = TRUE) } if (is.logical(legend)) { if (legend & length(unique(labs)) > 1) { @@ -148,18 +287,17 @@ else legend(legend, NULL, cols$legend$lab, col = cols$legend$col, lty = as.numeric(cols$legend$lty), lwd = as.numeric(cols$legend$lwd)) if(axes) - { - axis(side = 1) - axis(side = 2) - } - title(...) - box(...) + { + graphics::axis(side = 1) + graphics::axis(side = 2) + } + graphics::title(...) + graphics::box(...) } - -`bark.spectral` <- - function (f, ...) +##' @export +"bark.spectral" <- function (f, ...) { specobject = f if (!is.trackdata(specobject)) { @@ -180,12 +318,14 @@ spec <- specobject$data else if (is.matrix(specobject)) spec <- specobject - else spec <- as.spectral(rbind(specobject), attr(specobject, - "fs")) + else spec <- as.spectral(rbind(specobject), attr(specobject,"fs")) res <- NULL for (j in 1:nrow(spec)) { v = approx(b, c(spec[j, ]), ba) - res <- rbind(res, v$y) + if(j == 1){ # preallocate result matrix + res = matrix(nrow = nrow(spec), ncol = length(v$y)) + } + res[j, ] <- v$y } if (is.trackdata(specobject)) { specobject$data <- res @@ -200,15 +340,13 @@ specobject } - -`mel.spectral` <- - function (a) +##' @export +"mel.spectral" <- function (a) { specobject = a if (!is.trackdata(specobject)) { if (!is.matrix(specobject)) - specobject <- as.spectral(rbind(specobject), attr(specobject, - "fs")) + specobject <- as.spectral(rbind(specobject), attr(specobject, "fs")) } f <- trackfreq(specobject) b <- mel(f) @@ -223,7 +361,10 @@ res <- NULL for (j in 1:nrow(spec)) { v = approx(b, c(spec[j, ]), ba) - res <- rbind(res, v$y) + if(j == 1){ # preallocate result matrix + res = matrix(nrow = nrow(spec), ncol = length(v$y)) + } + res[j, ] <- v$y } if (is.trackdata(specobject)) { specobject$data <- res @@ -237,7 +378,3 @@ } specobject } - - - - diff --git a/R/start.trackdata.R b/R/start.trackdata.R index f61f4665..c55a9385 100644 --- a/R/start.trackdata.R +++ b/R/start.trackdata.R @@ -1,12 +1,14 @@ -"start.trackdata" <- -function(x, ...) + +##' @export +"start.trackdata" <- function(x, ...) { -x$ftime[,1] + x$ftime[,1] } -"end.trackdata" <- -function(x, ...) + +##' @export +"end.trackdata" <- function(x, ...) { -x$ftime[,2] + x$ftime[,2] } diff --git a/R/track.gradinfo.R b/R/track.gradinfo.R index 87b31473..94360773 100644 --- a/R/track.gradinfo.R +++ b/R/track.gradinfo.R @@ -1,29 +1,71 @@ -############################################################################# -# # -# copyright : (C) 2002 LTG, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - -## track.gradinfo -- -## generate various bits of information about a trackdata -## object: -## - duration -## - start, end: data values at the start and end of the segment -## - delta: the difference between start and end data points -## - slope: the slope of the data (delta/duration) -## +##' Calculate gradient summary information for trackdata +##' +##' Calculates a number of summary measures for a trackdata object: duration, +##' start and end data points, delta values and slope. +##' +##' \code{track.gradinfo} calculates a number of summary measure for the +##' segments within a trackdata object. These are useful for data such as +##' kinematic measures where segments might correspond to articulatory +##' movements etc. +##' +##' Measures returned are: duration, start and end data values (ie. the first +##' and last rows of data for each segment), delta (the difference between the +##' first and last rows of data) and slope (delta divided by the duration). +##' +##' @param trackdata An Emu trackdata object as returned by +##' \code{\link{get_trackdata}} +##' @return A data frame with one row per segment and columns: +##' \item{duration}{Segment} \item{startN }{The starting value for each segment +##' (start1 is the starting value for the first column) } \item{endN }{The +##' ending value for each segment } \item{deltaN }{The delta value for each +##' segment} \item{slopeN }{The slope value for each segment} +##' +##' Since the result is a data frame, the columns can be referred to by name +##' (\code{result$duration}) or as matrix columns (\code{result[,1]}). +##' @author Steve Cassidy +##' @seealso \code{\link{get_trackdata}}, \code{\link{dapply}} +##' @keywords misc +##' @examples +##' +##' data(vowlax) +##' segs = vowlax +##' ## fm has 4 columns +##' data.fm <-vowlax.fdat +##' ## F0 has one +##' data.F0 <- vowlax.fund +##' ## info.fm will have duration, 4xstart, 4xend, 4xdelta, 4xslope +##' info.fm <- track.gradinfo(data.fm) +##' ## this should be true +##' ncol(info.fm) == 1+4+4+4+4 +##' +##' ## info.F0 will have one of each +##' info.F0 <- track.gradinfo(data.F0) +##' ## this should be true +##' ncol(info.F0) == 1+1+1+1+1 +##' +##' ## plot the durations vs delta of the first formant +##' plot(info.F0$duration, info.fm$delta1, type="n", xlab="Duration", ylab="Delta") +##' text(info.fm$duration, info.fm$delta1, labels=label(segs)) +##' +##' ## extract just the delta values from the formant info +##' ## You need to eyeball the data to work out which columns to select +##' delta.fm <- info.fm[,10:13] +##' +##' @export track.gradinfo track.gradinfo <- function( trackdata ) { + ## track.gradinfo -- + ## generate various bits of information about a trackdata + ## object: + ## - duration + ## - start, end: data values at the start and end of the segment + ## - delta: the difference between start and end data points + ## - slope: the slope of the data (delta/duration) + ## + result <- dapply(trackdata, track.gradinfo.sub) ## all we want is the data which will be one row per segment result <- data.frame( result$data ) - + ## Put appropriate column headers on the data frame ## ## this would be better off in track.gradinfo.sub but @@ -37,10 +79,25 @@ track.gradinfo <- function( trackdata ) { return( result ) } -## track.gradinfo.sub -- -## do the work of track.gradinfo, return the various -## measures in the right form for dapply + + + + + + + + +##' track gradinfo sub +##' +##' see function +##' +##' +##' @keywords internal +##' @export track.gradinfo.sub track.gradinfo.sub <- function( data, ftime ) { + ## track.gradinfo.sub -- + ## do the work of track.gradinfo, return the various + ## measures in the right form for dapply n <- nrow(data) dur <- ftime[2]-ftime[1] ## delta is the difference between the start and end data points @@ -48,7 +105,7 @@ track.gradinfo.sub <- function( data, ftime ) { ## slope is the delta/duration slope <- delta/dur data <- matrix( c( dur, data[1,], data[n,], delta, slope ), nrow=1) - + ## ftime will be discarded anyway but let's do the right thing ## and set the start and end to the segment mid point mid <- ftime[1]+dur/2 diff --git a/R/trackdata.class.R b/R/trackdata.class.R index 1d67b432..bf8557f0 100644 --- a/R/trackdata.class.R +++ b/R/trackdata.class.R @@ -1,27 +1,23 @@ -############################################################################# -# # -# copyright : (C) 2000 SHLRC, Macquarie University # -# email : Steve.Cassidy@mq.edu.au # -# url : http://www.shlrc.mq.edu.au/emu # -# # -# This program is free software; you can redistribute it and/or modify # -# it under the terms of the GNU General Public License as published by # -# the Free Software Foundation; either version 2 of the License, or # -# (at your option) any later version. # -# # -############################################################################# - - +######################################################### ## Methods that define operations on the class "trackdata" ## see also track and frames +######################################################### + +##' print trackdata +##' +##' see function +##' +##' +##' @keywords internal +##' @export "print.trackdata"<- function(x, ...) { if(is.null(x$trackname)) cat("trackdata from unknown track.\n") else cat("trackdata from track:", x$trackname,"\n") - + cat("index:\n") print(x$index, ...) cat("ftime:\n") @@ -32,25 +28,31 @@ - -"[.trackdata" <- - function (dataset, i, j, ...) +##' Expand trackdata +##' +##' see function +##' +##' +##' @aliases [.trackdata +##' @keywords internal +##' @export +"[.trackdata" <- function (dataset, i, j, ...) { - - + + if (missing(i)) { i <- 1:nrow(dataset$index) } - - + + ftime <- dataset$ftime[i, , drop = FALSE] index <- dataset$index[i, , drop = FALSE] - - + + datarows <- NULL for (ind in 1:nrow(index)) { datarows <- c(datarows, seq(from = index[ind, 1], to = index[ind, - 2])) + 2])) } if (is.matrix(dataset$data)) { if (missing(j)) @@ -78,6 +80,24 @@ + + + + + + + + +##' summary trackdata +##' +##' summarizes trackdata objects +##' +##' +##' @param object track data object +##' @param \dots see summary +##' @keywords internal +##' @method summary trackdata +##' @export "summary.trackdata" <- function(object, ...) { if( is.matrix(object$data)){ @@ -96,12 +116,65 @@ } + + + + + + + + +##' Create an Emu trackdata object +##' +##' Create an Emu trackdata object from a raw data matrix. +##' +##' Emu trackdata objects contain possibly multi-column numerical data +##' corresponding to a set of segments from a database. Data for each segment +##' takes up a number of rows in the main \code{data} matrix, the start and end +##' rows are stored in the \code{index} component. The \code{ftime} component +##' contains the start and end times of the segment data. +##' +##' Trackdata objects are returned by the \code{\link{get_trackdata}} function. +##' +##' @param data A two dimensional matrix of numerical data. +##' @param index Segment index, one row per segment, two columns give the start +##' and end rows in the \code{data} matrix for each segment. +##' @param ftime A two column matrix with one row per segment, gives the start +##' and end times in milliseconds for each segment. +##' @param trackname The name of the track. +##' @return The components are bound into a trackdata object. +##' @seealso \code{\link{get_trackdata}} \code{\link{dplot}} +##' @keywords misc +##' @examples +##' +##' +##' # make a trackdata object of two data segments +##' data1 <- matrix( 1:10, ncol=2 ) +##' data2 <- matrix( 11:20, ncol=2 ) +##' +##' nd1 <- nrow(data1) +##' nd2 <- nrow(data2) +##' index <- rbind( c( 1, nd1 ), c(nd1+1,nd1+nd2) ) +##' +##' times <- rbind( c( 100.0, 110.0 ), c( 200.0, 210.0 ) ) +##' +##' tdata <- as.trackdata( rbind( data1, data2 ), index, times, trackname="fake") +##' +##' # describe the data +##' summary(tdata) +##' # get the data for the first segment +##' tdata[1] +##' # and the second +##' tdata[2] +##' +##' +##' @export as.trackdata "as.trackdata" <- function( data, index, ftime, trackname="" ) { mat <- list( data=as.matrix(data), - index=index, - ftime=ftime, - trackname=trackname) + index=index, + ftime=ftime, + trackname=trackname) if( version$major >= 5 ) { oldClass(mat) <- "trackdata" } else { @@ -110,51 +183,182 @@ mat } -"is.trackdata" <- - function (object) + + + + + + + + +##' Test whether an object is an Emu trackdata object +##' +##' Test whether an object is an Emu trackdata object +##' +##' +##' @param object A data object to be tested +##' @return Returns TRUE if the argument is a trackdata object. +##' @seealso \code{\link{get_trackdata}} +##' @keywords misc +##' @export is.trackdata +"is.trackdata" <- function (object) { return(inherits(object, "trackdata")) } -`plot.trackdata` <- - function (x, timestart = NULL, xlim = NULL, ylim = NULL, labels = NULL, - col = TRUE, lty = FALSE, type="p", pch=NULL, contig = TRUE, ...) + + + + + + + + +##' Produces time-series plots from trackdata +##' +##' The function produces a plot as a function of time for a single segment or +##' multiple plots as a function of time for several segments. +##' +##' The function plots a single segment of trackdata as a function of time. If +##' the segment contains multiple tracks, then these will be overlaid. If there +##' are several temporally non-contiguous segments in the trackdata object, +##' each segment is plotted in a different panel by specifying contig=FALSE. This +##' function is not suitable for overlaying trackdata from more than one +##' segments on the same plot as a function of time: for this use dplot(). +##' +##' @param x A trackdata object. +##' @param timestart A single valued numeric vector for setting the time at +##' which the trackdata should start. Defaults to NULL which means that the +##' start time is taken from start(trackdata), i.e. the time at which the +##' trackdata object starts. +##' @param xlim A numeric vector of two values for specifying the time interval +##' over which the trackdata is to be plotted. Defaults to NULL which means +##' that the trackdata object is plotted between between the start time of the +##' first segment and the end time of the last segment. +##' @param ylim Specify a yaxis range. +##' @param labels A character vector the same length as the number of segments +##' in the trackdata object. Each label is plotted at side = 3 on the plotted +##' at the temporal midpoint of each segment in the trackdata object. Defaults +##' to NULL (plot no labels). Labels will only be plotted if xlim=NULL. +##' @param col A single element logical vector. Defaults to TRUE to plot each +##' label type in a different colour +##' @param lty A single element logical vector. Defaults to FALSE. If TRUE, plot +##' each label type in a different linetype +##' @param type Specify the type of plot. See \link{plot} for the various +##' possibilities +##' @param pch The symbol types to be used for plotting. Should be specified as +##' a numeric vector of the same length as there are unique label classes +##' @param contig A single valued logical vector TRUE or FALSE. If TRUE, then all the +##' segments of the trackdata object are assumed to be temporally contiguous, +##' i.e. the boundaries of the segments are abutting in time and the start time +##' of segment[j,] is the end time of segment[j-1,]. In this case, all the +##' segments of the trackdata object are plotted on the same plot as a function +##' of time. An example of a contiguous trackdata object is coutts.sam. contig +##' = FALSE is when a trackdata object is non-contiguous e.g. all "i:" vowels +##' in a database. An example of a non-contiguous trackdata object is +##' vowlax.fdat. If contig=FALSE then each segment of the trackdata object is +##' plotted separately. +##' @param ... the same graphical parameters can be supplied to this function +##' as for plot e.g type="l", lty=2 etc. +##' @author Jonathan Harrington +##' @seealso \code{\link{plot}}, \code{\link{dplot}} +##' @keywords dplot +##' @examples +##' +##' +##' # a single segment of trackdata (F1) plotted as a function of time. +##' plot(vowlax.fdat[1,1]) +##' +##' # as above, but limits are set for the time axis. +##' plot(vowlax.fdat[1,1], xlim=c(880, 920)) +##' +##' # the the start-time of the x-axis is set to 0 ms, plot F1 and F3, lineplot +##' plot(vowlax.fdat[1,c(1,3)], timestart=0, type="l") +##' +##' +##' # plot F1-F4, same colour, same plotting symbol, between 900 +##' # and 920 ms, type is line and points plot, different linetype per track, no box +##' plot(vowlax.fdat[1,], col="blue", pch=20, xlim=c(900, 920), type="b", lty=TRUE, bty="n") +##' +##' +##' # F1 and F2 of six vowels with labels, separate windows +##' oldpar = par(mfrow=c(2,3)) +##' plot(vowlax.fdat[1:6,1:2], contig=FALSE, labels=vowlax.l[1:6], ylab="F1 and F2", +##' xlab="Time (ms)", type="b", ylim=c(300, 2400)) +##' +##' # As above, timestart set to zero, colour set to blue, different plotting +##' # symbols for the two tracks +##' plot(vowlax.fdat[1:6,1:2], contig=FALSE, labels=vowlax.l[1:6], ylab="F1 and F2", +##' xlab="Time (ms)", type="b", col="blue", pch=c(1,2), ylim=c(300, 2400), timestart=0) +##' +##' # RMS energy for the utterance 'just relax said Coutts' +##' plot(coutts.rms, type="l") +##' # as above a different colour +##' plot(coutts.rms, type="l", col="pink") +##' # as above, linetype 2, double line thickness, no box, times reset to 0 ms +##' plot(coutts.rms, type="l", col="pink", lty=2, lwd=2, bty="n", timestart=0) +##' # as above but plotted as non-contiguous segments, i.e one segment per panel +##' par(mfrow=c(2,3)) +##' plot(coutts.rms, type="l", col="pink", lty=2, lwd=2, bty="n", timestart=0, contig=FALSE) +##' # plot with labels +##' labels = label(coutts) +##' par(mfrow=c(1,1)) +##' plot(coutts.rms, labels=labels, type="l", bty="n") +##' # as above, double line-thickness, green, line type 3, no box, +##' # time start 0 ms with x and y axis labels +##' plot(coutts.rms, labels=labels, type="l", lwd=2, +##' col="green", lty=3, bty="n", timestart=0, xlab="Time (ms)", ylab="Amplitude") +##' # as above with a different plotting symbol for the points +##' par(mfrow=c(2,3)) +##' plot(coutts.rms, labels=labels, type="b", lwd=2, col="green", +##' timestart=0, bty="n", contig=FALSE, pch=20) +##' +##' par(oldpar) +##' +##' @export +`plot.trackdata` <- function (x, timestart = NULL, xlim = NULL, + ylim = NULL, labels = NULL, col = TRUE, + lty = FALSE, type="p", pch=NULL, + contig = TRUE, ...) { + oldpar = graphics::par(no.readonly=TRUE) + on.exit(graphics::par(oldpar)) + trackdata <- x N <- nrow(trackdata$data) if(is.logical(col)) - { - if (col) - col <- 1:ncol(trackdata) - else - col <- rep(1, ncol(trackdata)) - } + { + if (col) + col <- 1:ncol(trackdata) + else + col <- rep(1, ncol(trackdata)) + } else - { - if(length(col)!=ncol(trackdata)) - col <- rep(col[1], ncol(trackdata)) - } - + { + if(length(col)!=ncol(trackdata)) + col <- rep(col[1], ncol(trackdata)) + } + if(is.logical(lty)) - { - if (lty) - lty <- 1:ncol(trackdata) - else - lty <- rep(1, ncol(trackdata)) - } + { + if (lty) + lty <- 1:ncol(trackdata) + else + lty <- rep(1, ncol(trackdata)) + } else - { - if(length(lty)!=ncol(trackdata)) - lty <- rep(lty[1], ncol(trackdata)) - } + { + if(length(lty)!=ncol(trackdata)) + lty <- rep(lty[1], ncol(trackdata)) + } if(is.null(pch)) pch <- rep(1, ncol(trackdata)) else - { - if(length(pch)!=ncol(trackdata)) - pch <- rep(pch[1], ncol(trackdata)) - } - + { + if(length(pch)!=ncol(trackdata)) + pch <- rep(pch[1], ncol(trackdata)) + } + n <- nrow(trackdata) if (!is.null(xlim)) @@ -193,25 +397,25 @@ ylim <- range(data) for (k in 1:ncol(data)) { if(k==ncol(data)) - plot(times, data[, k], xlim = xlim, ylim = ylim, + graphics::plot(times, data[, k], xlim = xlim, ylim = ylim, col = col[k], lty = lty[k], pch=pch[k], type=type, ...) else - plot(times, data[, k], xlim = xlim, ylim = ylim, + graphics::plot(times, data[, k], xlim = xlim, ylim = ylim, col = col[k], lty = lty[k], pch=pch[k], xlab="", ylab="", main="", axes=FALSE, bty="n", type=type) - par(new = TRUE) + graphics::par(new = TRUE) } - par(new = FALSE) + graphics::par(new = FALSE) if (!is.null(labels)) { if (length(boundary.times) > 2) - abline(v = boundary.times) - mtext(labels, at = label.times) + graphics::abline(v = boundary.times) + graphics::mtext(labels, at = label.times) } } else { if (is.null(labels)) labels <- rep("", nrow(trackdata)) for (j in 1:nrow(trackdata)) { - plot(trackdata[j, ], timestart = timestart, xlim = xlim, + graphics::plot(trackdata[j, ], timestart = timestart, xlim = xlim, ylim = ylim, labels = labels[j], col=col, lty=lty, type=type, pch=pch,contig = TRUE, ...) } } @@ -219,50 +423,96 @@ - -"bark.trackdata" <- - function(f, ...) +##' @export +"bark.trackdata" <- function(f, ...) { trackdata = f if(is.spectral(trackdata$data)) return(bark.spectral(trackdata)) else - { - trackdata$data <- bark(trackdata$data) - return(trackdata) - } + { + trackdata$data <- bark(trackdata$data) + return(trackdata) + } } - -"mel.trackdata" <- - function(a) +##' @export +"mel.trackdata" <- function(a) { trackdata = a if(is.spectral(trackdata$data)) return(mel.spectral(trackdata)) else - { - trackdata$data <- mel(trackdata$data) - return(trackdata) - } + { + trackdata$data <- mel(trackdata$data) + return(trackdata) + } } -"trackfreq" <- - function(specdata){ - if(is.trackdata(specdata)) - return(attr(specdata$data, "fs")) - else - return(attr(specdata, "fs")) - } -"get.trackkeywrd" <- - function (fname) + + + + + + +##' function to find the frequencies of a spectral object +##' +##' Find the frequencies of a spectral object. +##' +##' +##' @param specdata A spectral object +##' @return A vector of the frequencies at which the columns of a spectral +##' matrix occur. +##' @author Jonathan Harrington +##' @keywords attribute +##' @examples +##' +##' trackfreq(vowlax.dft.5) +##' # Frequency components between 1000 and 2000 Hz +##' trackfreq(vowlax.dft.5[,1000:2000]) +##' # All frequency components of a trackdata object except the d.c. offset +##' trackfreq(fric.dft[,-1]) +##' # All frequency components except the d.c. offset +##' # and except frequencies above 5000 Hz +##' trackfreq(fric.dft[,-c(1, 5000:20000)]) +##' # Note the following syntax if the spectral object is a vector +##' # Frequencies 1000-3000 Hz +##' trackfreq(e.dft[1000:3000]) +##' +##' +##' +##' @export trackfreq +"trackfreq" <- function(specdata){ + if(is.trackdata(specdata)) + return(attr(specdata$data, "fs")) + else + return(attr(specdata, "fs")) +} + + + + + + + + + + +##' get trackkeywrd +##' +##' see function +##' +##' +##' @keywords internal +##' @export get.trackkeywrd +"get.trackkeywrd" <- function (fname) { line <- readLines(fname, n = 2) if (length(line) < 2) { @@ -285,22 +535,51 @@ } -"dur.trackdata" <- - function (x) + + + + + + + + +##' Duration of trackdata elements +##' +##' Duration of segments is calculated for each element in the trackdata object +##' +##' +##' @param x a trackdata object +##' @return a vector of durations +##' @author Jonathan Harrington +##' @keywords internal +##' @export +"dur.trackdata" <- function (x) { x$ftime[,2] - x$ftime[,1] } + + + + + + + + +##' frames +##' +##' Get frames from trackdata objects +##' +##' +##' @param trackdata an object of class trackdata +##' @return Data frames from the input object. +##' @author Jonathan Harrington +##' @seealso \code{\link{trackdata}} +##' @keywords utilities +##' @export frames "frames" <- function(trackdata) { if(!(is.trackdata(trackdata))) stop ("Object must be of class trackdata") trackdata$data } - - - -# Local Variables: -# mode:S -# S-temp-buffer-p:t -# End: diff --git a/R/tracktimes.R b/R/tracktimes.R index 93a106e3..4b5f1fab 100644 --- a/R/tracktimes.R +++ b/R/tracktimes.R @@ -1,21 +1,35 @@ -"tracktimes" <- -function(trackdata) +##' Get the track times from EMU trackdata objects +##' +##' The function obtains the times at which track values occur. +##' +##' Every $data value in a trackdata object is associated with a time at which +##' it occurs in the utterance. This function returns those times. +##' +##' @param trackdata An EMU trackdata object, or a matrix of track values +##' obtained at a single time point using dcut() +##' @author Jonathan Harrington +##' @seealso \code{\link{start.trackdata}} \code{\link{end.trackdata}} +##' \code{\link{start.emusegs}} \code{\link{end.emusegs}} +##' @keywords datagen +##' @examples +##' +##' # track time values for a trackdata object +##' times <- tracktimes(vowlax.fdat) +##' # track time values for a matrix of trackdata values +##' # at the temporal midpoint +##' tracktimes(dcut(vowlax.fdat[1:3,], 0.5, prop=TRUE)) +##' +##' @export tracktimes +"tracktimes" <- function(trackdata) { -if(is.trackdata(trackdata)) -# return the times at which the frames -# of trackdata occur as a numerical vector -times <- as.numeric(dimnames(trackdata$data)[[1]]) -else if(is.vector(trackdata)) -times <- as.numeric(names(trackdata)) -else if(is.matrix(trackdata)) -times <- as.numeric(dimnames(trackdata)[[1]]) -else times <- NULL -times + if(is.trackdata(trackdata)) + # return the times at which the frames + # of trackdata occur as a numerical vector + times <- as.numeric(dimnames(trackdata$data)[[1]]) + else if(is.vector(trackdata)) + times <- as.numeric(names(trackdata)) + else if(is.matrix(trackdata)) + times <- as.numeric(dimnames(trackdata)[[1]]) + else times <- NULL + times } - - - - - - - diff --git a/R/trapply.R b/R/trapply.R index 5d36df44..e1ee91da 100644 --- a/R/trapply.R +++ b/R/trapply.R @@ -1,18 +1,88 @@ -`trapply` <- - function (trackdata, fun, ..., simplify = FALSE, returntrack = FALSE) +##' A method of the generic function by for objects of class 'trackdata' +##' +##' A given function 'FUN' is applied to the data corresponding to each segment +##' of data. +##' +##' trapply() applies a function iteratively to each segment of a trackdata +##' object without the need for using a for-loop. It can be used to calculate, +##' for example, the mean value of the data values of each segment separately. +##' Any function that can be applied sensibly to trackdata[j]$data where j is +##' a segment number can be used as the fun argument to trapply(). It is also +##' possible to write your own function and use trapply() to apply it +##' separately to each segment. Care needs to be taken in using trapply() in +##' the following two ways. Firstly, the argument simplify=TRUE should only be set +##' if it can be guaranteed that a vector of the same length or matrix of the +##' same number of rows as the number of segments in the trackdata object is +##' returned. For example, simplify=TRUE can be used in calculating the mean per +##' segment of a trackdata object, because there will only be one value (the +##' mean) per segment. However, simplify should be set to FALSE in calculating the +##' range because here two values are returned per segment. Similarly use +##' simplify=FALSE n smoothing the data in which the number of values returned per +##' segment is different. Secondly, trapply() only applies a function to a +##' single parameter; the function can be used to apply to a function to +##' multi-parameter trackdata such as F1-F4, but then the function needs to be +##' put inside apply() - see examples below. +##' +##' @param trackdata a track data object +##' @param fun a function that is applied to each segment +##' @param \dots arguments of the function fun +##' @param simplify simplify = TRUE , output is a matrix; simplify = FALSE a +##' list is returned +##' @param returntrack returntrack = FALSE , return a trackdata object +##' @return list or vector or matrix +##' @author Jonathan Harrington +##' @seealso \code{\link{apply}} +##' @keywords methods +##' @examples +##' +##' # mean f0 one value per segment +##' m = trapply(vowlax.fund, mean, simplify=TRUE) +##' # mean F1 - F4 +##' m = trapply(vowlax.fdat, apply, 2, mean, simplify=TRUE) +##' # make a logical vector of any segments that have an F1 value +##' # between their start time and end time greater than n Hz +##' pfun <- function(x, n=1000) any(x > n) +##' # greater than 1100 Hz +##' temp = trapply(vowlax.fdat[,1], pfun, 1100, simplify=TRUE) +##' # get the F2-range per segment +##' r = trapply(vowlax.fdat[,2], range) +##' # F2-range of 20th segment +##' r[[20]] +##' # DCT-smooth F2 with 10 coeffs +##' # get the first 4 DCT coefficients +##' f2.dct = trapply(vowlax.fdat[,2], dct, 3, simplify=TRUE) +##' # dct-smooth F2 with the first 5 DCT coeffs +##' f2sm = trapply(vowlax.fdat[,2], dct, 4, TRUE, returntrack=TRUE) +##' # Make new F2 trackdata such that each segment has +##' # F2 divided by its F2 range +##' pfun <- function(x) x/(diff(abs(range(x)))) +##' newf2 = trapply(vowlax.fdat[,2], pfun, returntrack=TRUE) +##' +##' @export trapply +`trapply` <- function (trackdata, fun, ..., simplify = FALSE, returntrack = FALSE) { if(returntrack) simplify <- FALSE - # if simplify is F or if returntrack is T, store as a list + # if simplify is FALSE or if returntrack is TRUE, store as a list if (!simplify) result <- list(NULL) else result <- NULL - for (j in 1:nrow(trackdata)) { - if (!simplify) - result[[j]] <- fun(trackdata[j, ]$data, ...) - else - result <- rbind(result, fun(trackdata[j, ]$data, - ...)) + if (!simplify) + for (j in 1:nrow(trackdata)) { + curRes = fun(trackdata[j, ]$data, ...) + if(j == 1){ + lapply(1:nrow(trackdata), function(i) vector('numeric', length(curRes))) + } + result[[j]] <- curRes + } + else{ + for (j in 1:nrow(trackdata)) { + curRes = fun(trackdata[j, ]$data, ...) + if(j == 1){ # preallocate matrix + result = matrix(nrow = nrow(trackdata), ncol = length(curRes)) + } + result[j,] <- curRes + } } if (simplify) { if (ncol(result) == 1) @@ -22,4 +92,3 @@ result <- buildtrack(result) result } - diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..f3be02a2 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,12 @@ +.onLoad <- function(libname, pkgname) { + op <- options() + op.emuR <- list( + emuR.emuWebApp.dir = file.path(tempdir(), "EMU-webApp") + # emuR.emuWebApp.dir = file.path("~/Developer/EMU-webApp/dist/") # for devel + ) + # set package options + toset <- !(names(op.emuR) %in% names(op)) + if(any(toset)) options(op.emuR[toset]) + + invisible() +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..4bd9540e --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# emuR - Main package of the EMU Speech Database Management System + +[![Coverage Status](https://coveralls.io/repos/IPS-LMU/emuR/badge.svg)](https://coveralls.io/github/IPS-LMU/emuR) +[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/emuR)](https://cran.r-project.org/package=emuR) + +## Out of funding + +Unfortunately, the EMU-SDMS is currently out of funding. + +We at the IPS will do what we can to fix bugs, security issues or necessary adjustments to new versions of R; but we cannot currently work on new features or performance improvements. + +We would be very glad if funding in academia allowed for more technical staff to maintain software used by the research community. + +## Introduction + +The emuR package provides the next iteration of the EMU Speech +Database Management System (EMU-SDMS) with database management, data +extraction, data preparation and data visualization facilities. +It also contains a server that +is intended to host databases in the emuDB format +(see `vignette('emuDB_intro')`) to the EMU-webApp +([http://ips-lmu.github.io/EMU-webApp/](http://ips-lmu.github.io/EMU-webApp/)). The querying of annotations is +performed using EMU's own EQL2 (EMU Query Language Version 2). + +This package is part of the next iteration of the EMU Speech Database Management System +which aims to be as close to an all-in-one solution for generating, manipulating, querying, +analyzing and managing speech databases as possible. +For an overview of the system please see [http://ips-lmu.github.io/EMU.html](http://ips-lmu.github.io/EMU.html) and/or [https://doi.org/10.1016/j.csl.2017.01.002](https://doi.org/10.1016/j.csl.2017.01.002) + . + + + +## Installation + +* to install the current [CRAN release](https://cran.r-project.org/package=emuR) simply call: +```r +install.packages("emuR") +``` + +As this also installs all of the dependencies (incl. +the [wrassp](https://cran.r-project.org/package=wrassp) package) this is +the only installation step necessary to install the EMU-SDMS on your system. +The only other requirement of the EMU-SDMS is a modern web browser (Chrome (recommended!) / Firefox / ...) which +most people should already have on their systems. + + +## Quick start + +For more information see the [The EMU-SDMS Manual](https://ips-lmu.github.io/The-EMU-SDMS-Manual/) + +## For Developers / Beta-Testers + +### Installation (two alternative methods) + +* either download & extract the package from GitHub. Then install it with the following command: +```r +install.packages("path/to/emuR", repos = NULL, type="source") +``` + +* or install the latest development version from GitHub (**preferred method**): +```r +library(devtools) +install_github("IPS-LMU/emuR", build_vignettes = TRUE) +``` + diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 00000000..86926d01 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,17 @@ +## R CMD check results + +0 errors | 0 warnings | 0 notes + + +## revdepcheck results + +We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. + + * We saw 0 new problems + * We failed to check 0 packages + + +## Package anchors in \links + +Added package anchors to \links as appropriate. +(CRAN wants this fixed by 2025-09-03). diff --git a/data/coutts.rda b/data/coutts.rda index 09c29db3..5f0c9eed 100644 Binary files a/data/coutts.rda and b/data/coutts.rda differ diff --git a/data/coutts2.rda b/data/coutts2.rda index 1e5a2352..64926f1b 100644 Binary files a/data/coutts2.rda and b/data/coutts2.rda differ diff --git a/data/demo.vowels.rda b/data/demo.vowels.rda index a5b247de..306b0964 100644 Binary files a/data/demo.vowels.rda and b/data/demo.vowels.rda differ diff --git a/data/dip.rda b/data/dip.rda index d8d1ed81..6e621f29 100644 Binary files a/data/dip.rda and b/data/dip.rda differ diff --git a/data/isol.rda b/data/isol.rda index cefe7a80..3004c52b 100644 Binary files a/data/isol.rda and b/data/isol.rda differ diff --git a/data/vowlax.rda b/data/vowlax.rda index 00b1f0fd..1591dd70 100644 Binary files a/data/vowlax.rda and b/data/vowlax.rda differ diff --git a/demo/demo.all.R b/demo/demo.all.R deleted file mode 100644 index 97136eaa..00000000 --- a/demo/demo.all.R +++ /dev/null @@ -1,7 +0,0 @@ -packpath = .path.package(package = "emuR", quiet = FALSE) - sepa = .Platform$file.sep - nfile = "demo-all.seg" - filepath = paste(packpath,sepa,"demo",sepa,nfile,sep = "") - - -demo.all <- read.segs(filepath) diff --git a/demo/demo.all.rms.R b/demo/demo.all.rms.R deleted file mode 100644 index b59953bf..00000000 --- a/demo/demo.all.rms.R +++ /dev/null @@ -1,6 +0,0 @@ -packpath = .path.package(package = "emuR", quiet = FALSE) - sepa = .Platform$file.sep - nfile = "demo-all-rms.dat" - filepath = paste(packpath,sepa,"demo",sepa,nfile,sep = "") - - demo.all.rms <- read.trackdata(filepath) diff --git a/demo/demo.vowels.R b/demo/demo.vowels.R deleted file mode 100644 index 4c3e35e8..00000000 --- a/demo/demo.vowels.R +++ /dev/null @@ -1,6 +0,0 @@ -packpath = .path.package(package = "emuR", quiet = FALSE) - sepa = .Platform$file.sep - nfile = "demo-vowels.seg" - filepath = paste(packpath,sepa,"demo",sepa,nfile,sep = "") - - demo.vowels <- read.segs(filepath) diff --git a/demo/demo.vowels.fm.R b/demo/demo.vowels.fm.R deleted file mode 100644 index 49059d13..00000000 --- a/demo/demo.vowels.fm.R +++ /dev/null @@ -1,6 +0,0 @@ -packpath = .path.package(package = "emuR", quiet = FALSE) - sepa = .Platform$file.sep - nfile = "demo-vowel-fm.dat" - filepath = paste(packpath,sepa,"demo",sepa,nfile,sep = "") - - demo.vowels.fm <- read.trackdata(filepath) diff --git a/emuR.Rproj b/emuR.Rproj new file mode 100644 index 00000000..1fe768dc --- /dev/null +++ b/emuR.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 +ProjectId: d574d633-8020-48bb-a104-e505b2c076d9 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 00000000..27848387 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,2 @@ +## R package reference generated from DESCRIPTION metadata +citation(auto = meta) diff --git a/copyright b/inst/COPYRIGHTS similarity index 58% rename from copyright rename to inst/COPYRIGHTS index 38ffd77b..80635994 100644 --- a/copyright +++ b/inst/COPYRIGHTS @@ -1,13 +1,13 @@ ############################################################################# # # -# copyright : (C) 2008 IPS, University Munich # -# email : jmh@phonetic.uni-muenchen.de # -# url : http://emu.sourceforge.net # -# # +# copyright : (C) 2008 IPS, University Munich # +# email : raphael|jmh@phonetik.uni-muenchen.de # +# url : https://github.com/IPS-LMU/emuR # +# # # This program is free software; you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation; either version 2 of the License, or # # (at your option) any later version. # -# # +# # ############################################################################# diff --git a/inst/extdata/ae-hStar.seg b/inst/extdata/ae-hStar.seg new file mode 100644 index 00000000..b02c3ace --- /dev/null +++ b/inst/extdata/ae-hStar.seg @@ -0,0 +1,24 @@ +database:ae +query:Tone=H* +type:event +# +H* 419.082 0 0000:msajc003 +H* 931.588 0 0000:msajc003 +H* 1912.75 0 0000:msajc003 +H* 2230.668 0 0000:msajc003 +H* 761.206 0 0000:msajc010 +H* 1807.928 0 0000:msajc010 +H* 2205.734 0 0000:msajc010 +H* 592.768 0 0000:msajc012 +H* 1260.704 0 0000:msajc012 +H* 1822.494 0 0000:msajc012 +H* 1486.76 0 0000:msajc015 +H* 2445.22 0 0000:msajc015 +H* 2910.929 0 0000:msajc015 +H* 327.306 0 0000:msajc022 +H* 855.687 0 0000:msajc022 +H* 1306.299 0 0000:msajc022 +H* 662.897 0 0000:msajc023 +H* 1208.136 0 0000:msajc023 +H* 1889.685 0 0000:msajc023 +H* 611.83 0 0000:msajc057 diff --git a/inst/extdata/ae-n.seg b/inst/extdata/ae-n.seg new file mode 100644 index 00000000..4bef3179 --- /dev/null +++ b/inst/extdata/ae-n.seg @@ -0,0 +1,16 @@ +database:ae +query:Phonetic=n +type:segment +# +n 1031.989 1195.988 0000:msajc003 +n 1741.497 1791.494 0000:msajc003 +n 1515.5 1554.5 0000:msajc010 +n 2431 2528.5 0000:msajc010 +n 895.002 1023.007 0000:msajc012 +n 2402.311 2474.922 0000:msajc012 +n 2226.601 2271.132 0000:msajc015 +n 3046.168 3067.703 0000:msajc015 +n 1434.822 1495.318 0000:msajc023 +n 1774.989 1833.989 0000:msajc023 +n 508.744 544 0000:msajc057 +n 2447.748 2480.496 0000:msajc057 diff --git a/demo/demo-all-rms.dat b/inst/extdata/demo-all-rms.dat similarity index 100% rename from demo/demo-all-rms.dat rename to inst/extdata/demo-all-rms.dat diff --git a/demo/demo-all.seg b/inst/extdata/demo-all.seg similarity index 100% rename from demo/demo-all.seg rename to inst/extdata/demo-all.seg diff --git a/demo/demo-vowel-fm.dat b/inst/extdata/demo-vowel-fm.dat similarity index 100% rename from demo/demo-vowel-fm.dat rename to inst/extdata/demo-vowel-fm.dat diff --git a/demo/demo-vowels.seg b/inst/extdata/demo-vowels.seg similarity index 100% rename from demo/demo-vowels.seg rename to inst/extdata/demo-vowels.seg diff --git a/inst/extdata/legacy_emu_ae_phonetic_seglist.RData b/inst/extdata/legacy_emu_ae_phonetic_seglist.RData new file mode 100644 index 00000000..51f67de7 Binary files /dev/null and b/inst/extdata/legacy_emu_ae_phonetic_seglist.RData differ diff --git a/inst/extdata/rawDemoData/annotationFiles/ae_DBconfig.json b/inst/extdata/rawDemoData/annotationFiles/ae_DBconfig.json new file mode 100644 index 00000000..e7a1d28d --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/ae_DBconfig.json @@ -0,0 +1,411 @@ +{ + "name": "ae", + "UUID": "0fc618dc-8980-414d-8c7a-144a649ce199", + "mediafileExtension": "wav", + "ssffTrackDefinitions": [ + { + "name": "dft", + "columnName": "dft", + "fileExtension": "dft" + }, + { + "name": "fm", + "columnName": "fm", + "fileExtension": "fms" + } + ], + "levelDefinitions": [ + { + "name": "Utterance", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Utterance", + "type": "STRING" + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Intonational", + "type": "STRING" + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Intermediate", + "type": "STRING" + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Word", + "type": "STRING" + }, + { + "name": "Accent", + "type": "STRING" + }, + { + "name": "Text", + "type": "STRING" + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Syllable", + "type": "STRING" + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Phoneme", + "type": "STRING", + "labelGroups": [ + { + "name": "vowel", + "values": [ + "A", + "E", + "I", + "O", + "V", + "U", + "ai", + "ei", + "oi", + "i@", + "u@", + "au", + "@u", + "@:", + "@", + "a:", + "e:", + "i:", + "o:", + "u:" + ] + }, + { + "name": "stop", + "values": [ + "p", + "tS", + "dZ", + "t", + "k", + "b", + "d", + "g" + ] + }, + { + "name": "nasal", + "values": [ + "m", + "n", + "N" + ] + }, + { + "name": "fricative", + "values": [ + "f", + "v", + "s", + "z", + "S", + "Z", + "h", + "D", + "T" + ] + }, + { + "name": "approximant", + "values": [ + "w", + "j", + "l", + "r" + ] + }, + { + "name": "other", + "values": [ + "H" + ] + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "attributeDefinitions": [ + { + "name": "Phonetic", + "type": "STRING", + "labelGroups": [ + { + "name": "vowel", + "values": [ + "A", + "E", + "EC", + "I", + "O", + "V", + "U", + "ai", + "ei", + "oi", + "i@", + "u@", + "au", + "@u", + "@:", + "@", + "=", + "a:", + "e:", + "i:", + "o:", + "u:" + ] + }, + { + "name": "stop", + "values": [ + "p", + "tS", + "dZ", + "t", + "k", + "b", + "d", + "g" + ] + }, + { + "name": "nasal", + "values": [ + "m", + "n" + ] + }, + { + "name": "fricative", + "values": [ + "f", + "v", + "s", + "z", + "S", + "Z", + "h", + "D", + "D-", + "T" + ] + }, + { + "name": "approximant", + "values": [ + "w", + "j", + "l", + "r", + "rr", + "Or" + ] + }, + { + "name": "other", + "values": [ + "H" + ] + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "attributeDefinitions": [ + { + "name": "Tone", + "type": "STRING" + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "attributeDefinitions": [ + { + "name": "Foot", + "type": "STRING" + } + ] + } + ], + "linkDefinitions": [ + { + "type": "ONE_TO_MANY", + "superlevelName": "Utterance", + "sublevelName": "Intonational" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Intonational", + "sublevelName": "Intermediate" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Intermediate", + "sublevelName": "Word" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Word", + "sublevelName": "Syllable" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Syllable", + "sublevelName": "Phoneme" + }, + { + "type": "MANY_TO_MANY", + "superlevelName": "Phoneme", + "sublevelName": "Phonetic" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Syllable", + "sublevelName": "Tone" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Intonational", + "sublevelName": "Foot" + }, + { + "type": "ONE_TO_MANY", + "superlevelName": "Foot", + "sublevelName": "Syllable" + } + ], + "EMUwebAppConfig": { + "perspectives": [ + { + "name": "default", + "signalCanvases": { + "order": [ + "OSCI", + "SPEC" + ], + "assign": [ + + ], + "contourLims": [ + + ] + }, + "levelCanvases": { + "order": [ + "Phonetic", + "Tone" + ] + }, + "twoDimCanvases": { + "order": [ + + ] + } + }, + { + "name": "Phonetic-only", + "signalCanvases": { + "order": [ + "OSCI", + "SPEC" + ], + "assign": [ + + ], + "contourLims": [ + + ] + }, + "levelCanvases": { + "order": [ + "Phonetic" + ] + }, + "twoDimCanvases": { + "order": [ + + ] + } + }, + { + "name": "Tone-only", + "signalCanvases": { + "order": [ + "OSCI", + "SPEC" + ], + "assign": [ + + ], + "contourLims": [ + + ] + }, + "levelCanvases": { + "order": [ + "Tone" + ] + }, + "twoDimCanvases": { + "order": [ + + ] + } + } + + ], + "restrictions": { + "showPerspectivesSidebar": true + }, + "activeButtons": { + "saveBundle": true, + "showHierarchy": true + } + } +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msadb089.lab b/inst/extdata/rawDemoData/annotationFiles/msadb089.lab new file mode 100644 index 00000000..d74921b4 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msadb089.lab @@ -0,0 +1,34 @@ +signal msadb089 +nfields 1 +# + 0.289510 125 H# + 0.327505 125 D + 0.355756 125 @ + 0.391008 125 k + 0.403009 125 H + 0.432011 125 w + 0.530267 125 E + 0.574511 125 s + 0.606513 125 t + 0.663507 125 S + 0.696509 125 @ + 0.722261 125 n + 0.889264 125 e: + 0.960010 125 w + 0.999762 125 @ + 1.139263 125 zS + 1.311014 125 o: + 1.328015 125 t + 1.402009 125 H + 1.435261 125 @ + 1.498515 125 n + 1.529258 125 t + 1.606755 125 H + 1.665759 125 @ + 1.683260 125 D + 1.718012 125 @ + 1.746001 125 p + 1.789261 125 H + 1.964764 125 oi + 1.987851 125 n + 2.058762 125 tH diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc003.TextGrid new file mode 100644 index 00000000..675d8aef --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.TextGrid @@ -0,0 +1,607 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.90445 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 2.90445 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 2.604489 + text = "" + intervals [3]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 2.90445 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 2.604489 + text = "L%" + intervals [3]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 2.90445 + intervals: size = 4 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 1.289494 + text = "L-" + intervals [3]: + xmin = 1.289494 + xmax = 2.604489 + text = "L-" + intervals [4]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 2.90445 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.674237 + text = "C" + intervals [3]: + xmin = 0.674237 + xmax = 0.739994 + text = "F" + intervals [4]: + xmin = 0.739994 + xmax = 1.289494 + text = "C" + intervals [5]: + xmin = 1.289494 + xmax = 1.463242 + text = "F" + intervals [6]: + xmin = 1.463242 + xmax = 1.634493 + text = "F" + intervals [7]: + xmin = 1.634493 + xmax = 2.033739 + text = "C" + intervals [8]: + xmin = 2.033739 + xmax = 2.604489 + text = "C" + intervals [9]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 2.90445 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.674237 + text = "S" + intervals [3]: + xmin = 0.674237 + xmax = 0.739994 + text = "W" + intervals [4]: + xmin = 0.739994 + xmax = 1.289494 + text = "S" + intervals [5]: + xmin = 1.289494 + xmax = 1.463242 + text = "W" + intervals [6]: + xmin = 1.463242 + xmax = 1.634493 + text = "W" + intervals [7]: + xmin = 1.634493 + xmax = 2.033739 + text = "W" + intervals [8]: + xmin = 2.033739 + xmax = 2.604489 + text = "S" + intervals [9]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 2.90445 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.674237 + text = "amongst" + intervals [3]: + xmin = 0.674237 + xmax = 0.739994 + text = "her" + intervals [4]: + xmin = 0.739994 + xmax = 1.289494 + text = "friends" + intervals [5]: + xmin = 1.289494 + xmax = 1.463242 + text = "she" + intervals [6]: + xmin = 1.463242 + xmax = 1.634493 + text = "was" + intervals [7]: + xmin = 1.634493 + xmax = 2.033739 + text = "considered" + intervals [8]: + xmin = 2.033739 + xmax = 2.604489 + text = "beautiful" + intervals [9]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 2.90445 + intervals: size = 14 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.256994 + text = "W" + intervals [3]: + xmin = 0.256994 + xmax = 0.674237 + text = "S" + intervals [4]: + xmin = 0.674237 + xmax = 0.739994 + text = "S" + intervals [5]: + xmin = 0.739994 + xmax = 1.289494 + text = "S" + intervals [6]: + xmin = 1.289494 + xmax = 1.463242 + text = "W" + intervals [7]: + xmin = 1.463242 + xmax = 1.634493 + text = "W" + intervals [8]: + xmin = 1.634493 + xmax = 1.791494 + text = "W" + intervals [9]: + xmin = 1.791494 + xmax = 1.945495 + text = "S" + intervals [10]: + xmin = 1.945495 + xmax = 2.033739 + text = "W" + intervals [11]: + xmin = 2.033739 + xmax = 2.283744 + text = "S" + intervals [12]: + xmin = 2.283744 + xmax = 2.361989 + text = "W" + intervals [13]: + xmin = 2.361989 + xmax = 2.604489 + text = "W" + intervals [14]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 2.90445 + intervals: size = 34 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.256994 + text = "V" + intervals [3]: + xmin = 0.256994 + xmax = 0.340238 + text = "m" + intervals [4]: + xmin = 0.340238 + xmax = 0.426743 + text = "V" + intervals [5]: + xmin = 0.426743 + xmax = 0.48349 + text = "N" + intervals [6]: + xmin = 0.48349 + xmax = 0.566994 + text = "s" + intervals [7]: + xmin = 0.566994 + xmax = 0.674237 + text = "t" + intervals [8]: + xmin = 0.674237 + xmax = 0.739994 + text = "@:" + intervals [9]: + xmin = 0.739994 + xmax = 0.892734 + text = "f" + intervals [10]: + xmin = 0.892734 + xmax = 0.949994 + text = "r" + intervals [11]: + xmin = 0.949994 + xmax = 1.031989 + text = "E" + intervals [12]: + xmin = 1.031989 + xmax = 1.195988 + text = "n" + intervals [13]: + xmin = 1.195988 + xmax = 1.289494 + text = "z" + intervals [14]: + xmin = 1.289494 + xmax = 1.419986 + text = "S" + intervals [15]: + xmin = 1.419986 + xmax = 1.463242 + text = "i:" + intervals [16]: + xmin = 1.463242 + xmax = 1.506239 + text = "w" + intervals [17]: + xmin = 1.506239 + xmax = 1.548486 + text = "@" + intervals [18]: + xmin = 1.548486 + xmax = 1.634493 + text = "z" + intervals [19]: + xmin = 1.634493 + xmax = 1.715488 + text = "k" + intervals [20]: + xmin = 1.715488 + xmax = 1.741497 + text = "@" + intervals [21]: + xmin = 1.741497 + xmax = 1.791494 + text = "n" + intervals [22]: + xmin = 1.791494 + xmax = 1.893237 + text = "s" + intervals [23]: + xmin = 1.893237 + xmax = 1.945495 + text = "I" + intervals [24]: + xmin = 1.945495 + xmax = 1.966743 + text = "d" + intervals [25]: + xmin = 1.966743 + xmax = 2.033739 + text = "@" + intervals [26]: + xmin = 2.033739 + xmax = 2.150242 + text = "d_b" + intervals [27]: + xmin = 2.150242 + xmax = 2.211239 + text = "j" + intervals [28]: + xmin = 2.211239 + xmax = 2.283744 + text = "u:" + intervals [29]: + xmin = 2.283744 + xmax = 2.302993 + text = "d" + intervals [30]: + xmin = 2.302993 + xmax = 2.361989 + text = "@" + intervals [31]: + xmin = 2.361989 + xmax = 2.447484 + text = "f" + intervals [32]: + xmin = 2.447484 + xmax = 2.506316 + text = "@" + intervals [33]: + xmin = 2.506316 + xmax = 2.604489 + text = "l" + intervals [34]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 2.90445 + intervals: size = 36 + intervals [1]: + xmin = 0 + xmax = 0.187498 + text = "" + intervals [2]: + xmin = 0.187498 + xmax = 0.256994 + text = "V" + intervals [3]: + xmin = 0.256994 + xmax = 0.340238 + text = "m" + intervals [4]: + xmin = 0.340238 + xmax = 0.426743 + text = "V" + intervals [5]: + xmin = 0.426743 + xmax = 0.48349 + text = "N" + intervals [6]: + xmin = 0.48349 + xmax = 0.566994 + text = "s" + intervals [7]: + xmin = 0.566994 + xmax = 0.596742 + text = "t" + intervals [8]: + xmin = 0.596742 + xmax = 0.674237 + text = "H" + intervals [9]: + xmin = 0.674237 + xmax = 0.739994 + text = "@:" + intervals [10]: + xmin = 0.739994 + xmax = 0.892734 + text = "f" + intervals [11]: + xmin = 0.892734 + xmax = 0.949994 + text = "r" + intervals [12]: + xmin = 0.949994 + xmax = 1.031989 + text = "E" + intervals [13]: + xmin = 1.031989 + xmax = 1.195988 + text = "n" + intervals [14]: + xmin = 1.195988 + xmax = 1.289494 + text = "z" + intervals [15]: + xmin = 1.289494 + xmax = 1.419986 + text = "S" + intervals [16]: + xmin = 1.419986 + xmax = 1.463242 + text = "i:" + intervals [17]: + xmin = 1.463242 + xmax = 1.506239 + text = "w" + intervals [18]: + xmin = 1.506239 + xmax = 1.548486 + text = "@" + intervals [19]: + xmin = 1.548486 + xmax = 1.634493 + text = "z" + intervals [20]: + xmin = 1.634493 + xmax = 1.675991 + text = "k" + intervals [21]: + xmin = 1.675991 + xmax = 1.715488 + text = "H" + intervals [22]: + xmin = 1.715488 + xmax = 1.741497 + text = "@" + intervals [23]: + xmin = 1.741497 + xmax = 1.791494 + text = "n" + intervals [24]: + xmin = 1.791494 + xmax = 1.893237 + text = "s" + intervals [25]: + xmin = 1.893237 + xmax = 1.945495 + text = "I" + intervals [26]: + xmin = 1.945495 + xmax = 1.966743 + text = "d" + intervals [27]: + xmin = 1.966743 + xmax = 2.033739 + text = "@" + intervals [28]: + xmin = 2.033739 + xmax = 2.150242 + text = "db" + intervals [29]: + xmin = 2.150242 + xmax = 2.211239 + text = "j" + intervals [30]: + xmin = 2.211239 + xmax = 2.283744 + text = "u:" + intervals [31]: + xmin = 2.283744 + xmax = 2.302993 + text = "dH" + intervals [32]: + xmin = 2.302993 + xmax = 2.361989 + text = "@" + intervals [33]: + xmin = 2.361989 + xmax = 2.447484 + text = "f" + intervals [34]: + xmin = 2.447484 + xmax = 2.506316 + text = "@" + intervals [35]: + xmin = 2.506316 + xmax = 2.604489 + text = "l" + intervals [36]: + xmin = 2.604489 + xmax = 2.90445 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 2.90445 + points: size = 7 + points [1]: + number = 0.419082 + mark = "H*" + points [2]: + number = 0.931588 + mark = "H*" + points [3]: + number = 1.106992 + mark = "L-" + points [4]: + number = 1.912750 + mark = "H*" + points [5]: + number = 2.230668 + mark = "H*" + points [6]: + number = 2.543105 + mark = "L-" + points [7]: + number = 2.577642 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 2.90445 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.256994 + text = "" + intervals [2]: + xmin = 0.256994 + xmax = 0.674237 + text = "F" + intervals [3]: + xmin = 0.674237 + xmax = 0.739994 + text = "F" + intervals [4]: + xmin = 0.739994 + xmax = 1.791494 + text = "F" + intervals [5]: + xmin = 1.791494 + xmax = 2.033739 + text = "F" + intervals [6]: + xmin = 2.033739 + xmax = 2.604489 + text = "F" + intervals [7]: + xmin = 2.604489 + xmax = 2.90445 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc003.hlb new file mode 100644 index 00000000..a38cf78b --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.hlb @@ -0,0 +1,228 @@ +**EMU hierarchical labels** +187 +Syllable Syllable +102 W +103 S +104 S +105 S +106 W +107 W +108 W +109 S +110 W +111 S +112 W +113 W + +Word Word Accent Text +2 C S amongst +24 F W her +30 C S friends +43 F W she +52 F W was +61 C W considered +83 C S beautiful + +Foot Foot +47 F +53 F +62 F +71 F +84 F + +Phoneme Phoneme +114 V +115 m +116 V +117 N +118 s +119 t +120 @: +121 f +122 r +123 E +124 n +125 z +126 S +127 i: +128 w +129 @ +130 z +131 k +132 @ +133 n +134 s +135 I +136 d +137 @ +138 d +139 b +140 j +141 u: +142 d +143 @ +144 f +145 @ +146 l + +Phonetic Phonetic +147 V +148 m +149 V +150 N +151 s +152 t +153 H +154 @: +155 f +156 r +157 E +158 n +159 z +160 S +161 i: +162 w +163 @ +164 z +165 k +166 H +167 @ +168 n +169 s +170 I +171 d +172 @ +173 db +174 j +175 u: +176 dH +177 @ +178 f +179 @ +180 l + +Tone Tone +181 H* +182 H* +183 L- +184 H* +185 H* +186 L- +187 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +46 L- + + +2 102 103 114 115 116 117 118 119 147 148 149 150 151 152 153 +5 2 24 30 102 103 104 105 114 115 116 117 118 119 120 121 122 123 124 125 147 148 149 150 151 152 153 154 155 156 157 158 159 181 182 +7 2 5 24 30 43 46 47 52 53 61 62 71 83 84 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 184 185 +8 2 5 7 24 30 43 46 47 52 53 61 62 71 83 84 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 184 185 +24 104 120 154 181 +30 105 121 122 123 124 125 155 156 157 158 159 182 +43 106 126 127 160 161 +46 43 52 61 83 106 107 108 109 110 111 112 113 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 184 185 +47 103 115 116 117 118 119 148 149 150 151 152 153 +52 107 128 129 130 162 163 164 +53 104 120 154 181 +61 108 109 110 131 132 133 134 135 136 137 138 165 166 167 168 169 170 171 172 173 184 +62 105 106 107 108 121 122 123 124 125 126 127 128 129 130 131 132 133 155 156 157 158 159 160 161 162 163 164 165 166 167 168 182 +71 109 110 134 135 136 137 138 169 170 171 172 173 184 +83 111 112 113 139 140 141 142 143 144 145 146 173 174 175 176 177 178 179 180 185 +84 111 112 113 139 140 141 142 143 144 145 146 173 174 175 176 177 178 179 180 185 +102 114 147 +103 115 116 117 118 119 148 149 150 151 152 153 +104 120 154 181 +105 121 122 123 124 125 155 156 157 158 159 182 +106 126 127 160 161 +107 128 129 130 162 163 164 +108 131 132 133 165 166 167 168 +109 134 135 169 170 184 +110 136 137 138 171 172 173 +111 139 140 141 173 174 175 185 +112 142 143 176 177 +113 144 145 146 178 179 180 +114 147 +115 148 +116 149 +117 150 +118 151 +119 152 153 +120 154 +121 155 +122 156 +123 157 +124 158 +125 159 +126 160 +127 161 +128 162 +129 163 +130 164 +131 165 166 +132 167 +133 168 +134 169 +135 170 +136 171 +137 172 +138 173 +139 173 +140 174 +141 175 +142 176 +143 177 +144 178 +145 179 +146 180 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.lab b/inst/extdata/rawDemoData/annotationFiles/msajc003.lab new file mode 100644 index 00000000..33efc722 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.lab @@ -0,0 +1,38 @@ +signal msajc003 +nfields 1 +# + 0.187498 125 H# + 0.256994 125 V + 0.340238 125 m + 0.426743 125 V + 0.483490 125 N + 0.566994 125 s + 0.596742 125 t + 0.674237 125 H + 0.739994 125 @: + 0.892734 125 f + 0.949994 125 r + 1.031989 125 E + 1.195988 125 n + 1.289494 125 z + 1.419986 125 S + 1.463242 125 i: + 1.506239 125 w + 1.548486 125 @ + 1.634493 125 z + 1.675991 125 k + 1.715488 125 H + 1.741497 125 @ + 1.791494 125 n + 1.893237 125 s + 1.945495 125 I + 1.966743 125 d + 2.033739 125 @ + 2.150242 125 db + 2.211239 125 j + 2.283744 125 u: + 2.302993 125 dH + 2.361989 125 @ + 2.447484 125 f + 2.506316 125 @ + 2.604489 125 l diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.par b/inst/extdata/rawDemoData/annotationFiles/msajc003.par new file mode 100644 index 00000000..280767bb --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.par @@ -0,0 +1,59 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 @mVNkst +KAN: 1 h@ +KAN: 2 frendz +KAN: 3 Si: +KAN: 4 wQz +KAN: 5 k@nsId@d +KAN: 6 bju:tIf@l +ORT: 0 amongst +ORT: 1 her +ORT: 2 friends +ORT: 3 she +ORT: 4 was +ORT: 5 considered +ORT: 6 beautiful +TRN: 3800 48199 0,1,2,3,4,5,6 amongst her friends she was considered beautiful +MAU: 0 3799 -1 +MAU: 3800 999 0 @ +MAU: 4800 1999 0 m +MAU: 6800 1599 0 V +MAU: 8400 1199 0 N +MAU: 9600 599 0 k +MAU: 10200 1799 0 s +MAU: 12000 1799 0 t +MAU: 13800 1399 1 @ +MAU: 15200 2599 2 f +MAU: 17800 1999 2 r +MAU: 19800 799 2 e +MAU: 20600 2599 2 n +MAU: 23200 599 2 d +MAU: 23800 1799 2 z +MAU: 25600 2999 3 S +MAU: 28600 799 3 I +MAU: 29400 799 4 w +MAU: 30200 599 4 @ +MAU: 30800 2799 4 z +MAU: 33600 999 5 k +MAU: 34600 1199 5 n +MAU: 35800 2199 5 s +MAU: 38000 1199 5 I +MAU: 39200 599 5 d +MAU: 39800 1399 5 @ +MAU: 41200 2399 6 b +MAU: 43600 1199 6 j +MAU: 44800 1199 6 u: +MAU: 46000 599 6 t +MAU: 46600 599 6 I +MAU: 47200 2599 6 f +MAU: 49800 599 6 @ +MAU: 50400 1599 6 l +MAU: 52000 5799 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc003.parmanipulated new file mode 100644 index 00000000..ef872288 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.parmanipulated @@ -0,0 +1,70 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SBF: 01 +SSB: 16 +NCH: 1 + + +SPN: unknown +LBD: +KAN: 0 @mVNkst +KAN: 1 h@ +KAN: 2 frendz +KAN: 3;4 Si: +KAN: 4 wQz +KAN: 5 k@nsId@d +KAN: 6 bju:tIf@l +ORT: 0 amongst +ORT: 1 ABC: ABC_label; XYZ: XYZ_label +ORT: 2 friends +ORT: 3 she +ORT: 4 was +ORT: 5 considered +ORT: 6 beautiful + + + + +TRN: 3800 48199 0,1,2,3,4,5,6 amongst her friends she was considered beautiful +MAU: 0 3799 -1 +MAU: 3800 999 0 @ +MAU: 4800 1999 0 m +MAU: 6800 1599 0 V +MAU: 8400 1199 0 N + + + +MAU: 9600 601 0 k +MAU: 10200 1799 0 s +MAU: 12000 1799 0 t +MAU: 13800 1399 1 @ +MAU: 15200 2599 2 f +MAU: 17800 1999 2 r +MAU: 19800 799 2 e +MAU: 20600 2599 2 n +MAU: 23200 599 2 d +MAU: 23800 1799 2 z +MAU: 25600 2999 3 S +MAU: 28600 799 3 I +MAU: 29400 799 4 w +MAU: 30200 599 4 @ +MAU: 30800 2799 4 z +MAU: 33600 999 5 k +MAU: 34600 1199 5 n +MAU: 35800 2199 5 s +MAU: 38000 1199 5 I +MAU: 39200 599 5 d +MAU: 39800 1399 5 @ +MAU: 41200 2399 6 b +MAU: 43600 1199 6 j +MAU: 44800 1199 6 u: +MAU: 46000 599 6 t +MAU: 46600 599 6 I +MAU: 47200 2599 6 f +MAU: 49800 599 6 @ +MAU: 50400 1599 6 l +MAU: 52000 5799 -1 + + +XYZ: 1 XYZ_label diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003.tone b/inst/extdata/rawDemoData/annotationFiles/msajc003.tone new file mode 100644 index 00000000..fb570878 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003.tone @@ -0,0 +1,10 @@ +signal msajc003 +nfields 1 +# + 0.419082 125 H* + 0.931588 125 H* + 1.106992 125 L- + 1.912750 125 H* + 2.230668 125 H* + 2.543105 125 L- + 2.577642 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc003_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc003_annot.json new file mode 100644 index 00000000..8e880842 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc003_annot.json @@ -0,0 +1,1553 @@ +{ + "name": "msajc003", + "annotates": "msajc003.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 46, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "amongst" + } + ] + }, + { + "id": 24, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "her" + } + ] + }, + { + "id": 30, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "friends" + } + ] + }, + { + "id": 43, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "she" + } + ] + }, + { + "id": 52, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "was" + } + ] + }, + { + "id": 61, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "considered" + } + ] + }, + { + "id": 83, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "beautiful" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 102, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 103, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 104, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 105, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 106, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 107, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 108, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 110, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 111, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 112, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 113, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 114, + "labels": [ + { + "name": "Phoneme", + "value": "V" + } + ] + }, + { + "id": 115, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 116, + "labels": [ + { + "name": "Phoneme", + "value": "V" + } + ] + }, + { + "id": 117, + "labels": [ + { + "name": "Phoneme", + "value": "N" + } + ] + }, + { + "id": 118, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 119, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 120, + "labels": [ + { + "name": "Phoneme", + "value": "@:" + } + ] + }, + { + "id": 121, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 122, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 123, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 124, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 125, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 126, + "labels": [ + { + "name": "Phoneme", + "value": "S" + } + ] + }, + { + "id": 127, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + }, + { + "id": 128, + "labels": [ + { + "name": "Phoneme", + "value": "w" + } + ] + }, + { + "id": 129, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 133, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 134, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 135, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 136, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 137, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 138, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 139, + "labels": [ + { + "name": "Phoneme", + "value": "b" + } + ] + }, + { + "id": 140, + "labels": [ + { + "name": "Phoneme", + "value": "j" + } + ] + }, + { + "id": 141, + "labels": [ + { + "name": "Phoneme", + "value": "u:" + } + ] + }, + { + "id": 142, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 143, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 144, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 145, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 146, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 147, + "sampleStart": 3749, + "sampleDur": 1389, + "labels": [ + { + "name": "Phonetic", + "value": "V" + } + ] + }, + { + "id": 148, + "sampleStart": 5139, + "sampleDur": 1664, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 149, + "sampleStart": 6804, + "sampleDur": 1729, + "labels": [ + { + "name": "Phonetic", + "value": "V" + } + ] + }, + { + "id": 150, + "sampleStart": 8534, + "sampleDur": 1134, + "labels": [ + { + "name": "Phonetic", + "value": "N" + } + ] + }, + { + "id": 151, + "sampleStart": 9669, + "sampleDur": 1669, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 152, + "sampleStart": 11339, + "sampleDur": 594, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 153, + "sampleStart": 11934, + "sampleDur": 1549, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 154, + "sampleStart": 13484, + "sampleDur": 1314, + "labels": [ + { + "name": "Phonetic", + "value": "@:" + } + ] + }, + { + "id": 155, + "sampleStart": 14799, + "sampleDur": 3054, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 156, + "sampleStart": 17854, + "sampleDur": 1144, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 157, + "sampleStart": 18999, + "sampleDur": 1639, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 158, + "sampleStart": 20639, + "sampleDur": 3279, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 159, + "sampleStart": 23919, + "sampleDur": 1869, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 160, + "sampleStart": 25789, + "sampleDur": 2609, + "labels": [ + { + "name": "Phonetic", + "value": "S" + } + ] + }, + { + "id": 161, + "sampleStart": 28399, + "sampleDur": 864, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + }, + { + "id": 162, + "sampleStart": 29264, + "sampleDur": 859, + "labels": [ + { + "name": "Phonetic", + "value": "w" + } + ] + }, + { + "id": 163, + "sampleStart": 30124, + "sampleDur": 844, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 164, + "sampleStart": 30969, + "sampleDur": 1719, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 165, + "sampleStart": 32689, + "sampleDur": 829, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 166, + "sampleStart": 33519, + "sampleDur": 789, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 167, + "sampleStart": 34309, + "sampleDur": 519, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 168, + "sampleStart": 34829, + "sampleDur": 999, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 169, + "sampleStart": 35829, + "sampleDur": 2034, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 170, + "sampleStart": 37864, + "sampleDur": 1044, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 171, + "sampleStart": 38909, + "sampleDur": 424, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 172, + "sampleStart": 39334, + "sampleDur": 1339, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 173, + "sampleStart": 40674, + "sampleDur": 2329, + "labels": [ + { + "name": "Phonetic", + "value": "db" + } + ] + }, + { + "id": 174, + "sampleStart": 43004, + "sampleDur": 1219, + "labels": [ + { + "name": "Phonetic", + "value": "j" + } + ] + }, + { + "id": 175, + "sampleStart": 44224, + "sampleDur": 1449, + "labels": [ + { + "name": "Phonetic", + "value": "u:" + } + ] + }, + { + "id": 176, + "sampleStart": 45674, + "sampleDur": 384, + "labels": [ + { + "name": "Phonetic", + "value": "dH" + } + ] + }, + { + "id": 177, + "sampleStart": 46059, + "sampleDur": 1179, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 178, + "sampleStart": 47239, + "sampleDur": 1709, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 179, + "sampleStart": 48949, + "sampleDur": 1176, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 180, + "sampleStart": 50126, + "sampleDur": 1962, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 181, + "samplePoint": 8382, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 182, + "samplePoint": 18632, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 183, + "samplePoint": 22140, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 184, + "samplePoint": 38255, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 185, + "samplePoint": 44613, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 186, + "samplePoint": 50862, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 187, + "samplePoint": 51553, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 47, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 53, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 62, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 71, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 84, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 47 + }, + { + "fromID": 7, + "toID": 53 + }, + { + "fromID": 7, + "toID": 62 + }, + { + "fromID": 7, + "toID": 71 + }, + { + "fromID": 7, + "toID": 84 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 46 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 24 + }, + { + "fromID": 5, + "toID": 30 + }, + { + "fromID": 46, + "toID": 43 + }, + { + "fromID": 46, + "toID": 52 + }, + { + "fromID": 46, + "toID": 61 + }, + { + "fromID": 46, + "toID": 83 + }, + { + "fromID": 2, + "toID": 102 + }, + { + "fromID": 2, + "toID": 103 + }, + { + "fromID": 24, + "toID": 104 + }, + { + "fromID": 30, + "toID": 105 + }, + { + "fromID": 43, + "toID": 106 + }, + { + "fromID": 52, + "toID": 107 + }, + { + "fromID": 61, + "toID": 108 + }, + { + "fromID": 61, + "toID": 109 + }, + { + "fromID": 61, + "toID": 110 + }, + { + "fromID": 83, + "toID": 111 + }, + { + "fromID": 83, + "toID": 112 + }, + { + "fromID": 83, + "toID": 113 + }, + { + "fromID": 102, + "toID": 114 + }, + { + "fromID": 103, + "toID": 115 + }, + { + "fromID": 103, + "toID": 116 + }, + { + "fromID": 103, + "toID": 117 + }, + { + "fromID": 103, + "toID": 118 + }, + { + "fromID": 103, + "toID": 119 + }, + { + "fromID": 104, + "toID": 120 + }, + { + "fromID": 104, + "toID": 181 + }, + { + "fromID": 105, + "toID": 121 + }, + { + "fromID": 105, + "toID": 122 + }, + { + "fromID": 105, + "toID": 123 + }, + { + "fromID": 105, + "toID": 124 + }, + { + "fromID": 105, + "toID": 125 + }, + { + "fromID": 105, + "toID": 182 + }, + { + "fromID": 106, + "toID": 126 + }, + { + "fromID": 106, + "toID": 127 + }, + { + "fromID": 107, + "toID": 128 + }, + { + "fromID": 107, + "toID": 129 + }, + { + "fromID": 107, + "toID": 130 + }, + { + "fromID": 108, + "toID": 131 + }, + { + "fromID": 108, + "toID": 132 + }, + { + "fromID": 108, + "toID": 133 + }, + { + "fromID": 109, + "toID": 134 + }, + { + "fromID": 109, + "toID": 135 + }, + { + "fromID": 109, + "toID": 184 + }, + { + "fromID": 110, + "toID": 136 + }, + { + "fromID": 110, + "toID": 137 + }, + { + "fromID": 110, + "toID": 138 + }, + { + "fromID": 111, + "toID": 139 + }, + { + "fromID": 111, + "toID": 140 + }, + { + "fromID": 111, + "toID": 141 + }, + { + "fromID": 111, + "toID": 185 + }, + { + "fromID": 112, + "toID": 142 + }, + { + "fromID": 112, + "toID": 143 + }, + { + "fromID": 113, + "toID": 144 + }, + { + "fromID": 113, + "toID": 145 + }, + { + "fromID": 113, + "toID": 146 + }, + { + "fromID": 114, + "toID": 147 + }, + { + "fromID": 115, + "toID": 148 + }, + { + "fromID": 116, + "toID": 149 + }, + { + "fromID": 117, + "toID": 150 + }, + { + "fromID": 118, + "toID": 151 + }, + { + "fromID": 119, + "toID": 152 + }, + { + "fromID": 119, + "toID": 153 + }, + { + "fromID": 120, + "toID": 154 + }, + { + "fromID": 121, + "toID": 155 + }, + { + "fromID": 122, + "toID": 156 + }, + { + "fromID": 123, + "toID": 157 + }, + { + "fromID": 124, + "toID": 158 + }, + { + "fromID": 125, + "toID": 159 + }, + { + "fromID": 126, + "toID": 160 + }, + { + "fromID": 127, + "toID": 161 + }, + { + "fromID": 128, + "toID": 162 + }, + { + "fromID": 129, + "toID": 163 + }, + { + "fromID": 130, + "toID": 164 + }, + { + "fromID": 131, + "toID": 165 + }, + { + "fromID": 131, + "toID": 166 + }, + { + "fromID": 132, + "toID": 167 + }, + { + "fromID": 133, + "toID": 168 + }, + { + "fromID": 134, + "toID": 169 + }, + { + "fromID": 135, + "toID": 170 + }, + { + "fromID": 136, + "toID": 171 + }, + { + "fromID": 137, + "toID": 172 + }, + { + "fromID": 138, + "toID": 173 + }, + { + "fromID": 139, + "toID": 173 + }, + { + "fromID": 140, + "toID": 174 + }, + { + "fromID": 141, + "toID": 175 + }, + { + "fromID": 142, + "toID": 176 + }, + { + "fromID": 143, + "toID": 177 + }, + { + "fromID": 144, + "toID": 178 + }, + { + "fromID": 145, + "toID": 179 + }, + { + "fromID": 146, + "toID": 180 + }, + { + "fromID": 47, + "toID": 103 + }, + { + "fromID": 53, + "toID": 104 + }, + { + "fromID": 62, + "toID": 105 + }, + { + "fromID": 62, + "toID": 106 + }, + { + "fromID": 62, + "toID": 107 + }, + { + "fromID": 62, + "toID": 108 + }, + { + "fromID": 71, + "toID": 109 + }, + { + "fromID": 71, + "toID": 110 + }, + { + "fromID": 84, + "toID": 111 + }, + { + "fromID": 84, + "toID": 112 + }, + { + "fromID": 84, + "toID": 113 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc010.TextGrid new file mode 100644 index 00000000..6582798a --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.TextGrid @@ -0,0 +1,640 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 3.054 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 3.054 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.754 + text = "" + intervals [3]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 3.054 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.754 + text = "L%" + intervals [3]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 3.054 + intervals: size = 4 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 1.091 + text = "L-" + intervals [3]: + xmin = 1.091 + xmax = 2.754 + text = "L-" + intervals [4]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 3.054 + intervals: size = 11 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.411739 + text = "F" + intervals [3]: + xmin = 0.411739 + xmax = 0.571999 + text = "F" + intervals [4]: + xmin = 0.571999 + xmax = 1.091 + text = "C" + intervals [5]: + xmin = 1.091 + xmax = 1.222389 + text = "F" + intervals [6]: + xmin = 1.222389 + xmax = 1.391057 + text = "C" + intervals [7]: + xmin = 1.391057 + xmax = 1.436791 + text = "*" + intervals [8]: + xmin = 1.436791 + xmax = 1.6285 + text = "F" + intervals [9]: + xmin = 1.6285 + xmax = 1.9578 + text = "C" + intervals [10]: + xmin = 1.9578 + xmax = 2.754 + text = "C" + intervals [11]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 3.054 + intervals: size = 11 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.411739 + text = "W" + intervals [3]: + xmin = 0.411739 + xmax = 0.571999 + text = "W" + intervals [4]: + xmin = 0.571999 + xmax = 1.091 + text = "S" + intervals [5]: + xmin = 1.091 + xmax = 1.222389 + text = "W" + intervals [6]: + xmin = 1.222389 + xmax = 1.391057 + text = "W" + intervals [7]: + xmin = 1.391057 + xmax = 1.436791 + text = "*" + intervals [8]: + xmin = 1.436791 + xmax = 1.6285 + text = "W" + intervals [9]: + xmin = 1.6285 + xmax = 1.9578 + text = "S" + intervals [10]: + xmin = 1.9578 + xmax = 2.754 + text = "S" + intervals [11]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 3.054 + intervals: size = 11 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.411739 + text = "it" + intervals [3]: + xmin = 0.411739 + xmax = 0.571999 + text = "is" + intervals [4]: + xmin = 0.571999 + xmax = 1.091 + text = "futile" + intervals [5]: + xmin = 1.091 + xmax = 1.222389 + text = "to" + intervals [6]: + xmin = 1.222389 + xmax = 1.391057 + text = "offer" + intervals [7]: + xmin = 1.391057 + xmax = 1.436791 + text = "*" + intervals [8]: + xmin = 1.436791 + xmax = 1.6285 + text = "any" + intervals [9]: + xmin = 1.6285 + xmax = 1.9578 + text = "further" + intervals [10]: + xmin = 1.9578 + xmax = 2.754 + text = "resistance" + intervals [11]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 3.054 + intervals: size = 16 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.411739 + text = "W" + intervals [3]: + xmin = 0.411739 + xmax = 0.571999 + text = "W" + intervals [4]: + xmin = 0.571999 + xmax = 0.7985 + text = "S" + intervals [5]: + xmin = 0.7985 + xmax = 1.091 + text = "S" + intervals [6]: + xmin = 1.091 + xmax = 1.222389 + text = "W" + intervals [7]: + xmin = 1.222389 + xmax = 1.391057 + text = "S" + intervals [8]: + xmin = 1.391057 + xmax = 1.436791 + text = "W" + intervals [9]: + xmin = 1.436791 + xmax = 1.5155 + text = "S" + intervals [10]: + xmin = 1.5155 + xmax = 1.6285 + text = "W" + intervals [11]: + xmin = 1.6285 + xmax = 1.864 + text = "S" + intervals [12]: + xmin = 1.864 + xmax = 1.9578 + text = "W" + intervals [13]: + xmin = 1.9578 + xmax = 2.0785 + text = "W" + intervals [14]: + xmin = 2.0785 + xmax = 2.2285 + text = "S" + intervals [15]: + xmin = 2.2285 + xmax = 2.754 + text = "W" + intervals [16]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 3.054 + intervals: size = 33 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.373 + text = "I" + intervals [3]: + xmin = 0.373 + xmax = 0.411739 + text = "t" + intervals [4]: + xmin = 0.411739 + xmax = 0.4765 + text = "I" + intervals [5]: + xmin = 0.4765 + xmax = 0.571999 + text = "z" + intervals [6]: + xmin = 0.571999 + xmax = 0.6745 + text = "f" + intervals [7]: + xmin = 0.6745 + xmax = 0.737 + text = "j" + intervals [8]: + xmin = 0.737 + xmax = 0.7985 + text = "u:" + intervals [9]: + xmin = 0.7985 + xmax = 0.862947 + text = "t" + intervals [10]: + xmin = 0.862947 + xmax = 1.015885 + text = "ai" + intervals [11]: + xmin = 1.015885 + xmax = 1.091 + text = "l" + intervals [12]: + xmin = 1.091 + xmax = 1.13 + text = "t" + intervals [13]: + xmin = 1.13 + xmax = 1.222389 + text = "u:" + intervals [14]: + xmin = 1.222389 + xmax = 1.326995 + text = "O" + intervals [15]: + xmin = 1.326995 + xmax = 1.391057 + text = "f" + intervals [16]: + xmin = 1.391057 + xmax = 1.436791 + text = "@_r" + intervals [17]: + xmin = 1.436791 + xmax = 1.5155 + text = "E" + intervals [18]: + xmin = 1.5155 + xmax = 1.5545 + text = "n" + intervals [19]: + xmin = 1.5545 + xmax = 1.6285 + text = "i:" + intervals [20]: + xmin = 1.6285 + xmax = 1.741 + text = "f" + intervals [21]: + xmin = 1.741 + xmax = 1.864 + text = "@:" + intervals [22]: + xmin = 1.864 + xmax = 1.917 + text = "D" + intervals [23]: + xmin = 1.917 + xmax = 1.9578 + text = "@" + intervals [24]: + xmin = 1.9578 + xmax = 2.022 + text = "r" + intervals [25]: + xmin = 2.022 + xmax = 2.0785 + text = "@" + intervals [26]: + xmin = 2.0785 + xmax = 2.1695 + text = "z" + intervals [27]: + xmin = 2.1695 + xmax = 2.2285 + text = "I" + intervals [28]: + xmin = 2.2285 + xmax = 2.3195 + text = "s" + intervals [29]: + xmin = 2.3195 + xmax = 2.3825 + text = "t" + intervals [30]: + xmin = 2.3825 + xmax = 2.431 + text = "@" + intervals [31]: + xmin = 2.431 + xmax = 2.5285 + text = "n" + intervals [32]: + xmin = 2.5285 + xmax = 2.754 + text = "s" + intervals [33]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 3.054 + intervals: size = 37 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.373 + text = "I" + intervals [3]: + xmin = 0.373 + xmax = 0.397329 + text = "t" + intervals [4]: + xmin = 0.397329 + xmax = 0.411739 + text = "H" + intervals [5]: + xmin = 0.411739 + xmax = 0.4765 + text = "I" + intervals [6]: + xmin = 0.4765 + xmax = 0.571999 + text = "z" + intervals [7]: + xmin = 0.571999 + xmax = 0.6745 + text = "f" + intervals [8]: + xmin = 0.6745 + xmax = 0.737 + text = "j" + intervals [9]: + xmin = 0.737 + xmax = 0.7985 + text = "u:" + intervals [10]: + xmin = 0.7985 + xmax = 0.8125 + text = "t" + intervals [11]: + xmin = 0.8125 + xmax = 0.862947 + text = "H" + intervals [12]: + xmin = 0.862947 + xmax = 1.015885 + text = "ai" + intervals [13]: + xmin = 1.015885 + xmax = 1.091 + text = "l" + intervals [14]: + xmin = 1.091 + xmax = 1.1135 + text = "t" + intervals [15]: + xmin = 1.1135 + xmax = 1.13 + text = "H" + intervals [16]: + xmin = 1.13 + xmax = 1.222389 + text = "u:" + intervals [17]: + xmin = 1.222389 + xmax = 1.326995 + text = "O" + intervals [18]: + xmin = 1.326995 + xmax = 1.391057 + text = "f" + intervals [19]: + xmin = 1.391057 + xmax = 1.436791 + text = "r" + intervals [20]: + xmin = 1.436791 + xmax = 1.5155 + text = "E" + intervals [21]: + xmin = 1.5155 + xmax = 1.5545 + text = "n" + intervals [22]: + xmin = 1.5545 + xmax = 1.6285 + text = "i:" + intervals [23]: + xmin = 1.6285 + xmax = 1.741 + text = "f" + intervals [24]: + xmin = 1.741 + xmax = 1.864 + text = "@:" + intervals [25]: + xmin = 1.864 + xmax = 1.917 + text = "D" + intervals [26]: + xmin = 1.917 + xmax = 1.9578 + text = "@" + intervals [27]: + xmin = 1.9578 + xmax = 2.022 + text = "r" + intervals [28]: + xmin = 2.022 + xmax = 2.0785 + text = "@" + intervals [29]: + xmin = 2.0785 + xmax = 2.1695 + text = "z" + intervals [30]: + xmin = 2.1695 + xmax = 2.2285 + text = "I" + intervals [31]: + xmin = 2.2285 + xmax = 2.3195 + text = "s" + intervals [32]: + xmin = 2.3195 + xmax = 2.3455 + text = "t" + intervals [33]: + xmin = 2.3455 + xmax = 2.3825 + text = "H" + intervals [34]: + xmin = 2.3825 + xmax = 2.431 + text = "@" + intervals [35]: + xmin = 2.431 + xmax = 2.5285 + text = "n" + intervals [36]: + xmin = 2.5285 + xmax = 2.754 + text = "s" + intervals [37]: + xmin = 2.754 + xmax = 3.054 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 3.054 + points: size = 6 + points [1]: + number = 0.760206 + mark = "H*" + points [2]: + number = 1.011316 + mark = "L-" + points [3]: + number = 1.806928 + mark = "H*" + points [4]: + number = 2.204734 + mark = "H*" + points [5]: + number = 2.45839 + mark = "L-" + points [6]: + number = 2.488588 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 3.054 + intervals: size = 8 + intervals [1]: + xmin = 0 + xmax = 0.571999 + text = "" + intervals [2]: + xmin = 0.571999 + xmax = 0.7985 + text = "F" + intervals [3]: + xmin = 0.7985 + xmax = 1.222389 + text = "F" + intervals [4]: + xmin = 1.222389 + xmax = 1.436791 + text = "F" + intervals [5]: + xmin = 1.436791 + xmax = 1.6285 + text = "F" + intervals [6]: + xmin = 1.6285 + xmax = 2.0785 + text = "F" + intervals [7]: + xmin = 2.0785 + xmax = 2.754 + text = "F" + intervals [8]: + xmin = 2.754 + xmax = 3.054 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc010.hlb new file mode 100644 index 00000000..f3c35db0 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.hlb @@ -0,0 +1,234 @@ +**EMU hierarchical labels** +191 +Syllable Syllable +105 W +106 W +107 S +108 S +109 W +110 S +111 W +112 S +113 W +114 S +115 W +116 W +117 S +118 W + +Word Word Accent Text +2 F W it +14 F W is +21 C S futile +38 F W to +48 C W offer +58 F W any +68 C S further +80 C S resistance + +Foot Foot +49 F +59 F +69 F +81 F +83 F +104 F + +Phoneme Phoneme +119 I +120 t +121 I +122 z +123 f +124 j +125 u: +126 t +127 ai +128 l +129 t +130 u: +131 O +132 f +133 @ +134 r +135 E +136 n +137 i: +138 f +139 @: +140 D +141 @ +142 r +143 @ +144 z +145 I +146 s +147 t +148 @ +149 n +150 s + +Phonetic Phonetic +151 I +152 t +153 H +154 I +155 z +156 f +157 j +158 u: +159 t +160 H +161 ai +162 l +163 t +164 H +165 u: +166 O +167 f +168 r +169 E +170 n +171 i: +172 f +173 @: +174 D +175 @ +176 r +177 @ +178 z +179 I +180 s +181 t +182 H +183 @ +184 n +185 s + +Tone Tone +186 H* +187 L- +188 H* +189 H* +190 L- +191 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +41 L- + + +2 105 119 120 151 152 153 +5 2 14 21 105 106 107 108 119 120 121 122 123 124 125 126 127 128 151 152 153 154 155 156 157 158 159 160 161 162 186 +7 2 5 14 21 38 41 48 49 58 59 68 69 80 81 83 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 188 189 +8 2 5 7 14 21 38 41 48 49 58 59 68 69 80 81 83 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 188 189 +14 106 121 122 154 155 +21 107 108 123 124 125 126 127 128 156 157 158 159 160 161 162 186 +38 109 129 130 163 164 165 +41 38 48 58 68 80 109 110 112 113 114 115 116 117 118 129 130 131 132 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 163 164 165 166 167 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 188 189 +48 110 131 132 166 167 +49 107 123 124 125 156 157 158 186 +58 112 113 135 136 137 169 170 171 +59 108 109 126 127 128 129 130 159 160 161 162 163 164 165 +68 114 115 138 139 140 141 172 173 174 175 188 +69 110 111 131 132 133 134 166 167 168 +80 116 117 118 142 143 144 145 146 147 148 149 150 176 177 178 179 180 181 182 183 184 185 189 +81 112 113 135 136 137 169 170 171 +83 114 115 116 138 139 140 141 142 143 172 173 174 175 176 177 188 +104 117 118 144 145 146 147 148 149 150 178 179 180 181 182 183 184 185 189 +105 119 120 151 152 153 +106 121 122 154 155 +107 123 124 125 156 157 158 186 +108 126 127 128 159 160 161 162 +109 129 130 163 164 165 +110 131 132 166 167 +111 133 134 168 +112 135 169 +113 136 137 170 171 +114 138 139 172 173 188 +115 140 141 174 175 +116 142 143 176 177 +117 144 145 178 179 189 +118 146 147 148 149 150 180 181 182 183 184 185 +119 151 +120 152 153 +121 154 +122 155 +123 156 +124 157 +125 158 +126 159 160 +127 161 +128 162 +129 163 164 +130 165 +131 166 +132 167 +133 168 +134 168 +135 169 +136 170 +137 171 +138 172 +139 173 +140 174 +141 175 +142 176 +143 177 +144 178 +145 179 +146 180 +147 181 182 +148 183 +149 184 +150 185 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.lab b/inst/extdata/rawDemoData/annotationFiles/msajc010.lab new file mode 100644 index 00000000..59f99365 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.lab @@ -0,0 +1,39 @@ +signal msajc010 +nfields 1 +# + 0.300000 125 H# + 0.373000 125 I + 0.397329 125 t + 0.411739 125 H + 0.476500 125 I + 0.571999 125 z + 0.674500 125 f + 0.737000 125 j + 0.798500 125 u: + 0.812500 125 t + 0.862947 125 H + 1.015885 125 ai + 1.091000 125 l + 1.113500 125 t + 1.130000 125 H + 1.222389 125 u: + 1.326995 125 O + 1.391057 125 f + 1.436791 125 r + 1.515500 125 E + 1.554500 125 n + 1.628500 125 i: + 1.741000 125 f + 1.864000 125 @: + 1.917000 125 D + 1.957800 125 @ + 2.022000 125 r + 2.078500 125 @ + 2.169500 125 z + 2.228500 125 I + 2.319500 125 s + 2.345500 125 t + 2.382500 125 H + 2.431000 125 @ + 2.528500 125 n + 2.754000 125 s diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.par b/inst/extdata/rawDemoData/annotationFiles/msajc010.par new file mode 100644 index 00000000..34083ba0 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.par @@ -0,0 +1,59 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 It +KAN: 1 Iz +KAN: 2 fju:taIl +KAN: 3 t@ +KAN: 4 Qf@ +KAN: 5 enI +KAN: 6 f3:D@ +KAN: 7 rIzIst@ns +ORT: 0 it +ORT: 1 is +ORT: 2 futile +ORT: 3 to +ORT: 4 offer +ORT: 5 any +ORT: 6 further +ORT: 7 resistance +TRN: 6000 49199 0,1,2,3,4,5,6,7 it is futile to offer any further resistance +MAU: 0 5999 -1 +MAU: 6000 1999 0 I +MAU: 8000 599 0 t +MAU: 8600 999 1 I +MAU: 9600 1799 1 z +MAU: 11400 1999 2 f +MAU: 13400 1399 2 j +MAU: 14800 1799 2 u: +MAU: 16600 999 2 t +MAU: 17600 1999 2 aI +MAU: 19600 2599 2 l +MAU: 22200 599 3 t +MAU: 22800 599 3 @ +MAU: 23400 3199 4 Q +MAU: 26600 1599 4 f +MAU: 28200 999 4 @ +MAU: 29200 1199 5 I +MAU: 30400 1199 5 n +MAU: 31600 1199 5 I +MAU: 32800 2199 6 f +MAU: 35000 1999 6 3: +MAU: 37000 1399 6 D +MAU: 38400 799 6 @ +MAU: 39200 1399 7 r +MAU: 40600 799 7 I +MAU: 41400 2399 7 z +MAU: 43800 799 7 I +MAU: 44600 2399 7 s +MAU: 47000 599 7 t +MAU: 47600 799 7 @ +MAU: 48400 2199 7 n +MAU: 50600 4599 7 s +MAU: 55200 5599 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc010.parmanipulated new file mode 100644 index 00000000..61fe3b93 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.parmanipulated @@ -0,0 +1 @@ +LBD: diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010.tone b/inst/extdata/rawDemoData/annotationFiles/msajc010.tone new file mode 100644 index 00000000..829252cf --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010.tone @@ -0,0 +1,9 @@ +signal msajc010 +nfields 1 +# + 0.761206 125 H* + 1.012316 125 L- + 1.807928 125 H* + 2.205734 125 H* + 2.459390 125 L- + 2.489588 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc010_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc010_annot.json new file mode 100644 index 00000000..af754dc2 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc010_annot.json @@ -0,0 +1,1601 @@ +{ + "name": "msajc010", + "annotates": "msajc010.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 41, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "it" + } + ] + }, + { + "id": 14, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "is" + } + ] + }, + { + "id": 21, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "futile" + } + ] + }, + { + "id": 38, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "to" + } + ] + }, + { + "id": 48, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "offer" + } + ] + }, + { + "id": 58, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "any" + } + ] + }, + { + "id": 68, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "further" + } + ] + }, + { + "id": 80, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "resistance" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 105, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 106, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 107, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 108, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 110, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 111, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 112, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 113, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 114, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 115, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 116, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 117, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 118, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 119, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 120, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 121, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 122, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 123, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 124, + "labels": [ + { + "name": "Phoneme", + "value": "j" + } + ] + }, + { + "id": 125, + "labels": [ + { + "name": "Phoneme", + "value": "u:" + } + ] + }, + { + "id": 126, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 127, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 128, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 129, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Phoneme", + "value": "u:" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Phoneme", + "value": "O" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 133, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 134, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 135, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 136, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 137, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + }, + { + "id": 138, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 139, + "labels": [ + { + "name": "Phoneme", + "value": "@:" + } + ] + }, + { + "id": 140, + "labels": [ + { + "name": "Phoneme", + "value": "D" + } + ] + }, + { + "id": 141, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 142, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 143, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 144, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 145, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 146, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 147, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 148, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 149, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 150, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 151, + "sampleStart": 6000, + "sampleDur": 1459, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 152, + "sampleStart": 7460, + "sampleDur": 485, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 153, + "sampleStart": 7946, + "sampleDur": 287, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 154, + "sampleStart": 8234, + "sampleDur": 1295, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 155, + "sampleStart": 9530, + "sampleDur": 1908, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 156, + "sampleStart": 11439, + "sampleDur": 2050, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 157, + "sampleStart": 13490, + "sampleDur": 1249, + "labels": [ + { + "name": "Phonetic", + "value": "j" + } + ] + }, + { + "id": 158, + "sampleStart": 14740, + "sampleDur": 1229, + "labels": [ + { + "name": "Phonetic", + "value": "u:" + } + ] + }, + { + "id": 159, + "sampleStart": 15970, + "sampleDur": 279, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 160, + "sampleStart": 16250, + "sampleDur": 1007, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 161, + "sampleStart": 17258, + "sampleDur": 3058, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 162, + "sampleStart": 20317, + "sampleDur": 1502, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 163, + "sampleStart": 21820, + "sampleDur": 449, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 164, + "sampleStart": 22270, + "sampleDur": 328, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 165, + "sampleStart": 22599, + "sampleDur": 1847, + "labels": [ + { + "name": "Phonetic", + "value": "u:" + } + ] + }, + { + "id": 166, + "sampleStart": 24447, + "sampleDur": 2091, + "labels": [ + { + "name": "Phonetic", + "value": "O" + } + ] + }, + { + "id": 167, + "sampleStart": 26539, + "sampleDur": 1281, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 168, + "sampleStart": 27821, + "sampleDur": 913, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 169, + "sampleStart": 28735, + "sampleDur": 1574, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 170, + "sampleStart": 30310, + "sampleDur": 779, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 171, + "sampleStart": 31090, + "sampleDur": 1479, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + }, + { + "id": 172, + "sampleStart": 32570, + "sampleDur": 2249, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 173, + "sampleStart": 34820, + "sampleDur": 2459, + "labels": [ + { + "name": "Phonetic", + "value": "@:" + } + ] + }, + { + "id": 174, + "sampleStart": 37280, + "sampleDur": 1059, + "labels": [ + { + "name": "Phonetic", + "value": "D" + } + ] + }, + { + "id": 175, + "sampleStart": 38340, + "sampleDur": 815, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 176, + "sampleStart": 39156, + "sampleDur": 1282, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 177, + "sampleStart": 40439, + "sampleDur": 1130, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 178, + "sampleStart": 41570, + "sampleDur": 1819, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 179, + "sampleStart": 43390, + "sampleDur": 1179, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 180, + "sampleStart": 44570, + "sampleDur": 1819, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 181, + "sampleStart": 46390, + "sampleDur": 519, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 182, + "sampleStart": 46910, + "sampleDur": 739, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 183, + "sampleStart": 47650, + "sampleDur": 969, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 184, + "sampleStart": 48620, + "sampleDur": 1949, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 185, + "sampleStart": 50570, + "sampleDur": 4509, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 186, + "samplePoint": 15224, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 187, + "samplePoint": 20246, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 188, + "samplePoint": 36159, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 189, + "samplePoint": 44115, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 190, + "samplePoint": 49188, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 191, + "samplePoint": 49792, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 49, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 59, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 69, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 81, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 83, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 104, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 49 + }, + { + "fromID": 7, + "toID": 59 + }, + { + "fromID": 7, + "toID": 69 + }, + { + "fromID": 7, + "toID": 81 + }, + { + "fromID": 7, + "toID": 83 + }, + { + "fromID": 7, + "toID": 104 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 41 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 14 + }, + { + "fromID": 5, + "toID": 21 + }, + { + "fromID": 41, + "toID": 38 + }, + { + "fromID": 41, + "toID": 48 + }, + { + "fromID": 41, + "toID": 58 + }, + { + "fromID": 41, + "toID": 68 + }, + { + "fromID": 41, + "toID": 80 + }, + { + "fromID": 2, + "toID": 105 + }, + { + "fromID": 14, + "toID": 106 + }, + { + "fromID": 21, + "toID": 107 + }, + { + "fromID": 21, + "toID": 108 + }, + { + "fromID": 38, + "toID": 109 + }, + { + "fromID": 48, + "toID": 110 + }, + { + "fromID": 58, + "toID": 112 + }, + { + "fromID": 58, + "toID": 113 + }, + { + "fromID": 68, + "toID": 114 + }, + { + "fromID": 68, + "toID": 115 + }, + { + "fromID": 80, + "toID": 116 + }, + { + "fromID": 80, + "toID": 117 + }, + { + "fromID": 80, + "toID": 118 + }, + { + "fromID": 105, + "toID": 119 + }, + { + "fromID": 105, + "toID": 120 + }, + { + "fromID": 106, + "toID": 121 + }, + { + "fromID": 106, + "toID": 122 + }, + { + "fromID": 107, + "toID": 123 + }, + { + "fromID": 107, + "toID": 124 + }, + { + "fromID": 107, + "toID": 125 + }, + { + "fromID": 107, + "toID": 186 + }, + { + "fromID": 108, + "toID": 126 + }, + { + "fromID": 108, + "toID": 127 + }, + { + "fromID": 108, + "toID": 128 + }, + { + "fromID": 109, + "toID": 129 + }, + { + "fromID": 109, + "toID": 130 + }, + { + "fromID": 110, + "toID": 131 + }, + { + "fromID": 110, + "toID": 132 + }, + { + "fromID": 111, + "toID": 133 + }, + { + "fromID": 111, + "toID": 134 + }, + { + "fromID": 112, + "toID": 135 + }, + { + "fromID": 113, + "toID": 136 + }, + { + "fromID": 113, + "toID": 137 + }, + { + "fromID": 114, + "toID": 138 + }, + { + "fromID": 114, + "toID": 139 + }, + { + "fromID": 114, + "toID": 188 + }, + { + "fromID": 115, + "toID": 140 + }, + { + "fromID": 115, + "toID": 141 + }, + { + "fromID": 116, + "toID": 142 + }, + { + "fromID": 116, + "toID": 143 + }, + { + "fromID": 117, + "toID": 144 + }, + { + "fromID": 117, + "toID": 145 + }, + { + "fromID": 117, + "toID": 189 + }, + { + "fromID": 118, + "toID": 146 + }, + { + "fromID": 118, + "toID": 147 + }, + { + "fromID": 118, + "toID": 148 + }, + { + "fromID": 118, + "toID": 149 + }, + { + "fromID": 118, + "toID": 150 + }, + { + "fromID": 119, + "toID": 151 + }, + { + "fromID": 120, + "toID": 152 + }, + { + "fromID": 120, + "toID": 153 + }, + { + "fromID": 121, + "toID": 154 + }, + { + "fromID": 122, + "toID": 155 + }, + { + "fromID": 123, + "toID": 156 + }, + { + "fromID": 124, + "toID": 157 + }, + { + "fromID": 125, + "toID": 158 + }, + { + "fromID": 126, + "toID": 159 + }, + { + "fromID": 126, + "toID": 160 + }, + { + "fromID": 127, + "toID": 161 + }, + { + "fromID": 128, + "toID": 162 + }, + { + "fromID": 129, + "toID": 163 + }, + { + "fromID": 129, + "toID": 164 + }, + { + "fromID": 130, + "toID": 165 + }, + { + "fromID": 131, + "toID": 166 + }, + { + "fromID": 132, + "toID": 167 + }, + { + "fromID": 133, + "toID": 168 + }, + { + "fromID": 134, + "toID": 168 + }, + { + "fromID": 135, + "toID": 169 + }, + { + "fromID": 136, + "toID": 170 + }, + { + "fromID": 137, + "toID": 171 + }, + { + "fromID": 138, + "toID": 172 + }, + { + "fromID": 139, + "toID": 173 + }, + { + "fromID": 140, + "toID": 174 + }, + { + "fromID": 141, + "toID": 175 + }, + { + "fromID": 142, + "toID": 176 + }, + { + "fromID": 143, + "toID": 177 + }, + { + "fromID": 144, + "toID": 178 + }, + { + "fromID": 145, + "toID": 179 + }, + { + "fromID": 146, + "toID": 180 + }, + { + "fromID": 147, + "toID": 181 + }, + { + "fromID": 147, + "toID": 182 + }, + { + "fromID": 148, + "toID": 183 + }, + { + "fromID": 149, + "toID": 184 + }, + { + "fromID": 150, + "toID": 185 + }, + { + "fromID": 49, + "toID": 107 + }, + { + "fromID": 59, + "toID": 108 + }, + { + "fromID": 59, + "toID": 109 + }, + { + "fromID": 69, + "toID": 110 + }, + { + "fromID": 69, + "toID": 111 + }, + { + "fromID": 81, + "toID": 112 + }, + { + "fromID": 81, + "toID": 113 + }, + { + "fromID": 83, + "toID": 114 + }, + { + "fromID": 83, + "toID": 115 + }, + { + "fromID": 83, + "toID": 116 + }, + { + "fromID": 104, + "toID": 117 + }, + { + "fromID": 104, + "toID": 118 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc012.TextGrid new file mode 100644 index 00000000..d100de16 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.TextGrid @@ -0,0 +1,637 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.99235 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 2.99235 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.692363 + text = "" + intervals [3]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 2.99235 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.692363 + text = "L%" + intervals [3]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 2.99235 + intervals: size = 5 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 1.083007 + text = "L-" + intervals [3]: + xmin = 1.083007 + xmax = 1.565007 + text = "L-" + intervals [4]: + xmin = 1.565007 + xmax = 2.692363 + text = "L-" + intervals [5]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 2.99235 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.379597 + text = "F" + intervals [3]: + xmin = 0.379597 + xmax = 0.744565 + text = "C" + intervals [4]: + xmin = 0.744565 + xmax = 1.083007 + text = "C" + intervals [5]: + xmin = 1.083007 + xmax = 1.456512 + text = "C" + intervals [6]: + xmin = 1.456512 + xmax = 1.565007 + text = "F" + intervals [7]: + xmin = 1.565007 + xmax = 1.651007 + text = "F" + intervals [8]: + xmin = 1.651007 + xmax = 1.995007 + text = "C" + intervals [9]: + xmin = 1.995007 + xmax = 2.692363 + text = "C" + intervals [10]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 2.99235 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.379597 + text = "W" + intervals [3]: + xmin = 0.379597 + xmax = 0.744565 + text = "S" + intervals [4]: + xmin = 0.744565 + xmax = 1.083007 + text = "S" + intervals [5]: + xmin = 1.083007 + xmax = 1.456512 + text = "S" + intervals [6]: + xmin = 1.456512 + xmax = 1.565007 + text = "W" + intervals [7]: + xmin = 1.565007 + xmax = 1.651007 + text = "W" + intervals [8]: + xmin = 1.651007 + xmax = 1.995007 + text = "S" + intervals [9]: + xmin = 1.995007 + xmax = 2.692363 + text = "S" + intervals [10]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 2.99235 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.379597 + text = "the" + intervals [3]: + xmin = 0.379597 + xmax = 0.744565 + text = "chill" + intervals [4]: + xmin = 0.744565 + xmax = 1.083007 + text = "wind" + intervals [5]: + xmin = 1.083007 + xmax = 1.456512 + text = "caused" + intervals [6]: + xmin = 1.456512 + xmax = 1.565007 + text = "them" + intervals [7]: + xmin = 1.565007 + xmax = 1.651007 + text = "to" + intervals [8]: + xmin = 1.651007 + xmax = 1.995007 + text = "shiver" + intervals [9]: + xmin = 1.995007 + xmax = 2.692363 + text = "violently" + intervals [10]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 2.99235 + intervals: size = 14 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.379597 + text = "W" + intervals [3]: + xmin = 0.379597 + xmax = 0.744565 + text = "S" + intervals [4]: + xmin = 0.744565 + xmax = 1.083007 + text = "S" + intervals [5]: + xmin = 1.083007 + xmax = 1.456512 + text = "S" + intervals [6]: + xmin = 1.456512 + xmax = 1.565007 + text = "W" + intervals [7]: + xmin = 1.565007 + xmax = 1.651007 + text = "W" + intervals [8]: + xmin = 1.651007 + xmax = 1.863007 + text = "S" + intervals [9]: + xmin = 1.863007 + xmax = 1.995007 + text = "W" + intervals [10]: + xmin = 1.995007 + xmax = 2.220007 + text = "S" + intervals [11]: + xmin = 2.220007 + xmax = 2.304838 + text = "W" + intervals [12]: + xmin = 2.304838 + xmax = 2.534007 + text = "W" + intervals [13]: + xmin = 2.534007 + xmax = 2.692363 + text = "W" + intervals [14]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 2.99235 + intervals: size = 33 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.330499 + text = "D" + intervals [3]: + xmin = 0.330499 + xmax = 0.379597 + text = "@" + intervals [4]: + xmin = 0.379597 + xmax = 0.546362 + text = "tS" + intervals [5]: + xmin = 0.546362 + xmax = 0.615469 + text = "I" + intervals [6]: + xmin = 0.615469 + xmax = 0.744565 + text = "l" + intervals [7]: + xmin = 0.744565 + xmax = 0.812468 + text = "w" + intervals [8]: + xmin = 0.812468 + xmax = 0.895002 + text = "I" + intervals [9]: + xmin = 0.895002 + xmax = 1.023007 + text = "n" + intervals [10]: + xmin = 1.023007 + xmax = 1.083007 + text = "d" + intervals [11]: + xmin = 1.083007 + xmax = 1.184007 + text = "k" + intervals [12]: + xmin = 1.184007 + xmax = 1.322488 + text = "o:" + intervals [13]: + xmin = 1.322488 + xmax = 1.387007 + text = "z" + intervals [14]: + xmin = 1.387007 + xmax = 1.456512 + text = "d" + intervals [15]: + xmin = 1.456512 + xmax = 1.47208 + text = "D" + intervals [16]: + xmin = 1.47208 + xmax = 1.490499 + text = "@" + intervals [17]: + xmin = 1.490499 + xmax = 1.565007 + text = "m" + intervals [18]: + xmin = 1.565007 + xmax = 1.621007 + text = "t" + intervals [19]: + xmin = 1.621007 + xmax = 1.651007 + text = "@" + intervals [20]: + xmin = 1.651007 + xmax = 1.800998 + text = "S" + intervals [21]: + xmin = 1.800998 + xmax = 1.863007 + text = "I" + intervals [22]: + xmin = 1.863007 + xmax = 1.926007 + text = "v" + intervals [23]: + xmin = 1.926007 + xmax = 1.995007 + text = "@" + intervals [24]: + xmin = 1.995007 + xmax = 2.090007 + text = "v" + intervals [25]: + xmin = 2.090007 + xmax = 2.220007 + text = "ai" + intervals [26]: + xmin = 2.220007 + xmax = 2.304838 + text = "@" + intervals [27]: + xmin = 2.304838 + xmax = 2.356002 + text = "l" + intervals [28]: + xmin = 2.356002 + xmax = 2.402311 + text = "@" + intervals [29]: + xmin = 2.402311 + xmax = 2.474922 + text = "n" + intervals [30]: + xmin = 2.474922 + xmax = 2.534007 + text = "t" + intervals [31]: + xmin = 2.534007 + xmax = 2.56928 + text = "l" + intervals [32]: + xmin = 2.56928 + xmax = 2.692363 + text = "i:" + intervals [33]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 2.99235 + intervals: size = 39 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.330499 + text = "D" + intervals [3]: + xmin = 0.330499 + xmax = 0.379597 + text = "@" + intervals [4]: + xmin = 0.379597 + xmax = 0.427007 + text = "t" + intervals [5]: + xmin = 0.427007 + xmax = 0.546362 + text = "S" + intervals [6]: + xmin = 0.546362 + xmax = 0.615469 + text = "I" + intervals [7]: + xmin = 0.615469 + xmax = 0.744565 + text = "l" + intervals [8]: + xmin = 0.744565 + xmax = 0.812468 + text = "w" + intervals [9]: + xmin = 0.812468 + xmax = 0.895002 + text = "I" + intervals [10]: + xmin = 0.895002 + xmax = 1.023007 + text = "n" + intervals [11]: + xmin = 1.023007 + xmax = 1.047999 + text = "d" + intervals [12]: + xmin = 1.047999 + xmax = 1.083007 + text = "H" + intervals [13]: + xmin = 1.083007 + xmax = 1.126219 + text = "k" + intervals [14]: + xmin = 1.126219 + xmax = 1.184007 + text = "H" + intervals [15]: + xmin = 1.184007 + xmax = 1.322488 + text = "o:" + intervals [16]: + xmin = 1.322488 + xmax = 1.387007 + text = "z" + intervals [17]: + xmin = 1.387007 + xmax = 1.439007 + text = "d" + intervals [18]: + xmin = 1.439007 + xmax = 1.456512 + text = "H" + intervals [19]: + xmin = 1.456512 + xmax = 1.47208 + text = "D" + intervals [20]: + xmin = 1.47208 + xmax = 1.490499 + text = "@" + intervals [21]: + xmin = 1.490499 + xmax = 1.565007 + text = "m" + intervals [22]: + xmin = 1.565007 + xmax = 1.597007 + text = "t" + intervals [23]: + xmin = 1.597007 + xmax = 1.621007 + text = "H" + intervals [24]: + xmin = 1.621007 + xmax = 1.651007 + text = "@" + intervals [25]: + xmin = 1.651007 + xmax = 1.800998 + text = "S" + intervals [26]: + xmin = 1.800998 + xmax = 1.863007 + text = "I" + intervals [27]: + xmin = 1.863007 + xmax = 1.926007 + text = "v" + intervals [28]: + xmin = 1.926007 + xmax = 1.995007 + text = "@" + intervals [29]: + xmin = 1.995007 + xmax = 2.090007 + text = "v" + intervals [30]: + xmin = 2.090007 + xmax = 2.220007 + text = "ai" + intervals [31]: + xmin = 2.220007 + xmax = 2.304838 + text = "@" + intervals [32]: + xmin = 2.304838 + xmax = 2.356002 + text = "l" + intervals [33]: + xmin = 2.356002 + xmax = 2.402311 + text = "@" + intervals [34]: + xmin = 2.402311 + xmax = 2.474922 + text = "n" + intervals [35]: + xmin = 2.474922 + xmax = 2.507007 + text = "t" + intervals [36]: + xmin = 2.507007 + xmax = 2.534007 + text = "H" + intervals [37]: + xmin = 2.534007 + xmax = 2.56928 + text = "l" + intervals [38]: + xmin = 2.56928 + xmax = 2.692363 + text = "i:" + intervals [39]: + xmin = 2.692363 + xmax = 2.99235 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 2.99235 + points: size = 9 + points [1]: + number = 0.591768 + mark = "H*" + points [2]: + number = 0.835125 + mark = "!H*" + points [3]: + number = 0.972336 + mark = "L-" + points [4]: + number = 1.259704 + mark = "H*" + points [5]: + number = 1.557427 + mark = "L-" + points [6]: + number = 1.821494 + mark = "H*" + points [7]: + number = 2.150284 + mark = "!H*" + points [8]: + number = 2.639586 + mark = "L-" + points [9]: + number = 2.669647 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 2.99235 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.379597 + text = "" + intervals [2]: + xmin = 0.379597 + xmax = 0.744565 + text = "F" + intervals [3]: + xmin = 0.744565 + xmax = 1.083007 + text = "F" + intervals [4]: + xmin = 1.083007 + xmax = 1.651007 + text = "F" + intervals [5]: + xmin = 1.651007 + xmax = 1.995007 + text = "F" + intervals [6]: + xmin = 1.995007 + xmax = 2.692363 + text = "F" + intervals [7]: + xmin = 2.692363 + xmax = 2.99235 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc012.hlb new file mode 100644 index 00000000..6cce28d4 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.hlb @@ -0,0 +1,238 @@ +**EMU hierarchical labels** +195 +Syllable Syllable +107 W +108 S +109 S +110 S +111 W +112 W +113 S +114 W +115 S +116 W +117 W +118 W + +Word Word Accent Text +2 F W the +13 C S chill +23 C S wind +36 C S caused +51 F W them +60 F W to +70 C S shiver +82 C S violently + +Foot Foot +64 F +71 F +83 F +85 F +106 F + +Phoneme Phoneme +119 D +120 @ +121 tS +122 I +123 l +124 w +125 I +126 n +127 d +128 k +129 o: +130 z +131 d +132 D +133 @ +134 m +135 t +136 @ +137 S +138 I +139 v +140 @ +141 v +142 ai +143 @ +144 l +145 @ +146 n +147 t +148 l +149 i: + +Phonetic Phonetic +150 D +151 @ +152 t +153 S +154 I +155 l +156 w +157 I +158 n +159 d +160 H +161 k +162 H +163 o: +164 z +165 d +166 H +167 D +168 @ +169 m +170 t +171 H +172 @ +173 S +174 I +175 v +176 @ +177 v +178 ai +179 @ +180 l +181 @ +182 n +183 t +184 H +185 l +186 i: + +Tone Tone +187 H* +188 !H* +189 L- +190 H* +191 L- +192 H* +193 !H* +194 L- +195 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +39 L- +63 L- + + +2 107 119 120 150 151 +5 2 13 23 107 108 109 119 120 121 122 123 124 125 126 127 150 151 152 153 154 155 156 157 158 159 160 187 188 +7 2 5 13 23 36 39 51 60 63 64 70 71 82 83 85 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 190 192 193 +8 2 5 7 13 23 36 39 51 60 63 64 70 71 82 83 85 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 190 192 193 +13 108 121 122 123 152 153 154 155 187 +23 109 124 125 126 127 156 157 158 159 160 188 +36 110 128 129 130 131 161 162 163 164 165 166 190 +39 36 51 110 111 128 129 130 131 132 133 134 161 162 163 164 165 166 167 168 169 190 +51 111 132 133 134 167 168 169 +60 112 135 136 170 171 172 +63 60 70 82 112 113 114 115 116 117 118 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 192 193 +64 108 121 122 123 152 153 154 155 187 +70 113 114 137 138 139 140 173 174 175 176 192 +71 109 124 125 126 127 156 157 158 159 160 188 +82 115 116 117 118 141 142 143 144 145 146 147 148 149 177 178 179 180 181 182 183 184 185 186 193 +83 110 111 112 128 129 130 131 132 133 134 135 136 161 162 163 164 165 166 167 168 169 170 171 172 190 +85 113 114 137 138 139 140 173 174 175 176 192 +106 115 116 117 118 141 142 143 144 145 146 147 148 149 177 178 179 180 181 182 183 184 185 186 193 +107 119 120 150 151 +108 121 122 123 152 153 154 155 187 +109 124 125 126 127 156 157 158 159 160 188 +110 128 129 130 131 161 162 163 164 165 166 190 +111 132 133 134 167 168 169 +112 135 136 170 171 172 +113 137 138 173 174 192 +114 139 140 175 176 +115 141 142 177 178 193 +116 143 179 +117 144 145 146 147 180 181 182 183 184 +118 148 149 185 186 +119 150 +120 151 +121 152 153 +122 154 +123 155 +124 156 +125 157 +126 158 +127 159 160 +128 161 162 +129 163 +130 164 +131 165 166 +132 167 +133 168 +134 169 +135 170 171 +136 172 +137 173 +138 174 +139 175 +140 176 +141 177 +142 178 +143 179 +144 180 +145 181 +146 182 +147 183 184 +148 185 +149 186 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.lab b/inst/extdata/rawDemoData/annotationFiles/msajc012.lab new file mode 100644 index 00000000..893c213b --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.lab @@ -0,0 +1,41 @@ +signal msajc012 +nfields 1 +# + 0.300000 125 H# + 0.330499 125 D + 0.379597 125 @ + 0.427007 125 t + 0.546362 125 S + 0.615469 125 I + 0.744565 125 l + 0.812468 125 w + 0.895002 125 I + 1.023007 125 n + 1.047999 125 d + 1.083007 125 H + 1.126219 125 k + 1.184007 125 H + 1.322488 125 o: + 1.387007 125 z + 1.439007 125 d + 1.456512 125 H + 1.472080 125 D + 1.490499 125 @ + 1.565007 125 m + 1.597007 125 t + 1.621007 125 H + 1.651007 125 @ + 1.800998 125 S + 1.863007 125 I + 1.926007 125 v + 1.995007 125 @ + 2.090007 125 v + 2.220007 125 ai + 2.304838 125 @ + 2.356002 125 l + 2.402311 125 @ + 2.474922 125 n + 2.507007 125 t + 2.534007 125 H + 2.569280 125 l + 2.692363 125 i: diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.par b/inst/extdata/rawDemoData/annotationFiles/msajc012.par new file mode 100644 index 00000000..18c61564 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.par @@ -0,0 +1,57 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 D@ +KAN: 1 tSIl +KAN: 2 waInd +KAN: 3 kO:zd +KAN: 4 Dem +KAN: 5 t@ +KAN: 6 SIv@ +KAN: 7 vaI@l@ntlI +ORT: 0 the +ORT: 1 chill +ORT: 2 wind +ORT: 3 caused +ORT: 4 them +ORT: 5 to +ORT: 6 shiver +ORT: 7 violently +TRN: 5800 48399 0,1,2,3,4,5,6,7 the chill wind caused them to shiver violently +MAU: 0 5799 -1 +MAU: 5800 599 0 D +MAU: 6400 1799 0 @ +MAU: 8200 2999 1 tS +MAU: 11200 599 1 I +MAU: 11800 2999 1 l +MAU: 14800 2199 2 w +MAU: 17000 999 2 I +MAU: 18000 2999 2 n +MAU: 21000 2999 3 k +MAU: 24000 2799 3 O: +MAU: 26800 1599 3 z +MAU: 28400 599 4 D +MAU: 29000 1199 4 @ +MAU: 30200 1599 4 m +MAU: 31800 599 5 t +MAU: 32400 599 5 @ +MAU: 33000 3199 6 S +MAU: 36200 999 6 I +MAU: 37200 1399 6 v +MAU: 38600 1399 6 @ +MAU: 40000 1999 7 v +MAU: 42000 3799 7 aI +MAU: 45800 599 7 @ +MAU: 46400 599 7 l +MAU: 47000 799 7 @ +MAU: 47800 2199 7 n +MAU: 50000 599 7 t +MAU: 50600 1399 7 l +MAU: 52000 2199 7 I +MAU: 54200 5399 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc012.parmanipulated new file mode 100644 index 00000000..1a006599 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.parmanipulated @@ -0,0 +1,28 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 D@ +KAN: 1 tSIl +KAN: 2 waInd +KAN: 3 kO:zd +KAN: 4 Dem +KAN: 5 t@ +KAN: 6 SIv@ +KAN: 7 vaI@l@ntlI +ORT: 0 the +ORT: 1 chill +ORT: 2 wind +ORT: 3 caused +ORT: 4 them +ORT: 5 to +ORT: 6 shiver +ORT: 7 violently +TRN: 5800 48399 0,1,2,3,4,5,6,7 the chill wind caused them to shiver violently +MAU: 0 5799 -1 +MAU: 5800 599 0,1,2 D diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012.tone b/inst/extdata/rawDemoData/annotationFiles/msajc012.tone new file mode 100644 index 00000000..32ec8f42 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012.tone @@ -0,0 +1,12 @@ +signal msajc012 +nfields 1 +# + 0.592768 125 H* + 0.836125 125 !H* + 0.973336 125 L- + 1.260704 125 H* + 1.558427 125 L- + 1.822494 125 H* + 2.151284 125 !H* + 2.640586 125 L- + 2.670647 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc012_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc012_annot.json new file mode 100644 index 00000000..6d4f44a1 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc012_annot.json @@ -0,0 +1,1626 @@ +{ + "name": "msajc012", + "annotates": "msajc012.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 39, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 63, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "the" + } + ] + }, + { + "id": 13, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "chill" + } + ] + }, + { + "id": 23, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "wind" + } + ] + }, + { + "id": 36, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "caused" + } + ] + }, + { + "id": 51, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "them" + } + ] + }, + { + "id": 60, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "to" + } + ] + }, + { + "id": 70, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "shiver" + } + ] + }, + { + "id": 82, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "violently" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 107, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 108, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 110, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 111, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 112, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 113, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 114, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 115, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 116, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 117, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 118, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 119, + "labels": [ + { + "name": "Phoneme", + "value": "D" + } + ] + }, + { + "id": 120, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 121, + "labels": [ + { + "name": "Phoneme", + "value": "tS" + } + ] + }, + { + "id": 122, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 123, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 124, + "labels": [ + { + "name": "Phoneme", + "value": "w" + } + ] + }, + { + "id": 125, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 126, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 127, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 128, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 129, + "labels": [ + { + "name": "Phoneme", + "value": "o:" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Phoneme", + "value": "D" + } + ] + }, + { + "id": 133, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 134, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 135, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 136, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 137, + "labels": [ + { + "name": "Phoneme", + "value": "S" + } + ] + }, + { + "id": 138, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 139, + "labels": [ + { + "name": "Phoneme", + "value": "v" + } + ] + }, + { + "id": 140, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 141, + "labels": [ + { + "name": "Phoneme", + "value": "v" + } + ] + }, + { + "id": 142, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 143, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 144, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 145, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 146, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 147, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 148, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 149, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 150, + "sampleStart": 6000, + "sampleDur": 608, + "labels": [ + { + "name": "Phonetic", + "value": "D" + } + ] + }, + { + "id": 151, + "sampleStart": 6609, + "sampleDur": 981, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 152, + "sampleStart": 7591, + "sampleDur": 948, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 153, + "sampleStart": 8540, + "sampleDur": 2386, + "labels": [ + { + "name": "Phonetic", + "value": "S" + } + ] + }, + { + "id": 154, + "sampleStart": 10927, + "sampleDur": 1381, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 155, + "sampleStart": 12309, + "sampleDur": 2581, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 156, + "sampleStart": 14891, + "sampleDur": 1357, + "labels": [ + { + "name": "Phonetic", + "value": "w" + } + ] + }, + { + "id": 157, + "sampleStart": 16249, + "sampleDur": 1650, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 158, + "sampleStart": 17900, + "sampleDur": 2559, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 159, + "sampleStart": 20460, + "sampleDur": 498, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 160, + "sampleStart": 20959, + "sampleDur": 700, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 161, + "sampleStart": 21660, + "sampleDur": 863, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 162, + "sampleStart": 22524, + "sampleDur": 1155, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 163, + "sampleStart": 23680, + "sampleDur": 2768, + "labels": [ + { + "name": "Phonetic", + "value": "o:" + } + ] + }, + { + "id": 164, + "sampleStart": 26449, + "sampleDur": 1290, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 165, + "sampleStart": 27740, + "sampleDur": 1039, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 166, + "sampleStart": 28780, + "sampleDur": 349, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 167, + "sampleStart": 29130, + "sampleDur": 310, + "labels": [ + { + "name": "Phonetic", + "value": "D" + } + ] + }, + { + "id": 168, + "sampleStart": 29441, + "sampleDur": 367, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 169, + "sampleStart": 29809, + "sampleDur": 1490, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 170, + "sampleStart": 31300, + "sampleDur": 639, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 171, + "sampleStart": 31940, + "sampleDur": 479, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 172, + "sampleStart": 32420, + "sampleDur": 599, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 173, + "sampleStart": 33020, + "sampleDur": 2998, + "labels": [ + { + "name": "Phonetic", + "value": "S" + } + ] + }, + { + "id": 174, + "sampleStart": 36019, + "sampleDur": 1240, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 175, + "sampleStart": 37260, + "sampleDur": 1259, + "labels": [ + { + "name": "Phonetic", + "value": "v" + } + ] + }, + { + "id": 176, + "sampleStart": 38520, + "sampleDur": 1379, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 177, + "sampleStart": 39900, + "sampleDur": 1899, + "labels": [ + { + "name": "Phonetic", + "value": "v" + } + ] + }, + { + "id": 178, + "sampleStart": 41800, + "sampleDur": 2599, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 179, + "sampleStart": 44400, + "sampleDur": 1695, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 180, + "sampleStart": 46096, + "sampleDur": 1023, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 181, + "sampleStart": 47120, + "sampleDur": 925, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 182, + "sampleStart": 48046, + "sampleDur": 1451, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 183, + "sampleStart": 49498, + "sampleDur": 641, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 184, + "sampleStart": 50140, + "sampleDur": 539, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 185, + "sampleStart": 50680, + "sampleDur": 704, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 186, + "sampleStart": 51385, + "sampleDur": 2461, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 187, + "samplePoint": 11855, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 188, + "samplePoint": 16722, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 189, + "samplePoint": 19467, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 190, + "samplePoint": 25214, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 191, + "samplePoint": 31169, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 192, + "samplePoint": 36450, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 193, + "samplePoint": 43026, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 194, + "samplePoint": 52812, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 195, + "samplePoint": 53413, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 64, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 71, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 83, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 85, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 106, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 64 + }, + { + "fromID": 7, + "toID": 71 + }, + { + "fromID": 7, + "toID": 83 + }, + { + "fromID": 7, + "toID": 85 + }, + { + "fromID": 7, + "toID": 106 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 39 + }, + { + "fromID": 7, + "toID": 63 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 13 + }, + { + "fromID": 5, + "toID": 23 + }, + { + "fromID": 39, + "toID": 36 + }, + { + "fromID": 39, + "toID": 51 + }, + { + "fromID": 63, + "toID": 60 + }, + { + "fromID": 63, + "toID": 70 + }, + { + "fromID": 63, + "toID": 82 + }, + { + "fromID": 2, + "toID": 107 + }, + { + "fromID": 13, + "toID": 108 + }, + { + "fromID": 23, + "toID": 109 + }, + { + "fromID": 36, + "toID": 110 + }, + { + "fromID": 51, + "toID": 111 + }, + { + "fromID": 60, + "toID": 112 + }, + { + "fromID": 70, + "toID": 113 + }, + { + "fromID": 70, + "toID": 114 + }, + { + "fromID": 82, + "toID": 115 + }, + { + "fromID": 82, + "toID": 116 + }, + { + "fromID": 82, + "toID": 117 + }, + { + "fromID": 82, + "toID": 118 + }, + { + "fromID": 107, + "toID": 119 + }, + { + "fromID": 107, + "toID": 120 + }, + { + "fromID": 108, + "toID": 121 + }, + { + "fromID": 108, + "toID": 122 + }, + { + "fromID": 108, + "toID": 123 + }, + { + "fromID": 108, + "toID": 187 + }, + { + "fromID": 109, + "toID": 124 + }, + { + "fromID": 109, + "toID": 125 + }, + { + "fromID": 109, + "toID": 126 + }, + { + "fromID": 109, + "toID": 127 + }, + { + "fromID": 109, + "toID": 188 + }, + { + "fromID": 110, + "toID": 128 + }, + { + "fromID": 110, + "toID": 129 + }, + { + "fromID": 110, + "toID": 130 + }, + { + "fromID": 110, + "toID": 131 + }, + { + "fromID": 110, + "toID": 190 + }, + { + "fromID": 111, + "toID": 132 + }, + { + "fromID": 111, + "toID": 133 + }, + { + "fromID": 111, + "toID": 134 + }, + { + "fromID": 112, + "toID": 135 + }, + { + "fromID": 112, + "toID": 136 + }, + { + "fromID": 113, + "toID": 137 + }, + { + "fromID": 113, + "toID": 138 + }, + { + "fromID": 113, + "toID": 192 + }, + { + "fromID": 114, + "toID": 139 + }, + { + "fromID": 114, + "toID": 140 + }, + { + "fromID": 115, + "toID": 141 + }, + { + "fromID": 115, + "toID": 142 + }, + { + "fromID": 115, + "toID": 193 + }, + { + "fromID": 116, + "toID": 143 + }, + { + "fromID": 117, + "toID": 144 + }, + { + "fromID": 117, + "toID": 145 + }, + { + "fromID": 117, + "toID": 146 + }, + { + "fromID": 117, + "toID": 147 + }, + { + "fromID": 118, + "toID": 148 + }, + { + "fromID": 118, + "toID": 149 + }, + { + "fromID": 119, + "toID": 150 + }, + { + "fromID": 120, + "toID": 151 + }, + { + "fromID": 121, + "toID": 152 + }, + { + "fromID": 121, + "toID": 153 + }, + { + "fromID": 122, + "toID": 154 + }, + { + "fromID": 123, + "toID": 155 + }, + { + "fromID": 124, + "toID": 156 + }, + { + "fromID": 125, + "toID": 157 + }, + { + "fromID": 126, + "toID": 158 + }, + { + "fromID": 127, + "toID": 159 + }, + { + "fromID": 127, + "toID": 160 + }, + { + "fromID": 128, + "toID": 161 + }, + { + "fromID": 128, + "toID": 162 + }, + { + "fromID": 129, + "toID": 163 + }, + { + "fromID": 130, + "toID": 164 + }, + { + "fromID": 131, + "toID": 165 + }, + { + "fromID": 131, + "toID": 166 + }, + { + "fromID": 132, + "toID": 167 + }, + { + "fromID": 133, + "toID": 168 + }, + { + "fromID": 134, + "toID": 169 + }, + { + "fromID": 135, + "toID": 170 + }, + { + "fromID": 135, + "toID": 171 + }, + { + "fromID": 136, + "toID": 172 + }, + { + "fromID": 137, + "toID": 173 + }, + { + "fromID": 138, + "toID": 174 + }, + { + "fromID": 139, + "toID": 175 + }, + { + "fromID": 140, + "toID": 176 + }, + { + "fromID": 141, + "toID": 177 + }, + { + "fromID": 142, + "toID": 178 + }, + { + "fromID": 143, + "toID": 179 + }, + { + "fromID": 144, + "toID": 180 + }, + { + "fromID": 145, + "toID": 181 + }, + { + "fromID": 146, + "toID": 182 + }, + { + "fromID": 147, + "toID": 183 + }, + { + "fromID": 147, + "toID": 184 + }, + { + "fromID": 148, + "toID": 185 + }, + { + "fromID": 149, + "toID": 186 + }, + { + "fromID": 64, + "toID": 108 + }, + { + "fromID": 71, + "toID": 109 + }, + { + "fromID": 83, + "toID": 110 + }, + { + "fromID": 83, + "toID": 111 + }, + { + "fromID": 83, + "toID": 112 + }, + { + "fromID": 85, + "toID": 113 + }, + { + "fromID": 85, + "toID": 114 + }, + { + "fromID": 106, + "toID": 115 + }, + { + "fromID": 106, + "toID": 116 + }, + { + "fromID": 106, + "toID": 117 + }, + { + "fromID": 106, + "toID": 118 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc015.TextGrid new file mode 100644 index 00000000..46997cff --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.TextGrid @@ -0,0 +1,723 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 3.75685 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 3.75685 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 3.456899 + text = "" + intervals [3]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 3.75685 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 3.456899 + text = "L%" + intervals [3]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 3.75685 + intervals: size = 4 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 1.797463 + text = "L-" + intervals [3]: + xmin = 1.797463 + xmax = 3.456899 + text = "L-" + intervals [4]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 3.75685 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.425417 + text = "F" + intervals [3]: + xmin = 0.425417 + xmax = 1.129101 + text = "C" + intervals [4]: + xmin = 1.129101 + xmax = 1.213101 + text = "F" + intervals [5]: + xmin = 1.213101 + xmax = 1.797463 + text = "C" + intervals [6]: + xmin = 1.797463 + xmax = 2.104101 + text = "C" + intervals [7]: + xmin = 2.104101 + xmax = 2.693704 + text = "C" + intervals [8]: + xmin = 2.693704 + xmax = 2.780766 + text = "F" + intervals [9]: + xmin = 2.780766 + xmax = 3.456899 + text = "C" + intervals [10]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 3.75685 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.425417 + text = "W" + intervals [3]: + xmin = 0.425417 + xmax = 1.129101 + text = "S" + intervals [4]: + xmin = 1.129101 + xmax = 1.213101 + text = "W" + intervals [5]: + xmin = 1.213101 + xmax = 1.797463 + text = "S" + intervals [6]: + xmin = 1.797463 + xmax = 2.104101 + text = "W" + intervals [7]: + xmin = 2.104101 + xmax = 2.693704 + text = "S" + intervals [8]: + xmin = 2.693704 + xmax = 2.780766 + text = "W" + intervals [9]: + xmin = 2.780766 + xmax = 3.456899 + text = "S" + intervals [10]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 3.75685 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.425417 + text = "he" + intervals [3]: + xmin = 0.425417 + xmax = 1.129101 + text = "emphasized" + intervals [4]: + xmin = 1.129101 + xmax = 1.213101 + text = "his" + intervals [5]: + xmin = 1.213101 + xmax = 1.797463 + text = "strengths" + intervals [6]: + xmin = 1.797463 + xmax = 2.104101 + text = "while" + intervals [7]: + xmin = 2.104101 + xmax = 2.693704 + text = "concealing" + intervals [8]: + xmin = 2.693704 + xmax = 2.780766 + text = "his" + intervals [9]: + xmin = 2.780766 + xmax = 3.456899 + text = "weaknesses" + intervals [10]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 3.75685 + intervals: size = 16 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.425417 + text = "W" + intervals [3]: + xmin = 0.425417 + xmax = 0.639601 + text = "S" + intervals [4]: + xmin = 0.639601 + xmax = 0.706601 + text = "W" + intervals [5]: + xmin = 0.706601 + xmax = 1.129101 + text = "W" + intervals [6]: + xmin = 1.129101 + xmax = 1.213101 + text = "W" + intervals [7]: + xmin = 1.213101 + xmax = 1.797463 + text = "S" + intervals [8]: + xmin = 1.797463 + xmax = 2.104101 + text = "S" + intervals [9]: + xmin = 2.104101 + xmax = 2.271132 + text = "W" + intervals [10]: + xmin = 2.271132 + xmax = 2.502214 + text = "S" + intervals [11]: + xmin = 2.502214 + xmax = 2.693704 + text = "W" + intervals [12]: + xmin = 2.693704 + xmax = 2.780766 + text = "W" + intervals [13]: + xmin = 2.780766 + xmax = 3.046168 + text = "S" + intervals [14]: + xmin = 3.046168 + xmax = 3.123168 + text = "W" + intervals [15]: + xmin = 3.123168 + xmax = 3.456899 + text = "W" + intervals [16]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 3.75685 + intervals: size = 43 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.350276 + text = "h" + intervals [3]: + xmin = 0.350276 + xmax = 0.425417 + text = "i:" + intervals [4]: + xmin = 0.425417 + xmax = 0.496601 + text = "E" + intervals [5]: + xmin = 0.496601 + xmax = 0.558601 + text = "m" + intervals [6]: + xmin = 0.558601 + xmax = 0.639601 + text = "p" + intervals [7]: + xmin = 0.639601 + xmax = 0.663601 + text = "f" + intervals [8]: + xmin = 0.663601 + xmax = 0.706601 + text = "@" + intervals [9]: + xmin = 0.706601 + xmax = 0.806601 + text = "s" + intervals [10]: + xmin = 0.806601 + xmax = 1.006101 + text = "ai" + intervals [11]: + xmin = 1.006101 + xmax = 1.085101 + text = "z" + intervals [12]: + xmin = 1.085101 + xmax = 1.129101 + text = "d" + intervals [13]: + xmin = 1.129101 + xmax = 1.160101 + text = "h" + intervals [14]: + xmin = 1.160101 + xmax = 1.213101 + text = "I" + intervals [15]: + xmin = 1.213101 + xmax = 1.368101 + text = "z_s" + intervals [16]: + xmin = 1.368101 + xmax = 1.44955 + text = "t" + intervals [17]: + xmin = 1.44955 + xmax = 1.500731 + text = "r" + intervals [18]: + xmin = 1.500731 + xmax = 1.578583 + text = "E" + intervals [19]: + xmin = 1.578583 + xmax = 1.653718 + text = "N" + intervals [20]: + xmin = 1.653718 + xmax = 1.717601 + text = "T" + intervals [21]: + xmin = 1.717601 + xmax = 1.797463 + text = "s" + intervals [22]: + xmin = 1.797463 + xmax = 1.903635 + text = "w" + intervals [23]: + xmin = 1.903635 + xmax = 2.070101 + text = "ai" + intervals [24]: + xmin = 2.070101 + xmax = 2.104101 + text = "l" + intervals [25]: + xmin = 2.104101 + xmax = 2.200911 + text = "k" + intervals [26]: + xmin = 2.200911 + xmax = 2.226601 + text = "@" + intervals [27]: + xmin = 2.226601 + xmax = 2.271132 + text = "n" + intervals [28]: + xmin = 2.271132 + xmax = 2.408601 + text = "s" + intervals [29]: + xmin = 2.408601 + xmax = 2.502214 + text = "i:" + intervals [30]: + xmin = 2.502214 + xmax = 2.576618 + text = "l" + intervals [31]: + xmin = 2.576618 + xmax = 2.606558 + text = "I" + intervals [32]: + xmin = 2.606558 + xmax = 2.693704 + text = "N" + intervals [33]: + xmin = 2.693704 + xmax = 2.749004 + text = "I" + intervals [34]: + xmin = 2.749004 + xmax = 2.780766 + text = "z" + intervals [35]: + xmin = 2.780766 + xmax = 2.876593 + text = "w" + intervals [36]: + xmin = 2.876593 + xmax = 2.958101 + text = "i:" + intervals [37]: + xmin = 2.958101 + xmax = 3.046168 + text = "k" + intervals [38]: + xmin = 3.046168 + xmax = 3.067703 + text = "n" + intervals [39]: + xmin = 3.067703 + xmax = 3.123168 + text = "@" + intervals [40]: + xmin = 3.123168 + xmax = 3.238668 + text = "s" + intervals [41]: + xmin = 3.238668 + xmax = 3.297668 + text = "@" + intervals [42]: + xmin = 3.297668 + xmax = 3.456899 + text = "z" + intervals [43]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 3.75685 + intervals: size = 51 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.350276 + text = "h" + intervals [3]: + xmin = 0.350276 + xmax = 0.425417 + text = "i:" + intervals [4]: + xmin = 0.425417 + xmax = 0.496601 + text = "E" + intervals [5]: + xmin = 0.496601 + xmax = 0.558601 + text = "m" + intervals [6]: + xmin = 0.558601 + xmax = 0.639601 + text = "p" + intervals [7]: + xmin = 0.639601 + xmax = 0.663601 + text = "f" + intervals [8]: + xmin = 0.663601 + xmax = 0.706601 + text = "@" + intervals [9]: + xmin = 0.706601 + xmax = 0.806601 + text = "s" + intervals [10]: + xmin = 0.806601 + xmax = 1.006101 + text = "ai" + intervals [11]: + xmin = 1.006101 + xmax = 1.085101 + text = "z" + intervals [12]: + xmin = 1.085101 + xmax = 1.097601 + text = "d" + intervals [13]: + xmin = 1.097601 + xmax = 1.129101 + text = "H" + intervals [14]: + xmin = 1.129101 + xmax = 1.160101 + text = "h" + intervals [15]: + xmin = 1.160101 + xmax = 1.213101 + text = "I" + intervals [16]: + xmin = 1.213101 + xmax = 1.368101 + text = "zs" + intervals [17]: + xmin = 1.368101 + xmax = 1.413095 + text = "t" + intervals [18]: + xmin = 1.413095 + xmax = 1.44955 + text = "H" + intervals [19]: + xmin = 1.44955 + xmax = 1.464601 + text = "Or" + intervals [20]: + xmin = 1.464601 + xmax = 1.500731 + text = "r" + intervals [21]: + xmin = 1.500731 + xmax = 1.578583 + text = "E" + intervals [22]: + xmin = 1.578583 + xmax = 1.623228 + text = "N" + intervals [23]: + xmin = 1.623228 + xmax = 1.653718 + text = "NH" + intervals [24]: + xmin = 1.653718 + xmax = 1.717601 + text = "T" + intervals [25]: + xmin = 1.717601 + xmax = 1.797463 + text = "s" + intervals [26]: + xmin = 1.797463 + xmax = 1.828601 + text = "Ow" + intervals [27]: + xmin = 1.828601 + xmax = 1.903635 + text = "w" + intervals [28]: + xmin = 1.903635 + xmax = 2.070101 + text = "ai" + intervals [29]: + xmin = 2.070101 + xmax = 2.104101 + text = "l" + intervals [30]: + xmin = 2.104101 + xmax = 2.154601 + text = "k" + intervals [31]: + xmin = 2.154601 + xmax = 2.200911 + text = "H" + intervals [32]: + xmin = 2.200911 + xmax = 2.226601 + text = "@" + intervals [33]: + xmin = 2.226601 + xmax = 2.271132 + text = "n" + intervals [34]: + xmin = 2.271132 + xmax = 2.408601 + text = "s" + intervals [35]: + xmin = 2.408601 + xmax = 2.502214 + text = "i:" + intervals [36]: + xmin = 2.502214 + xmax = 2.576618 + text = "l" + intervals [37]: + xmin = 2.576618 + xmax = 2.606558 + text = "I" + intervals [38]: + xmin = 2.606558 + xmax = 2.693704 + text = "N" + intervals [39]: + xmin = 2.693704 + xmax = 2.749004 + text = "I" + intervals [40]: + xmin = 2.749004 + xmax = 2.780766 + text = "z" + intervals [41]: + xmin = 2.780766 + xmax = 2.798504 + text = "Ow" + intervals [42]: + xmin = 2.798504 + xmax = 2.876593 + text = "w" + intervals [43]: + xmin = 2.876593 + xmax = 2.958101 + text = "i:" + intervals [44]: + xmin = 2.958101 + xmax = 3.026668 + text = "k" + intervals [45]: + xmin = 3.026668 + xmax = 3.046168 + text = "H" + intervals [46]: + xmin = 3.046168 + xmax = 3.067703 + text = "n" + intervals [47]: + xmin = 3.067703 + xmax = 3.123168 + text = "@" + intervals [48]: + xmin = 3.123168 + xmax = 3.238668 + text = "s" + intervals [49]: + xmin = 3.238668 + xmax = 3.297668 + text = "@" + intervals [50]: + xmin = 3.297668 + xmax = 3.456899 + text = "z" + intervals [51]: + xmin = 3.456899 + xmax = 3.75685 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 3.75685 + points: size = 7 + points [1]: + number = 0.530305 + mark = "L+H*" + points [2]: + number = 1.48576 + mark = "H*" + points [3]: + number = 1.608948 + mark = "L-" + points [4]: + number = 2.44422 + mark = "H*" + points [5]: + number = 2.909929 + mark = "H*" + points [6]: + number = 3.109782 + mark = "L-" + points [7]: + number = 3.261078 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 3.75685 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.425417 + text = "" + intervals [2]: + xmin = 0.425417 + xmax = 1.213101 + text = "F" + intervals [3]: + xmin = 1.213101 + xmax = 1.797463 + text = "F" + intervals [4]: + xmin = 1.797463 + xmax = 2.271132 + text = "F" + intervals [5]: + xmin = 2.271132 + xmax = 2.780766 + text = "F" + intervals [6]: + xmin = 2.780766 + xmax = 3.456899 + text = "F" + intervals [7]: + xmin = 3.456899 + xmax = 3.75685 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc015.hlb new file mode 100644 index 00000000..84827f75 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.hlb @@ -0,0 +1,282 @@ +**EMU hierarchical labels** +240 +Syllable Syllable +110 W +130 S +131 W +132 W +133 W +134 S +135 S +136 W +137 S +138 W +139 W +140 S +141 W +142 W + +Word Word Accent Text +2 F W he +13 C S emphasized +37 F W his +46 C S strengths +66 C W while +78 C S concealing +101 F W his +109 C S weaknesses + +Foot Foot +70 F +79 F +88 F +102 F +104 F + +Phoneme Phoneme +143 h +144 i: +145 E +146 m +147 p +148 f +149 @ +150 s +151 ai +152 z +153 d +154 h +155 I +156 z +157 s +158 t +159 r +160 E +161 N +162 T +163 s +164 w +165 ai +166 l +167 k +168 @ +169 n +170 s +171 i: +172 l +173 I +174 N +175 I +176 z +177 w +178 i: +179 k +180 n +181 @ +182 s +183 @ +184 z + +Phonetic Phonetic +185 h +186 i: +187 E +188 m +189 p +190 f +191 @ +192 s +193 ai +194 z +195 d +196 H +197 h +198 I +199 zs +200 t +201 H +202 Or +203 r +204 E +205 N +206 NH +207 T +208 s +209 Ow +210 w +211 ai +212 l +213 k +214 H +215 @ +216 n +217 s +218 i: +219 l +220 I +221 N +222 I +223 z +224 Ow +225 w +226 i: +227 k +228 H +229 n +230 @ +231 s +232 @ +233 z + +Tone Tone +234 L+H* +235 H* +236 L- +237 H* +238 H* +239 L- +240 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +69 L- + + +2 110 143 144 185 186 +5 2 13 37 46 110 130 131 132 133 134 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 234 235 +7 2 5 13 37 46 66 69 70 78 79 88 101 102 104 109 110 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 237 238 +8 2 5 7 13 37 46 66 69 70 78 79 88 101 102 104 109 110 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 237 238 +13 130 131 132 145 146 147 148 149 150 151 152 153 187 188 189 190 191 192 193 194 195 196 234 +37 133 154 155 156 197 198 199 +46 134 157 158 159 160 161 162 163 199 200 201 202 203 204 205 206 207 208 235 +66 135 164 165 166 209 210 211 212 +69 66 78 101 109 135 136 137 138 139 140 141 142 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 237 238 +70 130 131 132 133 145 146 147 148 149 150 151 152 153 154 155 156 187 188 189 190 191 192 193 194 195 196 197 198 199 234 +78 136 137 138 167 168 169 170 171 172 173 174 213 214 215 216 217 218 219 220 221 237 +79 134 157 158 159 160 161 162 163 199 200 201 202 203 204 205 206 207 208 235 +88 135 136 164 165 166 167 168 169 209 210 211 212 213 214 215 216 +101 139 175 176 222 223 +102 137 138 139 170 171 172 173 174 175 176 217 218 219 220 221 222 223 237 +104 140 141 142 177 178 179 180 181 182 183 184 224 225 226 227 228 229 230 231 232 233 238 +109 140 141 142 177 178 179 180 181 182 183 184 224 225 226 227 228 229 230 231 232 233 238 +110 143 144 185 186 +130 145 146 147 187 188 189 234 +131 148 149 190 191 +132 150 151 152 153 192 193 194 195 196 +133 154 155 156 197 198 199 +134 157 158 159 160 161 162 163 199 200 201 202 203 204 205 206 207 208 235 +135 164 165 166 209 210 211 212 +136 167 168 169 213 214 215 216 +137 170 171 217 218 237 +138 172 173 174 219 220 221 +139 175 176 222 223 +140 177 178 179 224 225 226 227 228 238 +141 180 181 229 230 +142 182 183 184 231 232 233 +143 185 +144 186 +145 187 +146 188 +147 189 +148 190 +149 191 +150 192 +151 193 +152 194 +153 195 196 +154 197 +155 198 +156 199 +157 199 +158 200 201 +159 202 203 +160 204 +161 205 206 +162 207 +163 208 +164 209 210 +165 211 +166 212 +167 213 214 +168 215 +169 216 +170 217 +171 218 +172 219 +173 220 +174 221 +175 222 +176 223 +177 224 225 +178 226 +179 227 228 +180 229 +181 230 +182 231 +183 232 +184 233 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.lab b/inst/extdata/rawDemoData/annotationFiles/msajc015.lab new file mode 100644 index 00000000..3f4945cf --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.lab @@ -0,0 +1,53 @@ +signal msajc015 +nfields 1 +# + 0.300000 125 H# + 0.350276 125 h + 0.425417 125 i: + 0.496601 125 E + 0.558601 125 m + 0.639601 125 p + 0.663601 125 f + 0.706601 125 @ + 0.806601 125 s + 1.006101 125 ai + 1.085101 125 z + 1.097601 125 d + 1.129101 125 H + 1.160101 125 h + 1.213101 125 I + 1.368101 125 zs + 1.413095 125 t + 1.449550 125 H + 1.464601 125 Or + 1.500731 125 r + 1.578583 125 E + 1.623228 125 N + 1.653718 125 NH + 1.717601 125 T + 1.797463 125 s + 1.828601 125 Ow + 1.903635 125 w + 2.070101 125 ai + 2.104101 125 l + 2.154601 125 k + 2.200911 125 H + 2.226601 125 @ + 2.271132 125 n + 2.408601 125 s + 2.502214 125 i: + 2.576618 125 l + 2.606558 125 I + 2.693704 125 N + 2.749004 125 I + 2.780766 125 z + 2.798504 125 Ow + 2.876593 125 w + 2.958101 125 i: + 3.026668 125 k + 3.046168 125 H + 3.067703 125 n + 3.123168 125 @ + 3.238668 125 s + 3.297668 125 @ + 3.456899 125 z diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.par b/inst/extdata/rawDemoData/annotationFiles/msajc015.par new file mode 100644 index 00000000..cfbed777 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.par @@ -0,0 +1,66 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 hi: +KAN: 1 emf@saIzd +KAN: 2 hIz +KAN: 3 streNTs +KAN: 4 waIl +KAN: 5 k@nsi:lIN +KAN: 6 hIz +KAN: 7 wi:knIsIz +ORT: 0 he +ORT: 1 emphasized +ORT: 2 his +ORT: 3 strengths +ORT: 4 while +ORT: 5 concealing +ORT: 6 his +ORT: 7 weaknesses +TRN: 6000 63399 0,1,2,3,4,5,6,7 he emphasized his strengths while concealing his weaknesses +MAU: 0 5999 -1 +MAU: 6000 1199 0 h +MAU: 7200 2199 0 i: +MAU: 9400 799 1 e +MAU: 10200 1799 1 m +MAU: 12000 1399 1 f +MAU: 13400 599 1 @ +MAU: 14000 2399 1 s +MAU: 16400 3599 1 aI +MAU: 20000 3199 1 z +MAU: 23200 1199 2 I +MAU: 24400 799 2 z +MAU: 25200 2999 3 s +MAU: 28200 999 3 t +MAU: 29200 999 3 r +MAU: 30200 599 3 e +MAU: 30800 2399 3 N +MAU: 33200 1199 3 T +MAU: 34400 1999 3 s +MAU: 36400 2399 4 w +MAU: 38800 1599 4 aI +MAU: 40400 2399 4 l +MAU: 42800 1799 5 k +MAU: 44600 799 5 n +MAU: 45400 3199 5 s +MAU: 48600 1599 5 i: +MAU: 50200 999 5 l +MAU: 51200 799 5 I +MAU: 52000 2199 5 N +MAU: 54200 599 6 I +MAU: 54800 1199 6 z +MAU: 56000 2399 7 w +MAU: 58400 1999 7 i: +MAU: 60400 599 7 k +MAU: 61000 599 7 n +MAU: 61600 799 7 @ +MAU: 62400 2599 7 s +MAU: 65000 599 7 I +MAU: 65600 3799 7 z +MAU: 69400 5399 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc015.parmanipulated new file mode 100644 index 00000000..cfbed777 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.parmanipulated @@ -0,0 +1,66 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 hi: +KAN: 1 emf@saIzd +KAN: 2 hIz +KAN: 3 streNTs +KAN: 4 waIl +KAN: 5 k@nsi:lIN +KAN: 6 hIz +KAN: 7 wi:knIsIz +ORT: 0 he +ORT: 1 emphasized +ORT: 2 his +ORT: 3 strengths +ORT: 4 while +ORT: 5 concealing +ORT: 6 his +ORT: 7 weaknesses +TRN: 6000 63399 0,1,2,3,4,5,6,7 he emphasized his strengths while concealing his weaknesses +MAU: 0 5999 -1 +MAU: 6000 1199 0 h +MAU: 7200 2199 0 i: +MAU: 9400 799 1 e +MAU: 10200 1799 1 m +MAU: 12000 1399 1 f +MAU: 13400 599 1 @ +MAU: 14000 2399 1 s +MAU: 16400 3599 1 aI +MAU: 20000 3199 1 z +MAU: 23200 1199 2 I +MAU: 24400 799 2 z +MAU: 25200 2999 3 s +MAU: 28200 999 3 t +MAU: 29200 999 3 r +MAU: 30200 599 3 e +MAU: 30800 2399 3 N +MAU: 33200 1199 3 T +MAU: 34400 1999 3 s +MAU: 36400 2399 4 w +MAU: 38800 1599 4 aI +MAU: 40400 2399 4 l +MAU: 42800 1799 5 k +MAU: 44600 799 5 n +MAU: 45400 3199 5 s +MAU: 48600 1599 5 i: +MAU: 50200 999 5 l +MAU: 51200 799 5 I +MAU: 52000 2199 5 N +MAU: 54200 599 6 I +MAU: 54800 1199 6 z +MAU: 56000 2399 7 w +MAU: 58400 1999 7 i: +MAU: 60400 599 7 k +MAU: 61000 599 7 n +MAU: 61600 799 7 @ +MAU: 62400 2599 7 s +MAU: 65000 599 7 I +MAU: 65600 3799 7 z +MAU: 69400 5399 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015.tone b/inst/extdata/rawDemoData/annotationFiles/msajc015.tone new file mode 100644 index 00000000..d5ac109e --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015.tone @@ -0,0 +1,10 @@ +signal msajc015 +nfields 1 +# + 0.531305 125 L+H* + 1.486760 125 H* + 1.609948 125 L- + 2.445220 125 H* + 2.910929 125 H* + 3.110782 125 L- + 3.262078 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc015_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc015_annot.json new file mode 100644 index 00000000..57d3306d --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc015_annot.json @@ -0,0 +1,1950 @@ +{ + "name": "msajc015", + "annotates": "msajc015.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 69, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "he" + } + ] + }, + { + "id": 13, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "emphasized" + } + ] + }, + { + "id": 37, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "his" + } + ] + }, + { + "id": 46, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "strengths" + } + ] + }, + { + "id": 66, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "while" + } + ] + }, + { + "id": 78, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "concealing" + } + ] + }, + { + "id": 101, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "his" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "weaknesses" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 110, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 133, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 134, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 135, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 136, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 137, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 138, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 139, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 140, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 141, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 142, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 143, + "labels": [ + { + "name": "Phoneme", + "value": "h" + } + ] + }, + { + "id": 144, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + }, + { + "id": 145, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 146, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 147, + "labels": [ + { + "name": "Phoneme", + "value": "p" + } + ] + }, + { + "id": 148, + "labels": [ + { + "name": "Phoneme", + "value": "f" + } + ] + }, + { + "id": 149, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 150, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 151, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 152, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 153, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 154, + "labels": [ + { + "name": "Phoneme", + "value": "h" + } + ] + }, + { + "id": 155, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 156, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 157, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 158, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 159, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 160, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 161, + "labels": [ + { + "name": "Phoneme", + "value": "N" + } + ] + }, + { + "id": 162, + "labels": [ + { + "name": "Phoneme", + "value": "T" + } + ] + }, + { + "id": 163, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 164, + "labels": [ + { + "name": "Phoneme", + "value": "w" + } + ] + }, + { + "id": 165, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 166, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 167, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 168, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 169, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 170, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 171, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + }, + { + "id": 172, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 173, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 174, + "labels": [ + { + "name": "Phoneme", + "value": "N" + } + ] + }, + { + "id": 175, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 176, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 177, + "labels": [ + { + "name": "Phoneme", + "value": "w" + } + ] + }, + { + "id": 178, + "labels": [ + { + "name": "Phoneme", + "value": "i:" + } + ] + }, + { + "id": 179, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 180, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 181, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 182, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 183, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 184, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 185, + "sampleStart": 6000, + "sampleDur": 1004, + "labels": [ + { + "name": "Phonetic", + "value": "h" + } + ] + }, + { + "id": 186, + "sampleStart": 7005, + "sampleDur": 1502, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + }, + { + "id": 187, + "sampleStart": 8508, + "sampleDur": 1423, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 188, + "sampleStart": 9932, + "sampleDur": 1239, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 189, + "sampleStart": 11172, + "sampleDur": 1619, + "labels": [ + { + "name": "Phonetic", + "value": "p" + } + ] + }, + { + "id": 190, + "sampleStart": 12792, + "sampleDur": 479, + "labels": [ + { + "name": "Phonetic", + "value": "f" + } + ] + }, + { + "id": 191, + "sampleStart": 13272, + "sampleDur": 859, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 192, + "sampleStart": 14132, + "sampleDur": 1999, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 193, + "sampleStart": 16132, + "sampleDur": 3989, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 194, + "sampleStart": 20122, + "sampleDur": 1579, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 195, + "sampleStart": 21702, + "sampleDur": 249, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 196, + "sampleStart": 21952, + "sampleDur": 629, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 197, + "sampleStart": 22582, + "sampleDur": 619, + "labels": [ + { + "name": "Phonetic", + "value": "h" + } + ] + }, + { + "id": 198, + "sampleStart": 23202, + "sampleDur": 1059, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 199, + "sampleStart": 24262, + "sampleDur": 3099, + "labels": [ + { + "name": "Phonetic", + "value": "zs" + } + ] + }, + { + "id": 200, + "sampleStart": 27362, + "sampleDur": 898, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 201, + "sampleStart": 28261, + "sampleDur": 728, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 202, + "sampleStart": 28990, + "sampleDur": 301, + "labels": [ + { + "name": "Phonetic", + "value": "Or" + } + ] + }, + { + "id": 203, + "sampleStart": 29292, + "sampleDur": 721, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 204, + "sampleStart": 30014, + "sampleDur": 1556, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 205, + "sampleStart": 31571, + "sampleDur": 892, + "labels": [ + { + "name": "Phonetic", + "value": "N" + } + ] + }, + { + "id": 206, + "sampleStart": 32464, + "sampleDur": 609, + "labels": [ + { + "name": "Phonetic", + "value": "NH" + } + ] + }, + { + "id": 207, + "sampleStart": 33074, + "sampleDur": 1277, + "labels": [ + { + "name": "Phonetic", + "value": "T" + } + ] + }, + { + "id": 208, + "sampleStart": 34352, + "sampleDur": 1596, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 209, + "sampleStart": 35949, + "sampleDur": 622, + "labels": [ + { + "name": "Phonetic", + "value": "Ow" + } + ] + }, + { + "id": 210, + "sampleStart": 36572, + "sampleDur": 1499, + "labels": [ + { + "name": "Phonetic", + "value": "w" + } + ] + }, + { + "id": 211, + "sampleStart": 38072, + "sampleDur": 3329, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 212, + "sampleStart": 41402, + "sampleDur": 679, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 213, + "sampleStart": 42082, + "sampleDur": 1009, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 214, + "sampleStart": 43092, + "sampleDur": 925, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 215, + "sampleStart": 44018, + "sampleDur": 513, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 216, + "sampleStart": 44532, + "sampleDur": 889, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 217, + "sampleStart": 45422, + "sampleDur": 2749, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 218, + "sampleStart": 48172, + "sampleDur": 1871, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + }, + { + "id": 219, + "sampleStart": 50044, + "sampleDur": 1487, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 220, + "sampleStart": 51532, + "sampleDur": 598, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 221, + "sampleStart": 52131, + "sampleDur": 1742, + "labels": [ + { + "name": "Phonetic", + "value": "N" + } + ] + }, + { + "id": 222, + "sampleStart": 53874, + "sampleDur": 1105, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 223, + "sampleStart": 54980, + "sampleDur": 634, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 224, + "sampleStart": 55615, + "sampleDur": 354, + "labels": [ + { + "name": "Phonetic", + "value": "Ow" + } + ] + }, + { + "id": 225, + "sampleStart": 55970, + "sampleDur": 1560, + "labels": [ + { + "name": "Phonetic", + "value": "w" + } + ] + }, + { + "id": 226, + "sampleStart": 57531, + "sampleDur": 1630, + "labels": [ + { + "name": "Phonetic", + "value": "i:" + } + ] + }, + { + "id": 227, + "sampleStart": 59162, + "sampleDur": 1370, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 228, + "sampleStart": 60533, + "sampleDur": 389, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 229, + "sampleStart": 60923, + "sampleDur": 430, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 230, + "sampleStart": 61354, + "sampleDur": 1108, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 231, + "sampleStart": 62463, + "sampleDur": 2309, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 232, + "sampleStart": 64773, + "sampleDur": 1179, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 233, + "sampleStart": 65953, + "sampleDur": 3183, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 234, + "samplePoint": 10626, + "labels": [ + { + "name": "Tone", + "value": "L+H*" + } + ] + }, + { + "id": 235, + "samplePoint": 29735, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 236, + "samplePoint": 32199, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 237, + "samplePoint": 48904, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 238, + "samplePoint": 58219, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 239, + "samplePoint": 62216, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 240, + "samplePoint": 65242, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 70, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 79, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 88, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 102, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 104, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 70 + }, + { + "fromID": 7, + "toID": 79 + }, + { + "fromID": 7, + "toID": 88 + }, + { + "fromID": 7, + "toID": 102 + }, + { + "fromID": 7, + "toID": 104 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 69 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 13 + }, + { + "fromID": 5, + "toID": 37 + }, + { + "fromID": 5, + "toID": 46 + }, + { + "fromID": 69, + "toID": 66 + }, + { + "fromID": 69, + "toID": 78 + }, + { + "fromID": 69, + "toID": 101 + }, + { + "fromID": 69, + "toID": 109 + }, + { + "fromID": 2, + "toID": 110 + }, + { + "fromID": 13, + "toID": 130 + }, + { + "fromID": 13, + "toID": 131 + }, + { + "fromID": 13, + "toID": 132 + }, + { + "fromID": 37, + "toID": 133 + }, + { + "fromID": 46, + "toID": 134 + }, + { + "fromID": 66, + "toID": 135 + }, + { + "fromID": 78, + "toID": 136 + }, + { + "fromID": 78, + "toID": 137 + }, + { + "fromID": 78, + "toID": 138 + }, + { + "fromID": 101, + "toID": 139 + }, + { + "fromID": 109, + "toID": 140 + }, + { + "fromID": 109, + "toID": 141 + }, + { + "fromID": 109, + "toID": 142 + }, + { + "fromID": 110, + "toID": 143 + }, + { + "fromID": 110, + "toID": 144 + }, + { + "fromID": 130, + "toID": 145 + }, + { + "fromID": 130, + "toID": 146 + }, + { + "fromID": 130, + "toID": 147 + }, + { + "fromID": 130, + "toID": 234 + }, + { + "fromID": 131, + "toID": 148 + }, + { + "fromID": 131, + "toID": 149 + }, + { + "fromID": 132, + "toID": 150 + }, + { + "fromID": 132, + "toID": 151 + }, + { + "fromID": 132, + "toID": 152 + }, + { + "fromID": 132, + "toID": 153 + }, + { + "fromID": 133, + "toID": 154 + }, + { + "fromID": 133, + "toID": 155 + }, + { + "fromID": 133, + "toID": 156 + }, + { + "fromID": 134, + "toID": 157 + }, + { + "fromID": 134, + "toID": 158 + }, + { + "fromID": 134, + "toID": 159 + }, + { + "fromID": 134, + "toID": 160 + }, + { + "fromID": 134, + "toID": 161 + }, + { + "fromID": 134, + "toID": 162 + }, + { + "fromID": 134, + "toID": 163 + }, + { + "fromID": 134, + "toID": 235 + }, + { + "fromID": 135, + "toID": 164 + }, + { + "fromID": 135, + "toID": 165 + }, + { + "fromID": 135, + "toID": 166 + }, + { + "fromID": 136, + "toID": 167 + }, + { + "fromID": 136, + "toID": 168 + }, + { + "fromID": 136, + "toID": 169 + }, + { + "fromID": 137, + "toID": 170 + }, + { + "fromID": 137, + "toID": 171 + }, + { + "fromID": 137, + "toID": 237 + }, + { + "fromID": 138, + "toID": 172 + }, + { + "fromID": 138, + "toID": 173 + }, + { + "fromID": 138, + "toID": 174 + }, + { + "fromID": 139, + "toID": 175 + }, + { + "fromID": 139, + "toID": 176 + }, + { + "fromID": 140, + "toID": 177 + }, + { + "fromID": 140, + "toID": 178 + }, + { + "fromID": 140, + "toID": 179 + }, + { + "fromID": 140, + "toID": 238 + }, + { + "fromID": 141, + "toID": 180 + }, + { + "fromID": 141, + "toID": 181 + }, + { + "fromID": 142, + "toID": 182 + }, + { + "fromID": 142, + "toID": 183 + }, + { + "fromID": 142, + "toID": 184 + }, + { + "fromID": 143, + "toID": 185 + }, + { + "fromID": 144, + "toID": 186 + }, + { + "fromID": 145, + "toID": 187 + }, + { + "fromID": 146, + "toID": 188 + }, + { + "fromID": 147, + "toID": 189 + }, + { + "fromID": 148, + "toID": 190 + }, + { + "fromID": 149, + "toID": 191 + }, + { + "fromID": 150, + "toID": 192 + }, + { + "fromID": 151, + "toID": 193 + }, + { + "fromID": 152, + "toID": 194 + }, + { + "fromID": 153, + "toID": 195 + }, + { + "fromID": 153, + "toID": 196 + }, + { + "fromID": 154, + "toID": 197 + }, + { + "fromID": 155, + "toID": 198 + }, + { + "fromID": 156, + "toID": 199 + }, + { + "fromID": 157, + "toID": 199 + }, + { + "fromID": 158, + "toID": 200 + }, + { + "fromID": 158, + "toID": 201 + }, + { + "fromID": 159, + "toID": 202 + }, + { + "fromID": 159, + "toID": 203 + }, + { + "fromID": 160, + "toID": 204 + }, + { + "fromID": 161, + "toID": 205 + }, + { + "fromID": 161, + "toID": 206 + }, + { + "fromID": 162, + "toID": 207 + }, + { + "fromID": 163, + "toID": 208 + }, + { + "fromID": 164, + "toID": 209 + }, + { + "fromID": 164, + "toID": 210 + }, + { + "fromID": 165, + "toID": 211 + }, + { + "fromID": 166, + "toID": 212 + }, + { + "fromID": 167, + "toID": 213 + }, + { + "fromID": 167, + "toID": 214 + }, + { + "fromID": 168, + "toID": 215 + }, + { + "fromID": 169, + "toID": 216 + }, + { + "fromID": 170, + "toID": 217 + }, + { + "fromID": 171, + "toID": 218 + }, + { + "fromID": 172, + "toID": 219 + }, + { + "fromID": 173, + "toID": 220 + }, + { + "fromID": 174, + "toID": 221 + }, + { + "fromID": 175, + "toID": 222 + }, + { + "fromID": 176, + "toID": 223 + }, + { + "fromID": 177, + "toID": 224 + }, + { + "fromID": 177, + "toID": 225 + }, + { + "fromID": 178, + "toID": 226 + }, + { + "fromID": 179, + "toID": 227 + }, + { + "fromID": 179, + "toID": 228 + }, + { + "fromID": 180, + "toID": 229 + }, + { + "fromID": 181, + "toID": 230 + }, + { + "fromID": 182, + "toID": 231 + }, + { + "fromID": 183, + "toID": 232 + }, + { + "fromID": 184, + "toID": 233 + }, + { + "fromID": 70, + "toID": 130 + }, + { + "fromID": 70, + "toID": 131 + }, + { + "fromID": 70, + "toID": 132 + }, + { + "fromID": 70, + "toID": 133 + }, + { + "fromID": 79, + "toID": 134 + }, + { + "fromID": 88, + "toID": 135 + }, + { + "fromID": 88, + "toID": 136 + }, + { + "fromID": 102, + "toID": 137 + }, + { + "fromID": 102, + "toID": 138 + }, + { + "fromID": 102, + "toID": 139 + }, + { + "fromID": 104, + "toID": 140 + }, + { + "fromID": 104, + "toID": 141 + }, + { + "fromID": 104, + "toID": 142 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc022.TextGrid new file mode 100644 index 00000000..fad7d343 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.TextGrid @@ -0,0 +1,576 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.76955 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 2.76955 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.469588 + text = "" + intervals [3]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 2.76955 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.469588 + text = "L%" + intervals [3]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 2.76955 + intervals: size = 6 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.662486 + text = "L-" + intervals [3]: + xmin = 0.662486 + xmax = 1.113746 + text = "H-" + intervals [4]: + xmin = 1.113746 + xmax = 1.80634 + text = "L-" + intervals [5]: + xmin = 1.80634 + xmax = 2.469588 + text = "L-" + intervals [6]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 2.76955 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.662486 + text = "C" + intervals [3]: + xmin = 0.662486 + xmax = 0.775546 + text = "F" + intervals [4]: + xmin = 0.775546 + xmax = 1.113746 + text = "C" + intervals [5]: + xmin = 1.113746 + xmax = 1.400706 + text = "C" + intervals [6]: + xmin = 1.400706 + xmax = 1.80634 + text = "C" + intervals [7]: + xmin = 1.80634 + xmax = 1.89034 + text = "F" + intervals [8]: + xmin = 1.89034 + xmax = 2.469588 + text = "C" + intervals [9]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 2.76955 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.662486 + text = "S" + intervals [3]: + xmin = 0.662486 + xmax = 0.775546 + text = "W" + intervals [4]: + xmin = 0.775546 + xmax = 1.113746 + text = "S" + intervals [5]: + xmin = 1.113746 + xmax = 1.400706 + text = "S" + intervals [6]: + xmin = 1.400706 + xmax = 1.80634 + text = "S" + intervals [7]: + xmin = 1.80634 + xmax = 1.89034 + text = "W" + intervals [8]: + xmin = 1.89034 + xmax = 2.469588 + text = "S" + intervals [9]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 2.76955 + intervals: size = 9 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.662486 + text = "itches" + intervals [3]: + xmin = 0.662486 + xmax = 0.775546 + text = "are" + intervals [4]: + xmin = 0.775546 + xmax = 1.113746 + text = "always" + intervals [5]: + xmin = 1.113746 + xmax = 1.400706 + text = "so" + intervals [6]: + xmin = 1.400706 + xmax = 1.80634 + text = "tempting" + intervals [7]: + xmin = 1.80634 + xmax = 1.89034 + text = "to" + intervals [8]: + xmin = 1.89034 + xmax = 2.469588 + text = "scratch" + intervals [9]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 2.76955 + intervals: size = 12 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.372505 + text = "S" + intervals [3]: + xmin = 0.372505 + xmax = 0.662486 + text = "W" + intervals [4]: + xmin = 0.662486 + xmax = 0.775546 + text = "W" + intervals [5]: + xmin = 0.775546 + xmax = 0.947598 + text = "S" + intervals [6]: + xmin = 0.947598 + xmax = 1.113746 + text = "W" + intervals [7]: + xmin = 1.113746 + xmax = 1.400706 + text = "S" + intervals [8]: + xmin = 1.400706 + xmax = 1.655706 + text = "S" + intervals [9]: + xmin = 1.655706 + xmax = 1.80634 + text = "W" + intervals [10]: + xmin = 1.80634 + xmax = 1.89034 + text = "W" + intervals [11]: + xmin = 1.89034 + xmax = 2.469588 + text = "S" + intervals [12]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 2.76955 + intervals: size = 27 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.372505 + text = "I" + intervals [3]: + xmin = 0.372505 + xmax = 0.506007 + text = "tS" + intervals [4]: + xmin = 0.506007 + xmax = 0.596009 + text = "@" + intervals [5]: + xmin = 0.596009 + xmax = 0.662486 + text = "z" + intervals [6]: + xmin = 0.662486 + xmax = 0.713478 + text = "@" + intervals [7]: + xmin = 0.713478 + xmax = 0.775546 + text = "r" + intervals [8]: + xmin = 0.775546 + xmax = 0.891517 + text = "o:" + intervals [9]: + xmin = 0.891517 + xmax = 0.947598 + text = "l" + intervals [10]: + xmin = 0.947598 + xmax = 0.989522 + text = "w" + intervals [11]: + xmin = 0.989522 + xmax = 1.113746 + text = "ei" + intervals [12]: + xmin = 1.113746 + xmax = 1.280206 + text = "z_s" + intervals [13]: + xmin = 1.280206 + xmax = 1.400706 + text = "@u" + intervals [14]: + xmin = 1.400706 + xmax = 1.521206 + text = "t" + intervals [15]: + xmin = 1.521206 + xmax = 1.587206 + text = "E" + intervals [16]: + xmin = 1.587206 + xmax = 1.655706 + text = "m" + intervals [17]: + xmin = 1.655706 + xmax = 1.698706 + text = "p" + intervals [18]: + xmin = 1.718206 + xmax = 1.751843 + text = "I" + intervals [19]: + xmin = 1.751843 + xmax = 1.80634 + text = "N" + intervals [20]: + xmin = 1.80634 + xmax = 1.872591 + text = "t" + intervals [21]: + xmin = 1.872591 + xmax = 1.89034 + text = "@" + intervals [22]: + xmin = 1.89034 + xmax = 1.996338 + text = "s" + intervals [23]: + xmin = 1.996338 + xmax = 2.090588 + text = "k" + intervals [24]: + xmin = 2.090588 + xmax = 2.146346 + text = "r" + intervals [25]: + xmin = 2.146346 + xmax = 2.25409 + text = "A" + intervals [26]: + xmin = 2.25409 + xmax = 2.469588 + text = "tS" + intervals [27]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 2.76955 + intervals: size = 33 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.372505 + text = "I" + intervals [3]: + xmin = 0.372505 + xmax = 0.418007 + text = "t" + intervals [4]: + xmin = 0.418007 + xmax = 0.506007 + text = "S" + intervals [5]: + xmin = 0.506007 + xmax = 0.596009 + text = "@" + intervals [6]: + xmin = 0.596009 + xmax = 0.662486 + text = "z" + intervals [7]: + xmin = 0.662486 + xmax = 0.713478 + text = "@" + intervals [8]: + xmin = 0.713478 + xmax = 0.775546 + text = "r" + intervals [9]: + xmin = 0.775546 + xmax = 0.891517 + text = "o:" + intervals [10]: + xmin = 0.891517 + xmax = 0.947598 + text = "l" + intervals [11]: + xmin = 0.947598 + xmax = 0.989522 + text = "w" + intervals [12]: + xmin = 0.989522 + xmax = 1.113746 + text = "ei" + intervals [13]: + xmin = 1.113746 + xmax = 1.280206 + text = "zs" + intervals [14]: + xmin = 1.280206 + xmax = 1.400706 + text = "@u" + intervals [15]: + xmin = 1.400706 + xmax = 1.466706 + text = "t" + intervals [16]: + xmin = 1.466706 + xmax = 1.521206 + text = "H" + intervals [17]: + xmin = 1.521206 + xmax = 1.587206 + text = "E" + intervals [18]: + xmin = 1.587206 + xmax = 1.655706 + text = "m" + intervals [19]: + xmin = 1.655706 + xmax = 1.698706 + text = "pt" + intervals [20]: + xmin = 1.698706 + xmax = 1.718206 + text = "H" + intervals [21]: + xmin = 1.718206 + xmax = 1.751843 + text = "I" + intervals [22]: + xmin = 1.751843 + xmax = 1.80634 + text = "N" + intervals [23]: + xmin = 1.80634 + xmax = 1.841588 + text = "t" + intervals [24]: + xmin = 1.841588 + xmax = 1.872591 + text = "H" + intervals [25]: + xmin = 1.872591 + xmax = 1.89034 + text = "@" + intervals [26]: + xmin = 1.89034 + xmax = 1.996338 + text = "s" + intervals [27]: + xmin = 1.996338 + xmax = 2.071424 + text = "k" + intervals [28]: + xmin = 2.071424 + xmax = 2.090588 + text = "H" + intervals [29]: + xmin = 2.090588 + xmax = 2.146346 + text = "r" + intervals [30]: + xmin = 2.146346 + xmax = 2.25409 + text = "A" + intervals [31]: + xmin = 2.25409 + xmax = 2.305598 + text = "t" + intervals [32]: + xmin = 2.305598 + xmax = 2.469588 + text = "S" + intervals [33]: + xmin = 2.469588 + xmax = 2.76955 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 2.76955 + points: size = 10 + points [1]: + number = 0.326306 + mark = "H*" + points [2]: + number = 0.574676 + mark = "L-" + points [3]: + number = 0.854687 + mark = "H*" + points [4]: + number = 1.044539 + mark = "H-" + points [5]: + number = 1.305299 + mark = "H*" + points [6]: + number = 1.556909 + mark = "!H*" + points [7]: + number = 1.771921 + mark = "L-" + points [8]: + number = 2.131038 + mark = "H+L*" + points [9]: + number = 2.220245 + mark = "L-" + points [10]: + number = 2.270567 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 2.76955 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.775546 + text = "F" + intervals [3]: + xmin = 0.775546 + xmax = 1.113746 + text = "F" + intervals [4]: + xmin = 1.113746 + xmax = 1.400706 + text = "F" + intervals [5]: + xmin = 1.400706 + xmax = 1.89034 + text = "F" + intervals [6]: + xmin = 1.89034 + xmax = 2.469588 + text = "F" + intervals [7]: + xmin = 2.469588 + xmax = 2.76955 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc022.hlb new file mode 100644 index 00000000..64757f69 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.hlb @@ -0,0 +1,216 @@ +**EMU hierarchical labels** +173 +Syllable Syllable +96 S +97 W +98 W +99 S +100 W +101 S +102 S +103 W +104 W +105 S + +Word Word Accent Text +2 C S itches +19 F W are +28 C S always +42 C S so +51 C S tempting +71 F W to +81 C S scratch + +Foot Foot +72 F +75 F +82 F +94 F +95 F + +Phoneme Phoneme +106 I +107 tS +108 @ +109 z +110 @ +111 r +112 o: +113 l +114 w +115 ei +116 z +117 s +118 @u +119 t +120 E +121 m +122 p +123 t +124 I +125 N +126 t +127 @ +128 s +129 k +130 r +131 A +132 tS + +Phonetic Phonetic +133 I +134 t +135 S +136 @ +137 z +138 @ +139 r +140 o: +141 l +142 w +143 ei +144 zs +145 @u +146 t +147 H +148 E +149 m +150 pt +151 H +152 I +153 N +154 t +155 H +156 @ +157 s +158 k +159 H +160 r +161 A +162 t +163 S + +Tone Tone +164 H* +165 L- +166 H* +167 H- +168 H* +169 !H* +170 L- +171 H+L* +172 L- +173 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +22 H- +45 L- +74 L- + + +2 96 97 106 107 108 109 133 134 135 136 137 164 +5 2 96 97 106 107 108 109 133 134 135 136 137 164 +7 2 5 19 22 28 42 45 51 71 72 74 75 81 82 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 166 168 169 171 +8 2 5 7 19 22 28 42 45 51 71 72 74 75 81 82 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 166 168 169 171 +19 98 110 111 138 139 +22 19 28 98 99 100 110 111 112 113 114 115 116 138 139 140 141 142 143 144 166 +28 99 100 112 113 114 115 116 140 141 142 143 144 166 +42 101 117 118 144 145 168 +45 42 51 101 102 103 117 118 119 120 121 122 123 124 125 144 145 146 147 148 149 150 151 152 153 168 169 +51 102 103 119 120 121 122 123 124 125 146 147 148 149 150 151 152 153 169 +71 104 126 127 154 155 156 +72 96 97 98 106 107 108 109 110 111 133 134 135 136 137 138 139 164 +74 71 81 104 105 126 127 128 129 130 131 132 154 155 156 157 158 159 160 161 162 163 171 +75 99 100 112 113 114 115 116 140 141 142 143 144 166 +81 105 128 129 130 131 132 157 158 159 160 161 162 163 171 +82 101 117 118 144 145 168 +94 102 103 104 119 120 121 122 123 124 125 126 127 146 147 148 149 150 151 152 153 154 155 156 169 +95 105 128 129 130 131 132 157 158 159 160 161 162 163 171 +96 106 133 164 +97 107 108 109 134 135 136 137 +98 110 111 138 139 +99 112 113 140 141 166 +100 114 115 116 142 143 144 +101 117 118 144 145 168 +102 119 120 121 122 146 147 148 149 150 169 +103 123 124 125 150 151 152 153 +104 126 127 154 155 156 +105 128 129 130 131 132 157 158 159 160 161 162 163 171 +106 133 +107 134 135 +108 136 +109 137 +110 138 +111 139 +112 140 +113 141 +114 142 +115 143 +116 144 +117 144 +118 145 +119 146 147 +120 148 +121 149 +122 150 +123 150 151 +124 152 +125 153 +126 154 155 +127 156 +128 157 +129 158 159 +130 160 +131 161 +132 162 163 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.lab b/inst/extdata/rawDemoData/annotationFiles/msajc022.lab new file mode 100644 index 00000000..b5b1b123 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.lab @@ -0,0 +1,35 @@ +signal msajc022 +nfields 1 +# + 0.300000 125 H# + 0.372505 125 I + 0.418007 125 t + 0.506007 125 S + 0.596009 125 @ + 0.662486 125 z + 0.713478 125 @ + 0.775546 125 r + 0.891517 125 o: + 0.947598 125 l + 0.989522 125 w + 1.113746 125 ei + 1.280206 125 zs + 1.400706 125 @u + 1.466706 125 t + 1.521206 125 H + 1.587206 125 E + 1.655706 125 m + 1.698706 125 pt + 1.718206 125 H + 1.751843 125 I + 1.806340 125 N + 1.841588 125 t + 1.872591 125 H + 1.890340 125 @ + 1.996338 125 s + 2.071424 125 k + 2.090588 125 H + 2.146346 125 r + 2.254090 125 A + 2.305598 125 t + 2.469588 125 S diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.par b/inst/extdata/rawDemoData/annotationFiles/msajc022.par new file mode 100644 index 00000000..f5481392 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.par @@ -0,0 +1,50 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 ItSIz +KAN: 1 A: +KAN: 2 O:lweIz +KAN: 3 s@U +KAN: 4 temptIN +KAN: 5 t@ +KAN: 6 skr{tS +ORT: 0 itches +ORT: 1 are +ORT: 2 always +ORT: 3 so +ORT: 4 tempting +ORT: 5 to +ORT: 6 scratch +TRN: 5800 43599 0,1,2,3,4,5,6 itches are always so tempting to scratch +MAU: 0 5799 -1 +MAU: 5800 2399 0 I +MAU: 8200 2199 0 tS +MAU: 10400 1399 0 I +MAU: 11800 1799 0 z +MAU: 13600 1199 1 @ +MAU: 14800 3999 2 O: +MAU: 18800 1999 2 w +MAU: 20800 1399 2 eI +MAU: 22200 999 2 z +MAU: 23200 2399 3 s +MAU: 25600 3399 3 @U +MAU: 29000 1599 4 t +MAU: 30600 1399 4 e +MAU: 32000 1799 4 m +MAU: 33800 799 4 t +MAU: 34600 599 4 I +MAU: 35200 1599 4 N +MAU: 36800 599 5 t +MAU: 37400 599 5 @ +MAU: 38000 3199 6 s +MAU: 41200 599 6 k +MAU: 41800 1399 6 r +MAU: 43200 2999 6 { +MAU: 46200 3199 6 tS +MAU: 49400 5599 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc022.parmanipulated new file mode 100644 index 00000000..f5481392 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.parmanipulated @@ -0,0 +1,50 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 ItSIz +KAN: 1 A: +KAN: 2 O:lweIz +KAN: 3 s@U +KAN: 4 temptIN +KAN: 5 t@ +KAN: 6 skr{tS +ORT: 0 itches +ORT: 1 are +ORT: 2 always +ORT: 3 so +ORT: 4 tempting +ORT: 5 to +ORT: 6 scratch +TRN: 5800 43599 0,1,2,3,4,5,6 itches are always so tempting to scratch +MAU: 0 5799 -1 +MAU: 5800 2399 0 I +MAU: 8200 2199 0 tS +MAU: 10400 1399 0 I +MAU: 11800 1799 0 z +MAU: 13600 1199 1 @ +MAU: 14800 3999 2 O: +MAU: 18800 1999 2 w +MAU: 20800 1399 2 eI +MAU: 22200 999 2 z +MAU: 23200 2399 3 s +MAU: 25600 3399 3 @U +MAU: 29000 1599 4 t +MAU: 30600 1399 4 e +MAU: 32000 1799 4 m +MAU: 33800 799 4 t +MAU: 34600 599 4 I +MAU: 35200 1599 4 N +MAU: 36800 599 5 t +MAU: 37400 599 5 @ +MAU: 38000 3199 6 s +MAU: 41200 599 6 k +MAU: 41800 1399 6 r +MAU: 43200 2999 6 { +MAU: 46200 3199 6 tS +MAU: 49400 5599 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022.tone b/inst/extdata/rawDemoData/annotationFiles/msajc022.tone new file mode 100644 index 00000000..04db5cb1 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022.tone @@ -0,0 +1,13 @@ +signal msajc022 +nfields 1 +# + 0.327306 125 H* + 0.575676 125 L- + 0.855687 125 H* + 1.045539 125 H- + 1.306299 125 H* + 1.557909 125 !H* + 1.772921 125 L- + 2.132038 125 H+L* + 2.221245 125 L- + 2.271567 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc022_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc022_annot.json new file mode 100644 index 00000000..b1b0cfd7 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc022_annot.json @@ -0,0 +1,1464 @@ +{ + "name": "msajc022", + "annotates": "msajc022.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 22, + "labels": [ + { + "name": "Intermediate", + "value": "H-" + } + ] + }, + { + "id": 45, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 74, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "itches" + } + ] + }, + { + "id": 19, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "are" + } + ] + }, + { + "id": 28, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "always" + } + ] + }, + { + "id": 42, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "so" + } + ] + }, + { + "id": 51, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "tempting" + } + ] + }, + { + "id": 71, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "to" + } + ] + }, + { + "id": 81, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "scratch" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 96, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 97, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 98, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 99, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 100, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 101, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 102, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 103, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 104, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 105, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 106, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 107, + "labels": [ + { + "name": "Phoneme", + "value": "tS" + } + ] + }, + { + "id": 108, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 110, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 111, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 112, + "labels": [ + { + "name": "Phoneme", + "value": "o:" + } + ] + }, + { + "id": 113, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 114, + "labels": [ + { + "name": "Phoneme", + "value": "w" + } + ] + }, + { + "id": 115, + "labels": [ + { + "name": "Phoneme", + "value": "ei" + } + ] + }, + { + "id": 116, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 117, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 118, + "labels": [ + { + "name": "Phoneme", + "value": "@u" + } + ] + }, + { + "id": 119, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 120, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 121, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 122, + "labels": [ + { + "name": "Phoneme", + "value": "p" + } + ] + }, + { + "id": 123, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 124, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 125, + "labels": [ + { + "name": "Phoneme", + "value": "N" + } + ] + }, + { + "id": 126, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 127, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 128, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 129, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Phoneme", + "value": "A" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Phoneme", + "value": "tS" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 133, + "sampleStart": 6000, + "sampleDur": 1449, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 134, + "sampleStart": 7450, + "sampleDur": 909, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 135, + "sampleStart": 8360, + "sampleDur": 1759, + "labels": [ + { + "name": "Phonetic", + "value": "S" + } + ] + }, + { + "id": 136, + "sampleStart": 10120, + "sampleDur": 1799, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 137, + "sampleStart": 11920, + "sampleDur": 1328, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 138, + "sampleStart": 13249, + "sampleDur": 1019, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 139, + "sampleStart": 14269, + "sampleDur": 1240, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 140, + "sampleStart": 15510, + "sampleDur": 2319, + "labels": [ + { + "name": "Phonetic", + "value": "o:" + } + ] + }, + { + "id": 141, + "sampleStart": 17830, + "sampleDur": 1120, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 142, + "sampleStart": 18951, + "sampleDur": 838, + "labels": [ + { + "name": "Phonetic", + "value": "w" + } + ] + }, + { + "id": 143, + "sampleStart": 19790, + "sampleDur": 2483, + "labels": [ + { + "name": "Phonetic", + "value": "ei" + } + ] + }, + { + "id": 144, + "sampleStart": 22274, + "sampleDur": 3329, + "labels": [ + { + "name": "Phonetic", + "value": "zs" + } + ] + }, + { + "id": 145, + "sampleStart": 25604, + "sampleDur": 2409, + "labels": [ + { + "name": "Phonetic", + "value": "@u" + } + ] + }, + { + "id": 146, + "sampleStart": 28014, + "sampleDur": 1319, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 147, + "sampleStart": 29334, + "sampleDur": 1089, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 148, + "sampleStart": 30424, + "sampleDur": 1319, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 149, + "sampleStart": 31744, + "sampleDur": 1369, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 150, + "sampleStart": 33114, + "sampleDur": 859, + "labels": [ + { + "name": "Phonetic", + "value": "pt" + } + ] + }, + { + "id": 151, + "sampleStart": 33974, + "sampleDur": 389, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 152, + "sampleStart": 34364, + "sampleDur": 671, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 153, + "sampleStart": 35036, + "sampleDur": 1089, + "labels": [ + { + "name": "Phonetic", + "value": "N" + } + ] + }, + { + "id": 154, + "sampleStart": 36126, + "sampleDur": 704, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 155, + "sampleStart": 36831, + "sampleDur": 619, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 156, + "sampleStart": 37451, + "sampleDur": 354, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 157, + "sampleStart": 37806, + "sampleDur": 2119, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 158, + "sampleStart": 39926, + "sampleDur": 1501, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 159, + "sampleStart": 41428, + "sampleDur": 382, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 160, + "sampleStart": 41811, + "sampleDur": 1114, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 161, + "sampleStart": 42926, + "sampleDur": 2154, + "labels": [ + { + "name": "Phonetic", + "value": "A" + } + ] + }, + { + "id": 162, + "sampleStart": 45081, + "sampleDur": 1029, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 163, + "sampleStart": 46111, + "sampleDur": 3279, + "labels": [ + { + "name": "Phonetic", + "value": "S" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 164, + "samplePoint": 6546, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 165, + "samplePoint": 11514, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 166, + "samplePoint": 17114, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 167, + "samplePoint": 20911, + "labels": [ + { + "name": "Tone", + "value": "H-" + } + ] + }, + { + "id": 168, + "samplePoint": 26126, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 169, + "samplePoint": 31158, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 170, + "samplePoint": 35458, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 171, + "samplePoint": 42641, + "labels": [ + { + "name": "Tone", + "value": "H+L*" + } + ] + }, + { + "id": 172, + "samplePoint": 44425, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 173, + "samplePoint": 45431, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 72, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 75, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 82, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 94, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 95, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 72 + }, + { + "fromID": 7, + "toID": 75 + }, + { + "fromID": 7, + "toID": 82 + }, + { + "fromID": 7, + "toID": 94 + }, + { + "fromID": 7, + "toID": 95 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 22 + }, + { + "fromID": 7, + "toID": 45 + }, + { + "fromID": 7, + "toID": 74 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 22, + "toID": 19 + }, + { + "fromID": 22, + "toID": 28 + }, + { + "fromID": 45, + "toID": 42 + }, + { + "fromID": 45, + "toID": 51 + }, + { + "fromID": 74, + "toID": 71 + }, + { + "fromID": 74, + "toID": 81 + }, + { + "fromID": 2, + "toID": 96 + }, + { + "fromID": 2, + "toID": 97 + }, + { + "fromID": 19, + "toID": 98 + }, + { + "fromID": 28, + "toID": 99 + }, + { + "fromID": 28, + "toID": 100 + }, + { + "fromID": 42, + "toID": 101 + }, + { + "fromID": 51, + "toID": 102 + }, + { + "fromID": 51, + "toID": 103 + }, + { + "fromID": 71, + "toID": 104 + }, + { + "fromID": 81, + "toID": 105 + }, + { + "fromID": 96, + "toID": 106 + }, + { + "fromID": 96, + "toID": 164 + }, + { + "fromID": 97, + "toID": 107 + }, + { + "fromID": 97, + "toID": 108 + }, + { + "fromID": 97, + "toID": 109 + }, + { + "fromID": 98, + "toID": 110 + }, + { + "fromID": 98, + "toID": 111 + }, + { + "fromID": 99, + "toID": 112 + }, + { + "fromID": 99, + "toID": 113 + }, + { + "fromID": 99, + "toID": 166 + }, + { + "fromID": 100, + "toID": 114 + }, + { + "fromID": 100, + "toID": 115 + }, + { + "fromID": 100, + "toID": 116 + }, + { + "fromID": 101, + "toID": 117 + }, + { + "fromID": 101, + "toID": 118 + }, + { + "fromID": 101, + "toID": 168 + }, + { + "fromID": 102, + "toID": 119 + }, + { + "fromID": 102, + "toID": 120 + }, + { + "fromID": 102, + "toID": 121 + }, + { + "fromID": 102, + "toID": 122 + }, + { + "fromID": 102, + "toID": 169 + }, + { + "fromID": 103, + "toID": 123 + }, + { + "fromID": 103, + "toID": 124 + }, + { + "fromID": 103, + "toID": 125 + }, + { + "fromID": 104, + "toID": 126 + }, + { + "fromID": 104, + "toID": 127 + }, + { + "fromID": 105, + "toID": 128 + }, + { + "fromID": 105, + "toID": 129 + }, + { + "fromID": 105, + "toID": 130 + }, + { + "fromID": 105, + "toID": 131 + }, + { + "fromID": 105, + "toID": 132 + }, + { + "fromID": 105, + "toID": 171 + }, + { + "fromID": 106, + "toID": 133 + }, + { + "fromID": 107, + "toID": 134 + }, + { + "fromID": 107, + "toID": 135 + }, + { + "fromID": 108, + "toID": 136 + }, + { + "fromID": 109, + "toID": 137 + }, + { + "fromID": 110, + "toID": 138 + }, + { + "fromID": 111, + "toID": 139 + }, + { + "fromID": 112, + "toID": 140 + }, + { + "fromID": 113, + "toID": 141 + }, + { + "fromID": 114, + "toID": 142 + }, + { + "fromID": 115, + "toID": 143 + }, + { + "fromID": 116, + "toID": 144 + }, + { + "fromID": 117, + "toID": 144 + }, + { + "fromID": 118, + "toID": 145 + }, + { + "fromID": 119, + "toID": 146 + }, + { + "fromID": 119, + "toID": 147 + }, + { + "fromID": 120, + "toID": 148 + }, + { + "fromID": 121, + "toID": 149 + }, + { + "fromID": 122, + "toID": 150 + }, + { + "fromID": 123, + "toID": 150 + }, + { + "fromID": 123, + "toID": 151 + }, + { + "fromID": 124, + "toID": 152 + }, + { + "fromID": 125, + "toID": 153 + }, + { + "fromID": 126, + "toID": 154 + }, + { + "fromID": 126, + "toID": 155 + }, + { + "fromID": 127, + "toID": 156 + }, + { + "fromID": 128, + "toID": 157 + }, + { + "fromID": 129, + "toID": 158 + }, + { + "fromID": 129, + "toID": 159 + }, + { + "fromID": 130, + "toID": 160 + }, + { + "fromID": 131, + "toID": 161 + }, + { + "fromID": 132, + "toID": 162 + }, + { + "fromID": 132, + "toID": 163 + }, + { + "fromID": 72, + "toID": 96 + }, + { + "fromID": 72, + "toID": 97 + }, + { + "fromID": 72, + "toID": 98 + }, + { + "fromID": 75, + "toID": 99 + }, + { + "fromID": 75, + "toID": 100 + }, + { + "fromID": 82, + "toID": 101 + }, + { + "fromID": 94, + "toID": 102 + }, + { + "fromID": 94, + "toID": 103 + }, + { + "fromID": 94, + "toID": 104 + }, + { + "fromID": 95, + "toID": 105 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc023.TextGrid new file mode 100644 index 00000000..589074e8 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.TextGrid @@ -0,0 +1,542 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 2.8542 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 2.8542 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.554222 + text = "" + intervals [3]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 2.8542 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.554222 + text = "L%" + intervals [3]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 2.8542 + intervals: size = 5 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.819068 + text = "L-" + intervals [3]: + xmin = 0.819068 + xmax = 1.421989 + text = "L-" + intervals [4]: + xmin = 1.421989 + xmax = 2.554222 + text = "L-" + intervals [5]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 2.8542 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.513989 + text = "F" + intervals [3]: + xmin = 0.513989 + xmax = 0.819068 + text = "C" + intervals [4]: + xmin = 0.819068 + xmax = 1.038817 + text = "F" + intervals [5]: + xmin = 1.038817 + xmax = 1.421989 + text = "C" + intervals [6]: + xmin = 1.421989 + xmax = 1.495318 + text = "F" + intervals [7]: + xmin = 1.495318 + xmax = 1.774989 + text = "C" + intervals [8]: + xmin = 1.774989 + xmax = 1.964482 + text = "C" + intervals [9]: + xmin = 1.964482 + xmax = 2.554222 + text = "C" + intervals [10]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 2.8542 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.513989 + text = "W" + intervals [3]: + xmin = 0.513989 + xmax = 0.819068 + text = "W" + intervals [4]: + xmin = 0.819068 + xmax = 1.038817 + text = "W" + intervals [5]: + xmin = 1.038817 + xmax = 1.421989 + text = "W" + intervals [6]: + xmin = 1.421989 + xmax = 1.495318 + text = "W" + intervals [7]: + xmin = 1.495318 + xmax = 1.774989 + text = "W" + intervals [8]: + xmin = 1.774989 + xmax = 1.964482 + text = "S" + intervals [9]: + xmin = 1.964482 + xmax = 2.554222 + text = "S" + intervals [10]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 2.8542 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.513989 + text = "I'll" + intervals [3]: + xmin = 0.513989 + xmax = 0.819068 + text = "hedge" + intervals [4]: + xmin = 0.819068 + xmax = 1.038817 + text = "my" + intervals [5]: + xmin = 1.038817 + xmax = 1.421989 + text = "bets" + intervals [6]: + xmin = 1.421989 + xmax = 1.495318 + text = "and" + intervals [7]: + xmin = 1.495318 + xmax = 1.774989 + text = "take" + intervals [8]: + xmin = 1.774989 + xmax = 1.964482 + text = "no" + intervals [9]: + xmin = 1.964482 + xmax = 2.554222 + text = "risks" + intervals [10]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 2.8542 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.513989 + text = "W" + intervals [3]: + xmin = 0.513989 + xmax = 0.819068 + text = "S" + intervals [4]: + xmin = 0.819068 + xmax = 1.038817 + text = "W" + intervals [5]: + xmin = 1.038817 + xmax = 1.421989 + text = "S" + intervals [6]: + xmin = 1.421989 + xmax = 1.495318 + text = "W" + intervals [7]: + xmin = 1.495318 + xmax = 1.774989 + text = "S" + intervals [8]: + xmin = 1.774989 + xmax = 1.964482 + text = "S" + intervals [9]: + xmin = 1.964482 + xmax = 2.554222 + text = "S" + intervals [10]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 2.8542 + intervals: size = 25 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.434541 + text = "ai" + intervals [3]: + xmin = 0.434541 + xmax = 0.513989 + text = "l" + intervals [4]: + xmin = 0.513989 + xmax = 0.59559 + text = "h" + intervals [5]: + xmin = 0.59559 + xmax = 0.708989 + text = "E" + intervals [6]: + xmin = 0.708989 + xmax = 0.819068 + text = "dZ" + intervals [7]: + xmin = 0.819068 + xmax = 0.902989 + text = "m" + intervals [8]: + xmin = 0.902989 + xmax = 1.038817 + text = "ai" + intervals [9]: + xmin = 1.038817 + xmax = 1.132571 + text = "b" + intervals [10]: + xmin = 1.132571 + xmax = 1.230989 + text = "E" + intervals [11]: + xmin = 1.230989 + xmax = 1.297989 + text = "t" + intervals [12]: + xmin = 1.297989 + xmax = 1.421989 + text = "s" + intervals [13]: + xmin = 1.421989 + xmax = 1.434822 + text = "@" + intervals [14]: + xmin = 1.434822 + xmax = 1.495318 + text = "n" + intervals [15]: + xmin = 1.495318 + xmax = 1.584989 + text = "t" + intervals [16]: + xmin = 1.584989 + xmax = 1.704989 + text = "ei" + intervals [17]: + xmin = 1.704989 + xmax = 1.774989 + text = "k" + intervals [18]: + xmin = 1.774989 + xmax = 1.833989 + text = "n" + intervals [19]: + xmin = 1.833989 + xmax = 1.964482 + text = "@u" + intervals [20]: + xmin = 1.964482 + xmax = 2.063989 + text = "r" + intervals [21]: + xmin = 2.063989 + xmax = 2.144989 + text = "I" + intervals [22]: + xmin = 2.144989 + xmax = 2.279222 + text = "s" + intervals [23]: + xmin = 2.279222 + xmax = 2.366291 + text = "k" + intervals [24]: + xmin = 2.366291 + xmax = 2.554222 + text = "s" + intervals [25]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 2.8542 + intervals: size = 28 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.434541 + text = "ai" + intervals [3]: + xmin = 0.434541 + xmax = 0.513989 + text = "l" + intervals [4]: + xmin = 0.513989 + xmax = 0.59559 + text = "h" + intervals [5]: + xmin = 0.59559 + xmax = 0.708989 + text = "E" + intervals [6]: + xmin = 0.708989 + xmax = 0.757989 + text = "d" + intervals [7]: + xmin = 0.757989 + xmax = 0.819068 + text = "Z" + intervals [8]: + xmin = 0.819068 + xmax = 0.902989 + text = "m" + intervals [9]: + xmin = 0.902989 + xmax = 1.038817 + text = "ai" + intervals [10]: + xmin = 1.038817 + xmax = 1.132571 + text = "b" + intervals [11]: + xmin = 1.132571 + xmax = 1.230989 + text = "E" + intervals [12]: + xmin = 1.230989 + xmax = 1.297989 + text = "t" + intervals [13]: + xmin = 1.297989 + xmax = 1.421989 + text = "s" + intervals [14]: + xmin = 1.421989 + xmax = 1.434822 + text = "@" + intervals [15]: + xmin = 1.434822 + xmax = 1.495318 + text = "n" + intervals [16]: + xmin = 1.495318 + xmax = 1.522989 + text = "t" + intervals [17]: + xmin = 1.522989 + xmax = 1.584989 + text = "H" + intervals [18]: + xmin = 1.584989 + xmax = 1.704989 + text = "ei" + intervals [19]: + xmin = 1.704989 + xmax = 1.774989 + text = "k" + intervals [20]: + xmin = 1.774989 + xmax = 1.833989 + text = "n" + intervals [21]: + xmin = 1.833989 + xmax = 1.964482 + text = "@u" + intervals [22]: + xmin = 1.964482 + xmax = 2.063989 + text = "r" + intervals [23]: + xmin = 2.063989 + xmax = 2.144989 + text = "I" + intervals [24]: + xmin = 2.144989 + xmax = 2.279222 + text = "s" + intervals [25]: + xmin = 2.279222 + xmax = 2.352222 + text = "k" + intervals [26]: + xmin = 2.352222 + xmax = 2.366291 + text = "H" + intervals [27]: + xmin = 2.366291 + xmax = 2.554222 + text = "s" + intervals [28]: + xmin = 2.554222 + xmax = 2.8542 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 2.8542 + points: size = 8 + points [1]: + number = 0.661897 + mark = "H*" + points [2]: + number = 0.793507 + mark = "L-" + points [3]: + number = 1.207136 + mark = "H*" + points [4]: + number = 1.251789 + mark = "L-" + points [5]: + number = 1.888685 + mark = "H*" + points [6]: + number = 2.050847 + mark = "!H*" + points [7]: + number = 2.10255 + mark = "L-" + points [8]: + number = 2.137803 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 2.8542 + intervals: size = 7 + intervals [1]: + xmin = 0 + xmax = 0.513989 + text = "" + intervals [2]: + xmin = 0.513989 + xmax = 1.038817 + text = "F" + intervals [3]: + xmin = 1.038817 + xmax = 1.495318 + text = "F" + intervals [4]: + xmin = 1.495318 + xmax = 1.774989 + text = "F" + intervals [5]: + xmin = 1.774989 + xmax = 1.964482 + text = "F" + intervals [6]: + xmin = 1.964482 + xmax = 2.554222 + text = "F" + intervals [7]: + xmin = 2.554222 + xmax = 2.8542 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc023.hlb new file mode 100644 index 00000000..657193d0 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.hlb @@ -0,0 +1,190 @@ +**EMU hierarchical labels** +147 +Syllable Syllable +83 W +84 S +85 W +86 S +87 W +88 S +89 S +90 S + +Word Word Accent Text +2 F W I'll +13 C W hedge +23 F W my +32 C W bets +43 F W and +52 C W take +62 C S no +69 C S risks + +Foot Foot +53 F +63 F +70 F +72 F +82 F + +Phoneme Phoneme +91 ai +92 l +93 h +94 E +95 dZ +96 m +97 ai +98 b +99 E +100 t +101 s +102 @ +103 n +104 t +105 ei +106 k +107 n +108 @u +109 r +110 I +111 s +112 k +113 s + +Phonetic Phonetic +114 ai +115 l +116 h +117 E +118 d +119 Z +120 m +121 ai +122 b +123 E +124 t +125 s +126 @ +127 n +128 t +129 H +130 ei +131 k +132 n +133 @u +134 r +135 I +136 s +137 k +138 H +139 s + +Tone Tone +140 H* +141 L- +142 H* +143 L- +144 H* +145 !H* +146 L- +147 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +26 L- +46 L- + + +2 83 91 92 114 115 +5 2 13 83 84 91 92 93 94 95 114 115 116 117 118 119 140 +7 2 5 13 23 26 32 43 46 52 53 62 63 69 70 72 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 142 144 145 +8 2 5 7 13 23 26 32 43 46 52 53 62 63 69 70 72 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 142 144 145 +13 84 93 94 95 116 117 118 119 140 +23 85 96 97 120 121 +26 23 32 85 86 96 97 98 99 100 101 120 121 122 123 124 125 142 +32 86 98 99 100 101 122 123 124 125 142 +43 87 102 103 126 127 +46 43 52 62 69 87 88 89 90 102 103 104 105 106 107 108 109 110 111 112 113 126 127 128 129 130 131 132 133 134 135 136 137 138 139 144 145 +52 88 104 105 106 128 129 130 131 +53 84 85 93 94 95 96 97 116 117 118 119 120 121 140 +62 89 107 108 132 133 144 +63 86 87 98 99 100 101 102 103 122 123 124 125 126 127 142 +69 90 109 110 111 112 113 134 135 136 137 138 139 145 +70 88 104 105 106 128 129 130 131 +72 89 107 108 132 133 144 +82 90 109 110 111 112 113 134 135 136 137 138 139 145 +83 91 92 114 115 +84 93 94 95 116 117 118 119 140 +85 96 97 120 121 +86 98 99 100 101 122 123 124 125 142 +87 102 103 126 127 +88 104 105 106 128 129 130 131 +89 107 108 132 133 144 +90 109 110 111 112 113 134 135 136 137 138 139 145 +91 114 +92 115 +93 116 +94 117 +95 118 119 +96 120 +97 121 +98 122 +99 123 +100 124 +101 125 +102 126 +103 127 +104 128 129 +105 130 +106 131 +107 132 +108 133 +109 134 +110 135 +111 136 +112 137 138 +113 139 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.lab b/inst/extdata/rawDemoData/annotationFiles/msajc023.lab new file mode 100644 index 00000000..280a92e4 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.lab @@ -0,0 +1,30 @@ +signal msajc023 +nfields 1 +# + 0.300000 125 H# + 0.434541 125 ai + 0.513989 125 l + 0.595590 125 h + 0.708989 125 E + 0.757989 125 d + 0.819068 125 Z + 0.902989 125 m + 1.038817 125 ai + 1.132571 125 b + 1.230989 125 E + 1.297989 125 t + 1.421989 125 s + 1.434822 125 @ + 1.495318 125 n + 1.522989 125 t + 1.584989 125 H + 1.704989 125 ei + 1.774989 125 k + 1.833989 125 n + 1.964482 125 @u + 2.063989 125 r + 2.144989 125 I + 2.279222 125 s + 2.352222 125 k + 2.366291 125 H + 2.554222 125 s diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.par b/inst/extdata/rawDemoData/annotationFiles/msajc023.par new file mode 100644 index 00000000..f970c21f --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.par @@ -0,0 +1,54 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 Il +KAN: 1 hedZ +KAN: 2 maI +KAN: 3 bets +KAN: 4 @nd +KAN: 5 teIk +KAN: 6 nVmb@ +KAN: 7 rIsks +ORT: 0 I'll +ORT: 1 hedge +ORT: 2 my +ORT: 3 bets +ORT: 4 and +ORT: 5 take +ORT: 6 no +ORT: 7 risks +TRN: 6400 44599 0,1,2,3,4,5,6,7 I'll hedge my bets and take no risks +MAU: 0 6399 -1 +MAU: 6400 599 0 I +MAU: 7000 3199 0 l +MAU: 10200 2399 1 h +MAU: 12600 1999 1 e +MAU: 14600 1799 1 dZ +MAU: 16400 1799 2 m +MAU: 18200 2999 2 aI +MAU: 21200 1999 3 b +MAU: 23200 2399 3 e +MAU: 25600 599 3 t +MAU: 26200 1999 3 s +MAU: 28200 599 4 @ +MAU: 28800 1799 4 n +MAU: 30600 1399 5 t +MAU: 32000 2599 5 eI +MAU: 34600 799 5 k +MAU: 35400 1599 6 n +MAU: 37000 999 6 V +MAU: 38000 599 6 m +MAU: 38600 599 6 b +MAU: 39200 599 6 @ +MAU: 39800 1999 7 r +MAU: 41800 999 7 I +MAU: 42800 4199 7 s +MAU: 47000 599 7 k +MAU: 47600 3399 7 s +MAU: 51000 5799 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc023.parmanipulated new file mode 100644 index 00000000..f970c21f --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.parmanipulated @@ -0,0 +1,54 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 Il +KAN: 1 hedZ +KAN: 2 maI +KAN: 3 bets +KAN: 4 @nd +KAN: 5 teIk +KAN: 6 nVmb@ +KAN: 7 rIsks +ORT: 0 I'll +ORT: 1 hedge +ORT: 2 my +ORT: 3 bets +ORT: 4 and +ORT: 5 take +ORT: 6 no +ORT: 7 risks +TRN: 6400 44599 0,1,2,3,4,5,6,7 I'll hedge my bets and take no risks +MAU: 0 6399 -1 +MAU: 6400 599 0 I +MAU: 7000 3199 0 l +MAU: 10200 2399 1 h +MAU: 12600 1999 1 e +MAU: 14600 1799 1 dZ +MAU: 16400 1799 2 m +MAU: 18200 2999 2 aI +MAU: 21200 1999 3 b +MAU: 23200 2399 3 e +MAU: 25600 599 3 t +MAU: 26200 1999 3 s +MAU: 28200 599 4 @ +MAU: 28800 1799 4 n +MAU: 30600 1399 5 t +MAU: 32000 2599 5 eI +MAU: 34600 799 5 k +MAU: 35400 1599 6 n +MAU: 37000 999 6 V +MAU: 38000 599 6 m +MAU: 38600 599 6 b +MAU: 39200 599 6 @ +MAU: 39800 1999 7 r +MAU: 41800 999 7 I +MAU: 42800 4199 7 s +MAU: 47000 599 7 k +MAU: 47600 3399 7 s +MAU: 51000 5799 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023.tone b/inst/extdata/rawDemoData/annotationFiles/msajc023.tone new file mode 100644 index 00000000..c1dca44e --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023.tone @@ -0,0 +1,11 @@ +signal msajc023 +nfields 1 +# + 0.662897 125 H* + 0.794507 125 L- + 1.208136 125 H* + 1.252789 125 L- + 1.889685 125 H* + 2.051847 125 !H* + 2.103550 125 L- + 2.138803 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc023_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc023_annot.json new file mode 100644 index 00000000..1eeabd85 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc023_annot.json @@ -0,0 +1,1275 @@ +{ + "name": "msajc023", + "annotates": "msajc023.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 26, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 46, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "I'll" + } + ] + }, + { + "id": 13, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "hedge" + } + ] + }, + { + "id": 23, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "my" + } + ] + }, + { + "id": 32, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "bets" + } + ] + }, + { + "id": 43, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "and" + } + ] + }, + { + "id": 52, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "take" + } + ] + }, + { + "id": 62, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "no" + } + ] + }, + { + "id": 69, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "risks" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 83, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 84, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 85, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 86, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 87, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 88, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 89, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 90, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 91, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 92, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 93, + "labels": [ + { + "name": "Phoneme", + "value": "h" + } + ] + }, + { + "id": 94, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 95, + "labels": [ + { + "name": "Phoneme", + "value": "dZ" + } + ] + }, + { + "id": 96, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 97, + "labels": [ + { + "name": "Phoneme", + "value": "ai" + } + ] + }, + { + "id": 98, + "labels": [ + { + "name": "Phoneme", + "value": "b" + } + ] + }, + { + "id": 99, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 100, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 101, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 102, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 103, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 104, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 105, + "labels": [ + { + "name": "Phoneme", + "value": "ei" + } + ] + }, + { + "id": 106, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 107, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 108, + "labels": [ + { + "name": "Phoneme", + "value": "@u" + } + ] + }, + { + "id": 109, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 110, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 111, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 112, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 113, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 114, + "sampleStart": 6000, + "sampleDur": 2689, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 115, + "sampleStart": 8690, + "sampleDur": 1588, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 116, + "sampleStart": 10279, + "sampleDur": 1631, + "labels": [ + { + "name": "Phonetic", + "value": "h" + } + ] + }, + { + "id": 117, + "sampleStart": 11911, + "sampleDur": 2267, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 118, + "sampleStart": 14179, + "sampleDur": 979, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 119, + "sampleStart": 15159, + "sampleDur": 1221, + "labels": [ + { + "name": "Phonetic", + "value": "Z" + } + ] + }, + { + "id": 120, + "sampleStart": 16381, + "sampleDur": 1677, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 121, + "sampleStart": 18059, + "sampleDur": 2716, + "labels": [ + { + "name": "Phonetic", + "value": "ai" + } + ] + }, + { + "id": 122, + "sampleStart": 20776, + "sampleDur": 1874, + "labels": [ + { + "name": "Phonetic", + "value": "b" + } + ] + }, + { + "id": 123, + "sampleStart": 22651, + "sampleDur": 1967, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 124, + "sampleStart": 24619, + "sampleDur": 1339, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 125, + "sampleStart": 25959, + "sampleDur": 2479, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 126, + "sampleStart": 28439, + "sampleDur": 256, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 127, + "sampleStart": 28696, + "sampleDur": 1209, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 128, + "sampleStart": 29906, + "sampleDur": 552, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 129, + "sampleStart": 30459, + "sampleDur": 1239, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 130, + "sampleStart": 31699, + "sampleDur": 2399, + "labels": [ + { + "name": "Phonetic", + "value": "ei" + } + ] + }, + { + "id": 131, + "sampleStart": 34099, + "sampleDur": 1399, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 132, + "sampleStart": 35499, + "sampleDur": 1179, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 133, + "sampleStart": 36679, + "sampleDur": 2609, + "labels": [ + { + "name": "Phonetic", + "value": "@u" + } + ] + }, + { + "id": 134, + "sampleStart": 39289, + "sampleDur": 1989, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 135, + "sampleStart": 41279, + "sampleDur": 1619, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 136, + "sampleStart": 42899, + "sampleDur": 2684, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 137, + "sampleStart": 45584, + "sampleDur": 1459, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 138, + "sampleStart": 47044, + "sampleDur": 280, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 139, + "sampleStart": 47325, + "sampleDur": 3758, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 140, + "samplePoint": 13258, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 141, + "samplePoint": 15890, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 142, + "samplePoint": 24163, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 143, + "samplePoint": 25056, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 144, + "samplePoint": 37794, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 145, + "samplePoint": 41037, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 146, + "samplePoint": 42071, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 147, + "samplePoint": 42776, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 53, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 63, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 70, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 72, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 82, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 53 + }, + { + "fromID": 7, + "toID": 63 + }, + { + "fromID": 7, + "toID": 70 + }, + { + "fromID": 7, + "toID": 72 + }, + { + "fromID": 7, + "toID": 82 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 26 + }, + { + "fromID": 7, + "toID": 46 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 13 + }, + { + "fromID": 26, + "toID": 23 + }, + { + "fromID": 26, + "toID": 32 + }, + { + "fromID": 46, + "toID": 43 + }, + { + "fromID": 46, + "toID": 52 + }, + { + "fromID": 46, + "toID": 62 + }, + { + "fromID": 46, + "toID": 69 + }, + { + "fromID": 2, + "toID": 83 + }, + { + "fromID": 13, + "toID": 84 + }, + { + "fromID": 23, + "toID": 85 + }, + { + "fromID": 32, + "toID": 86 + }, + { + "fromID": 43, + "toID": 87 + }, + { + "fromID": 52, + "toID": 88 + }, + { + "fromID": 62, + "toID": 89 + }, + { + "fromID": 69, + "toID": 90 + }, + { + "fromID": 83, + "toID": 91 + }, + { + "fromID": 83, + "toID": 92 + }, + { + "fromID": 84, + "toID": 93 + }, + { + "fromID": 84, + "toID": 94 + }, + { + "fromID": 84, + "toID": 95 + }, + { + "fromID": 84, + "toID": 140 + }, + { + "fromID": 85, + "toID": 96 + }, + { + "fromID": 85, + "toID": 97 + }, + { + "fromID": 86, + "toID": 98 + }, + { + "fromID": 86, + "toID": 99 + }, + { + "fromID": 86, + "toID": 100 + }, + { + "fromID": 86, + "toID": 101 + }, + { + "fromID": 86, + "toID": 142 + }, + { + "fromID": 87, + "toID": 102 + }, + { + "fromID": 87, + "toID": 103 + }, + { + "fromID": 88, + "toID": 104 + }, + { + "fromID": 88, + "toID": 105 + }, + { + "fromID": 88, + "toID": 106 + }, + { + "fromID": 89, + "toID": 107 + }, + { + "fromID": 89, + "toID": 108 + }, + { + "fromID": 89, + "toID": 144 + }, + { + "fromID": 90, + "toID": 109 + }, + { + "fromID": 90, + "toID": 110 + }, + { + "fromID": 90, + "toID": 111 + }, + { + "fromID": 90, + "toID": 112 + }, + { + "fromID": 90, + "toID": 113 + }, + { + "fromID": 90, + "toID": 145 + }, + { + "fromID": 91, + "toID": 114 + }, + { + "fromID": 92, + "toID": 115 + }, + { + "fromID": 93, + "toID": 116 + }, + { + "fromID": 94, + "toID": 117 + }, + { + "fromID": 95, + "toID": 118 + }, + { + "fromID": 95, + "toID": 119 + }, + { + "fromID": 96, + "toID": 120 + }, + { + "fromID": 97, + "toID": 121 + }, + { + "fromID": 98, + "toID": 122 + }, + { + "fromID": 99, + "toID": 123 + }, + { + "fromID": 100, + "toID": 124 + }, + { + "fromID": 101, + "toID": 125 + }, + { + "fromID": 102, + "toID": 126 + }, + { + "fromID": 103, + "toID": 127 + }, + { + "fromID": 104, + "toID": 128 + }, + { + "fromID": 104, + "toID": 129 + }, + { + "fromID": 105, + "toID": 130 + }, + { + "fromID": 106, + "toID": 131 + }, + { + "fromID": 107, + "toID": 132 + }, + { + "fromID": 108, + "toID": 133 + }, + { + "fromID": 109, + "toID": 134 + }, + { + "fromID": 110, + "toID": 135 + }, + { + "fromID": 111, + "toID": 136 + }, + { + "fromID": 112, + "toID": 137 + }, + { + "fromID": 112, + "toID": 138 + }, + { + "fromID": 113, + "toID": 139 + }, + { + "fromID": 53, + "toID": 84 + }, + { + "fromID": 53, + "toID": 85 + }, + { + "fromID": 63, + "toID": 86 + }, + { + "fromID": 63, + "toID": 87 + }, + { + "fromID": 70, + "toID": 88 + }, + { + "fromID": 72, + "toID": 89 + }, + { + "fromID": 82, + "toID": 90 + } + ] +} + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.TextGrid b/inst/extdata/rawDemoData/annotationFiles/msajc057.TextGrid new file mode 100644 index 00000000..ded5df42 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.TextGrid @@ -0,0 +1,663 @@ +File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = 3.09495 +tiers? +size = 11 +item []: + item [1]: + class = "IntervalTier" + name = "Utterance" + xmin = 0 + xmax = 3.09495 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.794988 + text = "" + intervals [3]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [2]: + class = "IntervalTier" + name = "Intonational" + xmin = 0 + xmax = 3.09495 + intervals: size = 3 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 2.794988 + text = "L%" + intervals [3]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [3]: + class = "IntervalTier" + name = "Intermediate" + xmin = 0 + xmax = 3.09495 + intervals: size = 4 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 1.211242 + text = "L-" + intervals [3]: + xmin = 1.211242 + xmax = 2.794988 + text = "L-" + intervals [4]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [4]: + class = "IntervalTier" + name = "Word" + xmin = 0 + xmax = 3.09495 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.475802 + text = "F" + intervals [3]: + xmin = 0.475802 + xmax = 0.666743 + text = "C" + intervals [4]: + xmin = 0.666743 + xmax = 1.211242 + text = "C" + intervals [5]: + xmin = 1.211242 + xmax = 1.578745 + text = "C" + intervals [6]: + xmin = 1.578745 + xmax = 1.824488 + text = "C" + intervals [7]: + xmin = 1.824488 + xmax = 2.367811 + text = "C" + intervals [8]: + xmin = 2.367811 + xmax = 2.480496 + text = "F" + intervals [9]: + xmin = 2.480496 + xmax = 2.794988 + text = "C" + intervals [10]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [5]: + class = "IntervalTier" + name = "Accent" + xmin = 0 + xmax = 3.09495 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.475802 + text = "W" + intervals [3]: + xmin = 0.475802 + xmax = 0.666743 + text = "W" + intervals [4]: + xmin = 0.666743 + xmax = 1.211242 + text = "S" + intervals [5]: + xmin = 1.211242 + xmax = 1.578745 + text = "W" + intervals [6]: + xmin = 1.578745 + xmax = 1.824488 + text = "S" + intervals [7]: + xmin = 1.824488 + xmax = 2.367811 + text = "W" + intervals [8]: + xmin = 2.367811 + xmax = 2.480496 + text = "W" + intervals [9]: + xmin = 2.480496 + xmax = 2.794988 + text = "S" + intervals [10]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [6]: + class = "IntervalTier" + name = "Text" + xmin = 0 + xmax = 3.09495 + intervals: size = 10 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.475802 + text = "this" + intervals [3]: + xmin = 0.475802 + xmax = 0.666743 + text = "new" + intervals [4]: + xmin = 0.666743 + xmax = 1.211242 + text = "display" + intervals [5]: + xmin = 1.211242 + xmax = 1.578745 + text = "attracts" + intervals [6]: + xmin = 1.578745 + xmax = 1.824488 + text = "more" + intervals [7]: + xmin = 1.824488 + xmax = 2.367811 + text = "customers" + intervals [8]: + xmin = 2.367811 + xmax = 2.480496 + text = "than" + intervals [9]: + xmin = 2.480496 + xmax = 2.794988 + text = "ever" + intervals [10]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [7]: + class = "IntervalTier" + name = "Syllable" + xmin = 0 + xmax = 3.09495 + intervals: size = 15 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.475802 + text = "W" + intervals [3]: + xmin = 0.475802 + xmax = 0.666743 + text = "S" + intervals [4]: + xmin = 0.666743 + xmax = 0.773996 + text = "W" + intervals [5]: + xmin = 0.773996 + xmax = 1.211242 + text = "S" + intervals [6]: + xmin = 1.211242 + xmax = 1.247997 + text = "W" + intervals [7]: + xmin = 1.247997 + xmax = 1.578745 + text = "S" + intervals [8]: + xmin = 1.578745 + xmax = 1.824488 + text = "S" + intervals [9]: + xmin = 1.824488 + xmax = 2.037495 + text = "S" + intervals [10]: + xmin = 2.037495 + xmax = 2.173498 + text = "W" + intervals [11]: + xmin = 2.173498 + xmax = 2.367811 + text = "W" + intervals [12]: + xmin = 2.367811 + xmax = 2.480496 + text = "W" + intervals [13]: + xmin = 2.480496 + xmax = 2.587739 + text = "S" + intervals [14]: + xmin = 2.587739 + xmax = 2.794988 + text = "W" + intervals [15]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [8]: + class = "IntervalTier" + name = "Phoneme" + xmin = 0 + xmax = 3.09495 + intervals: size = 36 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.335872 + text = "D" + intervals [3]: + xmin = 0.335872 + xmax = 0.392995 + text = "I" + intervals [4]: + xmin = 0.392995 + xmax = 0.475802 + text = "s" + intervals [5]: + xmin = 0.475802 + xmax = 0.544 + text = "n" + intervals [6]: + xmin = 0.544 + xmax = 0.585748 + text = "j" + intervals [7]: + xmin = 0.585748 + xmax = 0.666743 + text = "u:" + intervals [8]: + xmin = 0.666743 + xmax = 0.729499 + text = "d" + intervals [9]: + xmin = 0.729499 + xmax = 0.773996 + text = "@" + intervals [10]: + xmin = 0.773996 + xmax = 0.86374 + text = "s" + intervals [11]: + xmin = 0.86374 + xmax = 0.970495 + text = "p" + intervals [12]: + xmin = 0.970495 + xmax = 1.021742 + text = "l" + intervals [13]: + xmin = 1.021742 + xmax = 1.211242 + text = "ei" + intervals [14]: + xmin = 1.211242 + xmax = 1.247997 + text = "@" + intervals [15]: + xmin = 1.247997 + xmax = 1.363996 + text = "t" + intervals [16]: + xmin = 1.363996 + xmax = 1.382495 + text = "r" + intervals [17]: + xmin = 1.382495 + xmax = 1.47099 + text = "A" + intervals [18]: + xmin = 1.47099 + xmax = 1.540248 + text = "k_t" + intervals [19]: + xmin = 1.540248 + xmax = 1.578745 + text = "s" + intervals [20]: + xmin = 1.578745 + xmax = 1.709245 + text = "m" + intervals [21]: + xmin = 1.709245 + xmax = 1.824488 + text = "o:" + intervals [22]: + xmin = 1.824488 + xmax = 1.943241 + text = "k" + intervals [23]: + xmin = 1.943241 + xmax = 2.037495 + text = "V" + intervals [24]: + xmin = 2.037495 + xmax = 2.085242 + text = "s" + intervals [25]: + xmin = 2.085242 + xmax = 2.1485 + text = "t" + intervals [26]: + xmin = 2.1485 + xmax = 2.173498 + text = "@" + intervals [27]: + xmin = 2.173498 + xmax = 2.233495 + text = "m" + intervals [28]: + xmin = 2.233495 + xmax = 2.302248 + text = "@" + intervals [29]: + xmin = 2.302248 + xmax = 2.367811 + text = "z" + intervals [30]: + xmin = 2.367811 + xmax = 2.407001 + text = "D" + intervals [31]: + xmin = 2.407001 + xmax = 2.447748 + text = "@" + intervals [32]: + xmin = 2.447748 + xmax = 2.480496 + text = "n" + intervals [33]: + xmin = 2.480496 + xmax = 2.587739 + text = "E" + intervals [34]: + xmin = 2.587739 + xmax = 2.645747 + text = "v" + intervals [35]: + xmin = 2.645747 + xmax = 2.794988 + text = "@" + intervals [36]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [9]: + class = "IntervalTier" + name = "Phonetic" + xmin = 0 + xmax = 3.09495 + intervals: size = 43 + intervals [1]: + xmin = 0 + xmax = 0.3 + text = "" + intervals [2]: + xmin = 0.3 + xmax = 0.335872 + text = "D" + intervals [3]: + xmin = 0.335872 + xmax = 0.392995 + text = "I" + intervals [4]: + xmin = 0.392995 + xmax = 0.475802 + text = "s" + intervals [5]: + xmin = 0.475802 + xmax = 0.508744 + text = "On" + intervals [6]: + xmin = 0.508744 + xmax = 0.544 + text = "n" + intervals [7]: + xmin = 0.544 + xmax = 0.585748 + text = "j" + intervals [8]: + xmin = 0.585748 + xmax = 0.666743 + text = "u:" + intervals [9]: + xmin = 0.666743 + xmax = 0.718499 + text = "d" + intervals [10]: + xmin = 0.718499 + xmax = 0.729499 + text = "H" + intervals [11]: + xmin = 0.729499 + xmax = 0.773996 + text = "@" + intervals [12]: + xmin = 0.773996 + xmax = 0.86374 + text = "s" + intervals [13]: + xmin = 0.86374 + xmax = 0.956497 + text = "p" + intervals [14]: + xmin = 0.956497 + xmax = 0.970495 + text = "H" + intervals [15]: + xmin = 0.970495 + xmax = 1.021742 + text = "l" + intervals [16]: + xmin = 1.021742 + xmax = 1.211242 + text = "ei" + intervals [17]: + xmin = 1.211242 + xmax = 1.247997 + text = "@" + intervals [18]: + xmin = 1.247997 + xmax = 1.289737 + text = "t" + intervals [19]: + xmin = 1.289737 + xmax = 1.363996 + text = "H" + intervals [20]: + xmin = 1.363996 + xmax = 1.382495 + text = "r" + intervals [21]: + xmin = 1.382495 + xmax = 1.47099 + text = "A" + intervals [22]: + xmin = 1.47099 + xmax = 1.540248 + text = "kt" + intervals [23]: + xmin = 1.540248 + xmax = 1.578745 + text = "s" + intervals [24]: + xmin = 1.578745 + xmax = 1.629742 + text = "Om" + intervals [25]: + xmin = 1.629742 + xmax = 1.709245 + text = "m" + intervals [26]: + xmin = 1.709245 + xmax = 1.824488 + text = "o:" + intervals [27]: + xmin = 1.824488 + xmax = 1.877995 + text = "k" + intervals [28]: + xmin = 1.877995 + xmax = 1.943241 + text = "H" + intervals [29]: + xmin = 1.943241 + xmax = 2.037495 + text = "V" + intervals [30]: + xmin = 2.037495 + xmax = 2.085242 + text = "s" + intervals [31]: + xmin = 2.085242 + xmax = 2.130251 + text = "t" + intervals [32]: + xmin = 2.130251 + xmax = 2.1485 + text = "H" + intervals [33]: + xmin = 2.1485 + xmax = 2.173498 + text = "@" + intervals [34]: + xmin = 2.173498 + xmax = 2.233495 + text = "m" + intervals [35]: + xmin = 2.233495 + xmax = 2.302248 + text = "@" + intervals [36]: + xmin = 2.302248 + xmax = 2.367811 + text = "z" + intervals [37]: + xmin = 2.367811 + xmax = 2.407001 + text = "D" + intervals [38]: + xmin = 2.407001 + xmax = 2.447748 + text = "@" + intervals [39]: + xmin = 2.447748 + xmax = 2.480496 + text = "n" + intervals [40]: + xmin = 2.480496 + xmax = 2.587739 + text = "E" + intervals [41]: + xmin = 2.587739 + xmax = 2.645747 + text = "v" + intervals [42]: + xmin = 2.645747 + xmax = 2.794988 + text = "@" + intervals [43]: + xmin = 2.794988 + xmax = 3.09495 + text = "" + item [10]: + class = "TextTier" + name = "Tone" + xmin = 0 + xmax = 3.09495 + points: size = 7 + points [1]: + number = 0.61083 + mark = "H*" + points [2]: + number = 1.020081 + mark = "!H*" + points [3]: + number = 1.179234 + mark = "L-" + points [4]: + number = 1.788564 + mark = "L+H*" + points [5]: + number = 2.525217 + mark = "!H*" + points [6]: + number = 2.70938 + mark = "L-" + points [7]: + number = 2.763947 + mark = "L%" + item [11]: + class = "IntervalTier" + name = "Foot" + xmin = 0 + xmax = 3.09495 + intervals: size = 8 + intervals [1]: + xmin = 0 + xmax = 0.475802 + text = "" + intervals [2]: + xmin = 0.475802 + xmax = 0.773996 + text = "F" + intervals [3]: + xmin = 0.773996 + xmax = 1.247997 + text = "F" + intervals [4]: + xmin = 1.247997 + xmax = 1.578745 + text = "F" + intervals [5]: + xmin = 1.578745 + xmax = 1.824488 + text = "F" + intervals [6]: + xmin = 1.824488 + xmax = 2.480496 + text = "F" + intervals [7]: + xmin = 2.480496 + xmax = 2.794988 + text = "F" + intervals [8]: + xmin = 2.794988 + xmax = 3.09495 + text = "" diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.hlb b/inst/extdata/rawDemoData/annotationFiles/msajc057.hlb new file mode 100644 index 00000000..5504da4b --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.hlb @@ -0,0 +1,252 @@ +**EMU hierarchical labels** +209 +Syllable Syllable +114 W +115 S +116 W +117 S +118 W +119 S +120 S +121 S +122 W +123 W +124 W +125 S +126 W + +Word Word Accent Text +2 F W this +15 C W new +25 C S display +44 C W attracts +65 C S more +73 C W customers +97 F W than +106 C S ever + +Foot Foot +51 F +66 F +74 F +76 F +98 F +107 F + +Phoneme Phoneme +127 D +128 I +129 s +130 n +131 j +132 u: +133 d +134 @ +135 s +136 p +137 l +138 ei +139 @ +140 t +141 r +142 A +143 k +144 t +145 s +146 m +147 o: +148 k +149 V +150 s +151 t +152 @ +153 m +154 @ +155 z +156 D +157 @ +158 n +159 E +160 v +161 @ + +Phonetic Phonetic +162 D +163 I +164 s +165 On +166 n +167 j +168 u: +169 d +170 H +171 @ +172 s +173 p +174 H +175 l +176 ei +177 @ +178 t +179 H +180 r +181 A +182 kt +183 s +184 Om +185 m +186 o: +187 k +188 H +189 V +190 s +191 t +192 H +193 @ +194 m +195 @ +196 z +197 D +198 @ +199 n +200 E +201 v +202 @ + +Tone Tone +203 H* +204 !H* +205 L- +206 L+H* +207 !H* +208 L- +209 L% + +Utterance Utterance +8 + +Intonational Intonational +7 L% + +Intermediate Intermediate +5 L- +47 L- + + +2 114 127 128 129 162 163 164 +5 2 15 25 114 115 116 117 127 128 129 130 131 132 133 134 135 136 137 138 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 203 204 +7 2 5 15 25 44 47 51 65 66 73 74 76 97 98 106 107 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 206 207 +8 2 5 7 15 25 44 47 51 65 66 73 74 76 97 98 106 107 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 206 207 +15 115 130 131 132 165 166 167 168 203 +25 116 117 133 134 135 136 137 138 169 170 171 172 173 174 175 176 204 +44 118 119 139 140 141 142 143 144 145 177 178 179 180 181 182 183 +47 44 65 73 97 106 118 119 120 121 122 123 124 125 126 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 206 207 +51 115 116 130 131 132 133 134 165 166 167 168 169 170 171 203 +65 120 146 147 184 185 186 206 +66 117 118 135 136 137 138 139 172 173 174 175 176 177 204 +73 121 122 123 148 149 150 151 152 153 154 155 187 188 189 190 191 192 193 194 195 196 +74 119 140 141 142 143 144 145 178 179 180 181 182 183 +76 120 146 147 184 185 186 206 +97 124 156 157 158 197 198 199 +98 121 122 123 124 148 149 150 151 152 153 154 155 156 157 158 187 188 189 190 191 192 193 194 195 196 197 198 199 +106 125 126 159 160 161 200 201 202 207 +107 125 126 159 160 161 200 201 202 207 +114 127 128 129 162 163 164 +115 130 131 132 165 166 167 168 203 +116 133 134 169 170 171 +117 135 136 137 138 172 173 174 175 176 204 +118 139 177 +119 140 141 142 143 144 145 178 179 180 181 182 183 +120 146 147 184 185 186 206 +121 148 149 187 188 189 +122 150 151 152 190 191 192 193 +123 153 154 155 194 195 196 +124 156 157 158 197 198 199 +125 159 200 207 +126 160 161 201 202 +127 162 +128 163 +129 164 +130 165 166 +131 167 +132 168 +133 169 170 +134 171 +135 172 +136 173 174 +137 175 +138 176 +139 177 +140 178 179 +141 180 +142 181 +143 182 +144 182 +145 183 +146 184 185 +147 186 +148 187 188 +149 189 +150 190 +151 191 192 +152 193 +153 194 +154 195 +155 196 +156 197 +157 198 +158 199 +159 200 +160 201 +161 202 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 + +0 + diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.lab b/inst/extdata/rawDemoData/annotationFiles/msajc057.lab new file mode 100644 index 00000000..bc6fb696 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.lab @@ -0,0 +1,45 @@ +signal msajc057 +nfields 1 +# + 0.300000 125 H# + 0.335872 125 D + 0.392995 125 I + 0.475802 125 s + 0.508744 125 On + 0.544000 125 n + 0.585748 125 j + 0.666743 125 u: + 0.718499 125 d + 0.729499 125 H + 0.773996 125 @ + 0.863740 125 s + 0.956497 125 p + 0.970495 125 H + 1.021742 125 l + 1.211242 125 ei + 1.247997 125 @ + 1.289737 125 t + 1.363996 125 H + 1.382495 125 r + 1.470990 125 A + 1.540248 125 kt + 1.578745 125 s + 1.629742 125 Om + 1.709245 125 m + 1.824488 125 o: + 1.877995 125 k + 1.943241 125 H + 2.037495 125 V + 2.085242 125 s + 2.130251 125 t + 2.148500 125 H + 2.173498 125 @ + 2.233495 125 m + 2.302248 125 @ + 2.367811 125 z + 2.407001 125 D + 2.447748 125 @ + 2.480496 125 n + 2.587739 125 E + 2.645747 125 v + 2.794988 125 @ diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.par b/inst/extdata/rawDemoData/annotationFiles/msajc057.par new file mode 100644 index 00000000..10d4fc33 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.par @@ -0,0 +1,63 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 DIs +KAN: 1 nju: +KAN: 2 dIspleI +KAN: 3 @tr{kts +KAN: 4 mO: +KAN: 5 kVst@m@z +KAN: 6 D@n +KAN: 7 ev@ +ORT: 0 this +ORT: 1 new +ORT: 2 display +ORT: 3 attracts +ORT: 4 more +ORT: 5 customers +ORT: 6 than +ORT: 7 ever +TRN: 6000 49199 0,1,2,3,4,5,6,7 this new display attracts more customers than ever +MAU: 0 5999 -1 +MAU: 6000 999 0 D +MAU: 7000 799 0 I +MAU: 7800 2399 0 s +MAU: 10200 599 1 n +MAU: 10800 1799 1 j +MAU: 12600 1599 1 u: +MAU: 14200 599 2 d +MAU: 14800 799 2 I +MAU: 15600 3199 2 s +MAU: 18800 599 2 p +MAU: 19400 1599 2 l +MAU: 21000 3999 2 eI +MAU: 25000 1199 3 @ +MAU: 26200 999 3 t +MAU: 27200 599 3 r +MAU: 27800 1799 3 { +MAU: 29600 599 3 k +MAU: 30200 599 3 t +MAU: 30800 1399 3 s +MAU: 32200 1999 4 m +MAU: 34200 3199 4 O: +MAU: 37400 1799 5 k +MAU: 39200 1399 5 V +MAU: 40600 1799 5 s +MAU: 42400 599 5 t +MAU: 43000 799 5 @ +MAU: 43800 1199 5 m +MAU: 45000 999 5 @ +MAU: 46000 1599 5 z +MAU: 47600 799 6 D +MAU: 48400 599 6 @ +MAU: 49000 799 6 n +MAU: 49800 1999 7 e +MAU: 51800 1399 7 v +MAU: 53200 1999 7 @ +MAU: 55200 6399 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.parmanipulated b/inst/extdata/rawDemoData/annotationFiles/msajc057.parmanipulated new file mode 100644 index 00000000..10d4fc33 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.parmanipulated @@ -0,0 +1,63 @@ +LHD: Partitur 1.2.16 +REP: unknown +SNB: 2 +SAM: 20000 +SBF: 01 +SSB: 16 +NCH: 1 +SPN: unknown +LBD: +KAN: 0 DIs +KAN: 1 nju: +KAN: 2 dIspleI +KAN: 3 @tr{kts +KAN: 4 mO: +KAN: 5 kVst@m@z +KAN: 6 D@n +KAN: 7 ev@ +ORT: 0 this +ORT: 1 new +ORT: 2 display +ORT: 3 attracts +ORT: 4 more +ORT: 5 customers +ORT: 6 than +ORT: 7 ever +TRN: 6000 49199 0,1,2,3,4,5,6,7 this new display attracts more customers than ever +MAU: 0 5999 -1 +MAU: 6000 999 0 D +MAU: 7000 799 0 I +MAU: 7800 2399 0 s +MAU: 10200 599 1 n +MAU: 10800 1799 1 j +MAU: 12600 1599 1 u: +MAU: 14200 599 2 d +MAU: 14800 799 2 I +MAU: 15600 3199 2 s +MAU: 18800 599 2 p +MAU: 19400 1599 2 l +MAU: 21000 3999 2 eI +MAU: 25000 1199 3 @ +MAU: 26200 999 3 t +MAU: 27200 599 3 r +MAU: 27800 1799 3 { +MAU: 29600 599 3 k +MAU: 30200 599 3 t +MAU: 30800 1399 3 s +MAU: 32200 1999 4 m +MAU: 34200 3199 4 O: +MAU: 37400 1799 5 k +MAU: 39200 1399 5 V +MAU: 40600 1799 5 s +MAU: 42400 599 5 t +MAU: 43000 799 5 @ +MAU: 43800 1199 5 m +MAU: 45000 999 5 @ +MAU: 46000 1599 5 z +MAU: 47600 799 6 D +MAU: 48400 599 6 @ +MAU: 49000 799 6 n +MAU: 49800 1999 7 e +MAU: 51800 1399 7 v +MAU: 53200 1999 7 @ +MAU: 55200 6399 -1 diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057.tone b/inst/extdata/rawDemoData/annotationFiles/msajc057.tone new file mode 100644 index 00000000..e5ff0f16 --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057.tone @@ -0,0 +1,10 @@ +signal msajc057 +nfields 1 +# + 0.611830 125 H* + 1.021081 125 !H* + 1.180234 125 L- + 1.789564 125 L+H* + 2.526217 125 !H* + 2.710380 125 L- + 2.764947 125 L% diff --git a/inst/extdata/rawDemoData/annotationFiles/msajc057_annot.json b/inst/extdata/rawDemoData/annotationFiles/msajc057_annot.json new file mode 100644 index 00000000..7a8f6c4b --- /dev/null +++ b/inst/extdata/rawDemoData/annotationFiles/msajc057_annot.json @@ -0,0 +1,1735 @@ +{ + "name": "msajc057", + "annotates": "msajc057.wav", + "sampleRate": 20000, + "levels": [ + { + "name": "Utterance", + "type": "ITEM", + "items": [ + { + "id": 8, + "labels": [ + { + "name": "Utterance", + "value": "" + } + ] + } + ] + }, + { + "name": "Intonational", + "type": "ITEM", + "items": [ + { + "id": 7, + "labels": [ + { + "name": "Intonational", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Intermediate", + "type": "ITEM", + "items": [ + { + "id": 5, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + }, + { + "id": 47, + "labels": [ + { + "name": "Intermediate", + "value": "L-" + } + ] + } + ] + }, + { + "name": "Word", + "type": "ITEM", + "items": [ + { + "id": 2, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "this" + } + ] + }, + { + "id": 15, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "new" + } + ] + }, + { + "id": 25, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "display" + } + ] + }, + { + "id": 44, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "attracts" + } + ] + }, + { + "id": 65, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "more" + } + ] + }, + { + "id": 73, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "customers" + } + ] + }, + { + "id": 97, + "labels": [ + { + "name": "Word", + "value": "F" + }, + { + "name": "Accent", + "value": "W" + }, + { + "name": "Text", + "value": "than" + } + ] + }, + { + "id": 106, + "labels": [ + { + "name": "Word", + "value": "C" + }, + { + "name": "Accent", + "value": "S" + }, + { + "name": "Text", + "value": "ever" + } + ] + } + ] + }, + { + "name": "Syllable", + "type": "ITEM", + "items": [ + { + "id": 114, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 115, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 116, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 117, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 118, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 119, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 120, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 121, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 122, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 123, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 124, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + }, + { + "id": 125, + "labels": [ + { + "name": "Syllable", + "value": "S" + } + ] + }, + { + "id": 126, + "labels": [ + { + "name": "Syllable", + "value": "W" + } + ] + } + ] + }, + { + "name": "Phoneme", + "type": "ITEM", + "items": [ + { + "id": 127, + "labels": [ + { + "name": "Phoneme", + "value": "D" + } + ] + }, + { + "id": 128, + "labels": [ + { + "name": "Phoneme", + "value": "I" + } + ] + }, + { + "id": 129, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 130, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 131, + "labels": [ + { + "name": "Phoneme", + "value": "j" + } + ] + }, + { + "id": 132, + "labels": [ + { + "name": "Phoneme", + "value": "u:" + } + ] + }, + { + "id": 133, + "labels": [ + { + "name": "Phoneme", + "value": "d" + } + ] + }, + { + "id": 134, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 135, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 136, + "labels": [ + { + "name": "Phoneme", + "value": "p" + } + ] + }, + { + "id": 137, + "labels": [ + { + "name": "Phoneme", + "value": "l" + } + ] + }, + { + "id": 138, + "labels": [ + { + "name": "Phoneme", + "value": "ei" + } + ] + }, + { + "id": 139, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 140, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 141, + "labels": [ + { + "name": "Phoneme", + "value": "r" + } + ] + }, + { + "id": 142, + "labels": [ + { + "name": "Phoneme", + "value": "A" + } + ] + }, + { + "id": 143, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 144, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 145, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 146, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 147, + "labels": [ + { + "name": "Phoneme", + "value": "o:" + } + ] + }, + { + "id": 148, + "labels": [ + { + "name": "Phoneme", + "value": "k" + } + ] + }, + { + "id": 149, + "labels": [ + { + "name": "Phoneme", + "value": "V" + } + ] + }, + { + "id": 150, + "labels": [ + { + "name": "Phoneme", + "value": "s" + } + ] + }, + { + "id": 151, + "labels": [ + { + "name": "Phoneme", + "value": "t" + } + ] + }, + { + "id": 152, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 153, + "labels": [ + { + "name": "Phoneme", + "value": "m" + } + ] + }, + { + "id": 154, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 155, + "labels": [ + { + "name": "Phoneme", + "value": "z" + } + ] + }, + { + "id": 156, + "labels": [ + { + "name": "Phoneme", + "value": "D" + } + ] + }, + { + "id": 157, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + }, + { + "id": 158, + "labels": [ + { + "name": "Phoneme", + "value": "n" + } + ] + }, + { + "id": 159, + "labels": [ + { + "name": "Phoneme", + "value": "E" + } + ] + }, + { + "id": 160, + "labels": [ + { + "name": "Phoneme", + "value": "v" + } + ] + }, + { + "id": 161, + "labels": [ + { + "name": "Phoneme", + "value": "@" + } + ] + } + ] + }, + { + "name": "Phonetic", + "type": "SEGMENT", + "items": [ + { + "id": 162, + "sampleStart": 6000, + "sampleDur": 716, + "labels": [ + { + "name": "Phonetic", + "value": "D" + } + ] + }, + { + "id": 163, + "sampleStart": 6717, + "sampleDur": 1141, + "labels": [ + { + "name": "Phonetic", + "value": "I" + } + ] + }, + { + "id": 164, + "sampleStart": 7859, + "sampleDur": 1656, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 165, + "sampleStart": 9516, + "sampleDur": 657, + "labels": [ + { + "name": "Phonetic", + "value": "On" + } + ] + }, + { + "id": 166, + "sampleStart": 10174, + "sampleDur": 705, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 167, + "sampleStart": 10880, + "sampleDur": 833, + "labels": [ + { + "name": "Phonetic", + "value": "j" + } + ] + }, + { + "id": 168, + "sampleStart": 11714, + "sampleDur": 1619, + "labels": [ + { + "name": "Phonetic", + "value": "u:" + } + ] + }, + { + "id": 169, + "sampleStart": 13334, + "sampleDur": 1034, + "labels": [ + { + "name": "Phonetic", + "value": "d" + } + ] + }, + { + "id": 170, + "sampleStart": 14369, + "sampleDur": 219, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 171, + "sampleStart": 14589, + "sampleDur": 889, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 172, + "sampleStart": 15479, + "sampleDur": 1794, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 173, + "sampleStart": 17274, + "sampleDur": 1854, + "labels": [ + { + "name": "Phonetic", + "value": "p" + } + ] + }, + { + "id": 174, + "sampleStart": 19129, + "sampleDur": 279, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 175, + "sampleStart": 19409, + "sampleDur": 1024, + "labels": [ + { + "name": "Phonetic", + "value": "l" + } + ] + }, + { + "id": 176, + "sampleStart": 20434, + "sampleDur": 3789, + "labels": [ + { + "name": "Phonetic", + "value": "ei" + } + ] + }, + { + "id": 177, + "sampleStart": 24224, + "sampleDur": 734, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 178, + "sampleStart": 24959, + "sampleDur": 834, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 179, + "sampleStart": 25794, + "sampleDur": 1484, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 180, + "sampleStart": 27279, + "sampleDur": 369, + "labels": [ + { + "name": "Phonetic", + "value": "r" + } + ] + }, + { + "id": 181, + "sampleStart": 27649, + "sampleDur": 1769, + "labels": [ + { + "name": "Phonetic", + "value": "A" + } + ] + }, + { + "id": 182, + "sampleStart": 29419, + "sampleDur": 1384, + "labels": [ + { + "name": "Phonetic", + "value": "kt" + } + ] + }, + { + "id": 183, + "sampleStart": 30804, + "sampleDur": 769, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 184, + "sampleStart": 31574, + "sampleDur": 1019, + "labels": [ + { + "name": "Phonetic", + "value": "Om" + } + ] + }, + { + "id": 185, + "sampleStart": 32594, + "sampleDur": 1589, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 186, + "sampleStart": 34184, + "sampleDur": 2304, + "labels": [ + { + "name": "Phonetic", + "value": "o:" + } + ] + }, + { + "id": 187, + "sampleStart": 36489, + "sampleDur": 1069, + "labels": [ + { + "name": "Phonetic", + "value": "k" + } + ] + }, + { + "id": 188, + "sampleStart": 37559, + "sampleDur": 1304, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 189, + "sampleStart": 38864, + "sampleDur": 1884, + "labels": [ + { + "name": "Phonetic", + "value": "V" + } + ] + }, + { + "id": 190, + "sampleStart": 40749, + "sampleDur": 954, + "labels": [ + { + "name": "Phonetic", + "value": "s" + } + ] + }, + { + "id": 191, + "sampleStart": 41704, + "sampleDur": 900, + "labels": [ + { + "name": "Phonetic", + "value": "t" + } + ] + }, + { + "id": 192, + "sampleStart": 42605, + "sampleDur": 364, + "labels": [ + { + "name": "Phonetic", + "value": "H" + } + ] + }, + { + "id": 193, + "sampleStart": 42970, + "sampleDur": 498, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 194, + "sampleStart": 43469, + "sampleDur": 1199, + "labels": [ + { + "name": "Phonetic", + "value": "m" + } + ] + }, + { + "id": 195, + "sampleStart": 44669, + "sampleDur": 1374, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 196, + "sampleStart": 46044, + "sampleDur": 1311, + "labels": [ + { + "name": "Phonetic", + "value": "z" + } + ] + }, + { + "id": 197, + "sampleStart": 47356, + "sampleDur": 783, + "labels": [ + { + "name": "Phonetic", + "value": "D" + } + ] + }, + { + "id": 198, + "sampleStart": 48140, + "sampleDur": 813, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + }, + { + "id": 199, + "sampleStart": 48954, + "sampleDur": 654, + "labels": [ + { + "name": "Phonetic", + "value": "n" + } + ] + }, + { + "id": 200, + "sampleStart": 49609, + "sampleDur": 2144, + "labels": [ + { + "name": "Phonetic", + "value": "E" + } + ] + }, + { + "id": 201, + "sampleStart": 51754, + "sampleDur": 1159, + "labels": [ + { + "name": "Phonetic", + "value": "v" + } + ] + }, + { + "id": 202, + "sampleStart": 52914, + "sampleDur": 2984, + "labels": [ + { + "name": "Phonetic", + "value": "@" + } + ] + } + ] + }, + { + "name": "Tone", + "type": "EVENT", + "items": [ + { + "id": 203, + "samplePoint": 12237, + "labels": [ + { + "name": "Tone", + "value": "H*" + } + ] + }, + { + "id": 204, + "samplePoint": 20422, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 205, + "samplePoint": 23605, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 206, + "samplePoint": 35791, + "labels": [ + { + "name": "Tone", + "value": "L+H*" + } + ] + }, + { + "id": 207, + "samplePoint": 50524, + "labels": [ + { + "name": "Tone", + "value": "!H*" + } + ] + }, + { + "id": 208, + "samplePoint": 54208, + "labels": [ + { + "name": "Tone", + "value": "L-" + } + ] + }, + { + "id": 209, + "samplePoint": 55299, + "labels": [ + { + "name": "Tone", + "value": "L%" + } + ] + } + ] + }, + { + "name": "Foot", + "type": "ITEM", + "items": [ + { + "id": 51, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 66, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 74, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 76, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 98, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + }, + { + "id": 107, + "labels": [ + { + "name": "Foot", + "value": "F" + } + ] + } + ] + } + ], + "links": [ + { + "fromID": 8, + "toID": 7 + }, + { + "fromID": 7, + "toID": 51 + }, + { + "fromID": 7, + "toID": 66 + }, + { + "fromID": 7, + "toID": 74 + }, + { + "fromID": 7, + "toID": 76 + }, + { + "fromID": 7, + "toID": 98 + }, + { + "fromID": 7, + "toID": 107 + }, + { + "fromID": 7, + "toID": 5 + }, + { + "fromID": 7, + "toID": 47 + }, + { + "fromID": 5, + "toID": 2 + }, + { + "fromID": 5, + "toID": 15 + }, + { + "fromID": 5, + "toID": 25 + }, + { + "fromID": 47, + "toID": 44 + }, + { + "fromID": 47, + "toID": 65 + }, + { + "fromID": 47, + "toID": 73 + }, + { + "fromID": 47, + "toID": 97 + }, + { + "fromID": 47, + "toID": 106 + }, + { + "fromID": 2, + "toID": 114 + }, + { + "fromID": 15, + "toID": 115 + }, + { + "fromID": 25, + "toID": 116 + }, + { + "fromID": 25, + "toID": 117 + }, + { + "fromID": 44, + "toID": 118 + }, + { + "fromID": 44, + "toID": 119 + }, + { + "fromID": 65, + "toID": 120 + }, + { + "fromID": 73, + "toID": 121 + }, + { + "fromID": 73, + "toID": 122 + }, + { + "fromID": 73, + "toID": 123 + }, + { + "fromID": 97, + "toID": 124 + }, + { + "fromID": 106, + "toID": 125 + }, + { + "fromID": 106, + "toID": 126 + }, + { + "fromID": 114, + "toID": 127 + }, + { + "fromID": 114, + "toID": 128 + }, + { + "fromID": 114, + "toID": 129 + }, + { + "fromID": 115, + "toID": 130 + }, + { + "fromID": 115, + "toID": 131 + }, + { + "fromID": 115, + "toID": 132 + }, + { + "fromID": 115, + "toID": 203 + }, + { + "fromID": 116, + "toID": 133 + }, + { + "fromID": 116, + "toID": 134 + }, + { + "fromID": 117, + "toID": 135 + }, + { + "fromID": 117, + "toID": 136 + }, + { + "fromID": 117, + "toID": 137 + }, + { + "fromID": 117, + "toID": 138 + }, + { + "fromID": 117, + "toID": 204 + }, + { + "fromID": 118, + "toID": 139 + }, + { + "fromID": 119, + "toID": 140 + }, + { + "fromID": 119, + "toID": 141 + }, + { + "fromID": 119, + "toID": 142 + }, + { + "fromID": 119, + "toID": 143 + }, + { + "fromID": 119, + "toID": 144 + }, + { + "fromID": 119, + "toID": 145 + }, + { + "fromID": 120, + "toID": 146 + }, + { + "fromID": 120, + "toID": 147 + }, + { + "fromID": 120, + "toID": 206 + }, + { + "fromID": 121, + "toID": 148 + }, + { + "fromID": 121, + "toID": 149 + }, + { + "fromID": 122, + "toID": 150 + }, + { + "fromID": 122, + "toID": 151 + }, + { + "fromID": 122, + "toID": 152 + }, + { + "fromID": 123, + "toID": 153 + }, + { + "fromID": 123, + "toID": 154 + }, + { + "fromID": 123, + "toID": 155 + }, + { + "fromID": 124, + "toID": 156 + }, + { + "fromID": 124, + "toID": 157 + }, + { + "fromID": 124, + "toID": 158 + }, + { + "fromID": 125, + "toID": 159 + }, + { + "fromID": 125, + "toID": 207 + }, + { + "fromID": 126, + "toID": 160 + }, + { + "fromID": 126, + "toID": 161 + }, + { + "fromID": 127, + "toID": 162 + }, + { + "fromID": 128, + "toID": 163 + }, + { + "fromID": 129, + "toID": 164 + }, + { + "fromID": 130, + "toID": 165 + }, + { + "fromID": 130, + "toID": 166 + }, + { + "fromID": 131, + "toID": 167 + }, + { + "fromID": 132, + "toID": 168 + }, + { + "fromID": 133, + "toID": 169 + }, + { + "fromID": 133, + "toID": 170 + }, + { + "fromID": 134, + "toID": 171 + }, + { + "fromID": 135, + "toID": 172 + }, + { + "fromID": 136, + "toID": 173 + }, + { + "fromID": 136, + "toID": 174 + }, + { + "fromID": 137, + "toID": 175 + }, + { + "fromID": 138, + "toID": 176 + }, + { + "fromID": 139, + "toID": 177 + }, + { + "fromID": 140, + "toID": 178 + }, + { + "fromID": 140, + "toID": 179 + }, + { + "fromID": 141, + "toID": 180 + }, + { + "fromID": 142, + "toID": 181 + }, + { + "fromID": 143, + "toID": 182 + }, + { + "fromID": 144, + "toID": 182 + }, + { + "fromID": 145, + "toID": 183 + }, + { + "fromID": 146, + "toID": 184 + }, + { + "fromID": 146, + "toID": 185 + }, + { + "fromID": 147, + "toID": 186 + }, + { + "fromID": 148, + "toID": 187 + }, + { + "fromID": 148, + "toID": 188 + }, + { + "fromID": 149, + "toID": 189 + }, + { + "fromID": 150, + "toID": 190 + }, + { + "fromID": 151, + "toID": 191 + }, + { + "fromID": 151, + "toID": 192 + }, + { + "fromID": 152, + "toID": 193 + }, + { + "fromID": 153, + "toID": 194 + }, + { + "fromID": 154, + "toID": 195 + }, + { + "fromID": 155, + "toID": 196 + }, + { + "fromID": 156, + "toID": 197 + }, + { + "fromID": 157, + "toID": 198 + }, + { + "fromID": 158, + "toID": 199 + }, + { + "fromID": 159, + "toID": 200 + }, + { + "fromID": 160, + "toID": 201 + }, + { + "fromID": 161, + "toID": 202 + }, + { + "fromID": 51, + "toID": 115 + }, + { + "fromID": 51, + "toID": 116 + }, + { + "fromID": 66, + "toID": 117 + }, + { + "fromID": 66, + "toID": 118 + }, + { + "fromID": 74, + "toID": 119 + }, + { + "fromID": 76, + "toID": 120 + }, + { + "fromID": 98, + "toID": 121 + }, + { + "fromID": 98, + "toID": 122 + }, + { + "fromID": 98, + "toID": 123 + }, + { + "fromID": 98, + "toID": 124 + }, + { + "fromID": 107, + "toID": 125 + }, + { + "fromID": 107, + "toID": 126 + } + ] +} + diff --git a/inst/extdata/rawDemoData/configFiles/ae.tpl b/inst/extdata/rawDemoData/configFiles/ae.tpl new file mode 100644 index 00000000..7cd9405c --- /dev/null +++ b/inst/extdata/rawDemoData/configFiles/ae.tpl @@ -0,0 +1,70 @@ +! this file was generated by tpled + +level Utterance +level Intonational Utterance +level Intermediate Intonational +level Word Intermediate +level Syllable Word +level Phoneme Syllable +level Phonetic Phoneme many-to-many +level Tone Syllable +level Foot Intonational +level Syllable Foot + + +label Word Accent +label Word Text + + + +labfile Phonetic :type SEGMENT :extension lab :time-factor 1000 +labfile Tone :type EVENT :extension tone :time-factor 1000 + + +legal Phoneme vowel A E I O V U ai ei oi i@ u@ au @u @: @ a: e: i: o: u: + +legal Phoneme stop p tS dZ t k b d g + +legal Phoneme nasal m n N + +legal Phoneme fricative f v s z S Z h D T + +legal Phoneme approximant w j l r + +legal Phoneme other H + + +legal Phonetic vowel A E EC I O V U ai ei oi i@ u@ au @u @: @ = a: e: i: o: u: + +legal Phonetic stop p tS dZ t k b d g + +legal Phonetic nasal m n + +legal Phonetic fricative f v s z S Z h D D- T + +legal Phonetic approximant w j l r rr Or + +legal Phonetic other H + + + + +path lab labels +path tone labels +path hlb labels +path wav signals +path dft signals +path fms signals + + +track samples wav +track dft dft +track fm fms + + +set PrimaryExtension wav +set LabelTracks spectrogram +set HierarchyViewLevels Intonational Intermediate Word Syllable Phoneme +set SignalViewLevels Phonetic Tone + + diff --git a/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForManyFiles.m b/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForManyFiles.m new file mode 100644 index 00000000..e46c4baa --- /dev/null +++ b/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForManyFiles.m @@ -0,0 +1,35 @@ +function [] = demoSignalScalerForManyFiles(options) + % Demo function for the emuR/Matlab interface. + % + % This file reads in any number of WAV files and multiplies their samples with + % the scaling factor provided in the arguments. It then stores many .mat files + % that contain the fields required by emuR: data, sampleRate, startTime, + % units, comment. + % + % To use this function in emuR, pass oneMatlabFunctionCallPerFile = FALSE to + % add_signalViaMatlab(). + arguments + options.inputFilename (1, :) string + options.outputFilename (1, :) string + options.scalingFactor (1, :) double + end + + for index = 1:size(options.inputFilename, 2) + % Metadata that will be stored in the results file: + % This comment should succinctly describe the signal processing your Matlab script runs. + comment = "Demo Signal Scaler for many files"; + % Digital audio signals do not really have a unit, so we leave this empty. + units = ""; + % If the signal this function produces does not start at time=0, provide an alternative start time here. + startTime = 0; + + % Read input WAV file + [data, sampleRate] = audioread(options.inputFilename(1, index)); + data = data * options.scalingFactor(1, index); + + % Save result + units = convertStringsToChars(units); + comment = convertStringsToChars(comment); + save(options.outputFilename(1, index), "data", "sampleRate", "startTime", "units", "comment"); + end +end diff --git a/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForOneFile.m b/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForOneFile.m new file mode 100644 index 00000000..6a8001b9 --- /dev/null +++ b/inst/extdata/rawDemoData/scriptFiles/demoSignalScalerForOneFile.m @@ -0,0 +1,33 @@ +function [] = demoSignalScalerForOneFile(options) + % Demo function for the emuR/Matlab interface. + % + % This file reads in one WAV file and multiplies its samples with + % the scaling factor provided in the arguments. It then stores a .mat file + % that contains the fields required by emuR: data, sampleRate, startTime, + % units, comment. + % + % To use this function in emuR, pass oneMatlabFunctionCallPerFile = TRUE to + % add_signalViaMatlab(). + arguments + options.inputFilename (1, 1) string + options.outputFilename (1, 1) string + options.scalingFactor (1, 1) double = 1 + end + + % Metadata that will be stored in the results file: + % This comment should succinctly describe the signal processing your Matlab script runs. + comment = "Demo Signal Scaler for one file"; + % Digital audio signals do not really have a unit, so we leave this empty. + units = ""; + % If the signal this function produces does not start at time=0, provide an alternative start time here. + startTime = 0; + + % Read input WAV file + [data, sampleRate] = audioread(options.inputFilename); + data = data * options.scalingFactor; + + % Save result + units = convertStringsToChars(units); + comment = convertStringsToChars(comment); + save(options.outputFilename, "data", "sampleRate", "startTime", "units", "comment"); +end diff --git a/inst/extdata/rawDemoData/txtFiles/msajc003.txt b/inst/extdata/rawDemoData/txtFiles/msajc003.txt new file mode 100644 index 00000000..203a7991 --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc003.txt @@ -0,0 +1 @@ +amongst her friends she was considered beautiful \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc010.txt b/inst/extdata/rawDemoData/txtFiles/msajc010.txt new file mode 100644 index 00000000..31d31888 --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc010.txt @@ -0,0 +1 @@ +it is futile to offer any further resistance \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc012.txt b/inst/extdata/rawDemoData/txtFiles/msajc012.txt new file mode 100644 index 00000000..8fa00572 --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc012.txt @@ -0,0 +1 @@ +the chill wind caused them to shiver violently \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc015.txt b/inst/extdata/rawDemoData/txtFiles/msajc015.txt new file mode 100644 index 00000000..a1b04e62 --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc015.txt @@ -0,0 +1 @@ +he emphasized his strengths while concealing his weaknesses \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc022.txt b/inst/extdata/rawDemoData/txtFiles/msajc022.txt new file mode 100644 index 00000000..1e541c1e --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc022.txt @@ -0,0 +1 @@ +itches are always so tempting to scratch \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc023.txt b/inst/extdata/rawDemoData/txtFiles/msajc023.txt new file mode 100644 index 00000000..78b93aa2 --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc023.txt @@ -0,0 +1 @@ +I'll hedge my bets and take no risks \ No newline at end of file diff --git a/inst/extdata/rawDemoData/txtFiles/msajc057.txt b/inst/extdata/rawDemoData/txtFiles/msajc057.txt new file mode 100644 index 00000000..e3b6acfc --- /dev/null +++ b/inst/extdata/rawDemoData/txtFiles/msajc057.txt @@ -0,0 +1 @@ +this new display attracts more customers than ever \ No newline at end of file diff --git a/inst/extdata/rawDemoData/wavFiles/msajc003.wav b/inst/extdata/rawDemoData/wavFiles/msajc003.wav new file mode 100644 index 00000000..2a98570c Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc003.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc010.wav b/inst/extdata/rawDemoData/wavFiles/msajc010.wav new file mode 100644 index 00000000..22f47abd Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc010.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc012.wav b/inst/extdata/rawDemoData/wavFiles/msajc012.wav new file mode 100644 index 00000000..0c5dc453 Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc012.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc015.wav b/inst/extdata/rawDemoData/wavFiles/msajc015.wav new file mode 100644 index 00000000..791bc23e Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc015.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc022.wav b/inst/extdata/rawDemoData/wavFiles/msajc022.wav new file mode 100644 index 00000000..35f00caa Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc022.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc023.wav b/inst/extdata/rawDemoData/wavFiles/msajc023.wav new file mode 100644 index 00000000..de341de0 Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc023.wav differ diff --git a/inst/extdata/rawDemoData/wavFiles/msajc057.wav b/inst/extdata/rawDemoData/wavFiles/msajc057.wav new file mode 100644 index 00000000..fb3c940a Binary files /dev/null and b/inst/extdata/rawDemoData/wavFiles/msajc057.wav differ diff --git a/inst/js/spectro-drawing.class.js b/inst/js/spectro-drawing.class.js new file mode 100644 index 00000000..8b83cfb8 --- /dev/null +++ b/inst/js/spectro-drawing.class.js @@ -0,0 +1,761 @@ + + +class SpectroDrawingClass { + + /** + * A handy class to draw a spectrogram + */ + + /////////////////////////////////// + // start: class vars + executed = false; + PI = 3.141592653589793; // value : Math.PI + TWO_PI = 6.283185307179586; // value : 2 * Math.PI + totalMax = 0; + dynRangeInDB = 50; + myWindow = { + BARTLETT: 1, + BARTLETTHANN: 2, + BLACKMAN: 3, + COSINE: 4, + GAUSS: 5, + HAMMING: 6, + HANN: 7, + LANCZOS: 8, + RECTANGULAR: 9, + TRIANGULAR: 10 + }; + imgWidth = 0; + imgHeight = 0; + upperFreq = 0; + lowerFreq = 0; + pixelRatio = 1; + heatMapColorAnchors = [ + [255, 0, 0], + [0, 255, 0], + [0, 0, 0] + ]; + samplesPerPxl = 0; + sampleRate = 0; + preEmphasisFilterFactor = 0.97; + transparency = 0; + drawHeatMapColors = false; + N = 0; + windowSizeInSecs = 0.01; + audioBuffer = undefined; + audioBufferChannels = 0; + wFunction = 0; + myFFT = undefined; + pixelHeight = 1; + internalalpha = 1; + maxPsd = 0; + HzStep = 0; + paint = []; + sin = undefined; + cos = undefined; + ceilingFreqFftIdx = 0; + floorFreqFftIdx = 0; + resultImgArr = undefined; + m = undefined; + + // end: class vars + ////////////////////////////////// + + ///////////////////////////////// + // start: math helper functions + + // used by FFT + toLinearLevel(dbLevel) { + return Math.pow(10, (dbLevel / 10)); + }; + + // calculate decadic logarithm + log10(arg) { + return Math.log(arg) / 2.302585092994046; // Math.log(x) / Math.LN10 + }; + + // calculate magintude + magnitude(real, imag) { + return Math.sqrt((real * real) + (imag * imag)); + }; + + + /** + * calculate the closest power of two that is + * greater than the passed in number + * @param num + * @returns power of two value + */ + calcClosestPowerOf2Gt(num) { + var curExp = 0; + + while (Math.pow(2, curExp) < num) { + curExp = curExp + 1; + } + + return (Math.pow(2, curExp)); + + }; + + + + // end: math helper functions + //////////////////////////////// + + /////////////////////////////////////////////////// + // start: FFT functions + + createSinAndCosTables (){ + var i, x; + var n = this.N; + this.m = parseInt((Math.log(n) / 0.6931471805599453), 10); + if (n !== (1 << this.m)) { // Make sure n is a power of 2 + // console.log('ERROR : FFT length must be power of 2'); + } + if (this.cos === undefined || n !== this.N) { + + // this means that the following is only executed + // when no COS table exists + // or n changes + + this.cos = new Float32Array(n / 2); // precompute cos table + for (x = 0; x < n / 2; x++) { + this.cos[x] = Math.cos(-2 * this.PI * x / n); + } + } + if (this.sin === undefined || n !== this.N) { + + // this means that the following is only executed + // when no COS table exists + // or n changes + + this.sin = new Float32Array(n / 2); // precompute sin table + for (x = 0; x < n / 2; x++) { + this.sin[x] = Math.sin(-2 * this.PI * x / n); + } + } + } + + /** + * apply window function and pre-emphasis from idx 0 to length + * in buffer given + * + * @param type is the chosen window Function as enum + * @param alpha is the alpha value for Window Functions (default 0.16) + * @param buffer is the zero padded magnitude spectrum + * @param length is the length to in the buffer (starting at idx 0) + * to which to apply the window to. If the buffer is [x0, x1, x2, x4] and + * length is 2 the window will be applied to [x0, x1, x2] this is needed + * to only apply function to non-zero-padded values of magnitude spectrum. + * @return the windowed/pre-emphasised buffer + */ + applyWindowFuncAndPreemph(type, alpha, buffer, length) { + // var length = buffer.length; + let i = 0; + this.alpha = alpha; + switch (type) { + case this.myWindow.BARTLETT: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionBartlett(length, i); + } + break; + case this.myWindow.BARTLETTHANN: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionBartlettHann(length, i); + } + break; + case this.myWindow.BLACKMAN: + this.alpha = this.alpha || 0.16; + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionBlackman(length, i, alpha); + } + break; + case this.myWindow.COSINE: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionCosine(length, i); + } + break; + case this.myWindow.GAUSS: + this.alpha = this.alpha || 0.25; + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionGauss(length, i, alpha); + } + break; + case this.myWindow.HAMMING: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionHamming(length, i); + } + break; + case this.myWindow.HANN: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionHann(length, i); + } + break; + case this.myWindow.LANCZOS: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionLanczos(length, i); + } + break; + case this.myWindow.RECTANGULAR: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionRectangular(length, i); + } + break; + case this.myWindow.TRIANGULAR: + for (i = 0; i < length; i++) { + if (i > 0) { + buffer[i] = this.applyPreEmph(buffer[i], buffer[i - 1]); + } + buffer[i] *= this.wFunctionTriangular(length, i); + } + break; + } + return buffer; + }; + //////////////////////////////////// + // start: the windowing functions + + wFunctionBartlett (length, index) { + return 2 / (length - 1) * ((length - 1) / 2 - Math.abs(index - (length - 1) / 2)); + }; + + wFunctionBartlettHann (length, index) { + return 0.62 - 0.48 * Math.abs(index / (length - 1) - 0.5) - 0.38 * Math.cos(this.TWO_PI * index / (length - 1)); + }; + + wFunctionBlackman(length, index, alpha) { + var a0 = (1 - alpha) / 2; + var a1 = 0.5; + var a2 = alpha / 2; + return a0 - a1 * Math.cos(this.TWO_PI * index / (length - 1)) + a2 * Math.cos(4 * this.PI * index / (length - 1)); + }; + + wFunctionCosine(length, index) { + return Math.cos(this.PI * index / (length - 1) - this.PI / 2); + }; + + wFunctionGauss(length, index, alpha) { + return Math.pow(Math.E, -0.5 * Math.pow((index - (length - 1) / 2) / (alpha * (length - 1) / 2), 2)); + }; + + wFunctionHamming(length, index) { + return 0.54 - 0.46 * Math.cos(this.TWO_PI * index / (length - 1)); + }; + + wFunctionHann(length, index) { + return 0.5 * (1 - Math.cos(this.TWO_PI * index / (length - 1))); + }; + + wFunctionLanczos(length, index) { + var x = 2 * index / (length - 1) - 1; + return Math.sin(this.PI * x) / (this.PI * x); + }; + + wFunctionRectangular() { + return 1; + }; + + wFunctionTriangular(length, index) { + return 2 / length * (length / 2 - Math.abs(index - (length - 1) / 2)); + }; + // end: the windowing functions + /////////////////////////////////// + + /** + * calculate and apply according pre-emphasis on sample + */ + applyPreEmph(curSample, prevSample) { + return curSample - this.preEmphasisFilterFactor * prevSample; + }; + + // the FFT calculation + fft (x, y) { + // Bit-reverse + var i, j, k, n1, n2, a, c, s, t1, t2; + // Bit-reverse + j = 0; + n2 = this.N / 2; + for (i = 1; i < this.N - 1; i++) { + n1 = n2; + while (j >= n1) { + j = j - n1; + n1 = n1 / 2; + } + j = j + n1; + + if (i < j) { + t1 = x[i]; + x[i] = x[j]; + x[j] = t1; + t1 = y[i]; + y[i] = y[j]; + y[j] = t1; + } + } + + // FFT + n1 = 0; + n2 = 1; + + for (i = 0; i < this.m; i++) { + n1 = n2; + n2 = n2 + n2; + a = 0; + + for (j = 0; j < n1; j++) { + c = this.cos[a]; + s = this.sin[a]; + a += 1 << (this.m - i - 1); + + for (k = j; k < this.N; k = k + n2) { + t1 = c * x[k + n1] - s * y[k + n1]; + t2 = s * x[k + n1] + c * y[k + n1]; + x[k + n1] = x[k] - t1; + y[k + n1] = y[k] - t2; + x[k] = x[k] + t1; + y[k] = y[k] + t2; + } + } + } + }; + + // end: FFT functions + /////////////////// + +// ///////////////////////////////// +// // start: rendering function + + /** + * interpolates a 3D color space and calculate accoring + * value on that plane + * + * @param minval is the minimum value to map to (number) + * @param maxval is the maximum value to map to (number) + * @param val is the value itself (number) + * @param colors is an array of arrays containing the colors + * to interpol. against (of the form: [[255, 0, 0],[0, 255, 0],[0, 0, 255]]) + */ + convertToHeatmap = function (minval, maxval, val, colors) { + let maxIndex = colors.length - 1; + let v = (val - minval) / (maxval - minval) * maxIndex; + let i1 = Math.floor(v); + let i2 = Math.min.apply(null, [Math.floor(v) + 1, maxIndex]); + let rgb1 = colors[i1]; + let rgb2 = colors[i2]; + let f = v - i1; + let res = { + 'r': Math.floor(rgb1[0] + f * (rgb2[0] - rgb1[0])), + 'g': Math.floor(rgb1[1] + f * (rgb2[1] - rgb1[1])), + 'b': Math.floor(rgb1[2] + f * (rgb2[2] - rgb1[2])) + }; + return (res); + }; + + + /** + * draws a single line of the spectrogram into the imageResult array. + * by calculating the RGB value of the current pixel with: + * 255-(255*scaled) + * @param xIdx in the this.paint array + */ + drawVerticalLineOfSpectogram (xIdx) { + + // set upper boundary for linear interpolation + var x1 = this.pixelHeight; + var rgb, index, px, py; + + // value for first interpolation at lower boundry (height=0) + + // calculate the one sided power spectral density PSD (f, t) in Pa2/Hz + // PSD(f) proportional to 2|X(f)|2 / (t2 - t1) + var psd = (2 * Math.pow(this.paint[xIdx][1], 2)) / this.N; + var psdLog = 10 * this.log10(psd / this.maxPsd); + var scaledVal = ((psdLog + this.dynRangeInDB) / this.dynRangeInDB); + if (scaledVal > 1) { + scaledVal = 1; + } else if (scaledVal < 0) { + scaledVal = 0; + } + + for (var i = 0; i < this.paint[xIdx].length; i++) { + + var y0 = scaledVal; // !!!! set y0 to previous scaled value + + // for each value in paint[] calculate pixelHeight interpolation points + // x0=0 + // x1=pixelHeight + // if(paint[i-1]<0) paint[i-1] = 1 + // y0=paint[i-1] + // y1=paint[i] + + + // !!!! calculate next scaledValue [0...1] + psd = (2 * Math.pow(this.paint[xIdx][i], 2)) / this.N; + psdLog = 10 * this.log10(psd / this.maxPsd); + scaledVal = ((psdLog + this.dynRangeInDB) / this.dynRangeInDB); + if (scaledVal > 1) { + scaledVal = 1; + } + if (scaledVal < 0) { + scaledVal = 0; + } + + // !!!! set y1 to this scaled value + var y1 = scaledVal; + + if (this.pixelHeight >= 1) { + // do interpolation between y0 (previous scaledValue) and y1 (scaledValue now) + for (var b = 0; b < this.pixelHeight; b++) { + var y2 = y0 + (y1 - y0) / x1 * b; + + // calculate corresponding color value for interpolation point [0...255] + // console.log(this.invert); + if (this.invert) { + rgb = Math.round(255 * y2); + } else { + rgb = 255 - Math.round(255 * y2); + } + + // set internal image buffer to calculated & interpolated value + px = Math.floor(xIdx); + py = Math.floor(this.imgHeight - (this.pixelHeight * (i - 2) + b)); + + index = (px + (py * this.imgWidth)) * 4; + if (this.drawHeatMapColors) { + if (!isNaN(rgb)) { + var hmVals = this.convertToHeatmap(0, 255, rgb, this.heatMapColorAnchors); + this.resultImgArr[index + 0] = hmVals.r; + this.resultImgArr[index + 1] = hmVals.g; + this.resultImgArr[index + 2] = hmVals.b; + this.resultImgArr[index + 3] = this.transparency; + + } else { + this.resultImgArr[index + 0] = rgb; + this.resultImgArr[index + 1] = rgb; + this.resultImgArr[index + 2] = rgb; + this.resultImgArr[index + 3] = this.transparency; + } + + } else { + this.resultImgArr[index + 0] = rgb; + this.resultImgArr[index + 1] = rgb; + this.resultImgArr[index + 2] = rgb; + this.resultImgArr[index + 3] = this.transparency; + } + } + } else { + rgb = 255 - Math.round(255 * y1); + // set internal image buffer to calculated & interpolated value + px = Math.floor(xIdx); + py = Math.floor(this.imgHeight - (this.pixelHeight * (i - 2))); + + index = (px + (py * this.imgWidth)) * 4; + this.resultImgArr[index + 0] = rgb; + this.resultImgArr[index + 1] = rgb; + this.resultImgArr[index + 2] = rgb; + this.resultImgArr[index + 3] = global.transparency; + } + } + }; + + + /** + * calculates Magnitude by + * - reading the current (defined with offset) data from localSoundBuffer + * - applying the current Window Function to the selected data + * - calculating the actual FFT + * - (and saving the biggest value in totalMax) + * + * @param offset calculated offset in PCM Stream + * @param windowSizeInSamples size of window in samples (actual samples -> not FFT length; rest zero-padded) + * @return magnitude spectrum as Float32Array + */ + calcMagnitudeSpectrum(offset, windowSizeInSamples) { + // imaginary array of length N + var imag = new Float32Array(this.N); + + // real array of length N + var real = new Float32Array(this.N); + + // result array of length c - d + var result = new Float32Array(this.ceilingFreqFftIdx - this.floorFreqFftIdx); + + // set real values by reading local sound buffer (this auto zeropads everything > windowSizeInSamples) + for (var j = 0; j < windowSizeInSamples; j++) { + real[j] = this.audioBuffer[offset + j]; + } + + // apply window function and pre-emphasis to non zero padded real + this.applyWindowFuncAndPreemph(this.wFunction, this.internalalpha, real, windowSizeInSamples); + + // calculate FFT over real and save to result + this.fft(real, imag); + + // calculate magnitude for each spectral component + for (var low = 0; low <= this.ceilingFreqFftIdx - this.floorFreqFftIdx; low++) { + result[low] = this.magnitude(real[low + this.floorFreqFftIdx], imag[low + this.floorFreqFftIdx]); + if (this.totalMax < result[low]) { + this.totalMax = result[low]; + } + } + return result; + }; + + /** + * initial function call for calculating and drawing Spectrogram + * input sample data comes from the buffer this.audioBuffer + * - first loop calculates magnitude spectra to draw (calcMagnitudeSpectrum()) + * - second loop draws these values into the this.resultImgArr (drawVerticalLineOfSpectogram()) + */ + _renderSpectrogram () { + + var windowSizeInSamples = this.sampleRate * this.windowSizeInSecs; + + // instance of FFT with windowSize N + // this.myFFT = new this.FFT(); + + // array holding FFT results paint[canvas width][canvas height] + this.paint = new Array(this.imgWidth); + + // Hz per pixel height + this.HzStep = (this.sampleRate / 2) / (this.N / 2); + + // upper Hz boundary to display + this.ceilingFreqFftIdx = Math.ceil(this.upperFreq / this.HzStep); + + // lower Hz boundary to display + this.floorFreqFftIdx = Math.floor(this.lowerFreq / this.HzStep); // -1 for value below display when lower>0 + + // height between two interpolation points + this.pixelHeight = this.imgHeight / (this.ceilingFreqFftIdx - this.floorFreqFftIdx - 2); + + // ugly hack in order to support PhantomJS < 2.0 testing + if (typeof Uint8ClampedArray === 'undefined') { + Uint8ClampedArray = Uint8Array; + } + // create new picture + this.resultImgArr = new Uint8ClampedArray(Math.ceil(this.imgWidth * this.imgHeight * 4)); + // create sin & cos tables + m var (prob. better place that some place else!) + this.createSinAndCosTables(); + + // calculate i FFT runs, save result into paint and set maxPsd while doing so + for (var i = 0; i < this.imgWidth; i++) { + this.paint[i] = this.calcMagnitudeSpectrum(Math.round(i * this.samplesPerPxl), windowSizeInSamples); + this.maxPsd = (2 * Math.pow(this.totalMax, 2)) / this.N; + } + + // draw spectrogram on png image with canvas width + // one column is drawn per drawVerticalLineOfSpectogram + for (var j = 0; j < this.imgWidth; j++) { + this.drawVerticalLineOfSpectogram(j); + } + + return(this.resultImgArr.buffer); + + + } + + // end: rendering function + ////////////////////////////// + + ////////////////////////// + // communication functions + + + /** + * function to handle messages events if used ad web worker + * @param e message event + */ + renderSpectrogram (data) { + if (data !== undefined) { + var render = true; + var renderError = ''; + if (data.windowSizeInSecs !== undefined) { + this.windowSizeInSecs = data.windowSizeInSecs; + } else { + renderError = 'windowSizeInSecs'; + render = false; + } + if (data.fftN !== undefined) { + this.N = data.fftN; + } else { + renderError = 'fftN'; + render = false; + } + if (data.alpha !== undefined) { + this.internalalpha = data.alpha; + } else { + renderError = 'alpha'; + render = false; + } + if (data.upperFreq !== undefined) { + this.upperFreq = data.upperFreq; + } else { + renderError = 'upperFreq'; + render = false; + } + if (data.lowerFreq !== undefined) { + this.lowerFreq = data.lowerFreq; + } else { + renderError = 'lowerFreq'; + render = false; + } + if (data.samplesPerPxl !== undefined) { + this.samplesPerPxl = data.samplesPerPxl; + } else { + renderError = 'samplesPerPxl'; + render = false; + } + if (data.window !== undefined) { + switch (data.window) { + case 1: + this.wFunction = this.myWindow.BARTLETT; + break; + case 2: + this.wFunction = this.myWindow.BARTLETTHANN; + break; + case 3: + this.wFunction =this.myWindow.BLACKMAN; + break; + case 4: + this.wFunction = this.myWindow.COSINE; + break; + case 5: + this.wFunction = this.myWindow.GAUSS; + break; + case 6: + this.wFunction = this.myWindow.HAMMING; + break; + case 7: + this.wFunction = this.myWindow.HANN; + break; + case 8: + this.wFunction = this.myWindow.LANCZOS; + break; + case 9: + this.wFunction = this.myWindow.RECTANGULAR; + break; + case 10: + this.wFunction = this.myWindow.TRIANGULAR; + break; + } + } else { + renderError = 'window'; + render = false; + } + if (data.imgWidth !== undefined) { + this.imgWidth = data.imgWidth; + } else { + renderError = 'imgWidth'; + render = false; + } + if (data.imgHeight !== undefined) { + this.imgHeight = data.imgHeight; + } else { + renderError = 'imgHeight'; + render = false; + } + if (data.dynRangeInDB !== undefined) { + this.dynRangeInDB = data.dynRangeInDB; + } else { + renderError = 'dynRangeInDB'; + render = false; + } + if (data.pixelRatio !== undefined) { + this.pixelRatio = data.pixelRatio; + } else { + renderError = 'pixelRatio'; + render = false; + } + if (data.sampleRate !== undefined) { + this.sampleRate = data.sampleRate; + } else { + renderError = 'sampleRate'; + render = false; + } + if (data.audioBufferChannels !== undefined) { + this.audioBufferChannels = data.audioBufferChannels; + } else { + renderError = 'audioBufferChannels'; + render = false; + } + if (data.transparency !== undefined) { + this.transparency = data.transparency; + } else { + renderError = 'transparency'; + render = false; + } + if (data.audioBuffer !== undefined) { + this.audioBuffer = data.audioBuffer; + } else { + renderError = 'audioBuffer'; + render = false; + } + if (data.drawHeatMapColors !== undefined) { + this.drawHeatMapColors = data.drawHeatMapColors; + } else { + renderError = 'drawHeatMapColors'; + render = false; + } + if (data.preEmphasisFilterFactor !== undefined) { + this.preEmphasisFilterFactor = data.preEmphasisFilterFactor; + } else { + renderError = 'preEmphasisFilterFactor'; + render = false; + } + if (data.heatMapColorAnchors !== undefined) { + console.log(data.heatMapColorAnchors[0]); + this.heatMapColorAnchors = [ + [data.heatMapColorAnchors.slice(0, 3)], + [data.heatMapColorAnchors.slice(3, 6)], + [data.heatMapColorAnchors.slice(6, 9)], + ]; + } else { + renderError = 'heatMapColorAnchors'; + render = false; + } + if (data.invert !== undefined) { + this.invert = data.invert; + } else { + renderError = 'invert'; + render = false; + } + if (render) { + return(this._renderSpectrogram()); + } else { + throw new Error(renderError + ' is undefined'); + } + } else { + throw new Error("data object has to be passed in to renderSpectrogram function"); + } + }; + // + ///////////////////// +} \ No newline at end of file diff --git a/man/.Rhistory b/man/.Rhistory deleted file mode 100644 index 905c6273..00000000 --- a/man/.Rhistory +++ /dev/null @@ -1,31 +0,0 @@ -library(emu) -args(matrix) -data <- matrix(1:10, nrow=5 ) -data -data <- matrix(1:10, ncol=2 ) -data -data1 <- matrix( 1:10, ncol=2 ) data2 <- matrix( 10:20, ncol=2 ) -data2 -data1 <- matrix( 1:10, ncol=2 ) data2 <- matrix( 11:20, ncol=2 ) -data2 <- matrix( 11:20, ncol=2 ) -data2 -nd1 <- nrow(data1) nd1 <- nrow(data2) index <- rbind( c( 1, nd1 ), c(nd1+1,nd1+nd2) ) -nd1 <- nrow(data1) nd2 <- nrow(data2) index <- rbind( c( 1, nd1 ), c(nd1+1,nd1+nd2) ) -index -data1 -summary(data) -segs <- emu.query( "demo", "msajc003", "Phoneme=vowel") -segs -segs <- emu.query( "demo", "msajc003", "Phoneme=V") -segs -trackdata <- emu.track( segs, "fm" ) -trackdata -args(as.trackdata) -data1 <- matrix( 1:10, ncol=2 ) data2 <- matrix( 11:20, ncol=2 ) nd1 <- nrow(data1) nd2 <- nrow(data2) index <- rbind( c( 1, nd1 ), c(nd1+1,nd1+nd2) ) times <- rbind( c( 100.0, 110.0 ), c(200.0, 210,0) ) tdata <- as.trackdata( rbind( data1, data2 ), index, times, trackname="fake") -index -times -times <- rbind( c( 100.0, 110.0 ), c( 200.0, 210.0 ) ) tdata <- as.trackdata( rbind( data1, data2 ), index, times, trackname="fake") -tdata -summary(tdata) -tdata[1] -q() diff --git a/man/AddListRemoveAttrDefLabelGroup.Rd b/man/AddListRemoveAttrDefLabelGroup.Rd new file mode 100644 index 00000000..8f35e4a1 --- /dev/null +++ b/man/AddListRemoveAttrDefLabelGroup.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRemoveAttrDefLabelGroup} +\alias{AddListRemoveAttrDefLabelGroup} +\alias{add_attrDefLabelGroup} +\alias{list_attrDefLabelGroups} +\alias{remove_attrDefLabelGroup} +\title{Add / List / Remove labelGroup to / of / from attributeDefinition of emuDB} +\usage{ +add_attrDefLabelGroup( + emuDBhandle, + levelName, + attributeDefinitionName, + labelGroupName, + labelGroupValues +) + +list_attrDefLabelGroups(emuDBhandle, levelName, attributeDefinitionName) + +remove_attrDefLabelGroup( + emuDBhandle, + levelName, + attributeDefinitionName, + labelGroupName +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{levelName}{name of level} + +\item{attributeDefinitionName}{name of attributeDefinition} + +\item{labelGroupName}{name of label group} + +\item{labelGroupValues}{character vector of labels} +} +\description{ +Add / List / Remove label group to / of / from a specific attribute definition. +This label group can be used as a short hand +to reference groups of labels specific +to an attribute definition (compared to global label groups that +are added by \code{\link{add_labelGroup}}) in a +\code{\link{query}}. A common example would be to +add a label group for something like the phonetic +category of nasals to be able reference them +as "nasals" in a \code{\link{query}}. For more information +on the structural elements of an emuDB see \code{vignette(emuDB)}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +sampaNasals = c("m", "F", "n", "J", "N") + +# add these values to the default Phonetic attribute +# definition of the Phonetic level of the ae emuDB +add_attrDefLabelGroup(emuDBhandle = ae, + levelName = "Phonetic", + attributeDefinitionName = "Phonetic", + labelGroupName = "sampaNasals", + labelGroupValues = sampaNasals) + +# query the labelGroup +query(ae, "Phonetic=sampaNasals") + + +# list attribute definition label groups +# of attributeDefinition "Phonetic" of the level "Phonetic" +# of the ae emuDB +list_attrDefLabelGroups(emuDBhandle = ae, + levelName = "Phonetic" , + attributeDefinitionName = "Phonetic") + +# remove the newly added attrDefLabelGroup +remove_attrDefLabelGroup(emuDBhandle = ae, + levelName = "Phonetic", + attributeDefinitionName = "Phonetic", + labelGroupName = "sampaNasals") + +} + +} +\seealso{ +add_labelGroup +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/AddListRemoveLabelGroup.Rd b/man/AddListRemoveLabelGroup.Rd new file mode 100644 index 00000000..32b70253 --- /dev/null +++ b/man/AddListRemoveLabelGroup.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRemoveLabelGroup} +\alias{AddListRemoveLabelGroup} +\alias{add_labelGroup} +\alias{list_labelGroups} +\alias{remove_labelGroup} +\title{Add / List / Remove global labelGroup to / of / from emuDB} +\usage{ +add_labelGroup(emuDBhandle, name, values) + +list_labelGroups(emuDBhandle) + +remove_labelGroup(emuDBhandle, name) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{name}{name of label group} + +\item{values}{character vector of labels} +} +\description{ +Add / List / Remove label group that can be used as a short hand +to reference groups of labels that are globally defined +for the entire database (compared to attribute definition +specific label groups that +are added by \code{\link{add_attrDefLabelGroup}}) in a +\code{\link{query}}. A common example would be to +add a label group for something like the phonetic +category of nasals to be able to reference them +as "nasals" in a \code{\link{query}}. +In theory you could use a labelGroupName as a label instance within the +level, but since this could lead to serious confusion, it is better avoided. +For users transitioning from the legacy EMU system: Do not confuse a +labelGroup with legal labels: a labelGroup +had the unfortunate name 'legal labels' in the legacy EMU system. +For more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +sampaNasals = c("m", "F", "n", "J", "N") + +# add these values to the ae emuDB +# as a globally available labelGroup +add_labelGroup(emuDBhandle = ae, + name = "sampaNasals", + values = sampaNasals) + +# query the labelGroup in the "Phonetic" level +query(emuDBhandle = ae, + query = "Phonetic == sampaNasals") + +# query the labelGroup in the "Phoneme" level +query(emuDBhandle = ae, + query = "Phoneme == sampaNasals") + +# list global label groups of ae emuDB +list_labelGroups(emuDBhandle = ae) + +# remove the newly added labelGroup +remove_labelGroup(emuDBhandle = ae, + name = "sampaNasals") +} + +} +\seealso{ +add_attrDefLabelGroup +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/AddListRemoveLevelDefinitions.Rd b/man/AddListRemoveLevelDefinitions.Rd new file mode 100644 index 00000000..cda0481d --- /dev/null +++ b/man/AddListRemoveLevelDefinitions.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRemoveLevelDefinitions} +\alias{AddListRemoveLevelDefinitions} +\alias{add_levelDefinition} +\alias{list_levelDefinitions} +\alias{remove_levelDefinition} +\title{Add / List / Remove level definition to / of / from emuDB} +\usage{ +add_levelDefinition( + emuDBhandle, + name, + type, + rewriteAllAnnots = TRUE, + verbose = TRUE +) + +list_levelDefinitions(emuDBhandle) + +remove_levelDefinition( + emuDBhandle, + name, + rewriteAllAnnots = TRUE, + force = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{name}{name of level definition} + +\item{type}{type of level definition ("SEGMENT","EVENT","ITEM")} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json files) (intended for expert use only)} + +\item{verbose}{Show progress bars and further information} + +\item{force}{delete all items incl. links pointing to those items from the levels} +} +\description{ +Add / List / Remove database operation functions for level definitions. +A level is a more general term for what is often referred to as a "tier". +It is more general in the sense that people usually +expect tiers to contain time information. Levels +can either contain time information if they are of the +type "EVENT" or of the type "SEGMENT" but are timeless +if they are of the type "ITEM". For more information +on the structural elements of an emuDB see \code{vignette(emuDB)}. +Note that a level cannot be removed, if it contains instances of annotation items +or if it is linked to another level. Further note, renaming a level definition +can be done using \code{\link{rename_attributeDefinition}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# add level called "Phonetic2" to the ae emuDB +# that could for example contain the transcriptions of a second annotator +add_levelDefinition(emuDBhandle = ae, + name = "Phonetic2", + type = "SEGMENT") + +# list level definition of ae emuDB +list_levelDefinitions(emuDBhandle = ae) + +# remove newly added level definition +remove_levelDefinitions(emuDBhandle = ae, + name = "Phonetic2") +} + +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/AddListRemoveLinkDefinition.Rd b/man/AddListRemoveLinkDefinition.Rd new file mode 100644 index 00000000..37c068df --- /dev/null +++ b/man/AddListRemoveLinkDefinition.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRemoveLinkDefinition} +\alias{AddListRemoveLinkDefinition} +\alias{add_linkDefinition} +\alias{list_linkDefinitions} +\alias{remove_linkDefinition} +\title{Add / List / Remove linkDefinition to / of / from emuDB} +\usage{ +add_linkDefinition(emuDBhandle, type, superlevelName, sublevelName) + +list_linkDefinitions(emuDBhandle) + +remove_linkDefinition( + emuDBhandle, + superlevelName, + sublevelName, + force = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{type}{type of linkDefinition (either \code{"ONE_TO_MANY"}, \code{"MANY_TO_MANY"} or \code{"ONE_TO_ONE"})} + +\item{superlevelName}{name of super-level of linkDefinition} + +\item{sublevelName}{name of sub-level of linkDefinition} + +\item{force}{delete all links belonging to the linkDefinition (\strong{USE WITH CAUTION! VERY INVASIVE AKTION!})} + +\item{verbose}{be verbose. Ask to delete links if \code{force} is \code{TRUE}.} +} +\description{ +Add / List / Remove new link definition to / of / from emuDB. A link definition +specifies the relationship between two levels, the +super-level and the sub-level. The entirety of all link +definitions of a emuDB specifies the +hierarchical structure of the database. For more information +on the structural elements of an emuDB see \code{vignette(emuDB)}. +} +\details{ +Link type descriptions: +\itemize{ +\item{\code{"ONE_TO_MANY"}: A single ITEM of the super-level can be linked to multiple ITEMs of the sub-level} +\item{\code{"MANY_TO_MANY"}: Multiple ITEMs of the super-level can be linked to multiple ITEMs of the sub-level} +\item{\code{"ONE_TO_ONE"}: A single ITEM of the super-level can be linked to a single ITEM of the sub-level} +} + +For all link types the rule applies that no links are allowed to cross any other links. +Further, a linkDefinition can not be removed, if there are links present in the emuDB. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded emuDB that was converted +# using the convert_TextGridCollection function called myTGcolDB +# (see ?load_emuDB and ?convert_TextGridCollection for more information) + +# add link definition from super-level "Phoneme" +# to sub-level "Phonetic" of type "ONE_TO_MANY" +# for myTGcolDB emuDB +add_linkDefinition(emuDBhandle = myTGcolDB, + type = "ONE_TO_MANY", + superlevelName = "Phoneme", + sublevelName = "Phonetic") + +# list link definitions for myTGcolDB emuDB +list_linkDefinitions(emuDBhandle = myTGcolDB) + +# remove newly added link definition +remove_linkDefinition(emuDBhandle = myTGcolDB, + superlevelName = "Phoneme", + sublevelName = "Phonetic") + + +} +} diff --git a/man/AddListRemovePerspective.Rd b/man/AddListRemovePerspective.Rd new file mode 100644 index 00000000..e7528a60 --- /dev/null +++ b/man/AddListRemovePerspective.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.EMUwebAppConfig.R +\name{AddListRemovePerspective} +\alias{AddListRemovePerspective} +\alias{add_perspective} +\alias{list_perspectives} +\alias{remove_perspective} +\title{Add / List / Remove perspective to / of / from emuDB} +\usage{ +add_perspective(emuDBhandle, name) + +list_perspectives(emuDBhandle) + +remove_perspective(emuDBhandle, name) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{name}{name of perspective} +} +\description{ +Add / List / Remove perspective to / of / from emuDB. The EMU-webApp subdivides different ways +to look at an emuDB into so called perspectives. These perspectives, +between which you can switch in the web application, contain +information on what levels are displayed, which ssffTracks are drawn, +and so on. For more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# add perspective called "justTones" to the ae emuDB +add_perspective(emuDBhandle = ae, + name = "justTones") + +# add levelCanvasOrder so only the "Tone" level is displayed +set_levelCanvasesOrder(emuDBhandle = ae, + perspectiveName = "justTones", + order = c("Tone")) + +# list perspectives of ae emuDB +list_perspectives(emuDBhandle = ae) + +# remove newly added perspective +remove_perspective(emuDBhandle = ae, + name = "justTones") + +} + +} +\keyword{DBconfig} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/AddListRemoveSsffTrackDefinition.Rd b/man/AddListRemoveSsffTrackDefinition.Rd new file mode 100644 index 00000000..5de94dba --- /dev/null +++ b/man/AddListRemoveSsffTrackDefinition.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRemoveSsffTrackDefinition} +\alias{AddListRemoveSsffTrackDefinition} +\alias{add_ssffTrackDefinition} +\alias{list_ssffTrackDefinitions} +\alias{remove_ssffTrackDefinition} +\title{Add / List / Remove ssffTrackDefinition to / from / of emuDB} +\usage{ +add_ssffTrackDefinition( + emuDBhandle, + name, + columnName = NULL, + fileExtension = NULL, + fileFormat = NULL, + onTheFlyFunctionName = NULL, + onTheFlyParams = NULL, + onTheFlyOptLogFilePath = NULL, + verbose = TRUE, + interactive = TRUE +) + +list_ssffTrackDefinitions(emuDBhandle) + +remove_ssffTrackDefinition(emuDBhandle, name, deleteFiles = FALSE) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{name}{name of ssffTrackDefinition} + +\item{columnName}{columnName of ssffTrackDefinition. +If the \code{onTheFlyFunctionName} parameter is set and columnName isn't, the +\code{columnName} will default to the first entry in \code{wrasspOutputInfos[[onTheFlyFunctionName]]$tracks}.} + +\item{fileExtension}{fileExtension of ssffTrackDefinitions. +If the \code{onTheFlyFunctionName} parameter is set and fileExtension isn't, the +\code{fileExtension} will default to the first entry in \code{wrasspOutputInfos[[onTheFlyFunctionName]]$ext}.} + +\item{fileFormat}{(optional) file format of ssffTrackDefinition. This is currently in test phase. Can be ssff, +Rda or NULL. Defaults to ssff.} + +\item{onTheFlyFunctionName}{name of wrassp function to do on-the-fly calculation. If set to the name of a wrassp +signal processing function, not only the emuDB schema is extended by the ssffTrackDefintion but also +the track itself is calculated from the signal file and stored in the emuDB. See \code{names(wrasspOutputInfos)} +for a list of all the signal processing functions provided by the wrassp package.} + +\item{onTheFlyParams}{a list of parameters that will be given to the function +passed in by the onTheFlyFunctionName parameter. This list can easily be +generated using the \code{\link{formals}} function on the according signal processing function +provided by the wrassp package and then setting the +parameter one wishes to change.} + +\item{onTheFlyOptLogFilePath}{path to optional log file for on-the-fly function} + +\item{verbose}{Show progress bars and further information} + +\item{interactive}{ask user for confirmation} + +\item{deleteFiles}{delete files that belong to ssffTrackDefinition on removal} +} +\description{ +Add / List / Remove ssffTrackDefinition to / from / of emuDB. +An ssffTrack (often simply referred to as a track) references +data that is stored in the Simple Signal File Format (SSFF) +in the according bundle folders. The two most common types of data are: +\itemize{ +\item{complementary data that was acquired during the recording +such as data acquired during electromagnetic +articulographic (EMA) or electropalatography (EPG) recordings;} +\item{derived data, i.e. data that was calculated from the original audio signal +such as formant values and their bandwidths or the short-term Root Mean Square amplitude of the signal.} +} +For more information on the structural elements of an emuDB see \code{vignette(emuDB)}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# add ssff track definition to ae emuDB +# calculating the according SSFF files (.zcr) on-the-fly +# using the wrassp function "zcrana" (zero-crossing-rate analysis) +add_ssffTrackDefinition(emuDBhandle = ae, + name = "ZCRtrack", + onTheFlyFunctionName = "zcrana") + +# add ssff track definition to ae emuDB +# for SSFF files that will be added later (either +# by adding files to the emuDB using +# the add_files() function or by calculating +# them using the according function provided +# by the wrassp package) +add_ssffTrackDefinition(emuDBhandle = ae, + name = "formants", + columnName = "fm", + fileExtension = "fms") + +# list ssff track definitions for ae emuDB +list_ssffTrackDefinitions(emuDBhandle = ae) + +# remove newly added ssff track definition (does not delete +# the actual .zcr files) +remove_ssffTrackDefinition(emuDBhandle = ae, + name = "ZCRtrack") + +} + +} diff --git a/man/AddListRenameRemoveAttributeDefinitions.Rd b/man/AddListRenameRemoveAttributeDefinitions.Rd new file mode 100644 index 00000000..b8cc0394 --- /dev/null +++ b/man/AddListRenameRemoveAttributeDefinitions.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{AddListRenameRemoveAttributeDefinitions} +\alias{AddListRenameRemoveAttributeDefinitions} +\alias{add_attributeDefinition} +\alias{list_attributeDefinitions} +\alias{rename_attributeDefinition} +\alias{remove_attributeDefinition} +\title{Add / List / Rename / Remove attribute definition to / of / from emuDB} +\usage{ +add_attributeDefinition( + emuDBhandle, + levelName, + name, + type = "STRING", + rewriteAllAnnots = TRUE, + verbose = TRUE +) + +list_attributeDefinitions(emuDBhandle, levelName) + +rename_attributeDefinition( + emuDBhandle, + origAttrDef, + newAttrDef, + verbose = TRUE +) + +remove_attributeDefinition( + emuDBhandle, + levelName, + name, + force = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{levelName}{name of level} + +\item{name}{name of attributeDefinition} + +\item{type}{type of attributeDefinition (currently only "STRING")} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json files) (intended for expert use only)} + +\item{verbose}{if set to \code{TRUE}, more status messages are printed} + +\item{origAttrDef}{name of level/attribute definition in emuDB that is to be changed} + +\item{newAttrDef}{new name that shall be assigned to the level/attribute definition} + +\item{force}{delete all attribute definitions in annotations (== label entries)} +} +\description{ +Add / List / Rename / Remove database operation functions for attribute +definition to / of / from an existing level definition of an emuDB. +Attribute definitions can be viewed as definitions of +parallel labels for the annotational units (ITEMs) of the emuDB. +Each level definition is required to have at least one +default attribute definition that has the same name as the level definition +(automatically created by \code{\link{add_levelDefinition}}). For more +information on the structural elements of an emuDB see \code{vignette(emuDB)}. +Note that as with level definitions, an attribute definition to a level cannot be removed, +if it contains labels in the emuDB. + +As the only one of these operations, \code{rename_attributeDefinition} can +also be used to manipulate (i.e. rename) a level definition. It is therefore +not necessary to specify the name of the level that the attribute definition +belongs to. While renaming a level or attribute definition, emuR will +(1) rewrite the levelDefinitions in DBconfig, (2) rewrite the +linkDefinitions in DBconfig, (3) rewrite the perspectives in DBconfig, +(4) rewrite the anagestConfig in DBconfig, and (5) rewrite all _annot.json +files. (5) May take quite a while, depending on the number of bundles in the +database. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# add additional attribute definition to the "Phonetic" level +# of the ae emuDB that will contain the UTF8 IPA +# symbols of the phonetic transcriptions +add_attributeDefinition(emuDBhandle = ae, + levelName = "Phonetic", + name = "IPA-UTF8") + +# list attribute definitions for level "Word" +# of the ae emuDB +list_attributeDefinitions(emuDBhandle = ae, + levelName = "Word") + +# remove newly added attributeDefinition +remove_attributeDefinition(emuDBhandle = ae, + levelName = "Phonetic", + name = "IPA-UTF8") +} + +} +\keyword{DBconfig} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/Expand.EPG.Rd b/man/Expand.EPG.Rd deleted file mode 100644 index bc2e525d..00000000 --- a/man/Expand.EPG.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{Expand.EPG} -\alias{[.EPG} -\title{ expand EPG } -\description{ -see function -} -\keyword{ internal } - diff --git a/man/Expand.emusegs.Rd b/man/Expand.emusegs.Rd deleted file mode 100644 index 7480f3af..00000000 --- a/man/Expand.emusegs.Rd +++ /dev/null @@ -1,7 +0,0 @@ -\name{Expand.emusegs} -\alias{[.emusegs} -\title{ Expand emusegs} -\description{ -see function} -\keyword{ internal } - diff --git a/man/Expand.spectral.Rd b/man/Expand.spectral.Rd deleted file mode 100644 index e3f4dd93..00000000 --- a/man/Expand.spectral.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{Expand.spectral} -\alias{[.spectral} -\title{ Expand spectral } -\description{ -see function -} -\keyword{ internal } - diff --git a/man/Expand.trackdata.Rd b/man/Expand.trackdata.Rd deleted file mode 100644 index 8adb6e3c..00000000 --- a/man/Expand.trackdata.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{Expand.trackdata} -\alias{[.trackdata} -\title{ Expand trackdata } -\description{ -see function -} -\keyword{ internal } - diff --git a/man/SetGetRemoveLegalLabels.Rd b/man/SetGetRemoveLegalLabels.Rd new file mode 100644 index 00000000..06d3c5ba --- /dev/null +++ b/man/SetGetRemoveLegalLabels.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.R +\name{SetGetRemoveLegalLabels} +\alias{SetGetRemoveLegalLabels} +\alias{set_legalLabels} +\alias{get_legalLabels} +\alias{remove_legalLabels} +\title{Set / Get / Remove legal labels of attributeDefinition of emuDB} +\usage{ +set_legalLabels(emuDBhandle, levelName, attributeDefinitionName, legalLabels) + +get_legalLabels(emuDBhandle, levelName, attributeDefinitionName) + +remove_legalLabels(emuDBhandle, levelName, attributeDefinitionName) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{levelName}{name of level} + +\item{attributeDefinitionName}{name of attributeDefinition (can be and often is the level name)} + +\item{legalLabels}{character vector of labels} +} +\description{ +Set / Get / Remove legal labels of a specific attributeDefinition of a emuDB. +The legal labels are a character vector of strings +that specifies the labels that are legal (i.e. allowed / valid) for the given attribute. +As the EMU-webApp won't allow the annotator to enter any labels that are not +specified in this array, this is a simple way of assuring that a level +has a consistent label set. For more information +on the structural elements of an emuDB see \code{vignette(emuDB)}. +Note that defining legal labels for an attributeDefinition does not imply that the +existing labels are checked for being 'legal' in the emuDB. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +legalPhoneticLabels = c("V", "m", "N", "s", "t", "H", "@:", "f", "r", + "E", "n", "z", "S", "i:", "w", "@", "k", "I", "d", + "db", "j", "u:", "dH", "l", "ai", "O", "D", "o:", "v") + +# set legal labels of the +# default "Phonetic" attributeDefinition of +# the "Phonetic" level of ae emuDB +set_legalLabels(emuDBhandle = ae, + levelName = "Phonetic", + attributeDefinitionName = "Phonetic", + legalLabels = legalPhoneticLabels) + +# get legal labels of the +# default "Phonetic" attributeDefinition of +# the "Phonetic" level of ae emuDB +get_legalLabels(emuDBhandle = ae, + levelName = "Phonetic", + attributeDefinitionName = "Phonetic") + + +# remove legal labels of the +# default "Phonetic" attributeDefinition of +# the "Phonetic" level of ae emuDB +remove_legalLabels(emuDBhandle = ae, + levelName = "Phonetic", + attributeDefinitionName = "Phonetic") + +} + +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/SetGetSignalCanvasesOrder.Rd b/man/SetGetSignalCanvasesOrder.Rd new file mode 100644 index 00000000..a577ed8a --- /dev/null +++ b/man/SetGetSignalCanvasesOrder.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.EMUwebAppConfig.R +\name{SetGetSignalCanvasesOrder} +\alias{SetGetSignalCanvasesOrder} +\alias{set_signalCanvasesOrder} +\alias{get_signalCanvasesOrder} +\title{Set / Get signalCanvasesOrder of / to / from emuDB} +\usage{ +set_signalCanvasesOrder(emuDBhandle, perspectiveName, order) + +get_signalCanvasesOrder(emuDBhandle, perspectiveName) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{perspectiveName}{name of perspective} + +\item{order}{character vector containing names of ssffTrackDefinitions or "OSCI" / "SPEC"} +} +\description{ +Set / Get signalCanvasesOrder array that specifies which signals are +displayed in the according perspective by the EMU-webApp. An entry in this character vector +refers to either the name of an ssffTrackDefinition or a predefined string: \code{"OSCI"} which +represents the oscillogram or \code{"SPEC"} which represents the +spectrogram. For more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# get signal canvas order of the "default" +# perspective of the ae emuDB +get_signalCanvasesOrder(emuDBhandle = ae, + perspectiveName = "default") + +} + +} +\keyword{DBconfig} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/SetGetlevelCanvasesOrder.Rd b/man/SetGetlevelCanvasesOrder.Rd new file mode 100644 index 00000000..01d6ebc0 --- /dev/null +++ b/man/SetGetlevelCanvasesOrder.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.DBconfig.EMUwebAppConfig.R +\name{SetGetlevelCanvasesOrder} +\alias{SetGetlevelCanvasesOrder} +\alias{set_levelCanvasesOrder} +\alias{get_levelCanvasesOrder} +\title{Set / Get level canvases order of emuDB} +\usage{ +set_levelCanvasesOrder(emuDBhandle, perspectiveName, order) + +get_levelCanvasesOrder(emuDBhandle, perspectiveName) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{perspectiveName}{name of perspective} + +\item{order}{character vector containing names of levelDefinitions} +} +\description{ +Set / Get which levels of an emuDB to display as level canvases (in a +given perspective of the EMU-webApp), +and in what order. Level canvases refer to levels of +the type "SEGMENT" or "EVENT" that are displayed by the EMU-webApp. Levels +of type "ITEM" can always be displayed using the hierarchy view of the +web application but can not be displayed as level canvases. +For more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# get level canvases order of ae emuDB +order = get_levelCanvasesOrder(emuDBhandle = ae, + perspectiveName = "default") + +# reverse the level canvases order of ae emuDB +set_levelCanvasesOrder(emuDBhandle = ae, + perspectiveName = "default", + order = rev(order)) + +# get level canvases order of ae emuDB +get_levelCanvasesOrder(emuDBhandle = ae, + perspectiveName = "default") +} + +} +\keyword{DBconfig} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/Slope.test.Rd b/man/Slope.test.Rd index 28a385e4..bd3baee6 100644 --- a/man/Slope.test.Rd +++ b/man/Slope.test.Rd @@ -1,58 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/slope.test.R \name{Slope.test} \alias{Slope.test} \title{Slope Test} -\description{ -Tests whether the difference between two or more regression lines is -significant -} \usage{ -Slope.test(\dots) +Slope.test(...) } \arguments{ -\item{...}{ -this function takes any number of two column matrices. -The first column is the y-data (in the case of locus equations, this -is the vowel onset) and the second column is the x-data (in the case of -locus equations, vowel target). -}} +\item{...}{this function takes any number of two column matrices. The +first column is the y-data (in the case of locus equations, this is the +vowel onset) and the second column is the x-data (in the case of locus +equations, vowel target).} +} \value{ -The return value consists of the following componenets: +The return value consists of the following components: + +\item{separate}{ slope, intercept, r-squared, F-ratio, "d(egrees of) +f(reedom)" and "prob(ability that) line fits data" for the separate data +matrices entered. } \item{combined}{ F-ratio, "d(egrees of) f(reedom)", and +"Probability of them being DIFFERENT" for the slope and for the intercept +of the combined data. } \item{x}{ the combined x-data for all the +matrices. } \item{y}{ the combined y-data for all the matrices. } +\item{mat}{ the category vectors for the combined data (consists of 1, 0 +and -1). } \item{numrows}{ the number of rows in each matrix. } +\item{numcats}{ the sum number of matrices entered. -\item{separate}{ -slope, intercept, r-squared, F-ratio, "d(egrees of) f(reedom)" -and "prob(ability that) line fits data" for the separate data matrices entered. -} -\item{combined}{ -F-ratio, "d(egrees of) f(reedom)", and "Probability of -them being DIFFERENT" for the slope and for the intercept of the combined -data. -} -\item{x}{ -the combined x-data for all the matrices. -} -\item{y}{ -the combined y-data for all the matrices. } -\item{mat}{ -the category vectors for the combined data -(consists of 1, 0 and -1). } -\item{numrows}{ -the number of rows in each matrix. +\description{ +Tests whether the difference between two or more regression lines is +significant } -\item{numcats}{ -the sum number of matrices entered. - - -}} \references{ see E. Pedhazur, Multiple Regression in Behavioral Research -p.436-450, 496-507. +p.436-450, 496-507. } \seealso{ lm(), summary.lm(), pf() } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. - - diff --git a/man/add_files.Rd b/man/add_files.Rd new file mode 100644 index 00000000..cfbab575 --- /dev/null +++ b/man/add_files.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.files.R +\name{add_files} +\alias{add_files} +\title{Add files to emuDB} +\usage{ +add_files(emuDBhandle, dir, fileExtension, targetSessionName = "0000") +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{dir}{directory containing files to be added} + +\item{fileExtension}{file extension of files to be added. If no . (dot) is found +in this string (e.g. "zcr") then the bundle name matching is performed by removing +\code{paste0(".", fileExtension)} from the files ("/path/to/msajc003.zcr" will become "msajc003") +and the according bundle name will be searched. If a . (dot) if found within this string +(e.g. "_annot.json") then the entire string is remove without prepending a . (dot) ("/path/to/msajc003_annot.json" will then become "msajc003")} + +\item{targetSessionName}{name of sessions containing +bundles that the files will be added to} +} +\description{ +Add files to existing bundles of specified session of emuDB. +Do not use this function to import new recordings (media files) and create bundles; +see \code{?import_mediaFiles} to import new recordings. +The files that are found in \code{dir} that have the extension +\code{fileExtension} will be copied into the according bundle +folder that have the same basename as the file. Note that the +same bundle name may appear in different sessions, therefore you must +specify the session in \code{targetSessionName}. For +more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +Note that adding files does not mean the emuDB is automatically using these, unless +you have defined the usage of these files (e.g. by ssffTrackDefinitions). +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# specify path to folder containing the following +# files we wish to add to: +# msajc003.zcr, msajc010.zcr, msajc012.zcr, msajc015.zcr, +# msajc022.zcr, msajc023.zcr and msajc057.zcr +path2dir = "/path/to/dir/" + +# add the files to session "0000" of the "ae" emuDB +add_files(emuDBhandle = ae, + dir = path2dir, + fileExtension = "zcr", + targetSessionName = "0000") + +} +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/add_signalViaMatlab.Rd b/man/add_signalViaMatlab.Rd new file mode 100644 index 00000000..293ef8e4 --- /dev/null +++ b/man/add_signalViaMatlab.Rd @@ -0,0 +1,227 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-add_signal.R +\name{add_signalViaMatlab} +\alias{add_signalViaMatlab} +\title{add_signalViaMatlab} +\usage{ +add_signalViaMatlab( + emuDBhandle, + matlabFunctionName, + outputFileExtension, + trackName, + trackColumn, + oneMatlabFunctionCallPerFile = TRUE, + inputFileExtension = NULL, + matlabFunctionParameters = NULL, + paths_to_add = NULL, + ... +) +} +\arguments{ +\item{emuDBhandle}{The Emu database to work on.} + +\item{matlabFunctionName}{Name of a Matlab function to use for signal processing. +Must be available on Matlab’s search path; see \code{paths_to_add}.} + +\item{outputFileExtension}{The file extension for the new derived signal file +to be created within each bundle.} + +\item{trackName}{The name of the new track that will be created automatically.} + +\item{trackColumn}{The column of data to be used from the result files generated +by Matlab. Should usually start with \verb{data[} or \verb{data$}.} + +\item{oneMatlabFunctionCallPerFile}{Whether to call \code{matlabFunctionName} once +per file (TRUE) or once for the entire database (FALSE). \code{FALSE} will +be necessary if you want Matlab to process bundles in parallel.} + +\item{inputFileExtension}{The file extension of the files to operate on. Defaults +to the standard media file extension of the current Emu database.} + +\item{matlabFunctionParameters}{Data frame with parameters for \code{matlabFunctionName}. +Needs to contain the columns \code{session} and \code{bundle} plus one column for +each function parameter. The column names will be used as parameter names. +Must contain \emph{one row for every bundle, without exception}.} + +\item{paths_to_add}{List of paths where Matlab will look for functions. This +is usually handled by \link[matlabr:run_matlab_code]{matlabr::run_matlab_code}, but it adds the paths +\emph{after} the code, so we need to handle it in \code{emuR}.} + +\item{...}{Other parameters are passed on to \link[matlabr:run_matlab_code]{matlabr::run_matlab_code}.} +} +\description{ +Use a Matlab function to derive an extra signal file for each +bundle of the Emu database. A new track definition will be added to the +database automatically. +} +\details{ +This function enables EMU-SDMS users you take advantage of tool boxes +and signal processing functions written in Matlab. The Matlab function must meet +certain requirements as detailed below, and it will always be run against the +entire database (either one bundle at a time or the whole database at a time, +but never only a part of the database). + +The Matlab function must: +\itemize{ +\item Be defined in a file of its own. +\item Accept named parameters. +\item Accept at least the parameters \code{inputFilename} and \code{outputFilename}, both +strings. +\item Use the file at \code{inputFilename} and produce a new file \code{outputFilename}; +the new file must be a \code{.mat} file containing the variables \code{data}, +\code{sampleRate}, \code{startTime}, \code{units}, and \code{comment}. +} + +You can find examples of Matlab functions that meet these requirements by running +\code{\link[=create_emuRdemoData]{create_emuRdemoData()}} and then looking at the subdirectory \verb{add_signal_files/matlab/}. + +The Matlab function can accept more parameters to influence the signal +processing. These parameters need not be the same values for the entire +database. They can be used, for example, to modify the signal processing +algorithms in a speaker-specific way. + +If \code{oneMatlabFunctionCallPerFile} is \code{TRUE}, the function will be called once +for every bundle of the database; in that case, all parameters +to the Matlab function will be 1x1 matrices. If \code{oneMatlabFunctionCallPerFile} +is \code{FALSE}, the Matlab function will only be called once for the entire database; +in that case, all parameters will be 1xN matrices with N equal to the number +of bundles in the database. \code{add_signalViaMatlab} will create a temporary \code{.m} +script. That script may, for example, contain code like this: + +\if{html}{\out{
}}\preformatted{demoSignalScalerForOneFile(inputFilename="msajc003.wav", outputFilename="/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc003.mat", scalingFactor=1); +demoSignalScalerForOneFile(inputFilename="msajc010.wav", outputFilename="/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc010.mat", scalingFactor=4); +}\if{html}{\out{
}} + +Or like this: + +\if{html}{\out{
}}\preformatted{demoSignalScalerForManyFiles(inputFilename=["msajc003.wav", "msajc010.wav",], outputFilename=["/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc003.mat", "/tmp/RtmpRwjz5Q/add_signalViaMatlab/0fc618dc-8980-414d-8c7a-144a649ce199/0000_ses/msajc010.mat], scalingFactor=[1, 4]); +}\if{html}{\out{
}} + +In both cases, \code{scalingFactor} is a parameter that \code{demoSignalScalerForOneFile} +and \code{demoSignalScalerForManyFiles} happen to accept. These are the demo functions +you can find by running \link{create_emuRdemoData}. + +The input file will typically be the media file of the bundle, but can be one +of the other files stored in the bundle. If you need that, use the \code{inputFileExtension} +parameter. + +The output \code{.mat} files that need to be written by the Matlab function will +be converted – by \code{emuR} – to \code{.Rda} files and saved in each bundle folder with +the file extension \code{outputFileExtension}. + +The working directory of the Matlab function will be the same as that of the +current R session, see \code{\link[base:getwd]{base::getwd()}}. + +You need a working and licensed Matlab instance on your computer. It will be +called via \code{\link[matlabr:run_matlab_code]{matlabr::run_matlab_code()}}. + +Matlab is a trademark of The MathWorks, Inc. +} +\examples{ + +\dontrun{ +########################### +# Setting up some demo data + +library(dplyr) +library(ggplot2) +library(emuR) +base_dir = tempdir() +emuR::create_emuRdemoData(base_dir) +emuDBhandle = emuR::load_emuDB(file.path(base_dir, + "emuR_demoData", + "ae_emuDB")) +segmentList = query(emuDBhandle, "Phonetic == ei") + +######################################################### +# Calling a Matlab function without additional parameters + +add_signalViaMatlab(emuDBhandle = emuDBhandle, + matlabFunctionName = "demoSignalScalerForOneFile", + outputFileExtension = "sound", + trackName = "unchangedSound", + trackColumn = "data[,1]", + paths_to_add = c(file.path(base_dir, + "emuR_demoData", + "add_signal_scripts", + "matlab"))) + +# paths_to_add tells Matlab where to find the demoSignalScalerForOneFile function. +# This will create a new track definition called unchangedSound. The track’s +# file format will be Rda. All files for this track will have the extension +# .sound and will contain the new signal within the variable data[,1]. + +list_ssffTrackDefinitions(emuDBhandle) + +# The "new" signal will just be a copy of the sound signal, because we have not +# included a scalingFactor parameter. Therefore, demoSignalScalerForOneFile will +# read the wav files and output them mostly unchanged (the values may be on a +# different scale). You can check it like this: + +td_media = get_trackdata(emuDBhandle, segmentList, "MEDIAFILE_SAMPLES") +td_new = get_trackdata(emuDBhandle, segmentList, "unchangedSound") + +ggplot(td_media) + + aes(x = times_rel, y = T1) + + facet_grid(vars(paste(session, bundle))) + + geom_line() + + ggtitle("Three sound signals, original") +ggplot(td_new) + + aes(x = times_rel, y = T1) + + facet_grid(vars(paste(session, bundle))) + + geom_line() + + ggtitle("Three sound signals, output by Matlab at new scale") + +# Observe that the two graphs look the same except for the scale. + +########################################### +# Calling a Matlab function with parameters + +bundleList = + emuR::list_bundles(emuDBhandle = emuDBhandle) \%>\% + dplyr::rename(bundle = name) +parameterList = + bundleList \%>\% + mutate(scalingFactor = case_match(bundle, + "msajc022" ~ 4, + "msajc023" ~ 2, + .default = 1)) +add_signalViaMatlab(emuDBhandle = emuDBhandle, + matlabFunctionName = "demoSignalScalerForOneFile", + outputFileExtension = "sound2", + trackName = "scaledSound", + trackColumn = "data[,1]", + matlabFunctionParameters = parameterList, + paths_to_add = c(file.path(base_dir, + "emuR_demoData", + "add_signal_scripts", + "matlab"))) + +# This will create a new track definition called scaledSound: + +list_ssffTrackDefinitions(emuDBhandle) + +# The "new" signal will be a copy of the original sound signals, but two bundles +# will be scaled up (multiplied by a given factor). The scaling factor was determined +# through the parameterList data frame, which contained a column scalingFactor. +# If the Matlab function expected other parameters, the data frame would have to +# contain columns accordingly. You can see that two of the bundles have changed +# their scale, but the shape is still the same: + +td_media = get_trackdata(emuDBhandle, segmentList, "MEDIAFILE_SAMPLES") +td_scaled = get_trackdata(emuDBhandle, segmentList, "scaledSound") + +ggplot(td_media) + + aes(x = times_rel, y = T1) + + facet_grid(vars(paste(session, bundle))) + + geom_line() + + ggtitle("Three sound signals, original") +ggplot(td_scaled) + + aes(x = times_rel, y = T1) + + facet_grid(vars(paste(session, bundle))) + + geom_line() + + ggtitle("Three sound signals, with different scaling factors applied") +} + + +} diff --git a/man/as.matrix.emusegs.Rd b/man/as.matrix.emusegs.Rd index 8af7edf9..8258f353 100644 --- a/man/as.matrix.emusegs.Rd +++ b/man/as.matrix.emusegs.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{as.matrix.emusegs} \alias{as.matrix.emusegs} -\title{ as.matrix.emusegs } +\title{as.matrix.emusegs} +\usage{ +\method{as.matrix}{emusegs}(x, ...) +} \description{ - see function +see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/as.spectral.Rd b/man/as.spectral.Rd index 7a13e137..9cb28c69 100644 --- a/man/as.spectral.Rd +++ b/man/as.spectral.Rd @@ -1,43 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralclass.R \name{as.spectral} \alias{as.spectral} - -\title{ Function to convert an object into an object of class 'spectral'.} -\description{ - The function converts a vector, matrix, or EMU-trackdata object -into an object of the same class and of class 'spectral' -} +\title{Function to convert an object into an object of class 'spectral'.} \usage{ as.spectral(trackdata, fs) } - \arguments{ - \item{trackdata}{ A vector, matrix, or EMU-trackdata object. } - \item{fs}{ Either a single element numeric vector, -or a numeric vector of the same length as the length -of trackdata if trackdata is a vector, -or of the same number of rows as trackdata} -} -\details{ - If fs is a single element numeric vector, -then the frequencies of trackdata are defined to -extend to fs/2. If fs is missing, then -the frequencies are 0:(N-1) where N is the length -of trackdata. +\item{trackdata}{A vector, matrix, or EMU-trackdata object.} + +\item{fs}{Either a single element numeric vector, or a numeric vector of +the same length as the length of trackdata if trackdata is a vector, or of +the same number of rows as trackdata} } \value{ - The same object but of class 'spectral'. +The same object but of class 'spectral'. } - -\author{Jonathan Harrington} - - -\seealso{ -\code{\link{is.spectral}} -\code{\link{plot.spectral}} +\description{ +The function converts a vector, matrix, or EMU-trackdata object into an +object of the same class and of class 'spectral' +} +\details{ +If fs is a single element numeric vector, then the frequencies of trackdata +are defined to extend to fs/2. If fs is missing, then the frequencies are +0:(N-1) where N is the length of trackdata. } - - \examples{ + vec = 1:10 as.spectral(vec, 2000) mat = rbind(1:10, 1:10) @@ -57,8 +46,12 @@ class(tr$data) -} +} +\seealso{ +\code{\link{is.spectral}} \code{\link{plot.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{attribute} - - diff --git a/man/as.trackdata.Rd b/man/as.trackdata.Rd index e45c5570..2a30980d 100644 --- a/man/as.trackdata.Rd +++ b/man/as.trackdata.Rd @@ -1,38 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{as.trackdata} \alias{as.trackdata} -\title{ Create an Emu trackdata object } -\description{ - Create an Emu trackdata object from a raw data matrix. -} +\title{Create an Emu trackdata object} \usage{ - as.trackdata(data, index, ftime, trackname="") +as.trackdata(data, index, ftime, trackname = "") } \arguments{ - \item{data}{ A two dimensional matrix of numerical data. } - \item{index}{ Segment index, one row per segment, two columns give the - start and end rows in the \code{data} matrix for each segment. } - \item{ftime}{ A two column matrix with one row per segment, gives the - start and end times in milliseconds for each segment. } - \item{trackname}{ The name of the track. } -} -\details{ - Emu trackdata objects contain possibly multi-column numerical data - corresponding to a set of segments from a database. Data for each - segment takes up a number of rows in the main \code{data} matrix, the - start and end rows are stored in the \code{index} component. The - \code{ftime} component contains the start and end times of the - segment data. - - Trackdata objects are returned by the \code{\link{emu.track}} function. +\item{data}{A two dimensional matrix of numerical data.} + +\item{index}{Segment index, one row per segment, two columns give the start +and end rows in the \code{data} matrix for each segment.} + +\item{ftime}{A two column matrix with one row per segment, gives the start +and end times in milliseconds for each segment.} + +\item{trackname}{The name of the track.} } \value{ - The components are bound into a trackdata object. +The components are bound into a trackdata object. } +\description{ +Create an Emu trackdata object from a raw data matrix. +} +\details{ +Emu trackdata objects contain possibly multi-column numerical data +corresponding to a set of segments from a database. Data for each segment +takes up a number of rows in the main \code{data} matrix, the start and end +rows are stored in the \code{index} component. The \code{ftime} component +contains the start and end times of the segment data. -\seealso{ \code{\link{emu.track}} \code{\link{dplot}} } - +Trackdata objects are returned by the \code{\link{get_trackdata}} function. +} \examples{ + # make a trackdata object of two data segments data1 <- matrix( 1:10, ncol=2 ) data2 <- matrix( 11:20, ncol=2 ) @@ -52,7 +54,9 @@ tdata[1] # and the second tdata[2] -} - +} +\seealso{ +\code{\link{get_trackdata}} \code{\link{dplot}} +} \keyword{misc} diff --git a/man/autobuild_linkFromTimes.Rd b/man/autobuild_linkFromTimes.Rd new file mode 100644 index 00000000..a546ae8b --- /dev/null +++ b/man/autobuild_linkFromTimes.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-autobuild.R +\name{autobuild_linkFromTimes} +\alias{autobuild_linkFromTimes} +\title{Autobuild links between two levels using their time information} +\usage{ +autobuild_linkFromTimes( + emuDBhandle, + superlevelName, + sublevelName, + rewriteAllAnnots = TRUE, + convertSuperlevel = FALSE, + backupLevelAppendStr = "-autobuildBackup", + newLinkDefType = NULL, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{superlevelName}{name of level to link from (link definition required in emuDB)} + +\item{sublevelName}{name of level to link to (link definition required in emuDB)} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json files) after +completing autobuild process (intended for expert use only)} + +\item{convertSuperlevel}{if set to TRUE a backup of the superlevel will be created and the actual +superlevel will be converted to a level of type ITEM} + +\item{backupLevelAppendStr}{string appended to level name for backup level} + +\item{newLinkDefType}{type of new linkDefinition (either \code{"ONE_TO_MANY"}, +\code{"MANY_TO_MANY"} or \code{"ONE_TO_ONE"}) which is passed to +\code{\link{add_linkDefinition}}. If NULL (the default) \code{\link{add_linkDefinition}} +isn't called and a linkDefintion is expected to be present.} + +\item{verbose}{show progress bars and further information} +} +\description{ +Autobuild links between two time levels. This is typically done when converting from +a database / annotation format that allows parallel time tiers / levels but does +not permit annotational units to be linked to each other, except by +matching time information (such as Praat's TextGrid format). The super-level has to be of the +type SEGMENT and the sub-level either of type EVENT or of type SEGMENT. If +this is the case and a according link definition is defined for the emuDB, +this function automatically links the events or segments of the sub-level which occur +within (startSample to (startSample + sampleDur)) the segments of the super-level to those segments. +} +\details{ +The type of link definition (ONE_TO_MANY, MANY_TO_MANY, ONE_TO_ONE) is relevant whether a link +is generated or not (e.g. overlapping segments are linked in a MANY_TO_MANY relationship +but not in a ONE_TO_MANY relationship). For more information on the structural +elements of an emuDB see \code{vignette(emuDB)}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded myTGcolDB emuDB +# (see ?create_emuRdemoData, ?convert_TextGridCollection, +# and vignette(emuR_intro) for more information) + +# add linkDefinition as one has to be present for +# the autobuild function to work +add_linkDefinition(emuDBhandle = myTGcolDB, + type = "ONE_TO_MANY", + superlevelName = "Syllable", + sublevelName = "Phoneme") + +# envoke autobuild function to build hierarchy for converted TextGridCollection +autobuild_linkFromTimes(emuDBhandle = myTGcolDB, + superlevelName = "Syllable", + sublevelName = "Phoneme", + convertSuperlevel = TRUE) + +} +} +\seealso{ +add_linkDefinition +} +\keyword{autobuild} +\keyword{emuR} diff --git a/man/bark.Rd b/man/bark.Rd index 9b2955c0..559259ab 100644 --- a/man/bark.Rd +++ b/man/bark.Rd @@ -1,48 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bark.R \name{bark} \alias{bark} \alias{bark.trackdata} \alias{bark.spectral} \alias{bark.default} - -%- Also NEED an `\alias' for EACH other topic documented here. \title{Convert Hertz to Bark and Bark to Hertz} -\description{ - The calculation is done using the formulae Traunmueller (1990) -} \usage{ - \method{bark}{spectral}(f,...) - \method{bark}{default}(f,inv=FALSE,...) - +bark(f, inv = FALSE, ...) } - -%- maybe also `usage' for other objects documented here. - \arguments{ +\item{f}{A vector or matrix of data or a spectral object.} - \item{f}{A vector or matrix of data or a spectral object.} - - \item{inv}{A single element logical vector. If F, data are converted from Hertz to Bark, if T, data are converted from Bark to Hertz. (Does not apply if 'data' is an oject of class 'spectral'.} - \item{\dots}{for generic only} - - } +\item{inv}{A single element logical vector. If FALSE, data are converted from +Hertz to Bark, if TRUE, data are converted from Bark to Hertz. (Does not apply +if 'data' is an oject of class 'spectral'.} +\item{\dots}{for generic only} +} \value{ - - A vector or matrix or spectral object of the same length and dimensions as data. - +A vector or matrix or spectral object of the same length and dimensions as +data. } - - - -\details{If 'data' is a spectral object, then +\description{ +The calculation is done using the formulae Traunmueller (1990) +} +\details{ +If 'data' is a spectral object, then the frequencies are changed so that they are proportional to the Bark scale and such that the Bark intervals -between frequencies are con stant between the lowest +between frequencies are con stant between the lowest -and highest frequencies. More specifically, +and highest frequencies. More specifically, suppose that a spectral object has frequencies @@ -59,33 +51,12 @@ the dB values at those frequencies. Negative frequencies which are obtained for values of about less than 40 Hz are removed in the case of spectral objects. - } - - - -\author{Jonathan Harrington} - - - -\references{ Traunmueller, H. (1990) "Analytical expressions for the tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100.} - - - -\seealso{ - - \code{\link{mel}}, - - \code{\link{plot.spectral}} - -} - - - \examples{ + # convert Hertz values to Bark vec <- c(500, 1500, 2500) @@ -106,7 +77,7 @@ are removed in the case of spectral objects. - # convert the \$data values in a trackdata object to Bark + # convert the $data values in a trackdata object to Bark # create a new track data object @@ -132,7 +103,7 @@ are removed in the case of spectral objects. w = bark(e.dft) -par(mfrow=c(1,2)) +oldpar = par(mfrow=c(1,2)) plot(w, type="l") @@ -148,10 +119,20 @@ plot(e.dft, freq=bark(trackfreq(e.dft))) # in a higher frequency range. +par(oldpar) } +\references{ +Traunmueller, H. (1990) "Analytical expressions for the +tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100. +} +\seealso{ +\code{\link{mel}}, - - +\code{\link{plot.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{math} diff --git a/man/bayes.dist.Rd b/man/bayes.dist.Rd index 1c39da1a..18b4dd68 100644 --- a/man/bayes.dist.Rd +++ b/man/bayes.dist.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{bayes.dist} \alias{bayes.dist} -\title{ bayes dist } +\title{bayes dist} +\usage{ +bayes.dist(data, train, labels = NULL) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/bayes.lab.Rd b/man/bayes.lab.Rd index 2972cf70..c013fd70 100644 --- a/man/bayes.lab.Rd +++ b/man/bayes.lab.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{bayes.lab} \alias{bayes.lab} - -\title{ bayes lab} +\title{bayes lab} +\usage{ +bayes.lab(data, train) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/bayesian.metric.Rd b/man/bayesian.metric.Rd index f9766c2e..5eb8d3d7 100644 --- a/man/bayesian.metric.Rd +++ b/man/bayesian.metric.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{bayesian.metric} \alias{bayesian.metric} -\title{bayesian metric } +\title{bayesian metric} +\usage{ +bayesian.metric(data, mean, cov, invcov) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/bayesplot.Rd b/man/bayesplot.Rd index 5135d80e..125804f6 100644 --- a/man/bayesplot.Rd +++ b/man/bayesplot.Rd @@ -1,8 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{bayesplot} \alias{bayesplot} -\title{ bayesplot } +\title{bayesplot} +\usage{ +bayesplot( + data, + train, + N = 10, + ellipse = FALSE, + labs = NULL, + xlab = "", + ylab = "", + colour = TRUE, + ... +) +} \description{ - bayesplot +bayesplot } -\keyword{ internal } - +\keyword{internal} diff --git a/man/bind.Rd b/man/bind.Rd index cc4a4621..b006a868 100644 --- a/man/bind.Rd +++ b/man/bind.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bind.R \name{bind} \alias{bind} -\title{ class method bind data} +\title{class method bind data} +\usage{ +bind(a, ...) +} \description{ binds data } -\keyword{ internal } - +\keyword{internal} diff --git a/man/bind.default.Rd b/man/bind.default.Rd index 84502375..03134eb4 100644 --- a/man/bind.default.Rd +++ b/man/bind.default.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bind.R \name{bind.default} \alias{bind.default} -\title{ data binding } +\title{data binding} +\usage{ +\method{bind}{default}(...) +} \description{ binds data } - -\keyword{ internal } - +\keyword{internal} diff --git a/man/bind.trackdata.Rd b/man/bind.trackdata.Rd index b5a1eafa..a2cc0443 100644 --- a/man/bind.trackdata.Rd +++ b/man/bind.trackdata.Rd @@ -1,14 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bind.R \name{bind.trackdata} \alias{bind.trackdata} -\title{bind trackdata } -\description{ -binds diffrent trackdata objects together -} +\title{bind trackdata} \usage{ \method{bind}{trackdata}(...) } \arguments{ - \item{\dots}{ trackdata objects} +\item{\dots}{trackdata objects} +} +\description{ +binds different trackdata objects together } -\keyword{ methods} - +\keyword{methods} diff --git a/man/bridge.Rd b/man/bridge.Rd index 8588f016..a61dd201 100644 --- a/man/bridge.Rd +++ b/man/bridge.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{bridge} \alias{bridge} \title{Three-columned matrix} -\usage{bridge} -\description{An EMU dataset} +\format{ +Three-columned matrix +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/buildtrack.Rd b/man/buildtrack.Rd index 06869911..5bdff5c9 100644 --- a/man/buildtrack.Rd +++ b/man/buildtrack.Rd @@ -1,37 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/buildtrack.R \name{buildtrack} \alias{buildtrack} -\title{ Build trackdata objects from the output of by() } -\description{ - buildtrack() converts a list that is the output -of by.trackdata() into a trackdata object if -the list components are matrices whose rows are -successive values in time. -} +\title{Build trackdata objects from the output of by()} \usage{ - buildtrack(mylist, ftime = NULL, trackname = "") +buildtrack(mylist, ftime = NULL, trackname = "") } \arguments{ - \item{mylist}{ a list that ist output from by() } - \item{ftime}{ ftime } - \item{trackname}{ name of track data object} -} +\item{mylist}{a list that ist output from by()} + +\item{ftime}{ftime} +\item{trackname}{name of track data object} +} +\description{ +buildtrack() converts a list that is the output of by.trackdata() into a +trackdata object if the list components are matrices whose rows are +successive values in time. +} \details{ - The default of by.trackdata() is to return a list. If -each element of the list consists of a matrix -whose rows are values occurring at the times given -by the row dimension names of the matrix, then buildtrack() -can be used to convert the list into a trackdata object. -If the times are not given in the row dimension names, -then these can be supplied as an additional argument to -buildtrack() +The default of by.trackdata() is to return a list. If each element of the +list consists of a matrix whose rows are values occurring at the times +given by the row dimension names of the matrix, then buildtrack() can be +used to convert the list into a trackdata object. If the times are not +given in the row dimension names, then these can be supplied as an +additional argument to buildtrack() } - -\author{ Jonathan Harrington } - - -\seealso{ \code{\link{by}} } \examples{ + #vowlax.fdat is a track data objects of formant of the vowlax segment list #calculate the difference between adjacent formant values p = by(vowlax.fdat[1,2],INDICES=NULL, diff) @@ -43,6 +39,12 @@ buildtrack() m = buildtrack(p) m -} -\keyword{manip} \ No newline at end of file +} +\seealso{ +\code{\link{by}} +} +\author{ +Jonathan Harrington +} +\keyword{manip} diff --git a/man/by.trackdata.Rd b/man/by.trackdata.Rd index 80ee9509..15294e2e 100644 --- a/man/by.trackdata.Rd +++ b/man/by.trackdata.Rd @@ -1,48 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/by.trackdata.R \name{by.trackdata} \alias{by.trackdata} \alias{by} - -\title{A method of the generic function by for objects of class \'trackdata\'} -\description{ - A given function 'FUN' is applied to the data corresponding to each segment of data. -} +\title{A method of the generic function by for objects of class 'trackdata'} \usage{ - \method{by}{trackdata}(data, INDICES=NULL,FUN, \dots, simplify = FALSE -) +\method{by}{trackdata}(data, INDICES = NULL, FUN, ..., simplify = FALSE) } \arguments{ - \item{data}{a track data object } - \item{INDICES}{a list of segment indices, like a label vector} - \item{FUN}{a function that is applied to each segment} - \item{\dots}{arguments of the function fun} - \item{simplify}{simplify = TRUE , output is a matrix; simplify = FALSE a list is returned} -} +\item{data}{a track data object} -\details{ -It is the same as trapply but with the extension -to subsume calculation to groups of segments. -Note, if you do not want to apply the function fun to a -special group of segments, use \link{trapply} instead. -} -\value{ - list or vector -} +\item{INDICES}{a list of segment indices, like a label vector} -\author{Jonathan Harrington} +\item{FUN}{a function that is applied to each segment} +\item{\dots}{arguments of the function fun} -\seealso{ - \code{\link{trapply}}, - \code{\link{by}}, - \code{\link{trackdata}} - \code{\link{dapply}} - \code{\link{smooth}} - \code{\link{apply}} - +\item{simplify}{simplify = TRUE , output is a matrix; simplify = FALSE a +list is returned} +} +\value{ +list or vector +} +\description{ +A given function 'FUN' is applied to the data corresponding to each segment +of data. +} +\details{ +It is the same as trapply but with the extension to subsume calculation to +groups of segments. Note, if you do not want to apply the function fun to a +special group of segments, use \link{trapply} instead. } - - \examples{ + data(demo.vowels) data(demo.vowels.fm) @@ -71,7 +61,13 @@ special group of segments, use \link{trapply} instead. by(demo.vowels.fm[,1], list(lab = lab, llabs = llabs) , sapply, mean , simplify=FALSE) -} +} +\seealso{ +\code{\link{trapply}}, \code{\link{by}}, \code{\link{trackdata}} +\code{\link{dapply}} \code{\link{smooth}} \code{\link{apply}} +} +\author{ +Jonathan Harrington +} \keyword{methods} - diff --git a/man/cbind.trackdata.Rd b/man/cbind.trackdata.Rd index f356907f..0de4da8a 100644 --- a/man/cbind.trackdata.Rd +++ b/man/cbind.trackdata.Rd @@ -1,43 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cbind.trackdata.R \name{cbind.trackdata} \alias{cbind.trackdata} \alias{cbind} - -\title{ A method of the generic function cbind for objects of class \'trackdata\'} -\description{ - Different track data objects from one segment list are bound by combining the \$data - columns of the track data object by columns. -} +\title{A method of the generic function cbind for objects of class 'trackdata'} \usage{ - \method{cbind}{trackdata}(...) +\method{cbind}{trackdata}(...) } - \arguments{ - \item{\dots}{ track data objects } +\item{\dots}{track data objects} } - -\details{ - All track data objects have to be track data of the same segment list. - Thus \$index and \$ftime values have to be identically for all track data objects. - Track data objects are created by emu.track(). - The number of rows of the track data objects must match. -} - \value{ - A track data object with the same \$index and \$ftime values of the source track data objects and - with \$data that includes all columns of \$data of the source track data objects. +A track data object with the same $index and ftime values of the +source track data objects and with $data that includes all columns of +$data of the source track data objects. } - -\author{Jonathan Harrington} - - -\seealso{ - \code{\link{cbind}}, - \code{\link{rbind.trackdata}} - \code{\link{trackdata}} - \code{\link{emu.track}} +\description{ +Different track data objects from one segment list are bound by combining +the $data columns of the track data object by columns. +} +\details{ +All track data objects have to be track data of the same segment list. +Thus $index and $ftime values have to be identically for all track data +objects. Track data objects are created by get_trackdata(). The number of +rows of the track data objects must match. } - \examples{ + data(vowlax) #segment list vowlax - first segment only @@ -57,7 +46,13 @@ #The first column keeps vowlax.fund data, the second keeps vowlax.rms data fund.rms.lax[1,] -} - +} +\seealso{ +\code{\link{cbind}}, \code{\link{rbind.trackdata}} +\code{\link{trackdata}} \code{\link{get_trackdata}} +} +\author{ +Jonathan Harrington +} \keyword{methods} diff --git a/man/cen.sub.Rd b/man/cen.sub.Rd index c1a735c3..68656bef 100644 --- a/man/cen.sub.Rd +++ b/man/cen.sub.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{cen.sub} \alias{cen.sub} -\title{ Subfunction of cen} +\title{Subfunction of cen} +\usage{ +cen.sub(data) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/classify.Rd b/man/classify.Rd index f66ab5ff..b96b418f 100644 --- a/man/classify.Rd +++ b/man/classify.Rd @@ -1,29 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{classify} \alias{classify} -\title{ classify} -\description{ -classifies data -} - +\title{classify} \usage{ classify(data, train, metric = "bayes") } - \arguments{ - \item{data}{ data to classify} - \item{train}{training data} - \item{metric}{bayes or mahal} -} +\item{data}{data to classify} + +\item{train}{training data} +\item{metric}{bayes or mahal} +} \value{ -The calssification matrix. +The classification matrix. +} +\description{ +classifies data } - -\author{ Jonathan Harrington } - - \examples{ + ## The function is currently defined as function (data, train, metric = "bayes") { @@ -41,7 +39,9 @@ function (data, train, metric = "bayes") } result } -} - +} +\author{ +Jonathan Harrington +} \keyword{models} diff --git a/man/classplot.Rd b/man/classplot.Rd index 5a70b48f..5868a70e 100644 --- a/man/classplot.Rd +++ b/man/classplot.Rd @@ -1,38 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/classplot.R \name{classplot} - \alias{classplot} - \title{Produce a classification plot from discriminant or SVM modelling} - \description{ - The function classifies all point specified within -the ranges of xlim and ylim based on the training model specified -in model. It then produces a two-dimensional plot colour-coded for classifications. } - \usage{ - classplot(model, xlim, ylim, N = 100, pch = 15, col = NULL, legend = TRUE, position = "topright", bg = "gray90", ...) - } - \arguments{ - \item{model}{ - A two-dimensional training model output from qda(), lda() of MASS package , or svm() of e1071 package - } - \item{xlim}{ - A vector of two numeric elements specifying the range on the x-axis - (parameter 1) over which classifications should be made - } - \item{ylim}{ - A vector of two elements specifying the range on the y-axis (parameter 2) over - which classifications should be made - } - \item{N}{ - A vector of one numeric element which specifies the density of classification - (greater N gives higher density). The default is 100. - } - \item{pch}{A single element numeric vector specifying the plotting symbol to be used in the classification plot. Defaults to 15.} - \item{col}{Either Null in which case the colours for the separate classes are col = c(1, 2, ...n) where n is the number of classes; or else a vector specifying the desired colours that is the same length as there are classes.} - \item{legend}{A single element logical vector specifying whether a legend should be drawn. Defaults to T} - \item{position}{A single element vector specifying the position in the figure where the legend should be drawn. Defaults to "topright"} -\item{bg}{A single element vector specifying the background colour on which the legend should be drawn.} -\item{...}{Further arguments to plot.} +\alias{classplot} +\title{Produce a classification plot from discriminant or SVM modelling} +\usage{ +classplot( + model, + xlim, + ylim, + N = 100, + pch = 15, + col = NULL, + legend = TRUE, + position = "topright", + bg = "gray90", + ... +) } +\arguments{ +\item{model}{A two-dimensional training model output from qda(), lda() of +MASS package , or svm() of e1071 package} + +\item{xlim}{A vector of two numeric elements specifying the range on the +x-axis (parameter 1) over which classifications should be made} + +\item{ylim}{A vector of two elements specifying the range on the y-axis +(parameter 2) over which classifications should be made} + +\item{N}{A vector of one numeric element which specifies the density of +classification (greater N gives higher density). The default is 100.} + +\item{pch}{A single element numeric vector specifying the plotting symbol +to be used in the classification plot. Defaults to 15.} + +\item{col}{Either Null in which case the colours for the separate classes +are col = c(1, 2, ...n) where n is the number of classes; or else a vector +specifying the desired colours that is the same length as there are +classes.} + +\item{legend}{A single element logical vector specifying whether a legend +should be drawn. Defaults to TRUE} + +\item{position}{A single element vector specifying the position in the +figure where the legend should be drawn. Defaults to "topright"} +\item{bg}{A single element vector specifying the background colour on which +the legend should be drawn.} + +\item{...}{Further arguments to plot.} +} +\description{ +The function classifies all point specified within the ranges of xlim and +ylim based on the training model specified in model. It then produces a +two-dimensional plot colour-coded for classifications. +} \examples{ + library(MASS) # Data from female speaker 68 temp = vowlax.spkr=="68" @@ -44,10 +67,12 @@ fm.lda = lda(vowlax.fdat.5[temp,1:2], vowlax.l[temp]) xlim=c(0,1000) ylim=c(0,3000) -par(mfrow=c(1,2)) +oldpar = par(mfrow=c(1,2)) classplot(fm.qda, xlim=xlim, ylim=ylim, main="QDA") classplot(fm.lda, xlim=xlim, ylim=ylim, main="LDA") +par(oldpar) + # install.packages("e1071") # library(e1071) @@ -58,7 +83,11 @@ classplot(fm.lda, xlim=xlim, ylim=ylim, main="LDA") \dontrun{classplot(fm.svm, xlim=xlim, ylim=ylim, xlab="F1", ylab="F2", main="SVM")} } -\author{Jonathan Harrington} \seealso{ -\code{\link[MASS]{qda}}, \code{\link[MASS]{lda}}, svm of e1071 package. There is a function plot.svm which produces a prettier plot for SVMs. +\code{\link[MASS]{qda}}, \code{\link[MASS]{lda}}, svm of e1071 +package. There is a function plot.svm which produces a prettier plot for +SVMs. +} +\author{ +Jonathan Harrington } diff --git a/man/closest.Rd b/man/closest.Rd index ff53543e..c90f655f 100644 --- a/man/closest.Rd +++ b/man/closest.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{closest} \alias{closest} \title{closest} +\usage{ +closest(vec, val) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/convert_BPFCollection.Rd b/man/convert_BPFCollection.Rd new file mode 100644 index 00000000..2cef0e73 --- /dev/null +++ b/man/convert_BPFCollection.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-convert_BPFCollection.R +\name{convert_BPFCollection} +\alias{convert_BPFCollection} +\title{Convert a Bas Partitur File Collection (audio and BAS Partitur files) to an emuDB} +\usage{ +convert_BPFCollection( + sourceDir, + targetDir, + dbName, + bpfExt = "par", + audioExt = "wav", + extractLevels = NULL, + refLevel = NULL, + newLevels = NULL, + newLevelClasses = NULL, + segmentToEventLevels = NULL, + unifyLevels = NULL, + verbose = TRUE +) +} +\arguments{ +\item{sourceDir}{path to the directory containing the Bas Partitur File collection} + +\item{targetDir}{directory where the new emuDB should be saved; if it does not exist, +the function tries to create one} + +\item{dbName}{name given to the new emuDB} + +\item{bpfExt}{extension of BPF files (default = "par")} + +\item{audioExt}{extension of audio files (default = "wav")} + +\item{extractLevels}{optional vector containing the names of levels that should be extracted. +If NULL (the default) all levels found in the BPF collection are extracted.} + +\item{refLevel}{optional name of level used as reference for symbolic links. If NULL (the default), a link-less data base is created.} + +\item{newLevels}{optional vector containing names of levels in the BPF collection that are not part of the standard +BPF levels. See \url{https://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html#Partitur_tiersdef} for details on +standard BPF levels.} + +\item{newLevelClasses}{optional vector containing the classes of levels in the newLevels vector as integers. +Must have the same length and order as newLevels.} + +\item{segmentToEventLevels}{optional vector containing names of segment levels with overlapping segments. +The parser treats segments on these levels as events (SEGMENT xyz becomes EVENT xyz_start and EVENT xyz_end). +If a level contains segmental overlap but is not in this vector, the parser will throw an error. If overlap +resolution leads to event overlap (e.g. if one segment's end coincides with the next segment's start), an error is thrown either way. If in doubt whether a level contains segmental overlap, try running the converter with segmentToEventLevels = NULL and see whether an error occurs.} + +\item{unifyLevels}{optional vector containing names of levels to be unified with the reference level. This means that +they are treated as labels of the reference level rather than independent items. At the moment, only purely symbolic +(class 1) levels can be unified. Links between the reference level and levels in unifyLevels must be one-to-one.} + +\item{verbose}{display infos, warnings and show progress bar} +} +\description{ +Converts a Bas Partitur File Collection to an emuDB. Expects a collection of the following form: +One master directory containing any number of file pairs (= bundles). A file pair +consists of an audio file with the extension and a BPF file with the extension . +Apart from extensions, the names of corresponding audio and BPF files must be identical. Each BPF +file is converted into an emuDB annot file. An emuDB config file matching the data base is created +after parsing. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: a dir with equally named file pairs *.wav and *.par +# (see ?create_emuRdemoData on how to create a demo) + +# convert file pairs *.wav and *.par in /tmp/BPF_collection into emuRDB 'NewEmuR' in +# dir /tmp/DirNewEmuR; the tier 'ORT' acts as the (word) reference tier; the +# tier 'KAN' is one-to-one bound to 'ORT' as a label +convert_BPFCollection("/tmp/BPF_collection","/tmp/DirNewEmuR",'NewEmuR', + bpfExt='par',audioExt='wav',refLevel='ORT',unifyLevels=c('KAN')) + +} + +} +\seealso{ +convert_TextGridCollection, convert_legacyEmuDB +} diff --git a/man/convert_TextGridCollection.Rd b/man/convert_TextGridCollection.Rd new file mode 100644 index 00000000..721bb592 --- /dev/null +++ b/man/convert_TextGridCollection.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-convert_TextGridCollection.R +\name{convert_TextGridCollection} +\alias{convert_TextGridCollection} +\title{Convert a TextGridCollection (e.g. .wav & .TextGrid files) to emuDB} +\usage{ +convert_TextGridCollection( + dir, + dbName, + targetDir, + tgExt = "TextGrid", + audioExt = "wav", + tierNames = NULL, + verbose = TRUE +) +} +\arguments{ +\item{dir}{path to directory containing the TextGridCollection (nested directory structures are permitted as the +function recursively searches through directories, generating the session names from dir. structure)} + +\item{dbName}{name given to the new emuDB} + +\item{targetDir}{directory where to save the new emuDB} + +\item{tgExt}{extension of TextGrid files (default=TextGrid, meaning file names of the form baseName.TextGrid)} + +\item{audioExt}{extension of audio files (default=wav, meaning file names of the form baseName.wav)} + +\item{tierNames}{character vector containing names of tiers to extract and convert. If NULL (the default) all +tiers are converted.} + +\item{verbose}{display infos & show progress bar} +} +\description{ +Converts a TextGridCollection to an emuDB by searching a given directory for .wav & .TextGrid files (default +extensions) with the same base name. First, the function generates a file pair list +containing paths to files with the same base +name. It then generates an emuDB DBconfig based on the first TextGrid in this list which specifies +the allowed level names and types in the new emuDB. After this it converts all file pairs to the new format, +checking whether they comply to the newly generated database configuration. For +more information on the emuDB format see \code{vignette{emuDB}}. +Note that since Praat TextGrids do not permit explicit hierarchical structures, the created emuDB does not contain +any links or link definitions. You can however use the \code{\link{autobuild_linkFromTimes}} function after the conversion process +to automatically build a hierarchal structure. +} +\examples{ +\dontrun{ + +########################################################## +# prerequisite: directory containing .wav & .TextGrid files +# (see \code{?create_emuRdemoData} how to create demo data) + +# convert TextGridCollection and store +# new emuDB in folder provided by tempdir() +convert_TextGridCollection(dir = "/path/to/directory/", + dbName = "myTGcolDB", + targetDir = tempdir()) + + +# same as above but this time only convert +# the information stored in the "Syllable" and "Phonetic" tiers +convert_TextGridCollection(dir = "/path/to/directory/", + dbName = "myTGcolDB", + targetDir = tempdir(), + tierNames = c("Syllable", "Phonetic")) + +} +} diff --git a/man/convert_legacyEmuDB.Rd b/man/convert_legacyEmuDB.Rd new file mode 100644 index 00000000..0f0d635e --- /dev/null +++ b/man/convert_legacyEmuDB.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-legacy.R +\name{convert_legacyEmuDB} +\alias{convert_legacyEmuDB} +\title{Convert legacy EMU database to the emuDB format} +\usage{ +convert_legacyEmuDB( + emuTplPath, + targetDir, + dbUUID = uuid::UUIDgenerate(), + verbose = TRUE, + ... +) +} +\arguments{ +\item{emuTplPath}{EMU template file path} + +\item{targetDir}{target directory} + +\item{dbUUID}{optional UUID of emuDB, will be generated by default} + +\item{verbose}{be verbose, default: \code{TRUE}} + +\item{...}{currently available additional options: +\itemize{ +\item{\code{rewriteSSFFTracks}: if \code{TRUE}, rewrite SSFF tracks instead of copying +the file to get rid of big endian encoded SSFF files (SPARC), default: \code{TRUE}} +\item{\code{ignoreMissingSSFFTrackFiles}: if \code{TRUE}, missing SSFF track files +are ignored, if \code{FALSE} an error will be generated, default: \code{TRUE}} +\item{\code{sourceFileTextEncoding}: encoding of legacy database text files (template, +label and hlb files), possible values: NULL, "latin1", "UTF-8" "bytes" or "unknown" +:default \code{NULL} (uses encoding of operating system platform)} +\item{\code{symbolicLinkSignalFiles}: if \code{TRUE}, signal files are symbolic +linked instead of copied. Implies: \code{rewriteSSFFTracks=FALSE}, Default: \code{FALSE}} +}} +} +\description{ +Converts an existing legacy EMU database to emuDB database structure. +Copies or rewrites signal files and converts the database configuration and annotation data. +The legacy database must be addressed by its template file. +} +\details{ +The database will be converted if the legacy database template file \code{emuTplPath} could +be found and successfully loaded and parsed. The legacy template file usually has the extension '.tpl'. +The UUID of the new emuDB will be randomly generated by default. If \code{targetDir} does not exist, +the directory and its parents will be created. A new directory with the name of the database and the +suffix '_emuDB' will be created in the \code{targetDir}. If the new database directory exists +already, the function stops with an error. The template file is converted to a JSON file. + +Some of the flags of the legacy EMU template files are ignored (lines with this syntax: "set [flagName] [flagValue]", +known ignored flag names are: 'LabelTracks', 'SpectrogramWhiteLevel', 'HierarchyViewLevels', 'SignalViewLevels'). +Legacy EMU utterances are reorganized to sessions and bundles. The naming of the sessions depends on the wildcard +path pattern of the primary track: If the path contains no wildcard, only one session with the name '0000' will be created. +If the path contains one wildcard path element, the names of the directories matching the pattern will be used as session names. +If the path contains more than one wildcard path element, the session name is the concatenation of directory names +separated by an underscore character. + +Media files (usually WAV files) are copied, SSFF track files are rewritten using the ASSP library of package +\code{wrassp} by default (see option \code{rewriteSSFFTracks} below, see also \link[wrassp]{read.AsspDataObj} +\link[wrassp]{write.AsspDataObj}). Annotations in EMU hierarchy (.hlb) files and ESPS label files are +converted to one JSON file per bundle (utterance). Only those files get copied, which match the scheme +of the template file. Additional files in the legacy database directories are ignored. The legacy EMU +database will not be modified. For more information on the structural elements of an emuDB see \code{vignette{emuDB}}. + + +\code{options} is a list of key value pairs: +} +\examples{ +\dontrun{ +## Convert legacy EMU database specified by EMU +## template file /mydata/EMU_legacy/ae/ae.tpl to directory /mydata/EMU/ +## and load it afterwards + +convert_legacyEmuDB("/mydata/EMU_legacy/ae/ae.tpl","/mydata/EMU/") +ae=load_emuDB("/mydata/EMU/ae_emuDB") + +## Convert database "ae" and do not rewrite SSFF tracks + +convert_legacyEmuDB("/mydata/EMU_legacy/ae/ae.tpl", +"/mydata/EMU/", +options=list(rewriteSSFFTracks=FALSE)) + +## Convert legacy database "ae" from emuR demo data and load converted emuDB + +create_emuRdemoData() +demoTplPath=file.path(tempdir(),"emuR_demoData/legacy_ae/ae.tpl") +targetDir=file.path(tempdir(),"converted_to_emuR") +convert_legacyEmuDB(demoTplPath,targetDir) +dbHandle=load_emuDB(file.path(targetDir,"ae_emuDB")) + +} + +} +\seealso{ +\code{\link{load_emuDB}} +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/convert_txtCollection.Rd b/man/convert_txtCollection.Rd new file mode 100644 index 00000000..ef246772 --- /dev/null +++ b/man/convert_txtCollection.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-convert_txtCollection.R +\name{convert_txtCollection} +\alias{convert_txtCollection} +\title{Converts a collection of audio files and plain text transcriptions into an emuDB} +\usage{ +convert_txtCollection( + dbName, + sourceDir, + targetDir, + txtExtension = "txt", + mediaFileExtension = "wav", + attributeDefinitionName = "transcription", + cleanWhitespaces = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{dbName}{name of the new emuDB} + +\item{sourceDir}{directory containing the plain text transcription files and media files} + +\item{targetDir}{directory where the new emuDB will be stored} + +\item{txtExtension}{file extension of transcription files} + +\item{mediaFileExtension}{file extension of media files} + +\item{attributeDefinitionName}{label name of the transcription items} + +\item{cleanWhitespaces}{if true, any sequence of whitespaces in the transcription (including newlines and tabs) +is transformed into a single blank} + +\item{verbose}{display progress bar} +} +\description{ +This function takes as input pairs of media files (i.e. wav files) and plain text +transcriptions files. It creates a new emuDB with one bundle per media file, and +turns the associated transcription into an item in that bundle. For this purpose, +media files and text files belonging to the same bundle must be named identically +(with the exception of their respective file extensions). The newly created +emuDB is stored in the target directory, and its handle is returned. +} +\seealso{ +convert_BPFCollection, convert_TextGridCollection +} diff --git a/man/convert_wideToLong.Rd b/man/convert_wideToLong.Rd new file mode 100644 index 00000000..7ba99b17 --- /dev/null +++ b/man/convert_wideToLong.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRtrackdata.R +\name{convert_wideToLong} +\alias{convert_wideToLong} +\title{convert tracks of a tibble trackdata object to the long form} +\usage{ +convert_wideToLong(td, calcFreqs = FALSE) +} +\arguments{ +\item{td}{wide form trackdata tibble object} + +\item{calcFreqs}{calculate an additional column containing +frequency values from 0-nyquist frequency that match T1-TN (can be quite useful for spectral data)} +} +\value{ +long form trackdata tibble object +} +\description{ +Converts a trackdata tibble object of the form (==wide): +\tabular{lllllll}{ +sl_rowIdx \tab ... \tab T1 \tab T2 \tab T3 \tab ... \tab TN\cr +1 \tab ... \tab T1_value \tab T2_value \tab T3_value \tab ... \tab TN_value +} +to its long form equivalent: +\tabular{llll}{ +sl_rowIdx \tab ... \tab track_name \tab track_value \cr +1 \tab ... \tab T1 \tab T1_value \cr +1 \tab ... \tab T2 \tab T2_value \cr +1 \tab ... \tab T3 \tab T3_value \cr +... \tab ... \tab ... \tab ... \cr +1 \tab ... \tab TN \tab TN_value \cr +} +} diff --git a/man/coutts.Rd b/man/coutts.Rd index a1f0d8e7..eb9fc87d 100644 --- a/man/coutts.Rd +++ b/man/coutts.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts} \alias{coutts} -\title{Segment list of words, read speech, female speaker of Australian English from database epgcoutts} -\usage{coutts} -\description{An EMU dataset} +\title{Segment list of words, read speech, female speaker of Australian English +from database epgcoutts} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts.epg.Rd b/man/coutts.epg.Rd index 366348cd..3286438a 100644 --- a/man/coutts.epg.Rd +++ b/man/coutts.epg.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts.epg} \alias{coutts.epg} \title{EPG-compressed trackdata from the segment list coutts} -\usage{coutts.epg} -\description{An EMU dataset} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts.l.Rd b/man/coutts.l.Rd index c77ff0ed..5d089b6b 100644 --- a/man/coutts.l.Rd +++ b/man/coutts.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts.l} \alias{coutts.l} \title{Vector of word label from the segment list coutts} -\usage{coutts.l} -\description{An EMU dataset} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts.rms.Rd b/man/coutts.rms.Rd index 7490256c..163f8cba 100644 --- a/man/coutts.rms.Rd +++ b/man/coutts.rms.Rd @@ -1,9 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts.rms} \alias{coutts.rms} -\docType{data} \title{rms Data to coutts segment list} -\description{An EMU dataset} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \examples{ + data(coutts.rms) + } \keyword{datasets} diff --git a/man/coutts.sam.Rd b/man/coutts.sam.Rd index e093593b..21c716d0 100644 --- a/man/coutts.sam.Rd +++ b/man/coutts.sam.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts.sam} \alias{coutts.sam} \title{Trackdata of acoustic waveforms from the segment list coutts} -\usage{coutts.sam} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts2.Rd b/man/coutts2.Rd index 835b5810..673bfaae 100644 --- a/man/coutts2.Rd +++ b/man/coutts2.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts2} \alias{coutts2} \title{Segment list, same as coutts but at a slower speech rate} -\usage{coutts2} -\description{An EMU dataset} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts2.epg.Rd b/man/coutts2.epg.Rd index ba549ab6..ae29b72b 100644 --- a/man/coutts2.epg.Rd +++ b/man/coutts2.epg.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts2.epg} \alias{coutts2.epg} \title{EPG-compressed trackdata from the segment list coutts2} -\usage{coutts2.epg} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts2.l.Rd b/man/coutts2.l.Rd index 9d501ed1..f43ff4d7 100644 --- a/man/coutts2.l.Rd +++ b/man/coutts2.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts2.l} \alias{coutts2.l} \title{Vector of word label from the segment list coutts2} -\usage{coutts2.l} -\description{An EMU dataset} +\format{ +vector of word label +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/coutts2.sam.Rd b/man/coutts2.sam.Rd index 0d4629f7..a477f536 100644 --- a/man/coutts2.sam.Rd +++ b/man/coutts2.sam.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{coutts2.sam} \alias{coutts2.sam} \title{Trackdata of acoustic waveforms from the segment list coutts2} -\usage{coutts2.sam} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/cr.Rd b/man/cr.Rd index ee0e92f1..2e9ba93b 100644 --- a/man/cr.Rd +++ b/man/cr.Rd @@ -1,69 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cr.R \name{cr} \alias{cr} - -\title{ Plot digital sinuoids. } -\description{ - The function plots and/or sums digital sinusoids for different parameter settings. -} +\title{Plot digital sinuoids.} \usage{ -cr(A = 1, k = 1, p = 0, N = 16, samfreq = NULL, duration = NULL, -const = NULL, expon = NULL, plotf = TRUE, ylim = NULL, -xlim = NULL, values = FALSE, xlab = "Time (number of points)", -ylab = "Amplitude", type = "b", bw = NULL, dopoints = FALSE, ...) +cr( + A = 1, + k = 1, + p = 0, + N = 16, + samfreq = NULL, + duration = NULL, + const = NULL, + expon = NULL, + plotf = TRUE, + ylim = NULL, + xlim = NULL, + values = FALSE, + xlab = "Time (number of points)", + ylab = "Amplitude", + type = "b", + bw = NULL, + dopoints = FALSE, + ... +) } -%- maybe also 'usage' for other objects documented here. \arguments{ - \item{A}{ A vector of amplitude values. Defaults to A = 1} - \item{k}{ A vector of cycles (repetitions). Defauls to k = 1 } - \item{p}{ A vector of phase values between -pi/2 and pi/2. Defaults to 0. } - \item{N}{ The number of points in the signal. Defaults to 16. } - \item{samfreq}{ If NULL, then a sinusoid is plotted -with a frequency of k cycles per N points. -Otherwise, if samfreq is an numeric, -then the argument to k is interpreted -as the frequency in Hz and the sinusoid at -that frequency is plotted for however many -points are specified by N. For example, if samfreq is 40 (Hz), -and if N is 40 and k = 1, then 1 cycle of a 1 Hz sinusoid will -be plotted. - } - \item{duration}{ Specify the duration in ms. If NULL, the default, then -the duration of the sinusoid is in points (N), otherwise if a numeric -value is supplied, then in ms. For example, 1/2 second of a 1 cycle -sinusoid at a sampling frequency of 40 Hz: duration = 500, k = 1, samfreq=40. -A ms value can be supplied only if the sampling frequency is also specified. } - \item{const}{ A single numeric vector for shifting the entire sinusoid -up or down the y-axis. For example, when const is 5, then -5 + s, where s is the sinusoid is plotted. Defaults to 0 (zero). } - \item{expon}{ A numeric vector. If supplied, then what is -plotted is expon[j]\eqn{\mbox{\textasciicircum}}{^}(c(0:(N - 1) * A cos (2 * pi * k/N * (0:(N-1))). -For example, a decaying sinusoid is produced with cr(expon=-0.9). -Defaults to NULL (i.e. to expon = 1).} - \item{plotf}{ A single-valued logical vector. If T (default), -the sinusoid is plotted.} - \item{ylim}{ A two-valued numeric vector for specifying the y-axis range. } - \item{xlim}{A two-valued numeric vector for specifying the y-axis range. } - \item{values}{ If T, then the values of the sinusoid are listed. Defaults to F. } - \item{xlab}{ A character vector for plotting the x-axis title. } - \item{ylab}{ A character vector for plotting the y-axis title.} - \item{type}{ A character vector for specifying the line type (see par) } - \item{bw}{ A numeric vector for specifying the bandwidth, if the -sampling frequency is supplied. The bandwidth is converted to -an exponential (see expon using exp( - rad(bw/2, samfreq = samfreq). } - \item{dopoints}{ this is now redundant. } - \item{\dots}{Option for supplying further graphical parameters - see par. } -} +\item{A}{A vector of amplitude values. Defaults to A = 1} +\item{k}{A vector of cycles (repetitions). Defaults to k = 1} -\author{ Jonathan Harrington } +\item{p}{A vector of phase values between -pi/2 and pi/2. Defaults to 0.} +\item{N}{The number of points in the signal. Defaults to 16.} -\seealso{ -\code{\link{crplot}} +\item{samfreq}{If NULL, then a sinusoid is plotted with a frequency of k +cycles per N points. Otherwise, if samfreq is an numeric, then the argument +to k is interpreted as the frequency in Hz and the sinusoid at that +frequency is plotted for however many points are specified by N. For +example, if samfreq is 40 (Hz), and if N is 40 and k = 1, then 1 cycle of a +1 Hz sinusoid will be plotted.} -} +\item{duration}{Specify the duration in ms. If NULL, the default, then the +duration of the sinusoid is in points (N), otherwise if a numeric value is +supplied, then in ms. For example, 1/2 second of a 1 cycle sinusoid at a +sampling frequency of 40 Hz: duration = 500, k = 1, samfreq=40. A ms value +can be supplied only if the sampling frequency is also specified.} + +\item{const}{A single numeric vector for shifting the entire sinusoid up or +down the y-axis. For example, when const is 5, then 5 + s, where s is the +sinusoid is plotted. Defaults to 0 (zero).} + +\item{expon}{A numeric vector. If supplied, then what is plotted is +expon[j]\eqn{\mbox{\textasciicircum}}{^}(c(0:(N - 1) * A cos (2 * pi * k/N +* (0:(N-1))). For example, a decaying sinusoid is produced with +cr(expon=-0.9). Defaults to NULL (i.e. to expon = 1).} + +\item{plotf}{A single-valued logical vector. If TRUE (default), the sinusoid +is plotted.} + +\item{ylim}{A two-valued numeric vector for specifying the y-axis range.} + +\item{xlim}{A two-valued numeric vector for specifying the y-axis range.} + +\item{values}{If TRUE, then the values of the sinusoid are listed. Defaults to +FALSE.} + +\item{xlab}{A character vector for plotting the x-axis title.} +\item{ylab}{A character vector for plotting the y-axis title.} + +\item{type}{A character vector for specifying the line type (see par)} + +\item{bw}{A numeric vector for specifying the bandwidth, if the sampling +frequency is supplied. The bandwidth is converted to an exponential (see +expon using exp( - rad(bw/2, samfreq = samfreq).} + +\item{dopoints}{this is now redundant.} + +\item{\dots}{Option for supplying further graphical parameters - see par.} +} +\description{ +The function plots and/or sums digital sinusoids for different parameter +settings. +} \examples{ + # cosine wave cr() @@ -94,5 +116,12 @@ cr(const=4, k=50, bw=4, samfreq=2000, N=100) # sinusoid multiplied by a decaying exponential (same effect as bandwidth) cr(expon=-0.95, N=200, type="l") + +} +\seealso{ +\code{\link{crplot}} +} +\author{ +Jonathan Harrington } \keyword{dplot} diff --git a/man/create_emuDB.Rd b/man/create_emuDB.Rd new file mode 100644 index 00000000..f6d4837e --- /dev/null +++ b/man/create_emuDB.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{create_emuDB} +\alias{create_emuDB} +\title{Create empty emuDB} +\usage{ +create_emuDB( + name, + targetDir, + mediaFileExtension = "wav", + store = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{name}{of new emuDB} + +\item{targetDir}{target directory to store the emuDB to} + +\item{mediaFileExtension}{defines mediaFileExtention (NOTE: currently only +'wav' (the default) is supported by all components of EMU)} + +\item{store}{store new created emuDB to file system} + +\item{verbose}{display infos & show progress bar} +} +\description{ +Creates an empty emuDB in the target directory specified +} +\details{ +Creates a new directory [name]_emuDB in targetDir. By default +the emuDB is created in the R session, written to the filesystem and +then purged from the R session. +} +\examples{ +\dontrun{ +# create empty emuDB in folder provided by tempdir() +create_emuDB(name = "myNewEmuDB", + targetDir = tempdir()) +} +} diff --git a/man/create_emuRdemoData.Rd b/man/create_emuRdemoData.Rd new file mode 100644 index 00000000..bde5f71c --- /dev/null +++ b/man/create_emuRdemoData.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-create_emuRdemoData.R +\name{create_emuRdemoData} +\alias{create_emuRdemoData} +\title{Create demo data for the emuR package} +\usage{ +create_emuRdemoData(dir = tempdir(), precache = FALSE) +} +\arguments{ +\item{dir}{directory to create demo data in (default= tempdir())} + +\item{precache}{creates an on-file-system cache for the ae emuDB to allow fast loading +(see \code{load_emuDB} for details about the emuDB file cache)} +} +\description{ +Create a folder within the folder specified +by the dir argument called emuR_demoData. +This folder contains the folders: +\itemize{ +\item{ae_emuDB: Containing an emuDB that adheres to the new format specification +(as expected by the \code{\link{load_emuDB}} function). See \code{vignette(emuDB)} +for more information on this database format.} +\item{BPF_collection: Containing a BAS Partitur Format (BPF) file collection (as +expected by the \code{\link{convert_BPFCollection}} function)} +\item{legacy_ae: Containing a legacyEmuDB (as expected by the +\code{\link{convert_legacyEmuDB}} function)} +\item{TextGrid_collection: Containing a TextGrid file collection +(as expected from the \code{\link{convert_TextGridCollection}} function)} +} +} +\examples{ +\dontrun{ + +# create demo data directory in directory +# provided by the tempdir function +create_emuRdemoData(dir = tempdir()) +} +} diff --git a/man/create_emuRtrackdata.Rd b/man/create_emuRtrackdata.Rd new file mode 100644 index 00000000..18af1a8d --- /dev/null +++ b/man/create_emuRtrackdata.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRtrackdata.R +\name{create_emuRtrackdata} +\alias{create_emuRtrackdata} +\title{create emuRtrackdata object} +\usage{ +create_emuRtrackdata(sl, td) +} +\arguments{ +\item{sl}{seglist of class \code{\link{emuRsegs}}} + +\item{td}{\code{\link{trackdata}} object generated from sl} +} +\value{ +emuRtrackdata object +} +\description{ +Joins \code{\link{emuRsegs}} and \code{\link{trackdata}} objects +to create an \code{\link{emuRtrackdata}} object that is a sub-class of +a \code{\link{data.frame}} object. This object +can be viewed as a flat version of a \code{\link{trackdata}} object that also +contains all the information of a \code{\link{emuRsegs}} object. It is meant to +ease integration with other packages as it is based on the well known +\code{\link{data.frame}} object. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# query emuDB (to get object of class emuRsegs) +sl = query(emuDBhandle = ae, + query = "Phonetic == i:") + +# get formats for SEGMENTs in sl (to get object of class trackdata) +td = get_trackdata(emuDBhandle = ae, + seglist = sl, + onTheFlyFunctionName = "forest") + +# create emuRtrackdata object +create_emuRtrackdata(sl = sl, td = td) + +} +} diff --git a/man/create_itemsInLevel.Rd b/man/create_itemsInLevel.Rd new file mode 100644 index 00000000..c9d87937 --- /dev/null +++ b/man/create_itemsInLevel.Rd @@ -0,0 +1,161 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-annotations_crud.R +\name{create_itemsInLevel} +\alias{create_itemsInLevel} +\title{Create new items programmatically} +\usage{ +create_itemsInLevel( + emuDBhandle, + itemsToCreate, + calculateEndTimeForSegments = TRUE, + allowGapsAndOverlaps = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \link{load_emuDB}} + +\item{itemsToCreate}{A data frame with the columns: +\itemize{ +\item \code{session} (character) +\item \code{bundle} (character) +\item \code{level} (character) +\item \code{attribute} (character) +\item \code{labels} (character) +\item \code{start_item_seq_idx} (numeric; only when \code{level} refers to a ITEM-typed +level) +\item \code{start} (numeric, milliseconds; only when \code{level} refers to an EVENT-typed +or SEGMENT-typed level) +\item \code{end} (numeric, milliseconds; only when \code{level} refers to a SEGMENT-typed +level and \code{calculateEndTimeForSegments} is \code{FALSE}) +}} + +\item{calculateEndTimeForSegments}{\emph{Only applicable if the level type is SEGMENT.} +If set to \code{TRUE}, then each segment’s end time is automatically aligned +with the start time of the following segment. In that case, user-provided +end times are ignored. The last segment’s end time is the end time of the +annotated media file. If set to \code{FALSE}, then the user has to provide +an end time for each segment.} + +\item{allowGapsAndOverlaps}{\emph{Only applicable if the level type is SEGMENT +and \code{calculateEndTimeForSegments} is \code{FALSE}.} +If set to \code{FALSE}, this function fails when \code{itemsToCreate} contains +gaps or overlaps between segments. The offending segments are returned invisibly. +You can inspect them by assigning the return value to a variable. The return +value will include a new column \code{gap_samples} that indicates the size +of the gap (positive values) or overlap (negative values) with the previous +segment, respectively. It is measured in audio samples, not in milliseconds. +Setting this to \code{TRUE} allows the function to complete even with gaps +and/or overlaps, but this is \strong{not recommended as it can cause bugs in +the EMU-webApp}.} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json +files) (intended for expert use only)} + +\item{verbose}{if set to \code{TRUE}, more status messages are printed} +} +\description{ +Create annotation items programmatically on a single level. +You have to pass in a data frame, called \code{itemsToCreate}, describing +the new items. The required columns depend on the type of the level (ITEM, +EVENT, or SEGMENT). + +This function belongs to emuR’s CRUD family of functions, which let the user +manipulate items programmatically: +\itemize{ +\item Create items (\link{create_itemsInLevel}) +\item Read items (\link{query}) +\item Update items (\link{update_itemsInLevel}) +\item Delete items (\link{delete_itemsInLevel})) +} +} +\details{ +This function creates new annotation items on an existing level, in existing +bundles. + +Regardless of the type of level you are creating items on, your input data +frame \code{itemsToCreate} must describe your new items by specifying the columns +\code{session}, \code{bundle}, \code{level}, \code{attribute} and \code{labels}. \code{level} must have the +same value for all rows, as we can only create items on one level at a time. + +\code{attribute} must also have the same value for all rows, and it must be an +existing attribute that belongs to the \code{level}. + +A major use case for this function is to obtain a segment list using \link{query}, +modify the segment list and feed it to this function. That is why the column +\code{labels} has a plural name: segment lists also have a column \code{labels} and +not \code{label}. The same is true for the sequence index columns introduced below. + +Creating new items works differently depending on the level type. The three +types are explained in the following sections. +\subsection{Levels of type ITEM}{ + +In addition to the columns that are always required, ITEM-typed levels require +a column with a sequence index to be present in the \code{itemsToCreate} data +frame. Its name must be \code{start_item_seq_idx}. This name was chosen instead +of \code{sequence_index} because it is present as a column name in segment lists +obtained with \link{query}. That makes it easer to use a segment list as input to +\code{\link[=create_itemsInLevel]{create_itemsInLevel()}}. + +Along the time axis, there can be multiple annotation items on every level. +Their order within the level is given by their sequence index. All \emph{existing} +items have a natural-valued sequence index and there are no gaps in the +sequences (i.e. if a level contains N annotation items, they are indexed 1..N). + +Any newly created item must be given a sequence index. The sequence index may +be real-valued (it will automatically be replaced with a natural value). To +prepend the new item to the existing ones, pass a value lower than one. To +append it to the existing items, you can either pass \code{NA} or any value that +you know is greater than N (the number of existing items in that level). It +does not need to be exactly N+1. To place the new item between two existing +ones, use any real value between the sequence indexes of the existing neighbors. + +If you are appending multiple items at the same time, every sequence index +(including \code{NA}) can only be used once per session/bundle/level combination +(because session/bundle/level/sequence index are the unique identifier of an +item). + +After creating the items, all sequence indexes (which may now be real-valued, +natural-valued or NA) are sorted in ascending order and then replaced with +the values 1..N, where N is the number of items on that level. While sorting, +\code{NA} values are placed at the end. +} + +\subsection{Levels of type EVENT}{ + +In addition to the columns that are always required, EVENT-typed levels require +a column with the time of the event to be present in the \code{itemsToCreate} data +frame. Its name must be \code{start}. This name was chosen because it is present +as a column name in segment lists obtained with \link{query}. That makes it easer +to use a segment list as input to \code{\link[=create_itemsInLevel]{create_itemsInLevel()}}. The \code{end} column +in segment lists is 0 for EVENT-typed levels. + +The \code{start} column must be given in milliseconds. + +You cannot create an EVENT item at a point on the time axis where another +item already exists on the same level. If you specify such an event, the +entire function will fail. +} + +\subsection{Levels of type SEGMENT}{ + +You can only create SEGMENT-typed items in bundles where the respective level +is empty. + +In addition to the columns that are always required, SEGMENT-typed levels +require the column \code{start} to be present in the \code{itemsToCreate} data frame, +representing the start time of the segment. It must be given in milliseconds. + +Segments also need to have an end, and there are two strategies to determine +the end. Either, you explicitly provide an \code{end} column in the \code{itemsToCreate} +data frame. It must be given in milliseconds. If you do that, you have to +specify the \code{calculateEndTimeForSegments} parameter as \code{FALSE}. + +Alternatively, you can leave \code{calculateEndTimeForSegments} at \code{TRUE} (which +is the default) and provide your \code{itemsToCreate} data frame without an \code{end} +column. In that case, the end time will be aligned to the next neighbor’s +start time. The end time of the last segment will be aligned with the end of +the annotated media file. +} +} diff --git a/man/create_links.Rd b/man/create_links.Rd new file mode 100644 index 00000000..71d3f13b --- /dev/null +++ b/man/create_links.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-annotations_crud.R +\name{create_links} +\alias{create_links} +\title{create links between items} +\usage{ +create_links(emuDBhandle, links, rewriteAllAnnots = TRUE, verbose = TRUE) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{links}{data.frame like object containing linking information. The required columns +are: +\itemize{ +\item \code{session}: +\item \code{bundle} +\item \code{from_id} +\item \code{to_id} +}} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json +files) (intended for expert use only)} + +\item{verbose}{if set to \code{TRUE}, more status messages are printed} +} +\description{ +create links between items +} diff --git a/man/create_spectrogram_image_as_raster.Rd b/man/create_spectrogram_image_as_raster.Rd new file mode 100644 index 00000000..d0bba576 --- /dev/null +++ b/man/create_spectrogram_image_as_raster.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-plotting.R +\name{create_spectrogram_image_as_raster} +\alias{create_spectrogram_image_as_raster} +\title{Create spectrogram image as raster} +\usage{ +create_spectrogram_image_as_raster( + audioFilePath, + begin = 0, + end = 0, + windowSizeInSecs = 0.01, + alpha = 0.16, + lowerFreq = 0, + upperFreq = 5000, + window = "GAUSS", + dynRangeInDB = 70, + audioChannel = 1, + preEmphasisFilterFactor = 0.97, + invert = FALSE +) +} +\arguments{ +\item{audioFilePath}{path to audio file to plot spectrogram of} + +\item{begin}{begin time in seconds (passed into begin parameter of \code{wrassp::read.AsspDataObj})} + +\item{end}{end time in seconds (passed into end parameter of \code{wrassp::read.AsspDataObj})} + +\item{windowSizeInSecs}{window size in seconds} + +\item{alpha}{value of spectrogram} + +\item{lowerFreq}{lower frequency limit of spectrogram} + +\item{upperFreq}{upper frequency limit of spectrogram} + +\item{window}{window type used in spectrogram calculation. Allowed values +are: +\itemize{ + \item "BARTLETT" + \item "BARTLETTHANN" + \item "BLACKMAN" + \item "COSINE" + \item "GAUSS" (the default) + \item "HAMMING" + \item "HANN" + \item "LANCZOS" + \item "RECTANGULAR" + \item "TRIANGULAR" +}} + +\item{dynRangeInDB}{dynamic range in DB of spectrogram} + +\item{audioChannel}{channel of audio file to draw spectrogram of (only +applicable when using multi-channel audio files)} + +\item{preEmphasisFilterFactor}{used in time domain for amplifying high-freqs} + +\item{invert}{invert the colors of the spectrogram} +} +\value{ +a image raster object +} +\description{ +Create spectrogram image as raster +} diff --git a/man/crplot.Rd b/man/crplot.Rd index e7aa9056..45d5218d 100644 --- a/man/crplot.Rd +++ b/man/crplot.Rd @@ -1,69 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cr.R \name{crplot} \alias{crplot} -\title{ Function to plot a digital sinusoid and the circle from which it is derived. } -\description{ A digital sinusoid is derived the movement of a point around a circle. -The function shows the relationship between the two for various parameter settings.} +\title{Function to plot a digital sinusoid and the circle from which it is +derived.} \usage{ -crplot(A = 1, k = 1, p = 0, N = 16, const = NULL, figsize = 8, npoints = 500, col = 1, cplot = TRUE, -splot = TRUE, numplot = TRUE, axes = TRUE, incircle = TRUE, arrow = TRUE, linetype = 1, textplot = NULL, -lineplot = NULL, ylab = "Amplitude", super = NULL, xaxlab = NULL, type = "b", -xlab = "Time (number of points)", fconst = 3.5/3.1, pointconst = 1.2) +crplot( + A = 1, + k = 1, + p = 0, + N = 16, + const = NULL, + figsize = 8, + npoints = 500, + col = 1, + cplot = TRUE, + splot = TRUE, + numplot = TRUE, + axes = TRUE, + incircle = TRUE, + arrow = TRUE, + linetype = 1, + textplot = NULL, + lineplot = NULL, + ylab = "Amplitude", + super = NULL, + xaxlab = NULL, + type = "b", + xlab = "Time (number of points)", + fconst = 3.5/3.1, + pointconst = 1.2 +) } - \arguments{ - \item{A}{ Amplitude of the circle/sinusoid. } - \item{k}{ Frequency of the sinusoid } - \item{p}{ Phase of the sinusoid } - \item{N}{ Number of points per cycle or revolution. } - \item{const}{ A constant corresponding to k + A*cos(2*pi*k+p) } - \item{figsize}{ Set the figure size as pin <- c(figsize, figsize/2). -Defaults to figsize = 8. } - \item{npoints}{ The number of points used in plotting the circle. Defaults to 500 } - \item{col}{ An integer for the color in plotting the sinusoid and points around the circle } - \item{cplot}{ Now redundant } - \item{splot}{ Now redundant } - \item{numplot}{ Logical. If T (defaults), the digital points around the circle are numbered } - \item{axes}{ Logical. If T, plot axes. } - \item{incircle}{ Logical. If T, plot an the angle between digital points in the circle. } - \item{arrow}{ Logical. If T, plot an arrow on incircle showing the direction of movement. } - \item{linetype}{ Specify a linetype. Same function as lty in plot } - \item{textplot}{ A list containing \$radius, \$textin, \$pivals -for plotting text at specified angles and radii on -the circle. \$radius: a vector of amplitudes of the radii at -which the text is to be plotted; \$textin: a vector -of chacacter labels to be plotted; \$pivals: the angle, in radians -relative to zero radians (top of the circle) at which -the text is to be plotted. Defaults to NULL } - \item{lineplot}{ Plot lines from the centre of the circle - to the circumference. lineplot is a vector specifying - the angle in radians (zero corresponds to the top of the circle) } - \item{ylab}{ Specify a y-axis label. } - \item{super}{ Superimpose a part solid circle and corresponding - sinusoid. This needs to be a list containing \$first and -\$last, which are values between 0 and 2*pi defining - the beginning and ending of the part circle which is - to be superimposed } - \item{xaxlab}{ Now redundant } - \item{xlab}{ Specify an x-axis label. } - \item{type}{ Specify a type. } - - \item{fconst}{ A single elment numeric vector -for the aspect ratio in a postscript plot. Defaults to 3.5/3.1 -which is appropriate for a postscript setting of setps(h=4, w=4) } - \item{pointconst}{ The radius for plotting the numbers around the circle. -Defaults to 1.2 * A } -} +\item{A}{Amplitude of the circle/sinusoid.} +\item{k}{Frequency of the sinusoid} -\references{ Harrington, J, & Cassidy, S. 1999. Techniques in Speech Acoustics. Kluwer} -\author{ Jonathan Harrington} +\item{p}{Phase of the sinusoid} +\item{N}{Number of points per cycle or revolution.} -\seealso{ -\code{\link{cr}} -} +\item{const}{A constant corresponding to k + A*cos(2*pi*k+p)} + +\item{figsize}{Set the figure size as pin <- c(figsize, figsize/2). +Defaults to figsize = 8.} + +\item{npoints}{The number of points used in plotting the circle. Defaults +to 500} + +\item{col}{An integer for the color in plotting the sinusoid and points +around the circle} + +\item{cplot}{Now redundant} + +\item{splot}{Now redundant} + +\item{numplot}{Logical. If TRUE (defaults), the digital points around the +circle are numbered} + +\item{axes}{Logical. If TRUE, plot axes.} + +\item{incircle}{Logical. If TRUE, plot an the angle between digital points in +the circle.} + +\item{arrow}{Logical. If TRUE, plot an arrow on incircle showing the direction +of movement.} + +\item{linetype}{Specify a linetype. Same function as lty in plot} + +\item{textplot}{A list containing $radius, $textin, $pivals for plotting +text at specified angles and radii on the circle. $radius: a vector of +amplitudes of the radii at which the text is to be plotted; $textin: a +vector of character labels to be plotted; $pivals: the angle, in radians +relative to zero radians (top of the circle) at which the text is to be +plotted. Defaults to NULL} + +\item{lineplot}{Plot lines from the centre of the circle to the +circumference. lineplot is a vector specifying the angle in radians (zero +corresponds to the top of the circle)} +\item{ylab}{Specify a y-axis label.} + +\item{super}{Superimpose a part solid circle and corresponding sinusoid. +This needs to be a list containing $first and $last, which are values +between 0 and 2*pi defining the beginning and ending of the part circle +which is to be superimposed} + +\item{xaxlab}{Now redundant} + +\item{type}{Specify a type.} + +\item{xlab}{Specify an x-axis label.} + +\item{fconst}{A single element numeric vector for the aspect ratio in a +postscript plot. Defaults to 3.5/3.1 which is appropriate for a postscript +setting of setps(h=4, w=4)} + +\item{pointconst}{The radius for plotting the numbers around the circle. +Defaults to 1.2 * A} +} +\description{ +A digital sinusoid is derived the movement of a point around a circle. The +function shows the relationship between the two for various parameter +settings. +} \examples{ + crplot() # sine wave crplot(p=-pi/2) @@ -72,6 +115,16 @@ crplot(k=3) # aliasing crplot(k=15) -} +} +\references{ +Harrington, J, & Cassidy, S. 1999. Techniques in Speech +Acoustics. Kluwer +} +\seealso{ +\code{\link{cr}} +} +\author{ +Jonathan Harrington +} \keyword{dplot} diff --git a/man/dapply.Rd b/man/dapply.Rd index 6d56ad8d..c76dfb0e 100644 --- a/man/dapply.Rd +++ b/man/dapply.Rd @@ -1,19 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dapply.R \name{dapply} \alias{dapply} -\title{ apply a function to each part of a trackdata object } -\description{ -Given an Emu trackdata object, \code{dapply} will apply a given -function to the data corresponding to each segment of data. The -result is a new trackdata object. -} +\title{apply a function to each part of a trackdata object} \usage{ dapply(trackdata, fun, ...) } \arguments{ - \item{trackdata}{ An Emu trackdata object } - \item{fun}{ A function taking a matrix of data and a vector of times - and returning a list with components \code{\$data} and \code{\$ftime}.} - \item{\dots}{ Additional arguments to be passed to \code{fun} } +\item{trackdata}{An Emu trackdata object} + +\item{fun}{A function taking a matrix of data and a vector of times and +returning a list with components \code{$data} and \code{$ftime}.} + +\item{\dots}{Additional arguments to be passed to \code{fun}} +} +\value{ +An Emu trackdata object with components: \item{data}{A matrix of +data with all segments concatenated by row.} \item{index}{A two column +matrix of the start and end rows for each segment} \item{ftime}{A two +column matrix of the start and end times for each segment} +} +\description{ +Given an Emu trackdata object, \code{dapply} will apply a given function to +the data corresponding to each segment of data. The result is a new +trackdata object. } \details{ \code{dapply} can be used to apply an arbitrary function to trackdata @@ -21,33 +31,23 @@ extracted from an Emu database. It can be used for example to smooth the data (see \code{\link{dsmooth}}) or differentiate it (see \code{\link{ddiff}}). -Trackdata is made up of three components: a matrix of data \code{\$data}, -a matrix of indexes (\code{\$index}) and a matrix of segment times -(\code{\$ftime}). The indexes contain the start and end rows for each -segment in the trackdata, the time matrix contains the start and end -times of each data segment. +Trackdata is made up of three components: a matrix of data \code{$data}, a +matrix of indexes (\code{$index}) and a matrix of segment times +(\code{$ftime}). The indexes contain the start and end rows for each +segment in the trackdata, the time matrix contains the start and end times +of each data segment. -The function \code{fun} supplied to \code{dapply} should take one matrix -of data (corresponding to one segment) and a vector of two times being -the start and end of the data. It should return a modified data matrix, -which can have any number of rows or columns, and a new pair of start -and end times. The new start and end times are necessary because the -operation applied might shorten or interpolate the data and hence change -the times corresponding to the first and last rows of data. +The function \code{fun} supplied to \code{dapply} should take one matrix of +data (corresponding to one segment) and a vector of two times being the +start and end of the data. It should return a modified data matrix, which +can have any number of rows or columns, and a new pair of start and end +times. The new start and end times are necessary because the operation +applied might shorten or interpolate the data and hence change the times +corresponding to the first and last rows of data. } -\value{ - An Emu trackdata object with components: - \item{data}{A matrix of data with all segments concatenated by row.} - \item{index}{A two column matrix of the start and end rows for each segment} - \item{ftime}{A two column matrix of the start and end times for each segment} -} -\seealso{ -\code{\link{dsmooth}} -\code{\link{ddiff}} -} - \examples{ + data(dip) ## formant data of the first segment in segment list dip fm <- dip.fdat[1] @@ -66,5 +66,9 @@ testfun <- function(data, ftime, n) { fm.first3 <- dapply( fm, testfun, 3 ) fm.first10 <- dapply( fm, testfun, 10 ) + +} +\seealso{ +\code{\link{dsmooth}} \code{\link{ddiff}} } -\keyword{misc}%-- one or more ... +\keyword{misc} diff --git a/man/dbinfo.Rd b/man/dbinfo.Rd deleted file mode 100644 index 1e2fd728..00000000 --- a/man/dbinfo.Rd +++ /dev/null @@ -1,25 +0,0 @@ -\name{dbinfo} -\alias{dbinfo} -\title{ -Available EMU Speech databases -} -\description{ -dbinfo lists all available databases (templates) in the linked Emu System -} -\usage{ -dbinfo() -} -\value{ -Returns the name of the template files without extension. -} -\author{ -Tina John -} - -\seealso{ -\code{\link{trackinfo}} -} -\examples{ -\dontrun{dbinfo()} -} -\keyword{utilities} diff --git a/man/dbnorm.Rd b/man/dbnorm.Rd index e557f385..2b8b9094 100644 --- a/man/dbnorm.Rd +++ b/man/dbnorm.Rd @@ -1,47 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{dbnorm} \alias{dbnorm} -\title{ Function to dB-normalise spectral objects } -\description{ - The function can be used to rescale a spectrum -to a dB value at a particular frequency - for example, -to rescale the spectrum so that 3000 Hz has 0 dB -and all other values are shifted in relation to this. -} +\title{Function to dB-normalise spectral objects} \usage{ dbnorm(specdata, f = 0, db = 0) } -%- maybe also 'usage' for other objects documented here. \arguments{ - \item{specdata}{ An object of class 'spectral' } - \item{f}{ A single element vector specifying the frequency. -Defaults to 0 } - \item{db}{ A single element vector specifying the dB value to which -the spectrum is to be rescaled. Defaults to zero } -} - -\value{An object of the same class with rescaled dB values. -The default is to rescale the dB-values of the spectrum to 0 dB -at 0 Hz. -} - -\author{ Jonathan Harrington } +\item{specdata}{An object of class 'spectral'} +\item{f}{A single element vector specifying the frequency. Defaults to 0} -\seealso{ -\code{\link{dbtopower}} -\code{\link{plot.spectral}} +\item{db}{A single element vector specifying the dB value to which the +spectrum is to be rescaled. Defaults to zero} +} +\value{ +An object of the same class with rescaled dB values. The default is +to rescale the dB-values of the spectrum to 0 dB at 0 Hz. +} +\description{ +The function can be used to rescale a spectrum to a dB value at a +particular frequency - for example, to rescale the spectrum so that 3000 Hz +has 0 dB and all other values are shifted in relation to this. } - \examples{ + # normalise to - 40 dB at 1500 Hz res = dbnorm(e.dft, 1500, 0) # compare the two ylim = range(c(res, e.dft)) plot(e.dft, ylim=ylim, type="l") -par(new=TRUE) +oldpar = par(new=TRUE) plot(res, ylim=ylim, type="l", col=2) -} - +par(oldpar) +} +\seealso{ +\code{\link{dbtopower}} \code{\link{plot.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{manip} diff --git a/man/dbtopower.Rd b/man/dbtopower.Rd index d13c5842..dec06195 100644 --- a/man/dbtopower.Rd +++ b/man/dbtopower.Rd @@ -1,46 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{dbtopower} \alias{dbtopower} - -\title{ Function for inter-converting between decibels and a linear scale } -\description{ - The function converts from decibels to a linear scale -} +\title{Function for inter-converting between decibels and a linear scale} \usage{ dbtopower(specdata, const = 10, base = 10, inv = FALSE) } - \arguments{ - \item{specdata}{ A numeric object or an object of class trackdata } - \item{const}{ A single element numeric vector. Defaults to 10 } - \item{base}{ A single element numeric vector. Defaults to 10 } - \item{inv}{ Logical. If T, then the conversion is -from a logarithmic to an anti-logarithmic form, otherwise the other way round } -} -\details{ - The function returns base\eqn{\mbox{\textasciicircum}}{^}(specdata/const) if -inv=F, otherwise, const * log(dat, base=base). If the object -to which this function is applied is of class 'trackdata' -then this function is applied to \$data. -} -\value{ - An object of the same class. +\item{specdata}{A numeric object or an object of class trackdata} -} +\item{const}{A single element numeric vector. Defaults to 10} -\author{ Jonathan Harrington } +\item{base}{A single element numeric vector. Defaults to 10} -\seealso{ -\code{\link{dbtopower}} -\code{\link{plot.spectral}} +\item{inv}{Logical. If TRUE, then the conversion is from a logarithmic to an +anti-logarithmic form, otherwise the other way round} +} +\value{ +An object of the same class. +} +\description{ +The function converts from decibels to a linear scale +} +\details{ +The function returns base\eqn{\mbox{\textasciicircum}}{^}(specdata/const) +if inv=FALSE, otherwise, const * log(dat, base=base). If the object to which +this function is applied is of class 'trackdata' then this function is +applied to $data. } - \examples{ + # convert 10 dB to a power ratio vec = dbtopower(10) # convert dB-data to a power ratio and back to decibels res = dbtopower(vowlax.dft.5) res = dbtopower(res, inv=TRUE) -} +} +\seealso{ +\code{\link{dbtopower}} \code{\link{plot.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{math} diff --git a/man/dct.Rd b/man/dct.Rd new file mode 100644 index 00000000..38db3831 --- /dev/null +++ b/man/dct.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dct.R +\name{dct} +\alias{dct} +\title{Discrete Cosine Transformation} +\usage{ +dct(data, m = NULL, fit = FALSE) +} +\arguments{ +\item{data}{a vector or single column matrix of numeric values to which the +2nd order polynomial is to be fitted.} + +\item{m}{The number of DCT coefficients that are returned or on which the +smoothed trajectory is based. Defaults to NULL which returns coefficients +of frequencies k = 0, 1,2 .. N-1 where N is the length of the input signal, +wav. If fit = TRUE and k = NULL, then the the sum of all the cosine waves +whose amplitudes are the DCT coefficients are returned - which is equal to +the original signal. k must be between 2 and the length of the signal.} + +\item{fit}{if FALSE, return the DCT coefficients; if TRUE, the values of the +smoothed trajectory are returned based on summing the cosine waves of the k +lowest ordered DCT coefficients, where k is the argument given below.} +} +\description{ +Obtain the coefficients of the discrete cosine transformation (DCTRUE). +} +\details{ +The function calculates the DCT coefficients for any vector or +single-columned matrix. The function can also be used to obtain a smoothed +trajectory of the input data by summing the cosine waves derived from the +first few DCT coefficients. + +The algorithm first reflects the input signal about the last data point, N. +Thus if the input signal vec if of length N, the algorithm creates a vector +c(vec, rev(vec[-c(1,N)])). and the R fft function is applied to this +reflected signal. The DCT coefficients are real part of what is returned by +fft i.e. the amplitudes of the cosine waves of frequencies k = 0, 1, 2, +...2 *(N-1) radians per sample. The phase is zero in all cases. The +amplitudes are calculated in such a way such that if these cosine waves are +summed, the original (reflected) signal is reconstructed. What is returned +by dct() are the amplitudes of the cosine waves (DCT coefficients) up to a +frequency of N radians/sample, i.e. a vector of cosine wave amplitudes that +has the same length as the original signal and of frequencies k = 0, 1, 2, +... (N-1). Alternatively, if fit=TRUE, a smoothed signal of the same length +as the original signal is obtained based on a summation of the lowest +ordered DCT coefficients. This dct() algorithm returns very similar values +to DCT() with inv=FALSE written by Catherine Watson and used in Watson & +Harrington (1999). +} +\examples{ + +data(vowlax) +# obtain the first four DCT coefficients +# (frequencies k = 0, 1, 2, 3) for some +# first formant frequency data +vec <- vowlax.fdat[1,1]$data +dct(vec, m=4) + +# obtain the corresponding smoothed +# trajectory +dct(vec, m=4 , fit=TRUE) + +} +\references{ +Watson, C. & Harrington, J. (1999). Acoustic evidence for +dynamic formant trajectories in Australian English vowels. Journal of the +Acoustical Society of America, 106, 458-468. + +Zahorian, S., and Jagharghi, A. (1993). Spectral-shape features versus +formants as acoustic correlates for vowels, Journal of the Acoustical +Society of America, 94, 19661982. +} +\seealso{ +\code{\link{plafit}} \code{\link{by}} +} +\author{ +Jonathan Harrington +} +\keyword{math} diff --git a/man/dct2.Rd b/man/dct2.Rd deleted file mode 100644 index cf12ef24..00000000 --- a/man/dct2.Rd +++ /dev/null @@ -1,95 +0,0 @@ -\name{dct} -\alias{dct} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ Discrete Cosine Transformation } -\description{ - Obtain the coefficients of the discrete cosine transformation (DCTRUE). -} - - -\usage{ -dct(data, m = NULL, fit = FALSE) -} -%- maybe also 'usage' for other objects documented here. -\arguments{ - \item{data}{ a vector or single column matrix of numeric values to which the 2nd order polynomial is to be fitted. } - \item{fit}{ if F, return the DCT coefficients; -if T, the values of the smoothed trajectory are returned -based on summing the cosine waves of the k lowest ordered -DCT coefficients, where k is the argument given below. } - \item{m}{ The number of DCT coefficients that -are returned or on which the smoothed trajectory is based. -Defaults to NULL which returns coefficients of -frequencies k = 0, 1,2 .. N-1 where N is the -length of the input signal, wav. If -fit = TRUE and k = NULL, then the the sum -of all the cosine waves whose amplitudes -are the DCT coefficients are returned - which -is equal to the original signal. k must be -between 2 and the length of the signal. } -} -\details{ - The function calculates the DCT coefficients for any -vector or single-columned matrix. The function can -also be used to obtain a smoothed trajectory of the -input data by summing the cosine waves derived from -the first few DCT coefficients. - -The algorithm first reflects the input signal -about the last data point, N. Thus if the input -signal vec if of length N, the algorithm -creates a vector c(vec, rev(vec[-c(1,N)])). -and the R fft function is applied to this reflected signal. -The DCT coefficients are real part of what is returned by -fft i.e. the amplitudes of -the cosine waves of frequencies k = 0, 1, 2, ...2 *(N-1) -radians per sample. The phase is zero in all cases. -The amplitudes are calculated in such a way such -that if these cosine waves are summed, the -original (reflected) signal is reconstructed. -What is returned by dct() are the amplitudes -of the cosine waves (DCT coefficients) -up to a frequency of N radians/sample, i.e. -a vector of cosine wave amplitudes -that has the same -length as the original signal and of -frequencies k = 0, 1, 2, ... (N-1). Alternatively, -if fit=T, a smoothed signal of the same -length as the original signal is obtained -based on a summation of the lowest ordered DCT coefficients. -This dct() algorithm returns very similar values -to DCT() with inv=F -written by Catherine Watson and used in Watson & Harrington (1999). -} - -\references{ Watson, C. & Harrington, J. (1999). -Acoustic evidence for dynamic formant trajectories -in Australian English vowels. Journal of -the Acoustical Society of America, 106, 458-468. - -Zahorian, S., and Jagharghi, A. (1993). -Spectral-shape features versus formants -as acoustic correlates for vowels, -Journal of -the Acoustical Society of America, 94, 19661982. } -\author{ Jonathan Harrington } - -\seealso{ -\code{\link{plafit}} -\code{\link{by}} -} -\examples{ -data(vowlax) -# obtain the first four DCT coefficients -# (frequencies k = 0, 1, 2, 3) for some -# first formant frequency data -vec <- vowlax.fdat[1,1]$data -dct(vec, m=4) - -# obtain the corresponding smoothed -# trajectory -dct(vec, m=4 , fit=TRUE) -} - -\keyword{math} - diff --git a/man/dcut.Rd b/man/dcut.Rd index c0846498..c26b6752 100644 --- a/man/dcut.Rd +++ b/man/dcut.Rd @@ -1,66 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtime.R \name{dcut} \alias{dcut} \alias{dcut.sub} -\title{Function to extract a vector or matrix from EMU-Trackdata at a single time point of to create another EMU-trackdata object between two times.} -\description{ A general purpose tool for extracting data from track objects - either at a particular time, or between two times. The times can - be values in milliseconds or proportional times between zero (the - onset) and one (the offset). -} +\title{Function to extract a vector or matrix from EMU-Trackdata at a single time +point of to create another EMU-trackdata object between two times.} \usage{ -dcut(trackdata, left.time, right.time, single = TRUE, average = TRUE, prop = FALSE) +dcut( + trackdata, + left.time, + right.time, + single = TRUE, + average = TRUE, + prop = FALSE +) } \arguments{ - \item{trackdata}{ An Emu trackdata object. } - \item{left.time}{ Either: a numeric vector of the same length -as there are obsverations in trackdata. Or: a single value between -0 and 1. In the first case, the left time boundary of trackdata[n,] is cut at left.time[n], -in the second case, and if prop=T, it is cut at that proportional time. } - \item{right.time}{ Either: a numeric vector of the same length -as there are obsverations in trackdata. Or: a single value between -0 and 1. In the first case, the right time boundary of trackdata[n,] is cut at right.time[n], -in the second case, and if prop=T, it is cut at that proportional time. } - \item{single}{ If TRUE, one value is returned per segment. This applies when - the requested time falls between two track frames. When - single=TRUE, the preceding value is returned, unless - average=TRUE (see below), in which case the average value of - the two frames is returned. when the right.time argument is - omitted } - \item{average}{ A single element logical vector - see single above. Applies - only when the right.times argument is omitted and when single - = TRUE } - \item{prop}{ If TRUE left.time and right.time are - interpreted as proportions, if FALSE, they are - interpreted as millisecond times} -} -\details{ - This function extracts data from each segment of a - trackdata object. +\item{trackdata}{An Emu trackdata object.} + +\item{left.time}{Either: a numeric vector of the same length as there are +observations in trackdata. Or: a single value between 0 and 1. In the first +case, the left time boundary of trackdata[n,] is cut at left.time[n], in +the second case, and if prop=TRUE, it is cut at that proportional time.} - If 'prop=FALSE' the time arguments ('left.time' and 'right.time') - are interpreted as millisecond times and each should be a vector - with the same length as the number of segments in 'trackdata'. If - 'prop=TRUE' the time arguments should be single values between - zero (the onset of the segment) and one (the offset). +\item{right.time}{Either: a numeric vector of the same length as there are +observations in trackdata. Or: a single value between 0 and 1. In the first +case, the right time boundary of trackdata[n,] is cut at right.time[n], in +the second case, and if prop=TRUE, it is cut at that proportional time.} - If 'right.time' is omitted then a single data point correponding - to 'left.time' for each segment is returned. +\item{single}{If TRUE, one value is returned per segment. This applies when +the requested time falls between two track frames. When single=TRUE, the +preceding value is returned, unless average=TRUE (see below), in which case +the average value of the two frames is returned. when the right.time +argument is omitted} + +\item{average}{A single element logical vector - see single above. Applies +only when the right.times argument is omitted and when single = TRUE} + +\item{prop}{If TRUE left.time and right.time are interpreted as +proportions, if FALSE, they are interpreted as millisecond times} } \value{ - A trackdata object if both 'left.time' and 'right.time' -are specified, otherwise a matrix if 'right.time' is unspecified -and the trackdata object has multiple columns of data or -a vector if right.time' is unspecified -and the trackdata object has a single column of data. +A trackdata object if both 'left.time' and 'right.time' are +specified, otherwise a matrix if 'right.time' is unspecified and the +trackdata object has multiple columns of data or a vector if right.time' is +unspecified and the trackdata object has a single column of data. +} +\description{ +A general purpose tool for extracting data from track objects either at a +particular time, or between two times. The times can be values in +milliseconds or proportional times between zero (the onset) and one (the +offset). +} +\details{ +This function extracts data from each segment of a trackdata object. + +If 'prop=FALSE' the time arguments ('left.time' and 'right.time') are +interpreted as millisecond times and each should be a vector with the same +length as the number of segments in 'trackdata'. If 'prop=TRUE' the time +arguments should be single values between zero (the onset of the segment) +and one (the offset). +If 'right.time' is omitted then a single data point corresponding to +'left.time' for each segment is returned. } -\author{ Jonathan Harrington } -\seealso{ - \code{\link{emu.track}}, - \code{\link{dplot}}, - \code{\link{eplot}} - } \examples{ + # the data values of the trackdata object at the temporal midpoint # (midvals is matrix of F1 and F2 data) dip.fdat[1:10] @@ -90,7 +95,12 @@ and the trackdata object has a single column of data. # from the start of the diphthongs up to 30 ms after the diphthongs) int <- dcut(dip.fdat, dip.fdat$ftime[,1], times) int[1] - } - + +} +\seealso{ +\code{\link{get_trackdata}}, \code{\link{dplot}}, \code{\link{eplot}} +} +\author{ +Jonathan Harrington +} \keyword{datagen} - diff --git a/man/ddiff.Rd b/man/ddiff.Rd index 99c2eabb..3a5600ce 100644 --- a/man/ddiff.Rd +++ b/man/ddiff.Rd @@ -1,23 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ddiff.R \name{ddiff} \alias{ddiff} \alias{ddiff.sub} -\title{ Differentiation of tracks} -\description{ - Differentiates a list, as returned by track, to the nth - order, readjusting the index and ftime values each time. - } +\title{Differentiation of tracks} \usage{ - ddiff(dataset, n = 1, smoothing = TRUE) +ddiff(dataset, n = 1, smoothing = TRUE) } \arguments{ - \item{dataset}{ track data object - a list as returned by track } - \item{n}{ the order of differentiation } - \item{smoothing}{ if TRUE track is smoothed } -} - - -\author{ Jonathan Harrington} +\item{dataset}{track data object - a list as returned by track} +\item{n}{the order of differentiation} +\item{smoothing}{if TRUE track is smoothed} +} +\description{ +Differentiates a list, as returned by track, to the nth order, readjusting +the index and ftime values each time. +} +\author{ +Jonathan Harrington +} \keyword{math} - diff --git a/man/delete_itemsInLevel.Rd b/man/delete_itemsInLevel.Rd new file mode 100644 index 00000000..df674602 --- /dev/null +++ b/man/delete_itemsInLevel.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-annotations_crud.R +\name{delete_itemsInLevel} +\alias{delete_itemsInLevel} +\title{Delete items programmatically} +\usage{ +delete_itemsInLevel( + emuDBhandle, + itemsToDelete, + sayYes = FALSE, + rewriteAllAnnots = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \link{load_emuDB}} + +\item{itemsToDelete}{A data frame with the columns: +\itemize{ +\item \code{session} (character) +\item \code{bundle} (character) +\item \code{start_item_id} (numeric) +}} + +\item{sayYes}{When you call this function, it warns you about problems it +may create. You can skip that question if you set the \code{sayYes} parameter to +TRUE. This is useful when you want to use the function non-interactively.} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json +files) (intended for expert use only)} + +\item{verbose}{if set to \code{TRUE}, more status messages are printed} +} +\description{ +Delete annotation items programmatically. You have to pass in a +data frame, called \code{itemsToDelete}, describing these items. The required +columns are described below. + +This function belongs to emuR’s CRUD family of functions, which let the user +manipulate items programmatically: +\itemize{ +\item Create items (\link{create_itemsInLevel}) +\item Read items (\link{query}) +\item Update items (\link{update_itemsInLevel}) +\item Delete items (\link{delete_itemsInLevel})) +} +} +\details{ +This function deletes annotation items from existing levels. Your input data +frame \code{itemsToDelete} must describe the items by specifying the columns +\code{session}, \code{bundle}, and \code{start_item_id}. + +Be careful with this function: You can use it to create problematic situations, +for example gaps in the annotation levels, and the function currently has +no checks to prevent this. Instead, you need to explicitly confirm that you +are aware of this, either by setting \code{sayYes} to \code{TRUE} or by interactively +responding yes to the prompt this function presents. + +A major use case for this function is to obtain a segment list using \link{query}, +possibly modify the segment list and feed it to this function. That is why +the column \code{start_item_id} is not called \code{item_id}: segment lists include +the former column name, not the latter. +} diff --git a/man/demo.all.Rd b/man/demo.all.Rd index feac37b7..5d67b5a5 100644 --- a/man/demo.all.Rd +++ b/man/demo.all.Rd @@ -1,28 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{demo.all} \alias{demo.all} \title{Emu segment list} -\usage{data(demo.all)} -\description{ - Segment list of the demo database that is part of the Emu system. - It is the result of a database query, that searched all segments at level Phonetic. -} \format{ - First Column labels - Second start time of the segment - Third end time of the segment - Fourth utterance name of the utterance the segment was found - +First Column labels Second start time of the segment Third end time +of the segment Fourth utterance name of the utterance the segment was found +} +\description{ +Segment list of the demo database that is part of the Emu system. It is +the result of a database query, that searched all segments at level +Phonetic. } - -\details{ - A segment list is created via emu.query() or by using the EMU Query Tool. +\details{ +A segment list is created via \code{\link{query}}. } - \seealso{ - \code{\link{demo.vowels}} - \code{\link{segmentlist}} +\code{\link{demo.vowels}} \code{\link{segmentlist}} } - - - \keyword{datasets} diff --git a/man/demo.all.f0.Rd b/man/demo.all.f0.Rd new file mode 100644 index 00000000..2edd9f04 --- /dev/null +++ b/man/demo.all.f0.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} +\name{demo.all.f0} +\alias{demo.all.f0} +\title{F0 track data for segment list demo.vowels} +\format{ +An object with $index, $ftime and $data + +index: a two columned matrix with the range of the $data rows that belong +to the segment ftime: a two columned matrix with the times marks of the +segment data: a one columned matrix with the F0 values +} +\description{ +A track list of the demo database that is part of the Emu system. It is +the result of get F0 data for the segment list demo.vowels (see +data(demo.vowels)). +} +\details{ +A track list is created via the \code{\link{get_trackdata}} function. +} +\seealso{ +\code{\link{demo.all.rms}} \code{\link{segmentlist}} +\code{\link{trackdata}} +} +\keyword{datasets} diff --git a/man/demo.all.fm.Rd b/man/demo.all.fm.Rd new file mode 100644 index 00000000..2c33e321 --- /dev/null +++ b/man/demo.all.fm.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} +\name{demo.all.fm} +\alias{demo.all.fm} +\title{Formant track data for segment list demo.vowels} +\format{ +index: a two columned matrix with the range of the $data rows that +belong to the segment ftime: a two columned matrix with the times marks of +the segment data: a three columned matrix with the formant values of the +first three formants for each segment +} +\description{ +A track list of the demo database that is part of the Emu system. It is +the result of get fm data for the segment list demo.vowels (see +data(demo.vowels)). +} +\details{ +A track list is created via the \code{\link{get_trackdata}} function. +} +\seealso{ +\code{\link{demo.all.rms}} \code{\link{segmentlist}} +\code{\link{trackdata}} +} +\keyword{datasets} diff --git a/man/demo.all.rms.Rd b/man/demo.all.rms.Rd index be39f6ef..d76fcac5 100644 --- a/man/demo.all.rms.Rd +++ b/man/demo.all.rms.Rd @@ -1,30 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{demo.all.rms} \alias{demo.all.rms} \title{Emu track data for a rms track for segment list demo.all} -\usage{data(demo.all.rms)} -\description{ - A track list of the demo database that is part of the Emu system. - It is the result of get rms data for the segment list demo.all (data(demo.all)). -} \format{ - A object with \$index, \$ftime and \$data - -index: a two columned matrix with the range of the \$data rows that belong to the segment -ftime: a two columned matrix with the times marks of the segment -data: a vector with the rms data +A object with $index, $ftime and $data +index: a two columned matrix with the range of the $data rows that belong +to the segment ftime: a two columned matrix with the times marks of the +segment data: a vector with the rms data +} +\description{ +A track list of the demo database that is part of the Emu system. It is +the result of get rms data for the segment list demo.all (data(demo.all)). } - \details{ - A track list is created via emu.track() or via get data within the EMU Query Tool. +A track list is created via the \code{\link{get_trackdata}} function. } - \seealso{ - \code{\link{demo.vowels.fm}} - \code{\link{segmentlist}} - \code{\link{trackdata}} +\code{\link{demo.vowels.fm}} \code{\link{segmentlist}} +\code{\link{trackdata}} } - - - -\keyword{datasets} \ No newline at end of file +\keyword{datasets} diff --git a/man/demo.vowels.Rd b/man/demo.vowels.Rd index 0897b77a..32eccd58 100644 --- a/man/demo.vowels.Rd +++ b/man/demo.vowels.Rd @@ -1,29 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{demo.vowels} \alias{demo.vowels} \title{Emu segment List} -\usage{data(demo.vowels)} -\description{ - Segment list of the demo database that is part of the Emu system. - It is the result of a database query, that searched all vowel segments at level Phonetic. -} \format{ - First Column labels - Second start time of the segment - Third end time of the segment - Fourth utterance name of the utterance the segment was found - +First Column labels Second start time of the segment Third end time +of the segment Fourth utterance name of the utterance the segment was found +} +\description{ +Segment list of the demo database that is part of the Emu system. It is +the result of a database query, that searched all vowel segments at level +Phonetic. } - \details{ - A segment list is created via emu.query() or by using the EMU Query Tool. +A segment list is created via \code{\link{query}}. } - \seealso{ - \code{\link{demo.all}} - \code{\link{segmentlist}} +\code{\link{demo.all}} \code{\link{segmentlist}} } - - - - -\keyword{datasets} \ No newline at end of file +\keyword{datasets} diff --git a/man/demo.vowels.f0.Rd b/man/demo.vowels.f0.Rd index 43d51ec1..c3486c11 100644 --- a/man/demo.vowels.f0.Rd +++ b/man/demo.vowels.f0.Rd @@ -1,31 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R \name{demo.vowels.f0} \alias{demo.vowels.f0} \title{F0 track data for segment list demo.vowels} -\usage{data(demo.vowels.f0)} -\description{ - A track list of the demo database that is part of the Emu system. - It is the result of get F0 data for the segment list demo.vowels (see data(demo.vowels)). -} \format{ - An object with \$index, \$ftime and \$data - -index: a two columned matrix with the range of the \$data rows that belong to the segment -ftime: a two columned matrix with the times marks of the segment -data: a one columned matrix with the F0 values +An object with $index, $ftime and $data +index: a two columned matrix with the range of the $data rows that belong +to the segment ftime: a two columned matrix with the times marks of the +segment data: a one columned matrix with the F0 values +} +\description{ +A track list of the demo database that is part of the Emu system. It is the +result of get F0 data for the segment list demo.vowels (see +data(demo.vowels)). } - \details{ - A track list is created via emu.track() or via get data within the EMU Query Tool. +A track list is created via the \code{\link{get_trackdata}} function. } - \seealso{ - \code{\link{demo.all.rms}} - \code{\link{segmentlist}} - \code{\link{trackdata}} +\code{\link{demo.all.rms}} \code{\link{segmentlist}} +\code{\link{trackdata}} } - - - - -\keyword{datasets} \ No newline at end of file +\keyword{datasets} diff --git a/man/demo.vowels.fm.Rd b/man/demo.vowels.fm.Rd index eea8d22d..5638ffed 100644 --- a/man/demo.vowels.fm.Rd +++ b/man/demo.vowels.fm.Rd @@ -1,29 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R \name{demo.vowels.fm} \alias{demo.vowels.fm} \title{Formant track data for segment list demo.vowels} -\usage{data(demo.vowels.fm)} -\description{ - A track list of the demo database that is part of the Emu system. - It is the result of get fm data for the segment list demo.vowels (see data(demo.vowels)). -} \format{ -index: a two columned matrix with the range of the \$data rows that belong to the segment -ftime: a two columned matrix with the times marks of the segment -data: a three columned matrix with the formant values of the first three formants for each segment - +index: a two columned matrix with the range of the $data rows that +belong to the segment ftime: a two columned matrix with the times marks of +the segment data: a three columned matrix with the formant values of the +first three formants for each segment +} +\description{ +A track list of the demo database that is part of the Emu system. It is the +result of get fm data for the segment list demo.vowels (see +data(demo.vowels)). } - \details{ - A track list is created via emu.track() or via get data within the EMU Query Tool. +A track list is created via the \code{\link{get_trackdata}} function. } - \seealso{ - \code{\link{demo.all.rms}} - \code{\link{segmentlist}} - \code{\link{trackdata}} +\code{\link{demo.all.rms}} \code{\link{segmentlist}} +\code{\link{trackdata}} } - - - - -\keyword{datasets} \ No newline at end of file +\keyword{datasets} diff --git a/man/dextract.Rd b/man/dextract.Rd index bfc0cff1..de8f87f9 100644 --- a/man/dextract.Rd +++ b/man/dextract.Rd @@ -1,38 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dextract.R \name{dextract} \alias{dextract} \alias{dextract.sub} \title{Extract a subset of data from a trackdata object} - -\description{ A function that cuts up trackdata either at a proportional - time or proportionally between two times. It is a subsidiary function - of dplot() } - \usage{ dextract(dataset, start, end) } - \arguments{ - \item{dataset}{A trackdata object} +\item{dataset}{A trackdata object} - \item{start}{ A single valued numeric vector corresponding to a - proportional time between zero (the onset of the trackdata) and one - (the offset of the trackdata).} +\item{start}{A single valued numeric vector corresponding to a proportional +time between zero (the onset of the trackdata) and one (the offset of the +trackdata).} - \item{end}{ As start, but optional} +\item{end}{As start, but optional} } \value{ - If both start and end are specified, a trackdata object -is returned, otherwise a vector if the original trackdata -is one-dimensional and the end argument -is not used, or a matrix if the original trackdata -has more than one dimension and the end argument is not used +If both start and end are specified, a trackdata object is +returned, otherwise a vector if the original trackdata is one-dimensional +and the end argument is not used, or a matrix if the original trackdata has +more than one dimension and the end argument is not used +} +\description{ +A function that cuts up trackdata either at a proportional time or +proportionally between two times. It is a subsidiary function of dplot() } - -\author{ Jonathan Harrington} - -\seealso{\code{dcut}} - \examples{ + data(demo.vowels.f0) data(demo.vowels.fm) @@ -40,7 +35,7 @@ form = demo.vowels.fm # get the formants at the midpoint: f50 is a matrix # same as dcut(form, .5, prop=TRUE) f50 = dextract(form, 0.5) -# get the formants between the 25% and 75% time points +# get the formants between the 25\% and 75\% time points # fcut is a trackdata object # same as dcut(form, .25, .75, prop=TRUE) fcut = dextract(form, 0.25, 0.75) @@ -49,5 +44,12 @@ fcut = dextract(form, 0.25, 0.75) fzero = demo.vowels.f0 fzero50 = dextract(fzero, 0.5) + +} +\seealso{ +\code{dcut} +} +\author{ +Jonathan Harrington } \keyword{datagen} diff --git a/man/dextract.lab.Rd b/man/dextract.lab.Rd index aab2d915..3d26a4c4 100644 --- a/man/dextract.lab.Rd +++ b/man/dextract.lab.Rd @@ -1,44 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dextract.lab.R \name{dextract.lab} \alias{dextract.lab} -\title{ -Extract a subset of data from a trackdata object -} -\description{ -Extract a subset of data from a trackdata object according to the -label in the parallel label vector. -} +\title{Extract a subset of data from a trackdata object} \usage{ -dextract.lab(dataset, labs, labtype=unique(labs)) +dextract.lab(dataset, labs, labtype = unique(labs)) } \arguments{ -\item{dataset}{ -A trackdata object returned from \code{track}. -} -\item{labs}{ -A vector of labels parallel to \code{trackdata$index}, i.e. one for each -segment in the trackdata. +\item{dataset}{A trackdata object returned from \code{track}.} + +\item{labs}{A vector of labels parallel to \code{trackdata$index}, i.e. one +for each segment in the trackdata.} + +\item{labtype}{A vector of labels for which data is to be extracted.} } -\item{labtype}{ -A vector of labels for which data is to be extracted. -}} \value{ -A trackdata object which is a subset of \code{trackdata} containing only the -data for those labels in \code{labtype}. The result has the same components -as the input \code{trackdata}: +A trackdata object which is a subset of \code{trackdata} containing +only the data for those labels in \code{labtype}. The result has the same +components as the input \code{trackdata}: -\item{data}{ -A vector or matrix of numerical data. +\item{data}{ A vector or matrix of numerical data. } \item{index}{ A two +column matrix giving the start and end indices into the data vector for +each segment. } \item{ftime}{ A two column matrix giving the start and end +times for each segment. } } -\item{index}{ -A two column matrix giving the start and end indeces into the data -vector for each segment. +\description{ +Extract a subset of data from a trackdata object according to the label in +the parallel label vector. } -\item{ftime}{ -A two column matrix giving the start and end times for each segment. -}} \seealso{ track, dextract, get.time.element, frames.time } - \keyword{internal} - diff --git a/man/dim.trackdata.Rd b/man/dim.trackdata.Rd index 9fc365f7..d2ca6f98 100644 --- a/man/dim.trackdata.Rd +++ b/man/dim.trackdata.Rd @@ -1,36 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimtrackdata.R \name{dim.trackdata} \alias{dim.trackdata} \alias{dim} - -\title{ A method of the generic function dim for objects of class \'trackdata\'} -\description{ - The function returns the dimension attributes of a track data object. -} +\title{A method of the generic function dim for objects of class 'trackdata'} \usage{ - \method{dim}{trackdata}(x) +\method{dim}{trackdata}(x) } - \arguments{ - \item{x}{a track data object} +\item{x}{a track data object} +} +\description{ +The function returns the dimension attributes of a track data object. } - \details{ - The function returns the dimension attributes of a track data object as the number of segments x number of tracks. - c(nrow(x$index), ncol(x$data)) +The function returns the dimension attributes of a track data object as the +number of segments x number of tracks. c(nrow(x$index), ncol(x$data)) } -\author{Jonathan Harrington} - \examples{ + #isol.fdat is the formant track of the segment list isol #write out the dimension of the track data object - dim.trackdata(isol.fdat) + dim(isol.fdat) #because there are 13 segments isol.fdat$ftime #and there are 4 rows for each segment (see here for the first segment) isol.fdat$data[1,] -} -\keyword{methods} \ No newline at end of file +} +\author{ +Jonathan Harrington +} +\keyword{methods} diff --git a/man/dimnames.trackdata.Rd b/man/dimnames.trackdata.Rd index 12af71bd..44c58bd4 100644 --- a/man/dimnames.trackdata.Rd +++ b/man/dimnames.trackdata.Rd @@ -1,13 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimtrackdata.R \name{dimnames.trackdata} \alias{dimnames.trackdata} -\title{ Dimnames of trackdata object} -\description{ -returns dimension names of trackdata objects} +\title{Dimnames of trackdata object} \usage{ \method{dimnames}{trackdata}(x) } \arguments{ - \item{x}{trackdata object} +\item{x}{trackdata object} +} +\description{ +returns dimension names of trackdata objects } \keyword{methods} - diff --git a/man/dip.Rd b/man/dip.Rd index de3dfba8..93cca7de 100644 --- a/man/dip.Rd +++ b/man/dip.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{dip} \alias{dip} -\title{Segment list of dipththongs, two speakers one male, one female , Standard North German, read speech from database kielread} -\usage{dip} -\description{An EMU dataset} +\title{Segment list of diphthongs, two speakers one male, one female , Standard +North German, read speech from database kielread} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/dip.fdat.Rd b/man/dip.fdat.Rd index cec8c6b6..28396208 100644 --- a/man/dip.fdat.Rd +++ b/man/dip.fdat.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{dip.fdat} \alias{dip.fdat} \title{Trackdata of formants from the segment list dip} -\usage{dip.fdat} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/dip.l.Rd b/man/dip.l.Rd index 90b7fb24..00217c36 100644 --- a/man/dip.l.Rd +++ b/man/dip.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{dip.l} \alias{dip.l} \title{Vector of phoneme labels from the segment list dip} -\usage{dip.l} -\description{An EMU dataset} +\format{ +vector of phoneme lables +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/dip.spkr.Rd b/man/dip.spkr.Rd index 3a6489ae..a2331ac5 100644 --- a/man/dip.spkr.Rd +++ b/man/dip.spkr.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{dip.spkr} \alias{dip.spkr} \title{Vector of speaker labels from the segment list dip} -\usage{dip.spkr} -\description{An EMU dataset} +\format{ +vector of speaker labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/distance.Rd b/man/distance.Rd index 96da03cc..2a78d433 100644 --- a/man/distance.Rd +++ b/man/distance.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{distance} \alias{distance} -\title{ distance} +\title{distance} +\usage{ +distance(data, train, labels = NULL, metric = "bayes") +} \description{ - see function +see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/dplot.Rd b/man/dplot.Rd index 7a48196d..ce873012 100644 --- a/man/dplot.Rd +++ b/man/dplot.Rd @@ -1,76 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplot.R \name{dplot} \alias{dplot} \alias{dplot.norm} \alias{dplot.time} - -\title{ A function to plot one or more columns of EMU-trackdata as a function of time} -\description{ A general purpose routine for plotting EMU-trackdata on a single plot. - Tracks can be aligned at an arbitrary position, length normalised - or averaged. The plots can be colour-coded for different category types. -} +\title{A function to plot one or more columns of EMU-trackdata as a function of +time (DEPRECATED see below)} \usage{ -dplot(x, labs = NULL, offset = 0, prop = TRUE, average = FALSE, - xlim = NULL, ylim = NULL, lty = FALSE, normalise = FALSE, - colour = TRUE, lwd = NULL, pch = NULL, legend = "topright", - axes = TRUE, type = "l", n = 20, ...) +dplot( + x, + labs = NULL, + offset = 0, + prop = TRUE, + average = FALSE, + xlim = NULL, + ylim = NULL, + lty = FALSE, + normalise = FALSE, + colour = TRUE, + lwd = NULL, + pch = NULL, + legend = "topright", + axes = TRUE, + type = "l", + n = 20, + ... +) } \arguments{ - \item{x}{An EMU-trackdata object } - \item{labs}{ A label vector with one element for each row in 'dataset' } - \item{offset}{ Either: A single numeric vector between 0 and 1. -0 and 1 denote synchronize the trackdata at their temporal onsets -and offsets respectively; 0.5 denotes synchronization at the -temporal midpoint, etc. Or a numeric vector of the same length as x -specifying the synchronisation point per segment} - \item{prop}{ A single element character vector specifying whether the tracks should be aligned proportionally or relative to millisecond times. Defaults to proportional alignment} - \item{average}{ If TRUE, the data for each unique label in 'labs' is averaged } - \item{xlim}{ A vector of two numeric values specifying the x-axis range} - \item{ylim}{ A vector of two numeric values specifying the y-axis range } - \item{lty}{ A single element logical vector. Defaults to F. -If TRUE, plot each label type in a different linetype} - \item{normalise}{ If TRUE, the data for each segment is linearly time normalised so - that all observations have the same length. The number of points -used in the linear time normalisation is control by the argument n. } - \item{colour}{ A single element logical vector. Defaults to T -to plot each label type in a different colour } - \item{lwd}{ A code passed to the lwd argument -in plotting functions. -'lwd' can be either -a single element numeric vector, or its length must -be equal to the number of unique types in labs. -For example, if lwd=3 and if labs = c("a", "b", "a", "c"), -then the output is c(3, 3, 3, 3). Alternatively, -if lwd = c(2,3,1), then the output is -c(2, 3, 2, 1) for the same example. The default is -NULL in which case all lines are drawn with lwd=1 } -\item{pch}{ A code passed to the pch argument -in plotting functions. Functions in the same way as lwd above} - \item{legend}{ Either a character vector to plot -the legend. Possible values are: "bottomright"', '"bottom"', '"bottomleft"', - '"left"', '"topleft"', '"top"', '"topright"', '"right"' and - '"center"'. This places the legend on the inside of the plot frame - at the given location. Partial argument matching is used. Or -a logical vector: legend = FALSE suppresses legend plotting. legend = TRUE -plots it at the default, legend = "topright"} - \item{axes}{ A single element logical vector. Defaults to T -to plot the axes } - \item{type}{ The default line type. Default to "l" for a line plot } - \item{n}{ A single element numeric vector. Only used if normalise=T. -The number of data points used to linearly time normalise each track } -\item{...}{graphical options \link{par}} -} +\item{x}{An EMU-trackdata object} -\value{ - NULL -} -\author{Jonathan Harrington} +\item{labs}{A label vector with one element for each row in 'dataset'} + +\item{offset}{Either: A single numeric vector between 0 and 1. 0 and 1 +denote synchronize the trackdata at their temporal onsets and offsets +respectively; 0.5 denotes synchronization at the temporal midpoint, etc. Or +a numeric vector of the same length as x specifying the synchronisation +point per segment} + +\item{prop}{A single element character vector specifying whether the tracks +should be aligned proportionally or relative to millisecond times. Defaults +to proportional alignment} + +\item{average}{If TRUE, the data for each unique label in 'labs' is +averaged} + +\item{xlim}{A vector of two numeric values specifying the x-axis range} + +\item{ylim}{A vector of two numeric values specifying the y-axis range} + +\item{lty}{A single element logical vector. Defaults to FALSE. If TRUE, plot +each label type in a different linetype} + +\item{normalise}{If TRUE, the data for each segment is linearly time +normalised so that all observations have the same length. The number of +points used in the linear time normalisation is control by the argument n.} + +\item{colour}{A single element logical vector. Defaults to TRUE to plot each +label type in a different colour} -\seealso{ - \code{\link{dcut}} - \code{\link{emu.track}} +\item{lwd}{A code passed to the lwd argument in plotting functions. 'lwd' +can be either a single element numeric vector, or its length must be equal +to the number of unique types in labs. For example, if lwd=3 and if labs = +c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +default is NULL in which case all lines are drawn with lwd=1} + +\item{pch}{A code passed to the pch argument in plotting functions. +Functions in the same way as lwd above} + +\item{legend}{Either a character vector to plot the legend. Possible values +are: "bottomright"', '"bottom"', '"bottomleft"', '"left"', '"topleft"', +'"top"', '"topright"', '"right"' and '"center"'. This places the legend on +the inside of the plot frame at the given location. Partial argument +matching is used. Or a logical vector: legend = FALSE suppresses legend +plotting. legend = TRUE plots it at the default, legend = "topright"} + +\item{axes}{A single element logical vector. Defaults to TRUE to plot the axes} + +\item{type}{The default line type. Default to "l" for a line plot} + +\item{n}{A single element numeric vector. Only used if normalise=TRUE. The +number of data points used to linearly time normalise each track} + +\item{...}{graphical options \link{par}} +} +\description{ +A general purpose routine for plotting EMU-trackdata on a single plot. +Tracks can be aligned at an arbitrary position, length normalised or +averaged. The plots can be colour-coded for different category types. +DEPRECATED as this function does not play well with with the new +resultType = "tibble" of \code{get_trackdata()}. See +\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/recipe-plottingSnippets.html} +for an alternative plotting routines using ggplot2. } \examples{ + # Plot of column 1 (which happens to be the 1st formant) of an EMU-trackdata object dplot(dip.fdat[,1]) @@ -119,6 +145,12 @@ The number of data points used to linearly time normalise each track } + +} +\seealso{ +\code{\link{dcut}} \code{\link{get_trackdata}} +} +\author{ +Jonathan Harrington } \keyword{dplot} - diff --git a/man/dsmooth.Rd b/man/dsmooth.Rd index d8bac7bc..3f4c688e 100644 --- a/man/dsmooth.Rd +++ b/man/dsmooth.Rd @@ -1,28 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dsmooth.R \name{dsmooth} \alias{dsmooth} \alias{dsmooth.sub} -\title{ -Smooth the data in a trackdata object. -} -\description{Smooths each dataset in a trackdata object using a running - mean smoother.} +\title{Smooth the data in a trackdata object.} \usage{ dsmooth(dataset) } \arguments{ -\item{dataset}{ -A trackdata object as returned from \code{track}. -}} +\item{dataset}{A trackdata object as returned from \code{track}.} +} \value{ -The result of applying the \code{smooth} function to each column of the data -for each segment in the trackdata object. +The result of applying the \code{smooth} function to each column of +the data for each segment in the trackdata object. +} +\description{ +Smooths each dataset in a trackdata object using a running mean smoother. } \details{ -This function uses the \code{dapply} function to apply \code{smooth} to the data -for each segment. +This function uses the \code{dapply} function to apply \code{smooth} to the +data for each segment. } \seealso{ smooth, dapply } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/dtime.Rd b/man/dtime.Rd index e2ca3e6d..2ed8f059 100644 --- a/man/dtime.Rd +++ b/man/dtime.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dtime.R \name{dtime} \alias{dtime} \title{time signal times} +\usage{ +dtime(dataset, times, single = TRUE, average = TRUE) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/duplicate_level.Rd b/man/duplicate_level.Rd new file mode 100644 index 00000000..c14561e1 --- /dev/null +++ b/man/duplicate_level.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-autoproc_annots.R +\name{duplicate_level} +\alias{duplicate_level} +\title{Duplicate level} +\usage{ +duplicate_level( + emuDBhandle, + levelName, + duplicateLevelName, + duplicateLinks = TRUE, + linkDuplicates = FALSE, + linkDefType = "ONE_TO_ONE", + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{levelName}{name of level in emuDB that is to be duplicated} + +\item{duplicateLevelName}{name given to newly duplicated level} + +\item{duplicateLinks}{if set to \code{TRUE} (the default) all the +links to and from the original items are duplicated to point to the +new items of the new duplicate level.} + +\item{linkDuplicates}{link the duplicated ITEMs to the originals. This +can only be set to \code{TRUE} if \code{duplicateLinks} is set to \code{FALSE}.} + +\item{linkDefType}{type given to link definition. Only relevant if \code{linkDuplicates} +is set to \code{TRUE}.} + +\item{verbose}{show progress bars and further information} +} +\description{ +Duplicate level of emuDB including all of its items and its various +attributeDefinitions. If the \code{duplicateLinks} variable is set +to \code{TRUE} all the links to and from the original items are +duplicated. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# duplicate Phonetic level +duplicate_level(ae, levelName = "Phonetic", + duplicateLevelName = "Phonetic2") + +} + +} +\seealso{ +\code{\link{load_emuDB}} +} +\keyword{emuDB} diff --git a/man/dur.Rd b/man/dur.Rd index 6ab53d91..896ef427 100644 --- a/man/dur.Rd +++ b/man/dur.Rd @@ -1,8 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{dur} \alias{dur} -\docType{data} -\title{ duration } +\title{duration} +\usage{ +dur(x) +} +\arguments{ +\item{x}{???} +} \description{ calculates durations } -\keyword{attribute} diff --git a/man/dur.emusegs.Rd b/man/dur.emusegs.Rd index 1ad8990b..7120ed8c 100644 --- a/man/dur.emusegs.Rd +++ b/man/dur.emusegs.Rd @@ -1,18 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{dur.emusegs} \alias{dur.emusegs} -\title{ Duration of segments} -\description{ -duration of segments is calculated for each segment in the segment list -} +\title{Duration of segments (NOTE: does not work for new default resultType = "tibble" of \code{query()})} \usage{ \method{dur}{emusegs}(x) } \arguments{ - \item{x}{ a segment list} +\item{x}{a segment list} } \value{ a vector of durations } -\author{Jonathan Harrington} -\keyword{ internal } - +\description{ +duration of segments is calculated for each segment in the segment list +} +\author{ +Jonathan Harrington +} +\keyword{internal} diff --git a/man/dur.trackdata.Rd b/man/dur.trackdata.Rd index 77fcd6c6..86e0182b 100644 --- a/man/dur.trackdata.Rd +++ b/man/dur.trackdata.Rd @@ -1,17 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{dur.trackdata} \alias{dur.trackdata} -\title{ Duration of trackdata elements} -\description{ -Duration of segments is calculated for each element in the trackdata object -} +\title{Duration of trackdata elements} \usage{ \method{dur}{trackdata}(x) } \arguments{ - \item{x}{ a trackdata object } +\item{x}{a trackdata object} } \value{ a vector of durations } -\author{Jonathan Harrington} -\keyword{ internal } +\description{ +Duration of segments is calculated for each element in the trackdata object +} +\author{ +Jonathan Harrington +} +\keyword{internal} diff --git a/man/e.dft.Rd b/man/e.dft.Rd index 950a98d8..b3b6d216 100644 --- a/man/e.dft.Rd +++ b/man/e.dft.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{e.dft} \alias{e.dft} -\title{Spectral vector of a single E vowel produced by a male speaker of Standard North German.} -\usage{e.dft} -\description{An EMU dataset} +\title{Spectral vector of a single E vowel produced by a male speaker of Standard +North German.} +\format{ +spectral vector +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/ellipse.Rd b/man/ellipse.Rd index 00b2f6dc..fb5c2c0f 100644 --- a/man/ellipse.Rd +++ b/man/ellipse.Rd @@ -1,34 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eplot.R \name{ellipse} \alias{ellipse} \title{Calculate ellipse coordinates} -\description{Calculates ellipse coordinates for eplot} \usage{ -ellipse(x, y, rx, ry, orient, incr=360/100) +ellipse(x, y, rx, ry, orient, incr = 360/100) } \arguments{ -\item{x}{ -X coordinate of center +\item{x}{X coordinate of center} + +\item{y}{y coordinate of center} + +\item{rx}{Radius in the x direction} + +\item{ry}{Radius in the y direction} + +\item{orient}{Orientation, in radians. The angle of the major axis to the x +axis.} + +\item{incr}{The increment between points, in degrees.} } -\item{y}{ -y coordinate of center -} -\item{rx}{ -Radius in the x direction -} -\item{ry}{ -Radius in the y direction -} -\item{orient}{ -Orientation, in radians. The angle of the major axis to the x axis. -} -\item{incr}{ -The increment between points, in degrees. -}} \value{ -A matrix of x and y coordinates for the ellipse. +A matrix of x and y coordinates for the ellipse. +} +\description{ +Calculates ellipse coordinates for eplot } \seealso{ \code{\link{eplot}} } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/emu-package.Rd b/man/emu-package.Rd deleted file mode 100644 index dba67a5a..00000000 --- a/man/emu-package.Rd +++ /dev/null @@ -1,96 +0,0 @@ -\name{emu-package} -\alias{emu} -\alias{emu-package} -\docType{package} -\title{ -EMU/R - R Programming Interface to the EMU Speech Database System -} -\description{ - The EMU Speech Database System for Creation, Analysis and Query of Speech Databases including acoustic and articulaty data as well as the annotations. It provides annotation, signal processing and scripting facilities, an interface to Praat, WaveSurfer as well as to Articulate Instruments. Query of annotations in EQL or SQL via data export provided. Query of signals using R. Developer: Tina John, Lasse Bombien, Steve Cassidy, Jonathan Harrington. - - This package is part of the EMU System and provides an interface to the R Programming Environment for the query and analysis of the speech data stored in the EMU Speech Database System and data exports from this source. -} -\details{ -\tabular{ll}{ -Package: \tab emu\cr -Type: \tab Package\cr -Version: \tab 4.3\cr -Date: \tab 2012-03-10\cr -License: \tab Copyright 2012 IPS LMU Munich. All rights reserved.\cr -} -Workflow: -\tabular{ll}{ - Query database - \code{\link{emu.query}} \cr - Extract labels - \code{\link{label}} \cr - Get signals over segment interval - \code{\link{emu.track}} \cr - Extract point in signal - \code{\link{dcut}} \cr - Plot data - \code{\link{plot}}, \code{\link{dplot}}, \code{\link{eplot}} \cr - Further analysis and statistical processing \cr -} -} -\author{ -Tina John (help text) - -Maintainer: -} -\references{ -Harrington, J. (2010). The Phonetic Analysis of Speech Corpora. Blackwell. -} -\keyword{ package } -\seealso{ -\code{\link{emu.testsuite}} -} -\examples{ - - # emu.query - # Available Databases: - dbinfo() - - if(!any(dbinfo()=="demo")) { - message("No database demo found in the local EMU System - function can not be run") - } else { - # emu.query - # An EMU query - seg = emu.query("demo","*","Phonetic = @: | e: | ei | A | E | @u") - seg - - #Extract Labels from segment list - seg.lab = label(seg) - seg.lab - - # emu.track - # Extraction of the tracks for the segment list - seg.sample = emu.track(seg,"samples") - - # Plot of signals - plot(seg.sample,label=seg.lab, type="l", main="waveforms") - - # dplot, eplot - # Extract track values at point in time - seg.fm = emu.track(seg,"fm") - seg.fm.5 = dcut(seg.fm, .5, prop =TRUE) - - # Plot the data as time signal and formant card - dplot(seg.fm[,1:2], seg.lab, normalise=TRUE, main = "Formants over vowel duration") - eplot(seg.fm.5[,1:2], seg.lab, dopoints=TRUE, doellipse=FALSE, main = "F1/F2 of vowel midpoint", form=TRUE, xlab = "F2 in Hz", ylab = "F1 in Hz") - - # emu.requery - # An EMU query and ... - segH = emu.query("demo","*","Phonetic = H") - segH - - # ... and requery - segHseql1 = emu.requery(segH,"Phonetic","Phonetic",sequence=-1) - segHseql1 - segH.lab = label(segHseql1) - - # plot.spectral - # Plot of spectral data from 50% of aspiration duration - segH.dft = emu.track(segH,"dft") - segH.dft.5 = dcut(segH.dft, .5,prop=TRUE) - plot(segH.dft.5,segH.lab, main = "Spectral data of aspiration") - } - - - \dontrun{emu.testsuite()} -} diff --git a/man/emu.command.name.Rd b/man/emu.command.name.Rd deleted file mode 100644 index b058befc..00000000 --- a/man/emu.command.name.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.command.name} -\alias{emu.command.name} -\title{ the emu command send to EMU} -\description{ -this is the platform dependent command sent to EMU -} -\keyword{ internal } - diff --git a/man/emu.defaultpaths.Rd b/man/emu.defaultpaths.Rd deleted file mode 100644 index 42f8090f..00000000 --- a/man/emu.defaultpaths.Rd +++ /dev/null @@ -1,21 +0,0 @@ -\name{emu.defaultpaths} -\Rdversion{1.1} -\alias{emu.defaultpaths} -\title{ -Default paths of the emu and tcl/tk libraries -} -\description{ - Returns the paths that are suggested for the system for the emu and tcl/tk libraries. -} -\usage{ -emu.defaultpaths() -} - -\author{ -Tina John -} - -\examples{ -emu.defaultpaths() -} -\keyword{IO} diff --git a/man/emu.directory.Rd b/man/emu.directory.Rd deleted file mode 100644 index b6632d41..00000000 --- a/man/emu.directory.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.directory} -\alias{emu.directory} -\docType{data} -\title{ EMU directory} -\description{ -Gives the EMU installation path -} -\keyword{datasets} diff --git a/man/emu.init.Rd b/man/emu.init.Rd deleted file mode 100644 index 3ba286e9..00000000 --- a/man/emu.init.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.init} -\alias{emu.init} -\title{ emu init} -\description{ -initialises emu -} -\keyword{ internal } - diff --git a/man/emu.inquotes.Rd b/man/emu.inquotes.Rd deleted file mode 100644 index 882faf53..00000000 --- a/man/emu.inquotes.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.inquotes} -\alias{emu.inquotes} -\title{ emu inquotes } -\description{ -quotes emu -} -\keyword{ internal } - diff --git a/man/emu.options.Rd b/man/emu.options.Rd deleted file mode 100644 index 6465f0b3..00000000 --- a/man/emu.options.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.options} -\alias{emu.options} -\title{ emu options} -\description{ -the emu options -} -\keyword{ internal } - diff --git a/man/emu.platform.Rd b/man/emu.platform.Rd deleted file mode 100644 index 67851d93..00000000 --- a/man/emu.platform.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.platform} -\alias{emu.platform} -\title{ emu platform} -\description{ -where are we running -} -\keyword{ internal } - diff --git a/man/emu.query.Rd b/man/emu.query.Rd deleted file mode 100644 index 60128dfc..00000000 --- a/man/emu.query.Rd +++ /dev/null @@ -1,57 +0,0 @@ -\name{emu.query} -\alias{emu.query} -\title{Query an Emu Database} -\description{ -Perform a query on an Emu speech database selecting segments for -subsequent analysis. The return value is a segment list containing -the labels, start and end times and utterance name of each token -matching the query. -} -\usage{ -emu.query(template, pattern=NULL, query="") -} -\arguments{ - \item{template}{ The name of the Emu database to query (in quotes) } - \item{pattern}{ A pattern matching utterances to be searched from the - database } - \item{query}{ a valid Emu query } -} -\details{ -The Emu query language is described in the Emu -documentation. \code{template} must refer to a valid database template -on your system, ie. the template file name without the \code{.tpl} -extension. -} -\value{ -An object of type \code{emusegs} with one row per token matched by the -query and columns for the token label, start time, end time and -utterance name. This can be passed to \code{\link{emu.track}} to extract -speech data corresponding to each token. -} -\references{ See the Emu documentation at: - \url{http://www.shlrc.mq.edu.au/emu} } -\author{ Steve Cassidy } -\note{ This function calls an external which are scripts via tcltk part of the Emu - speech database system and so requires this system to be installed on - your computer. See the Emu web site for details. } - -\seealso{ \code{\link{emu.track}}} - -\examples{ - -## assumes a database called demo is available on your system and that -## the Emu system is installed. -data(vowlax) -# find all Phonetic vowels in the database -\dontrun{segs <- emu.query("demo", "*", "Phonetic=vowel")} -\dontshow{segs = vowlax} -# display summary information on the segments found -summary(segs) -# get formant data at the midpoint and plot it -\dontrun{data <- emu.track( segs, "fm", cut=0.5 )} -\dontshow{data = vowlax.fdat.5} -eplot( data[,1:2], label( segs ), dopoints=TRUE ) - -} -\keyword{misc} - diff --git a/man/emu.requery.Rd b/man/emu.requery.Rd deleted file mode 100644 index 635bd9c1..00000000 --- a/man/emu.requery.Rd +++ /dev/null @@ -1,102 +0,0 @@ -\name{emu.requery} -\alias{emu.requery} -\title{ Generate a segment list or label list derived from an existing - Emu segment list } -\description{ - Given an Emu segment list, \code{emu.requery} can find other segments - which are related to the segments in the original list. For example, - the segment that follows or one dominated by the original segment. -} -\usage{ -emu.requery(segs, level, targetlevel=level, justlabels=FALSE, sequence=0, longerok=FALSE) -} -\arguments{ - \item{segs}{ An Emu segment list } - \item{level}{ The level of the segments in \code{segs} (eg. Phonetic) } - \item{targetlevel}{ The level of the segments/events to find } - \item{justlabels}{ If TRUE, a label vector is returned instead of a - segment list. } - \item{sequence}{ An integer value (positive or negative) which denotes - the relative position of the target segments. } - \item{longerok}{ If TRUE, the returned segment list or label list may - be longer than the input in cases where one segment in the input - dominates more than one segment at the target level. If FALSE, the - result will always have the same length as the input and multiple - labels will be concatenated.} -} -\details{ - \code{emu.requery} provides a way of locating segments related to - those in an existing segment list either by sequence (follows or - preceeds) or hierarchically (dominates, is dominated by). - - To find sequentially related segments or events, use the \code{sequence} - argument to define the number of steps to take. For example - \code{sequence=1} will find the immediately following segment, - \code{sequence=-1} will find the preceeding segment and - \code{sequence=3} will find the third following segment. - - To find hierarchically related segments, use the \code{targetlevel} - argument to denote a different level to that of the original segment - list. Segments at the target level related to the original segments - will be retrieved. For example, if the original segment list consists - of Phonetic segments, \code{targetlevel="Word"} would find the Word - level segment dominating each Phonetic segment. - - If both \code{targetlevel} and \code{sequence} are supplied together, - the \code{targetlevel} argument is applied first and then the - \code{sequence} argument is applied to find the following or - preceeding segment at the target level. For example, - \code{targetlevel="Word" sequence=1} would find the Word following the - word that dominates the original segment. - - In some cases, Emu can't locate an appropriate segment (for example, - because there is no following segment). In this case the corresponding - segment label in the result will be \code{"no-segment"}. - - If \code{justlabels} is TRUE, only the vector of labels for the - matching segments is returned, otherwise a complete segment list is - returned. - - If \code{longerok} is TRUE, the resulting segment list or label vector - might be longer than the input in cases where more than one segment - is dominated by (or dominates) the original segment. For example, if - the original level is Word and the target level is Phoneme the segment - \code{cat} might dominate /k/, /a/ and /t/ Phonemes. If - \code{longerok} is TRUE these will be included as seperate entries in - the result, if it is FALSE the labels and times will be concatenated - (eg. k-a-t). - - Note that \code{emu.requery} might fail in some cases since it relies - on finding segments according to their times. In some cases this - might result in not finding a given segment due to rounding errors. - -} -\value{ - If \code{longerok} is FALSE: an Emu segment list, otherwise a vector - of segment labels. -} -\author{ Steve Cassidy } -\seealso{ \code{\link{emu.query}} } - -\examples{ - -data(vowlax) - -# find all Phonetic vowels in the database -\dontrun{segs <- emu.query("demo", "*", "Phonetic=vowel")} -\dontshow{segs = vowlax} - -## find the word level segments -\dontrun{wsegs <- emu.requery(segs, level="Phonetic", targetlevel="Text")} -\dontshow{wsegs = vowlax.word} - -# now find the Phonetic segment that follows the original vowels: -\dontrun{nsegs <- emu.requery(segs, level="Phonetic", sequence=1)} -\dontshow{nsegs <- vowlax.right} -# and the one that preceeds them, but only get the labels this time -\dontrun{prelabs <- emu.requery(segs, level="Phonetic", sequence=-1, justlabels=TRUE)} -\dontshow{prelabs <- vowlax.left} - - -} -\keyword{misc} diff --git a/man/emu.system.Rd b/man/emu.system.Rd deleted file mode 100644 index 7a4726d2..00000000 --- a/man/emu.system.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.system} -\alias{emu.system} -\title{ emu system} -\description{ -the emu system -} -\keyword{ internal } - diff --git a/man/emu.tempfile.Rd b/man/emu.tempfile.Rd deleted file mode 100644 index 22f05719..00000000 --- a/man/emu.tempfile.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emu.tempfile} -\alias{emu.tempfile} -\title{ emu tempfile } -\description{ -Tempfile for read.trackdata -} -\keyword{ internal } - diff --git a/man/emu.testsuite.Rd b/man/emu.testsuite.Rd deleted file mode 100644 index 57d8b9ef..00000000 --- a/man/emu.testsuite.Rd +++ /dev/null @@ -1,62 +0,0 @@ -\name{emu.testsuite} -\alias{emu.testsuite} -\alias{emu.testsuite} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ -Test suite for functions in the \code{\link{emu-package}} -} -\description{ -Checks all functions in the emuR Tcl library in the EMU Speech Database System. -} -\usage{ -emu.testsuite() -} -\author{ -Tina John -} - -\seealso{ -\code{\link{emu-package}} -} -\examples{ -\dontrun{emu.testsuite() - - # emu.query - # Available Databases: - dbinfo() - - # emu.query - seg = emu.query("demo","*","Phonetic = @: | e: | ei | A | E | @u") - - # Extract Labels from segment list - seg.lab = label(seg) - - # emu.track - # Extraction of the tracks for the segment list - seg.sample = emu.track(seg,"samples") - - #Plot of signals - plot(seg.sample,label=seg.lab, type="l", main="waveforms") - - # dplot, eplot - # Extract track values at point in time - seg.fm = emu.track(seg,"fm") - seg.fm.5 = dcut(seg.fm, .5, prop =T) - - #Plot the data as time signal and formant card - dplot(seg.fm[,1:2], seg.lab, normalise=T, main = "Formants over vowel duration") - eplot(seg.fm.5[,1:2], seg.lab, dopoints=T, doellipse=F, main = "F1/F2 of vowel midpoint", form=T, xlab = "F2 in Hz", ylab = "F1 in Hz") - - # emu.requery - segH = emu.query("demo","*","Phonetic = H") - segHseql1 = emu.requery(segH,"Phonetic","Phonetic",sequence=-1) - segH.lab = label(segHseql1) - - # plot.spectral - segH.dft = emu.track(segH,"dft") - segH.dft.5 = dcut(segH.dft, .5,prop=T) - plot(segH.dft.5,segH.lab, main = "Spectral data of aspiration") -} - -} -\keyword{utilities} diff --git a/man/emu.track.Rd b/man/emu.track.Rd deleted file mode 100644 index cb776b24..00000000 --- a/man/emu.track.Rd +++ /dev/null @@ -1,94 +0,0 @@ -\name{emu.track} -\alias{emu.track} -\title{Retrieve Numerical Data} -\description{ - Takes the result of a database query and retrieves corresponding - time-series data from the database. -} -\usage{ -emu.track(seglist, trackname, cut=NULL, npoints=NULL, template=attr(seglist, "database"), rmfile=TRUE) -} - -\arguments{ - \item{seglist}{ An Emu segment list. } - \item{trackname}{ The name of the data track to retrieve, a - string. This must be a track name defined in the database template. } - \item{cut}{ An optional cut time for segment data, ranges between 0 - and 1, a value of 0.5 will extract data only at the segment - midpoint. } - \item{npoints}{An optional number of points to retrieve for each - segment or event. For segments this requires a cut= argument and - data is extracted around the cut time. For events data is extracted - around the event time.} - \item{template}{ The database to retrieve the data from, this should - not normally be set. } - \item{rmfile}{ A trackdata matrix is written to users home directory. This file is removed (TRUE) or kept (FALSE).} -} -\details{ - - \code{emu.track} takes a segment list as input - and retrieves associated numerical data from the corresponding - database. The segment list will usually be the result of a call - to \code{\link{emu.query}} but could be constructed with the - \code{\link{make.seglist}} function. The result is either a - two dimensional array of data or an object of class - \code{trackdata} which may contain multi-column data - from many tokens. - -} -\value{ - If only two arguments are supplied the entire data track is - retrieved for each segment in the segment list. The amount of data - returned will depend on the sample rate and number of columns in - the track requested. The returned data is packaged up as a - trackdata object. - - The optional \code{cut} argument specifies a cut - point as a fraction of the duration of each segment in the segment - list. If this is specified the data at this single cutpoint will - be extracted rather than that for the entire track. The result of - \code{emu.track} with the cut arguement is a two - dimensional array of data with one row per segment in the original - segment list and one column per column in the requested - track. This array can be treated like any other array in Splus. - - If the input segment list is in fact an event list (ie. is - derived from an annotation level defined as events in the database - template) then the result of \code{emu.track} is the - same as if the cut argument was specified. - - If the \code{npoints} argument is specified, it defines the number of - points that will be returned for each segment or event. The - \code{cut} argument is required if the input is a segment list (as - opposed to an event list). -} - -\references{ See the Emu documentation at: - \url{http://www.shlrc.mq.edu.au/emu} } -\author{ Steve Cassidy } -\note{ This function calls external scripts via tcltk which are part of the Emu - speech database system and so requires this system to be installed on - your computer. See the Emu web site for details. } - -\seealso{ \code{\link{dplot}} \code{\link{dapply}} } - -\examples{ - -## assumes a database called demo is available on your system and that -## the Emu system is installed. -data(vowlax) - -# find all Phonetic vowels in the database -\dontrun{segs <- emu.query("demo", "*", "Phonetic=vowel")} -\dontshow{segs = vowlax} -# get formant data at the midpoint, returns an array -\dontrun{data.mid <- emu.track( segs, "fm", cut=0.5 )} -\dontshow{data.mid = vowlax.fdat.5} -# get formant data for entire tracks, returns trackdata -\dontrun{data.all <- emu.track( segs, "fm" )} -\dontshow{data.all = vowlax.fdat} -summary(data.all) - - -} -\keyword{misc} diff --git a/man/emu.version.Rd b/man/emu.version.Rd deleted file mode 100644 index ed455451..00000000 --- a/man/emu.version.Rd +++ /dev/null @@ -1,19 +0,0 @@ -\name{emu.variables} -\alias{emu.version} -\alias{emu.date} -\alias{emu.year} -\docType{data} -\title{ Emu variables} -\description{ - \describe{ - \item{\code{emu.version}}{Current version of the emu R package} - \item{\code{emu.date}}{Date of package creation} - \item{\code{emu.year}}{Year of package creation, copyright.} - } -} -\usage{ -emu.version -emu.date -emu.year -} -\keyword{datasets} diff --git a/man/emuR-package.Rd b/man/emuR-package.Rd new file mode 100644 index 00000000..50e8f1ef --- /dev/null +++ b/man/emuR-package.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-packageDocs.R +\docType{package} +\name{emuR-package} +\alias{emuR-package} +\alias{emuR} +\title{emuR - Main Package of the EMU Speech Database Management System} +\description{ +The emuR package provides the next iteration of the EMU Speech +Database Management System with database management, data +extraction, data preparation and data visualization facilities. +} +\details{ +This package is part of the next iteration of the EMU Speech Database Management System (EMU-SDMS) +which aims to be as close to an all-in-one solution for generating, manipulating, querying, +analyzing and managing speech databases as possible. +For an overview of the system please visit this URL: \url{http://ips-lmu.github.io/EMU.html}. + +It can be viewed as the main component of the EMU-SDMS as it acts as +the central instance that is able to interact with every component of the system. +It takes care of database managing duties by being able to interact with a speech +database that is stored in the emuDB format. Further, it has easy to understand and +learn yet expressive and powerful querying mechanics, that allow the user to easily query +the annotation structures of the database. Lastly it provides easy data extraction +capabilities that extract data (e.g. formant values) which corresponds to the +result of a query. + +For an introduction to the emuR package please see the \code{emuR_intro} vignette +by calling: \code{vignette('emuR_intro')} + +For information about the \code{emuDB} database format please see the \code{emuDB} +vignette by calling: \code{vignette('emuDB')} + +For information about the query language used by the EMU-SDMS please see the \code{EQL} +vignette by calling: \code{vignette('EQL')} + +Typical work-flow in emuR (emuDB required): + +\enumerate{ +\item Load database into current R session - \code{\link{load_emuDB}} +\item Database annotation / visual inspection - +\code{\link{serve}} and connect the EMU-webApp to the local server +\item Query database - \code{\link{query}} (sometimes +followed by \code{\link{requery_hier}} or \code{\link{requery_seq}}) +\item Get trackdata (e.g. formant values) for the result +of a query - \code{\link{get_trackdata}} +\item Data preparation +\item Visual data inspection +\item Further analysis and statistical processing +} + +TIP: for a browsable overview of all the functions provided by emuR simply +run the command \code{help.start()} -> click on packages -> click on emuR +} +\examples{ +\dontrun{ +# create demo data including an emuDB called "ae" +create_emuRdemoData(dir = tempdir()) + +# construct path to demo emuDB +path2ae = file.path(tempdir(), "emuR_demoData", "ae") + +# load emuDB into current R session +ae = load_emuDB(path2ae) + +# query loaded emuDB +lvowels = query(ae, "Phonetic = i: | u: | o:") + +# extract labels from query result +lvowels.labs = label(lvowels) + +# list all ssffTrackDefinitions of emuDB +list_ssffTrackDefinitions(ae) + +# get formant trackdata defined in ssffTrackDefinitions "fm" for query result +lvowels.fm = get_trackdata(ae, lvowels, "fm") + +# extract track values at temporal midpoint of segments +lvowels.fmCut = dcut(lvowels.fm, .5, prop = TRUE) + +# Plot the data as time signal and formant card +dplot(lvowels.fm[,1:2], lvowels.labs, normalise=TRUE, main = "Formants over vowel duration") +eplot(lvowels.fmCut[,1:2], lvowels.labs, dopoints=TRUE, + doellipse=FALSE, main = "F1/F2 of vowel midpoint", form=TRUE, + xlab = "F2 in Hz", ylab = "F1 in Hz") + + +# Plot of spectral data from 50\% of aspiration duration +hs = query(ae,"Phonetic = H") +hs.labs = label(hs) +hs.dft = get_trackdata(ae, hs, "dft") +hs.dftCut = dcut(hs.dft, .5, prop=TRUE) +plot(hs.dftCut, hs.labs, main = "Spectral data of aspiration") + +} + +} +\references{ +Harrington, J. (2010). The Phonetic Analysis of Speech Corpora. +Blackwell. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/IPS-LMU/emuR} + \item \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/} + \item Report bugs at \url{https://github.com/IPS-LMU/emuR/issues} +} + +} +\author{ +\strong{Maintainer}: Markus Jochim \email{markusjochim@phonetik.uni-muenchen.de} (\href{https://orcid.org/0000-0002-5638-4870}{ORCID}) + +Authors: +\itemize{ + \item Raphael Winkelmann \email{raphael@phonetik.uni-muenchen.de} + \item Klaus Jaensch \email{klausj@phonetik.uni-muenchen.de} [contributor] + \item Steve Cassidy \email{steve.cassidy@mq.edu.au} [contributor] + \item Jonathan Harrington \email{jmh@phonetik.uni-muenchen.de} [contributor] +} + +} +\keyword{package} diff --git a/man/emuRsegs.Rd b/man/emuRsegs.Rd new file mode 100644 index 00000000..b2644aab --- /dev/null +++ b/man/emuRsegs.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-objDocs.R +\name{emuRsegs} +\alias{emuRsegs} +\alias{segment} +\alias{list} +\title{emuR segment list} +\format{ +Attributed data.frame, one row per segment descriptor. + +Data frame columns are: +\itemize{ + \item labels: sequenced labels of segment concatenated by '->' + \item start: onset time in milliseconds + \item end: offset time in milliseconds + \item session: session name + \item bundle: bundle name + \item level: level name + \item type: type of "segment" row: 'ITEM': symbolic item, 'EVENT': event item, 'SEGMENT': segment + +} +Additional hidden columns: +\itemize{ + \item utts: utterance name (for compatibility to \link{emusegs} class) + \item db_uuid: UUID of emuDB + \item startItemID: item ID of first element of sequence + \item endItemID: item ID of last element of sequence + \item sampleStart: start sample position + \item sampleEnd: end sample position + \item sampleRate: sample rate +} + +Attributes: +\itemize{ + \item database: name of emuDB + \item query: Query string + \item type: type ('segment' or 'event') (for compatibility to \link{emusegs} class) +} +} +\description{ +An emuR segment list is a list of segment descriptors. Each segment +descriptor describes a sequence of annotation elements. The list is +usually a result of an emuDB query using function \code{\link{query}}. +} +\details{ +Each row shows the annotation label sequence, the start and end position +in time, session and bundle names, level name and type. +Additionally the row contains the UUID of the emuDB, the ID's of start +and end elements and the corresponding start and end position as sample +count and the sample rate. These columns are not printed by default. +The print method of emuRsegs hides them. To print all columns of a segment +list object use the print method of \code{\link{data.frame}}. +For example to print all columns of an emuRsegs segmentlist \code{sl} type: +\code{print.data.frame(sl)} +Though the segment descriptors have references to the annotations, the label +and sample/time position information is not updated if any of them change. The +values of the segment list may get invalid if the the database is modified. +A segment may consist only of one single element, in this case start and end ID are equal. +An emuR segment list is the default result of \code{\link{query}} and can +be used to get track data using \code{\link{get_trackdata}}. +The emuRsegs class inherits \link{emusegs} and hence \code{\link{data.frame}} +} +\seealso{ +\code{\link{query}},\code{\link{get_trackdata}},\link{emusegs} +} +\keyword{classes} diff --git a/man/emuRtrackdata.Rd b/man/emuRtrackdata.Rd new file mode 100644 index 00000000..431eff06 --- /dev/null +++ b/man/emuRtrackdata.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-objDocs.R +\name{emuRtrackdata} +\alias{emuRtrackdata} +\title{emuR track data object} +\format{ +The \code{\link{data.frame}} has the following columns: + +\describe{ + \item{$sl_rowIdx}{column to indicate \code{\link{emuRsegs}} row index that + the value belongs to} + \item{$labels - $sampleRate}{duplicated information of \code{\link{emuRsegs}} row entries} + \item{$times_rel}{relative time stamps of sample values in milliseconds} + \item{$times_orig}{absolute time stamps of sample values in milliseconds} + \item{$T1 - $TN}{actual data values (e.g. formant values / F0 values / DFT values / ...)} +} + +Note that $labels - $sampleRate as well as $T1 - $TN (where the N in TN is to be read as the n-th T value) +refer to multiple columns of the object. +} +\description{ +A emuR track data object is the result of \code{\link{get_trackdata}} if the +\code{resultType} parameter is set to \code{"emuRtrackdata"} or the result of +an explicit call to \code{\link{create_emuRtrackdata}}. Compared to +the \code{\link{trackdata}} object it is a sub-class of a +\code{\link{data.frame}} which is meant to ease integration with other +packages for further processing. It can be viewed as an amalgamation of +a \code{\link{emuRsegs}} and a \code{\link{trackdata}} object as it +contains the information stored in both objects. +} +\section{Methods}{ + The following methods are implemented for emuRtrackdata objects: + +\describe{ + \item{cut}{Function to extract a \code{\link{emuRtrackdata}} object from an + emuRtrackdata at a single time point or between two times} +} +} + +\seealso{ +\code{\link{get_trackdata}}, \code{\link{create_emuRtrackdata}} + +trackdata +} +\keyword{classes} diff --git a/man/emudata.init.Rd b/man/emudata.init.Rd deleted file mode 100644 index e7b97e0a..00000000 --- a/man/emudata.init.Rd +++ /dev/null @@ -1,8 +0,0 @@ -\name{emudata.init} -\alias{emudata.init} -\title{ emudata init} -\description{ -loads package emudata as far as installed or reports message -} -\keyword{ internal } - diff --git a/man/emuinfo.Rd b/man/emuinfo.Rd deleted file mode 100644 index 3c3ae4c0..00000000 --- a/man/emuinfo.Rd +++ /dev/null @@ -1,25 +0,0 @@ -\name{emuinfo} -\alias{emuinfo} -\title{ -Version number of the EMU tcl-library in tcl path -} -\description{ -emuinfo returns the version number of the EMU tcl-library in tcl path -} -\usage{ -emurinfo() -} -\value{ -Returns the version number of the EMU tcl-library in tcl path. -} -\author{ -Tina John -} - -\seealso{ -\code{\link{emurinfo}} -} -\examples{ -\dontrun{emuinfo()} -} -\keyword{utilities} diff --git a/man/emulink.Rd b/man/emulink.Rd deleted file mode 100644 index ab33d665..00000000 --- a/man/emulink.Rd +++ /dev/null @@ -1,51 +0,0 @@ -\name{emulink} -\Rdversion{1.1} -\alias{emulink} -\title{ -Establishs the link to an Emu installation -} -\description{ -Without argument emulink() tries to read a configuration file and tries -to find the necessary package in the stored directory paths. -If this fails it tries to link against default directories. If this fails, the user -is asked to enter the path. -With paths argument the first two steps are ignored. -} -\usage{ -emulink(paths = "") -} -\arguments{ - \item{paths}{The directories seperated by ";" in which - Emu libraries and tcllib >= 1.8 are installed on the system. Without paths argument link is established with default configurations. -} -} -\details{ -The paths are added to the tcl auto_path variable to make the emuR library available that is part of the Emu Speech Database System \url{http://www.emu.sf.net}. -} -\value{ -After success the file path is returned where this configuration was written to. -} -\references{ -\url{http://www.emu.sf.net} -} -\author{ -Tina John -} -\note{ -Use emulink() without paths argument first. -It is not necessary to link an Emu installation to this libary. -You can use all functions of the library but \code{\link{emu.query}}, \code{\link{emu.requery}} and \code{\link{emu.track}}. -The functionality of \code{\link{emu.query}} and \code{\link{emu.track}} function is provided by the Emu Speech Database System \url{http://www.emu.sf.net} also. Thus export the respective files from the software and import it to R using -\code{\link{read.emusegs}} or \code{\link{read.trackdata}} respectively. -} -\seealso{ -\code{\link{emu.query}} -\code{\link{emu.requery}} -\code{\link{emu.track}} -\code{\link{read.emusegs}} -\code{\link{read.trackdata}} -} -\examples{ -\dontrun{emulink(".")} -} -\keyword{IO} \ No newline at end of file diff --git a/man/emurinfo.Rd b/man/emurinfo.Rd deleted file mode 100644 index c18a1810..00000000 --- a/man/emurinfo.Rd +++ /dev/null @@ -1,25 +0,0 @@ -\name{emurinfo} -\alias{emurinfo} -\title{ -Version number of the EMU/R tcl-library in tcl path -} -\description{ -emurinfo returns the version number of the EMU/R tcl-library in tcl path -} -\usage{ -emurinfo() -} -\value{ -Returns the nversion number of the EMU/R tcl-library in tcl path. -} -\author{ -Tina John -} - -\seealso{ -\code{\link{emuinfo}} -} -\examples{ -\dontrun{emurinfo()} -} -\keyword{utilities} diff --git a/man/emusegs.database.Rd b/man/emusegs.database.Rd index 2ead1bf0..3c5ed883 100644 --- a/man/emusegs.database.Rd +++ b/man/emusegs.database.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{emusegs.database} \alias{emusegs.database} -\title{ emusegs database } +\title{emusegs database} +\usage{ +emusegs.database(sl) +} \description{ Returns the database attribute from a segmentlist } -\keyword{ internal } - +\keyword{internal} diff --git a/man/emusegs.query.Rd b/man/emusegs.query.Rd index fb400fa2..4dd17328 100644 --- a/man/emusegs.query.Rd +++ b/man/emusegs.query.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{emusegs.query} \alias{emusegs.query} -\title{emusegs query } +\title{emusegs query} +\usage{ +emusegs.query(sl) +} \description{ sends a emu query to EMU } -\keyword{ internal } - +\keyword{internal} diff --git a/man/emusegs.type.Rd b/man/emusegs.type.Rd index 8c84350c..8acbbf95 100644 --- a/man/emusegs.type.Rd +++ b/man/emusegs.type.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{emusegs.type} \alias{emusegs.type} \title{segment list type} +\usage{ +emusegs.type(sl) +} \description{ -Gives SEGEMENT or EVENT +Gives SEGMENT or EVENT } -\keyword{ internal } - +\keyword{internal} diff --git a/man/engassim.Rd b/man/engassim.Rd index 20da6fce..894e0daf 100644 --- a/man/engassim.Rd +++ b/man/engassim.Rd @@ -1,6 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{engassim} \alias{engassim} -\title{Segment list of a a sequence of syllable final n or N preceding k or g , isolated words single speaker, Australian English female from database epgassim.} -\usage{engassim} -\description{An EMU dataset} +\title{Segment list of a sequence of syllable final n or N preceding k or g , +isolated words single speaker, Australian English female from database +epgassim.} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/engassim.epg.Rd b/man/engassim.epg.Rd index 3efaab8e..31019b04 100644 --- a/man/engassim.epg.Rd +++ b/man/engassim.epg.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{engassim.epg} \alias{engassim.epg} \title{EPG-compressed trackdata from the segment list engassim} -\usage{engassim.epg} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/engassim.l.Rd b/man/engassim.l.Rd index aa159032..acb79b85 100644 --- a/man/engassim.l.Rd +++ b/man/engassim.l.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{engassim.l} \alias{engassim.l} -\title{Vector of phonetic labels from the segment list engassim: nK = nk,ng , sK = sk,sg} -\usage{engassim.l} -\description{An EMU dataset} +\title{Vector of phonetic labels from the segment list engassim: nK = nk,ng , sK = +sk,sg} +\format{ +vector of phonetic labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/engassim.w.Rd b/man/engassim.w.Rd index 8c83207a..d3bc1b58 100644 --- a/man/engassim.w.Rd +++ b/man/engassim.w.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{engassim.w} \alias{engassim.w} \title{Vector of word labels from the segment list engassim.} -\usage{engassim.w} -\description{An EMU dataset} +\format{ +vector of word labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/epgai.Rd b/man/epgai.Rd new file mode 100644 index 00000000..8536a19a --- /dev/null +++ b/man/epgai.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epg.class.R +\name{epgai} +\alias{epgai} +\alias{epgci} +\alias{epgdi} +\title{Electropalatographic contact indices} +\usage{ +epgai(epgdata, weights = c(1, 9, 81, 729, 4921)) +} +\arguments{ +\item{epgdata}{An eight-columned EPG-compressed trackdata object, or an +eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +array that is the output of palate()} + +\item{weights}{A vector of five values that are applied to EPG rows 1-5 +respectively in epgai(). A vector of four values that are applied to +columns 1 and 8, to columns 2 and 7, columns 3 and 6, columns 4 and 5 +respectively. Defaults to the values given in Recasens & Pallares (2001).} +} +\value{ +These functions return a trackdata object if they are applied to an +eight-columned EPG-compressed trackdata object, otherwise a one-columned +matrix. +} +\description{ +epgai(), epgci(), epgdi() return the anteriority index, the centrality +index, the dorsopalatal index respectively as a trackdata object or a +vector +} +\details{ +These are exact implementations of the formulae for calculating the EPG +anteriority, EPG centrality, and EPG dorsopalatal indices as described in +Recasens & Pallares (2001). +} +\examples{ + +# Anteriority index: trackdata +ai <- epgai(coutts.epg) +# Dorsopalatal index, one-columned matrix +di <- epgdi(dcut(coutts.epg, 0.5, prop=TRUE)) +# Next to examples: Centrality index, one-columed matrix +ci <- epgci(palate(coutts.epg)) +ci <- epgci(palate(dcut(coutts.epg, 0.5, prop=TRUE))) + + +} +\references{ +GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. +Hardcastle & N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge +University Press: Cambridge. + +RECASENS, D. & PALLARES, M. (2001) Coarticulation, assimilation and +blending in Catalan consonant clusters. Journal of Phonetics, 29, 273-301. +} +\seealso{ +\code{\link{epgcog}} \code{\link{epggs}} \code{\link{palate}} +} +\author{ +Jonathan Harrington +} +\keyword{math} diff --git a/man/epgcog.Rd b/man/epgcog.Rd index 90d4dd27..8c5bdbb4 100644 --- a/man/epgcog.Rd +++ b/man/epgcog.Rd @@ -1,62 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epg.class.R \name{epgcog} \alias{epgcog} -\title{ Electropalatographic centre of gravity } -\description{ - Calculate the centre of gravity in palatographic data. +\title{Electropalatographic centre of gravity} +\usage{ +epgcog( + epgdata, + weights = seq(7.5, 0.5, by = -1), + rows = 1:8, + columns = 1:8, + row1 = NULL +) } -\usage{epgcog (epgdata, weights = seq(7.5, 0.5, by = -1), rows = 1:8, - columns = 1:8, row1 = NULL) } - \arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object, -or an eight columned matrix of EPG-compressed trackdata, or -a 3D palatographic array that is the output of palate() } - \item{weights}{ A vector of 8 values that are applied to -EPG rows 1-8 respectively. Defaults to 7.5, 7.0, 6.5...0.5. } - \item{rows}{ Calculate EPG-COG over selected row number(s). -rows = 5:8, columns = 3:6 is an implementation -of posterior centre of gravity, as defined by Gibbon & Nicolaidis (1999,p. 239). See examples below. } - \item{columns}{ Calculate EPG-COG over selected column number(s). } - \item{row1}{ an optional single valued -numeric vector to allow a separate weighting of -the electrodes in row1. For example, if row1=4/3, -then all the electrodes in row1 are multiplied by -that value, before EPG-COG is calculated. -Defaults to NULL (no weighting). }} - -\details{ - The centre of gravity is a key function in palatographic -research and gives an value per palate that is -indicative of the overall location of contacts -along the anterior-posterior dimension. The formula -is an implementation of the ones discussed -in Hardcastle et al. (1991), Gibbon et al (1993), -and Gibbon & Nicolaidis (1999). -} +\item{epgdata}{An eight-columned EPG-compressed trackdata object, or an +eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +array that is the output of palate()} -\value{ -These functions return a trackdata object if -they are applied to an eight-columned EPG-compressed trackdata object, -otherwise a one-columned matrix.} +\item{weights}{A vector of 8 values that are applied to EPG rows 1-8 +respectively. Defaults to 7.5, 7.0, 6.5...0.5.} -\references{ - GIBBON, F., HARDCASTLE, W. and NICOLAIDIS, K. (1993) -Temporal and spatial aspects of lingual -coarticuation in /kl/ sequences: a cross-linguistic -investigation. -Language & Speech, 36, 26t1-277. +\item{rows}{Calculate EPG-COG over selected row number(s). rows = 5:8, +columns = 3:6 is an implementation of posterior centre of gravity, as +defined by Gibbon & Nicolaidis (1999,p. 239). See examples below.} -GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. -In W.J. Hardcastle & N. Hewlett (eds). Coarticulation. -(pp. 229-245). Cambridge University Press: Cambridge. +\item{columns}{Calculate EPG-COG over selected column number(s).} -HARDCASTLE, W, GIBBON, F. and NICOLAIDIS, K. (1991) -EPG data reduction methods and thier -implications for studies of lingual coarticulation. -Journal of Phonetics, 19, 251-266. +\item{row1}{an optional single valued numeric vector to allow a separate +weighting of the electrodes in row1. For example, if row1=4/3, then all the +electrodes in row1 are multiplied by that value, before EPG-COG is +calculated. Defaults to NULL (no weighting).} +} +\value{ +These functions return a trackdata object if they are applied to an +eight-columned EPG-compressed trackdata object, otherwise a one-columned +matrix. +} +\description{ +Calculate the centre of gravity in palatographic data. +} +\details{ +The centre of gravity is a key function in palatographic research and gives +an value per palate that is indicative of the overall location of contacts +along the anterior-posterior dimension. The formula is an implementation of +the ones discussed in Hardcastle et al. (1991), Gibbon et al (1993), and +Gibbon & Nicolaidis (1999). } - \examples{ + # COG: trackdata cog <- epgcog(coutts.epg) # cog, one-columned matrix @@ -72,16 +63,25 @@ r[6, c(1, 2, 3, 7, 8), 2] <- 1 r[7:8, , 2] = 1 class(r) <- "EPG" epgcog(r, rows=5:8, columns=3:6) -} -\author{ Jonathan Harrington } +} +\references{ +GIBBON, F., HARDCASTLE, W. and NICOLAIDIS, K. (1993) Temporal +and spatial aspects of lingual coarticulation in /kl/ sequences: a +cross-linguistic investigation. Language & Speech, 36, 26t1-277. +GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. Hardcastle & +N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge University +Press: Cambridge. -\seealso{ -\code{\link{epgai}} -\code{\link{epgsum}} -\code{\link{palate}} +HARDCASTLE, W, GIBBON, F. and NICOLAIDIS, K. (1991) EPG data reduction +methods and their implications for studies of lingual coarticulation. +Journal of Phonetics, 19, 251-266. } - -%\keyword{emu} -\keyword{math} \ No newline at end of file +\seealso{ +\code{\link{epgai}} \code{\link{epgsum}} \code{\link{palate}} +} +\author{ +Jonathan Harrington +} +\keyword{math} diff --git a/man/epgconindices.Rd b/man/epgconindices.Rd deleted file mode 100644 index 7a58dc86..00000000 --- a/man/epgconindices.Rd +++ /dev/null @@ -1,65 +0,0 @@ -\name{EPG contact indices} -\alias{epgai} -\alias{epgci} -\alias{epgdi} -\title{ Electropalatographic contact indices } -\description{ - epgai(), epgci(), epgdi() return the anteriority index, -the centrality index, the dorsopalatal index respectively -as a trackdata object or a vector -} -\usage{epgai (epgdata, weights = c(1, 9, 81, 729, 4921)) - epgci (epgdata, weights = c(1, 17, 289, 4913)) - epgdi (epgdata) } - -\arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object, -or an eight columned matrix of EPG-compressed trackdata, or -a 3D palatographic array that is the output of palate() } - \item{weights}{A vector of five values that are applied to -EPG rows 1-5 respectively in epgai(). A vector of -four values that are applied to columns 1 and 8, -to columns 2 and 7, columns 3 and 6, columns 4 and 5 -respectively. Defaults to the values given in Recasens & Pallares (2001). } -} - -\details{ - These are exact implementations of the formulae -for calculating the EPG anteriority, EPG centrality, -and EPG dorsopalatal indices as described in Recasens & Pallares (2001). -} - -\value{ -These functions return a trackdata object if -they are applied to an eight-columned EPG-compressed trackdata object, -otherwise a one-columned matrix.} - -\references{ GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. -In W.J. Hardcastle & N. Hewlett (eds). Coarticulation. -(pp. 229-245). Cambridge University Press: Cambridge. - -RECASENS, D. & PALLARES, M. (2001) Coarticulation, assimilation and -blending in Catalan consonant clusters. Journal of Phonetics, 29, 273-301. } - -\examples{ -# Anteriority index: trackdata -ai <- epgai(coutts.epg) -# Dorsopalatal index, one-columned matrix -di <- epgdi(dcut(coutts.epg, 0.5, prop=TRUE)) -# Next to examples: Centrality index, one-columed matrix -ci <- epgci(palate(coutts.epg)) -ci <- epgci(palate(dcut(coutts.epg, 0.5, prop=TRUE))) - -} - -\author{ Jonathan Harrington } - - -\seealso{ -\code{\link{epgcog}} -\code{\link{epggs}} -\code{\link{palate}} -} - -%\keyword{emu} -\keyword{math} \ No newline at end of file diff --git a/man/epggs.Rd b/man/epggs.Rd index a128b075..d03e277a 100644 --- a/man/epggs.Rd +++ b/man/epggs.Rd @@ -1,43 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epg.class.R \name{epggs} \alias{epggs} -\title{ Plot a grey-scale image of palatographic data. } -\description{ - The function plots a grey-scale image of palatographic data -such that the greyness in cell r, c is in proportion to the -frequency of contacts in cells of row r and columns c -of all palatograms in the object passed to this function. +\title{Plot a grey-scale image of palatographic data.} +\usage{ +epggs( + epgdata, + gscale = 100, + gridlines = TRUE, + gridcol = "gray", + gridlty = 1, + axes = TRUE, + xlab = "", + ylab = "", + ... +) } -\usage{epggs (epgdata, gscale = 100, gridlines = TRUE, gridcol = "gray", - gridlty = 1, axes = TRUE, xlab = "", ylab = "", ...) } - \arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object, -or an eight columned matrix of EPG-compressed trackdata, or -a 3D palatographic array that is the output of palate() } - \item{gscale}{a single valued numeric vector that defines the granularity -of the greyscale. Defaults to 100. } - \item{gridlines}{ if T (default) grid lines over the -palatographic image are drawn are drawn. } - \item{gridlty}{ A single-valued numeric vector that defines the linetype for plotting the grid. } - \item{gridcol}{ color of grid } -\item{axes}{ T for show axes, F for no axes} -\item{xlab}{ A character vector for the x-axis label. } -\item{ylab}{ A character vector for the y-axis label. } - \item{...}{ graphical parameters can be given as arguments to 'epggs'. }} +\item{epgdata}{An eight-columned EPG-compressed trackdata object, or an +eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +array that is the output of palate()} -\details{ - The function plots a grey-scale image of up to -62 values arranged over an 8 x 8 -grid with columns 1 and 8 unfilled for row 1. -If cell row r column c is contacted for all -palatograms in the object that is passed to this function, -the corresponding cell is black; if none of -of the cells in row r column c are contacted, -then the cell is white (unfilled). -} +\item{gscale}{a single valued numeric vector that defines the granularity +of the greyscale. Defaults to 100.} + +\item{gridlines}{if TRUE (default) grid lines over the palatographic image are +drawn are drawn.} + +\item{gridcol}{color of grid} +\item{gridlty}{A single-valued numeric vector that defines the linetype for +plotting the grid.} +\item{axes}{TRUE for show axes, FALSE for no axes} + +\item{xlab}{A character vector for the x-axis label.} + +\item{ylab}{A character vector for the y-axis label.} + +\item{...}{graphical parameters can be given as arguments to 'epggs'.} +} +\description{ +The function plots a grey-scale image of palatographic data such that the +greyness in cell r, c is in proportion to the frequency of contacts in +cells of row r and columns c of all palatograms in the object passed to +this function. +} +\details{ +The function plots a grey-scale image of up to 62 values arranged over an 8 +x 8 grid with columns 1 and 8 unfilled for row 1. If cell row r column c +is contacted for all palatograms in the object that is passed to this +function, the corresponding cell is black; if none of of the cells in row r +column c are contacted, then the cell is white (unfilled). +} \examples{ + # greyscale image across the first two segments 'just relax' # with title epggs(coutts.epg[1:2,], main="just relax") @@ -59,17 +76,13 @@ epggs(polhom.epg[temp,]) # the same but derived from palates p <- palate(polhom.epg[temp,]) epggs(p) -} -\author{ Jonathan Harrington } - - -\seealso{ -\code{\link{epgai}} -\code{\link{epgcog}} -\code{\link{epgplot}} -\code{\link{palate}} } - +\seealso{ +\code{\link{epgai}} \code{\link{epgcog}} \code{\link{epgplot}} +\code{\link{palate}} +} +\author{ +Jonathan Harrington +} \keyword{dplot} -%\keyword{emu} \ No newline at end of file diff --git a/man/epgplot.Rd b/man/epgplot.Rd index ad8a9d57..d4ce1665 100644 --- a/man/epgplot.Rd +++ b/man/epgplot.Rd @@ -1,66 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epg.class.R \name{epgplot} \alias{epgplot} -\title{ Plot palatographic data } -\description{ - Function to plot palatograms from EPG compressed -objects or from a 3D-palatographic array -that is output from palate(). +\title{Plot palatographic data} +\usage{ +epgplot( + epgdata, + select = NULL, + numbering = "times", + gridlines = TRUE, + mfrow = NULL, + col = 1, + mar = c(0.8, 0.1, 0.8, 0.1), + xlim = NULL +) } -\usage{epgplot(epgdata, select = NULL, numbering = "times", gridlines = TRUE, - mfrow = NULL, col = 1, mar = c(0.8, 0.1, 0.8, 0.1), xlim = NULL) } - \arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object, -or an eight columned matrix of EPG-compressed trackdata, or -a 3D palatographic array that is the output of palate() } - \item{select}{A vector of times. Palatograms are -plotted at these times only. Note: this argument -should only be used if epgdata is temporally contiguous, -i.e. the entire trackdata object contains palatograms at -successive multiple times of the EPG sampling frequency. -(as in coutts.epg\$ftime). Defaults to NULL, in which -case palatograms are plotted for all -times available in epgdata. } - \item{numbering}{ Either "times" (default), or logical T, or a character -vector of the same length as the number of segments in -epgdata. In the default case, the times at which the -palatograms occur are printed above the palatograms. -If logical T, then the palatograms are numbered 1, 2, ... -number of segments and this value is printed above the palatograms. -If a character vector, then this must be the -same length as the number of segments in epgdata. } - \item{gridlines}{ if T (default) grid lines over the -palatogram are drawn. } - \item{mfrow}{ By default, the function tries to work out a sensible -number of rows and columns for plotting the palatograms. -Otherwise, this can be user-specified, in which -case mfrow is a vector of two integer numeric values. } - \item{xlim}{ A numeric vector of two time -values over which the epgdata should be -plotted. Note: this argument -should only be used if epgdata is temporally contiguous, -i.e. the entire trackdata object contains palatograms at -successive multiple times of the EPG sampling frequency. -(as in coutts.epg\$ftime). Defaults to NULL (plot all -time values). } - \item{col}{ specify a colour for plotting the filled EPG cells. } - \item{mar}{ A numerical vector of the form 'c(bottom, left, top, right)' - which gives the number of lines of margin to be specified on - the four sides of the plot. The default in -this function is c(0.8, 0.1, 0.8, 0.1). (The default -in the R plot() function is c(5, 4, 4, 2) + 0.1. } +\item{epgdata}{An eight-columned EPG-compressed trackdata object, or an +eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +array that is the output of palate()} + +\item{select}{A vector of times. Palatograms are plotted at these times +only. Note: this argument should only be used if epgdata is temporally +contiguous, i.e. the entire trackdata object contains palatograms at +successive multiple times of the EPG sampling frequency. (as in +coutts.epg$ftime). Defaults to NULL, in which case palatograms are plotted +for all times available in epgdata.} + +\item{numbering}{Either "times" (default), or logical TRUE, or a character +vector of the same length as the number of segments in epgdata. In the +default case, the times at which the palatograms occur are printed above +the palatograms. If logical TRUE, then the palatograms are numbered 1, 2, ... +number of segments and this value is printed above the palatograms. If a +character vector, then this must be the same length as the number of +segments in epgdata.} + +\item{gridlines}{if TRUE (default) grid lines over the palatogram are drawn.} + +\item{mfrow}{By default, the function tries to work out a sensible number +of rows and columns for plotting the palatograms. Otherwise, this can be +user-specified, in which case mfrow is a vector of two integer numeric +values.} + +\item{col}{specify a colour for plotting the filled EPG cells.} + +\item{mar}{A numerical vector of the form 'c(bottom, left, top, right)' +which gives the number of lines of margin to be specified on the four sides +of the plot. The default in this function is c(0.8, 0.1, 0.8, 0.1). (The +default in the R plot() function is c(5, 4, 4, 2) + 0.1.} + +\item{xlim}{A numeric vector of two time values over which the epgdata +should be plotted. Note: this argument should only be used if epgdata is +temporally contiguous, i.e. the entire trackdata object contains +palatograms at successive multiple times of the EPG sampling frequency. (as +in coutts.epg$ftime). Defaults to NULL (plot all time values).} +} +\description{ +Function to plot palatograms from EPG compressed objects or from a +3D-palatographic array that is output from palate(). } - \details{ - The function plots 62 values arranged over an 8 x 8 -grid with columns 1 and 8 unfilled for row 1. -When there is a contact (1), the corresponding rectangle -of the grid is filled otherwise the -rectangle is empty. +The function plots 62 values arranged over an 8 x 8 grid with columns 1 and +8 unfilled for row 1. When there is a contact (1), the corresponding +rectangle of the grid is filled otherwise the rectangle is empty. } - - \examples{ + epgplot(polhom.epg[10,]) # as above but between times 1295 ms and 1330 ms @@ -87,17 +92,13 @@ epgplot(p[,,c(2, 8)], gridlines=FALSE, col="pink", numbering=c("s1", "s2")) # to 16377 ms and 16633 ms epgplot(coutts.epg[2,], c(16377, 16633)) -} - -\author{ Jonathan Harrington } - -\seealso{ -\code{\link{epgai}} -\code{\link{epgcog}} -\code{\link{epggs}} -\code{\link{palate}} } - -%\keyword{emu} +\seealso{ +\code{\link{epgai}} \code{\link{epgcog}} \code{\link{epggs}} +\code{\link{palate}} +} +\author{ +Jonathan Harrington +} \keyword{dplot} diff --git a/man/epgsum.Rd b/man/epgsum.Rd index 3eba6121..1eae0d78 100644 --- a/man/epgsum.Rd +++ b/man/epgsum.Rd @@ -1,54 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epg.class.R \name{epgsum} \alias{epgsum} -\title{ Sum contacts in palatograms. } -\description{ - The function calculates EPG contact -profiles, i.e. sums active -or inactive electrodes optionally by -row and/or column in palatographic data. +\title{Sum contacts in palatograms.} +\usage{ +epgsum( + epgdata, + profile = c(1, 3), + inactive = FALSE, + rows = 1:8, + columns = 1:8, + trackname = "EPG-sum" +) } -\usage{epgsum (epgdata, profile = c(1, 3), inactive = FALSE, rows = 1:8, - columns = 1:8, trackname = "EPG-sum") } - \arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object, -or an eight columned matrix of EPG-compressed trackdata, or -a 3D palatographic array that is the output of palate() } - \item{profile}{A numeric vector of one or two values. The -options are as follows. c(1,3) and c(1) sum the contacts -by row, but the latter outputs the summation -in the rows. c(2,3) and c(2) -sum the contacts -by column, but the latter outputs the summation in the columns. -(see also rows and columns arguments and the examples below -for further details). } - \item{inactive}{ a single element logical vector. If F (the default), -then the active electrodes (i.e, 1s) are summed, otherwise -the inactive electrodes (i.e., 0s) are summed.} - \item{rows}{vector of rows to sum} - \item{columns}{vector of columns to sum} - \item{trackname}{ single element character vector of the -name of the track (defaults to "EPG-sum") } -} +\item{epgdata}{An eight-columned EPG-compressed trackdata object, or an +eight columned matrix of EPG-compressed trackdata, or a 3D palatographic +array that is the output of palate()} -\details{ - Contact profiles are standard tools in electropalatographic -analysis. See e.g., Byrd (1996) for details. -} +\item{profile}{A numeric vector of one or two values. The options are as +follows. c(1,3) and c(1) sum the contacts by row, but the latter outputs +the summation in the rows. c(2,3) and c(2) sum the contacts by column, but +the latter outputs the summation in the columns. (see also rows and columns +arguments and the examples below for further details).} -\value{ -These functions return a trackdata object if -they are applied to an eight-columned EPG-compressed trackdata object, -otherwise a one-columned matrix.} +\item{inactive}{a single element logical vector. If FALSE (the default), then +the active electrodes (i.e, 1s) are summed, otherwise the inactive +electrodes (i.e., 0s) are summed.} -\references{ BYRD, D. (1996). Influences on articulatory timing -in consonant sequences. Journal of Phonetics, 24, 209-244. +\item{rows}{vector of rows to sum} -GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. -In W.J. Hardcastle & N. Hewlett (eds). Coarticulation. -(pp. 229-245). Cambridge University Press: Cambridge. } +\item{columns}{vector of columns to sum} +\item{trackname}{single element character vector of the name of the track +(defaults to "EPG-sum")} +} +\value{ +These functions return a trackdata object if they are applied to an +eight-columned EPG-compressed trackdata object, otherwise a one-columned +matrix. +} +\description{ +The function calculates EPG contact profiles, i.e. sums active or inactive +electrodes optionally by row and/or column in palatographic data. +} +\details{ +Contact profiles are standard tools in electropalatographic analysis. See +e.g., Byrd (1996) for details. +} \examples{ + # Trackdata object of the sum of contacts in the 1st segment of polhom.epg epgsum(polhom.epg[1,]) # as above, but the summation is in rows 1-3 only. @@ -77,17 +78,21 @@ epgsum(p, 1, columns=3:6) # sum of the contacts in columns 3-6 showing the separate column summations. epgsum(p, 2, columns=3:6) -} - -\author{ Jonathan Harrington } - -\seealso{ -\code{\link{epgai}} -\code{\link{epgcog}} -\code{\link{epggs}} -\code{\link{palate}} } +\references{ +BYRD, D. (1996). Influences on articulatory timing in consonant +sequences. Journal of Phonetics, 24, 209-244. -%\keyword{emu} +GIBBON, F. AND NICOLAIDIS, K. (1999). Palatography. In W.J. Hardcastle & +N. Hewlett (eds). Coarticulation. (pp. 229-245). Cambridge University +Press: Cambridge. +} +\seealso{ +\code{\link{epgai}} \code{\link{epgcog}} \code{\link{epggs}} +\code{\link{palate}} +} +\author{ +Jonathan Harrington +} \keyword{math} diff --git a/man/eplot.Rd b/man/eplot.Rd index 2fc3b2b2..a92187a5 100644 --- a/man/eplot.Rd +++ b/man/eplot.Rd @@ -1,66 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eplot.R \name{eplot} \alias{eplot} -\title{ Plot ellipses for two-dimensional data} -\description{ - The function plots ellipses for different categories from two-dimensional data. -} +\title{Plot ellipses for two-dimensional data (DEPRECATED see below)} \usage{ -eplot (x, labs, chars, formant = FALSE, scaling = "linear", - prob = 0.95, nsdev = NULL, dopoints = FALSE, doellipse = TRUE, - centroid = FALSE, axes = TRUE, xlim, ylim, col = TRUE, lty = FALSE, - lwd = NULL, ...) +eplot( + x, + labs, + chars, + formant = FALSE, + scaling = "linear", + prob = 0.95, + nsdev = NULL, + dopoints = FALSE, + doellipse = TRUE, + centroid = FALSE, + axes = TRUE, + xlim, + ylim, + col = TRUE, + lty = FALSE, + lwd = NULL, + ... +) } -%- maybe also 'usage' for other objects documented here. \arguments{ - \item{x}{ A two-columned matrix of data} - \item{labs}{ An optional vector of labels, parallel to 'data' } - \item{chars}{ An optional vector of labels, parallel to 'data'. If this argument is - specified these labels will be plotted rather than the labels - in 'labs'. } - \item{formant}{ If TRUE) then the data is negated and the axes are switched so that, for formant data, - the plot is made with decreasing F2 on the x-axis and decreasing F1 on the y-axis. } - \item{scaling}{ Either "mel" or "bark" for mel or bark scaling of the data} - \item{prob}{ A single numeric vector greater than zero and less than 1 representing the confidence interval -of the ellipse contours. Defaults to 0.95} - \item{nsdev}{ Defines the length of the major and minor axes of the - ellipses in terms of the standard deviation of the data and overrides the prob argument.} - \item{dopoints}{If TRUE) character labels (from 'labs' or 'chars') are - plotted for each data point} - \item{doellipse}{ If TRUE, ellipses are drawn on the plot. If FALSE, no ellipses are - drawn and, if 'dopoints' is also FALSE, 'centroids' is set to T} - \item{centroid}{One label for each ellipse is drawn} - \item{axes}{ If TRUE axes are drawn on the plot. } - \item{xlim}{ A vector of two numeric values giving the range of the x-axis. } - \item{ylim}{ A vector of two numeric values giving the range of the y-axis. } - \item{col}{ If colour is TRUE) the ellipses and labels will be plotted in - different colours } - \item{lty}{If linetype is TRUE) the ellipses will be plotted with - different linetypes. This is useful for plots that will be - printed.} -\item{lwd}{ A code passed to the lwd argument -in plotting functions. -'lwd' can be either -a single element numeric vector, or its length must -be equal to the number of unique types in labs. -For example, if lwd=3 and if labs = c("a", "b", "a", "c"), -then the output is c(3, 3, 3, 3). Alternatively, -if lwd = c(2,3,1), then the output is -c(2, 3, 2, 1) for the same example. The default is -NULL in which case all lines are drawn with lwd=1 } -\item{...}{graphical options \link{par}} -} -\value{ - NULL -} +\item{x}{A two-columned matrix of data} + +\item{labs}{An optional vector of labels, parallel to 'data'} + +\item{chars}{An optional vector of labels, parallel to 'data'. If this +argument is specified these labels will be plotted rather than the labels +in 'labs'.} + +\item{formant}{If TRUE) then the data is negated and the axes are switched +so that, for formant data, the plot is made with decreasing F2 on the +x-axis and decreasing F1 on the y-axis.} + +\item{scaling}{Either "mel" or "bark" for mel or bark scaling of the data} + +\item{prob}{A single numeric vector greater than zero and less than 1 +representing the confidence interval of the ellipse contours. Defaults to +0.95} + +\item{nsdev}{Defines the length of the major and minor axes of the ellipses +in terms of the standard deviation of the data and overrides the prob +argument.} + +\item{dopoints}{If TRUE) character labels (from 'labs' or 'chars') are +plotted for each data point} + +\item{doellipse}{If TRUE, ellipses are drawn on the plot. If FALSE, no +ellipses are drawn and, if 'dopoints' is also FALSE, 'centroids' is set to +TRUE} + +\item{centroid}{One label for each ellipse is drawn} -\author{ Jonathan Harrington jmh@ipds.uni-kiel.de, Steve Cassidy, Gordon Watson } +\item{axes}{If TRUE axes are drawn on the plot.} -\seealso{ - \code{\link{dcut}} +\item{xlim}{A vector of two numeric values giving the range of the x-axis.} + +\item{ylim}{A vector of two numeric values giving the range of the y-axis.} + +\item{col}{If colour is TRUE) the ellipses and labels will be plotted in +different colours} + +\item{lty}{If linetype is TRUE) the ellipses will be plotted with different +linetypes. This is useful for plots that will be printed.} + +\item{lwd}{A code passed to the lwd argument in plotting functions. 'lwd' +can be either a single element numeric vector, or its length must be equal +to the number of unique types in labs. For example, if lwd=3 and if labs = +c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +default is NULL in which case all lines are drawn with lwd=1} + +\item{...}{graphical options \link{par}} +} +\description{ +The function plots ellipses for different categories from two-dimensional +data. DEPRECATED as this function does not play well with with the new +resultType = "tibble" of \code{get_trackdata()}. See \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/recipe-plottingSnippets.html} +for an alternative plotting routines using ggplot2. } \examples{ - data(vowlax) + + data(vowlax) data <- cbind(vowlax.df$F1,vowlax.df$F2) phonetic = vowlax.l word = vowlax.word @@ -70,11 +96,16 @@ NULL in which case all lines are drawn with lwd=1 } eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", centroid=TRUE) eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", dopoints=TRUE) - eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark", dopoints=TRUE, scaling="bark") - eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark b/w with linetype", col=FALSE, lty=TRUE, dopoints=TRUE, scaling="bark") - eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", doellipse=FALSE, dopoints=TRUE) - eplot(data, phonetic, form=TRUE, dopoints=TRUE, prob=0.5, main="F1 x F2 plane, 50\% confidence intervals") - eplot(data, phonetic, form=TRUE, dopoints=TRUE, nsdev=2, main="F1 x F2 plane, 2 standard deviations") + eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark", + dopoints=TRUE, scaling="bark") + eplot(data, phonetic, form=TRUE, main="F1 x F2 plane in Bark b/w with linetype", + col=FALSE, lty=TRUE, dopoints=TRUE, scaling="bark") + eplot(data, phonetic, form=TRUE, main="F1 x F2 plane", + doellipse=FALSE, dopoints=TRUE) + eplot(data, phonetic, form=TRUE, dopoints=TRUE, + prob=0.5, main="F1 x F2 plane, 50\% confidence intervals") + eplot(data, phonetic, form=TRUE, dopoints=TRUE, + nsdev=2, main="F1 x F2 plane, 2 standard deviations") temp <- phonetic \%in\% c("a", "O") @@ -82,12 +113,18 @@ NULL in which case all lines are drawn with lwd=1 } temp <- phonetic=="O" - eplot(data[temp,], phonetic[temp], word[temp], form=TRUE, dopoints=TRUE, main="[O] only showing word labels") + eplot(data[temp,], phonetic[temp], word[temp], form=TRUE, + dopoints=TRUE, main="[O] only showing word labels") -} -%\keyword{emu} -\keyword{dplot} \ No newline at end of file +} +\seealso{ +\code{\link{dcut}} +} +\author{ +Jonathan Harrington, Steve Cassidy +} +\keyword{dplot} diff --git a/man/euclidean.Rd b/man/euclidean.Rd index 1588df3c..0ff0057e 100644 --- a/man/euclidean.Rd +++ b/man/euclidean.Rd @@ -1,36 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/euclidean.R \name{euclidean} \alias{euclidean} \alias{euclidean.metric} - -\title{ -Find the inter-euclidean distance for a data matrix -} +\title{Find the inter-euclidean distance for a data matrix} \usage{ -euclidean(data, m=1, n=ncol(data)) -} -\description{ -Finds the inter-euclidean distance for a data matrix +euclidean(data, m = 1, n = ncol(data)) } \arguments{ - \item{data}{ - A vector or matrix of numerical data. - } - \item{m}{ - The first column of data to be used in the distance calculation. - } - \item{n}{ - The last column of data to be used in the distance calculation. - } +\item{data}{A vector or matrix of numerical data.} + +\item{m}{The first column of data to be used in the distance calculation.} + +\item{n}{The last column of data to be used in the distance calculation.} } \value{ -Calculates the euclidean distance between successive rows of the matrix -based on columns m:n. +Calculates the euclidean distance between successive rows of the +matrix based on columns m:n. } -\seealso{ -steady +\description{ +Finds the inter-euclidean distance for a data matrix } \examples{ + euclidean(cbind(c(1,2,3,4), c(2,3,2,2))) } +\seealso{ +steady +} \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/expand_labels.Rd b/man/expand_labels.Rd index 5accefc0..8936de47 100644 --- a/man/expand_labels.Rd +++ b/man/expand_labels.Rd @@ -1,27 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{expand_labels} \alias{expand_labels} -\title{ -Label each data sample -} -\description{ -Labels each data sample -} +\title{Label each data sample} \usage{ - expand_labels(indvals, labs) +expand_labels(indvals, labs) } \arguments{ -\item{indvals}{ -Index component of a trackdata object as returned by \code{frames}, or \code{track}. +\item{indvals}{Index component of a trackdata object as returned by +\code{frames}, or \code{track}.} + +\item{labs}{A label vector parallel to \code{indvals}.} } -\item{labs}{ -A label vector parallel to \code{indvals}. -}} \value{ -Returns a vector of labels, one for each row in the data matrix that -corresponds to \code{indvals}. +Returns a vector of labels, one for each row in the data matrix +that corresponds to \code{indvals}. +} +\description{ +Labels each data sample } \seealso{ frames, track } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/export_BPFCollection.Rd b/man/export_BPFCollection.Rd new file mode 100644 index 00000000..ab140ef1 --- /dev/null +++ b/man/export_BPFCollection.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-export_BPFCollection.R +\name{export_BPFCollection} +\alias{export_BPFCollection} +\title{Exports an emuDB into a BAS Partitur File (BPF) Collection} +\usage{ +export_BPFCollection( + handle, + targetDir, + extractLevels, + refLevel = NULL, + verbose = TRUE, + newLevels = NULL, + newLevelClasses = NULL, + copyAudio = FALSE +) +} +\arguments{ +\item{handle}{handle to the emuDB} + +\item{targetDir}{directory where the BPF collection should be saved} + +\item{extractLevels}{list containing the names of labels (not levels!) that should be extracted, and their +matching BPF keys, e.g. extractLevels = list(SampleRate="SAM", Text="ORT", Phonemes="SAP")} + +\item{refLevel}{optional name of level (not label!) used as reference for symbolic links. If NULL (the default), a link-less BPF collection is created.} + +\item{verbose}{display infos, warnings and show progress bar} + +\item{newLevels}{optional vector containing names of levels in the BPF collection that are not part of the standard +BPF levels. See \url{https://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html#Partitur_tiersdef} for details on +standard BPF levels.} + +\item{newLevelClasses}{optional vector containing the classes of levels in the newLevels vector as integers. +Must have the same length and order as newLevels.} + +\item{copyAudio}{if true, audio files are copied to the new BPF collection} +} +\description{ +This function exports an emuDB into the BAS Partitur File format, with one BPF file per bundle. +The user must pass a list of matching label names and BPF keys. +\strong{Important:} The BPF format does not support explicit hierarchies with more than three +levels. Hence, you will probably lose information when exporting complex hierarchies. +} +\seealso{ +export_TextGridCollection +} diff --git a/man/export_TextGridCollection.Rd b/man/export_TextGridCollection.Rd new file mode 100644 index 00000000..a3a737d4 --- /dev/null +++ b/man/export_TextGridCollection.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-export_TextGridCollection.R +\name{export_TextGridCollection} +\alias{export_TextGridCollection} +\title{Export annotations of emuDB to TextGrid collection} +\usage{ +export_TextGridCollection( + emuDBhandle, + targetDir, + sessionPattern = ".*", + bundlePattern = ".*", + attributeDefinitionNames = NULL, + timeRefSegmentLevel = NULL, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{targetDir}{directory where the TextGrid collection should be saved} + +\item{sessionPattern}{A regular expression pattern matching session names to be exported from the database} + +\item{bundlePattern}{A regular expression pattern matching bundle names to be exported from the database} + +\item{attributeDefinitionNames}{list of names of attributeDefinitions that are to be +exported as tiers. If set to NULL (the default) all attribute definitions will be exported as separate tiers.} + +\item{timeRefSegmentLevel}{parameter passed into \link{query} function. (set time segment level from which to derive time +information. It is only necessary to set this parameter if more than one child +level contains time information and the queried parent level is of type ITEM.)} + +\item{verbose}{Show progress bars and further information} +} +\description{ +Exports the annotations of an emuDB to a TextGrid collection (.TextGrid and .wav file pairs). +To avoid naming conflicts and not to loose the session information, the session structure of +the database is kept in place (i.e. the TextGrid collection will have sub-folders that are named +as the sessions were). Due to the more complex annotation structure modeling capabilities of +the EMU-SDMS system, this export routine has to make several compromises on export which +can lead to information loss. So use with caution and at own risk as reimporting the exported +data will mean that not all information can be recreated! +The main compromises are: +\itemize{ + \item If a MANY_TO_MANY relationship between two levels is present and + two items from the parent level are linked to a single item on the child level, the + concatenated using the '->' symbol. An example would be: the annotation items containing the labels 'd' and 'b' of the + parent items are merged into a single annotation item and their labels are + Phoneme level are linked to 'db' on the Phonetic level. The generated Phoneme tier then has a segment with the + start and end times of the 'db' item and contains the labels 'db' (see for example the + bundle 0000_ses/msajc010_bndl of the ae_emuDB). + \item As annotations can contain gaps (e.g. incomplete hierarchies or orphaned items) and do not have to start at + time 0 and be the length of the audio file this export routine pads these gaps with empty segments. +} +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +## Export all levels +export_TextGridCollection(ae, "/path/2/targetDir") + +} + +} +\seealso{ +\code{\link{load_emuDB}} +} +\keyword{EQL} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{query} diff --git a/man/export_seglistToTxtCollection.Rd b/man/export_seglistToTxtCollection.Rd new file mode 100644 index 00000000..59f949fa --- /dev/null +++ b/man/export_seglistToTxtCollection.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRsegs.R +\name{export_seglistToTxtCollection} +\alias{export_seglistToTxtCollection} +\title{Exports a segment list to txt collection} +\usage{ +export_seglistToTxtCollection(emuDBhandle, seglist, targetDir) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{seglist}{\code{tibble}, \code{\link{emuRsegs}} or +\code{\link{emusegs}} object obtained by \code{\link{query}}ing a loaded emuDB} + +\item{targetDir}{target directory to store} +} +\description{ +Extract the media file (usually .wav file) snippets that correspond to +the segments of a segment list (see result of a \code{\link{query}}) and +save them to separate files and write the corresponding labels into a .txt file. Further, +the segmentlist is also stored to the target directory (as a .csv file). +} diff --git a/man/fapply.Rd b/man/fapply.Rd index 6fb86135..bfc70e2d 100644 --- a/man/fapply.Rd +++ b/man/fapply.Rd @@ -1,54 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/apply.R \name{fapply} \alias{fapply} - -\title{ Function that applies a function to an EMU spectral object} -\description{ - Applies a function to an EMU spectral object. -} +\title{Function that applies a function to an EMU spectral object} \usage{ fapply(specdata, fun, ..., power = FALSE, powcoeffs = c(10, 10)) } - \arguments{ - \item{specdata}{ A matrix or trackdata object of class spectral } - \item{fun}{ A function to be applied. } - \item{\dots}{ Optional arguments to fun } - \item{power}{ -A single element logical vector. If T, convert specdata to -power values i.e. apply the function to a * specdata \eqn{\mbox{\textasciicircum}}{^}b -or a * specdata\$data \eqn{\mbox{\textasciicircum}}{^}b -where a and b powcoeffs defined below. +\item{specdata}{A matrix or trackdata object of class spectral} + +\item{fun}{A function to be applied.} + +\item{\dots}{Optional arguments to fun} + +\item{power}{A single element logical vector. If TRUE, convert specdata to +power values i.e. apply the function to a * specdata +\eqn{\mbox{\textasciicircum}}{^}b or a * specdata$data +\eqn{\mbox{\textasciicircum}}{^}b where a and b powcoeffs defined below.} + +\item{powcoeffs}{A 2 element numeric vector for converting dB values to +power values. Defaults to a = 10 and b = 10. See \code{power}.} } -\item{powcoeffs}{ -A 2 element numeric vector for converting dB values to power values. -Defaults to a = 10 and b = 10. See \code{power}. +\value{ +If the output has the same dimensions has the input, then an object +of the same dimensionality and class is returned. Otherwise it may be a +vector or matrix depending on the function that is applied. ... } - +\description{ +Applies a function to an EMU spectral object. } - \details{ - fapply performs a similar operation to apply except that it -is specifically designed for handling EMU spectral objects. +fapply performs a similar operation to apply except that it is specifically +designed for handling EMU spectral objects. } -\value{ - If the output has the same dimensions has the -input, then an object of the same dimensionality and -class is returned. Otherwise it may be a vector or matrix -depending on the function that is applied. - ... +\section{Warning }{ + The function can be very slow if applied to a large +trackdata object. In this case, it may be faster to use a for-loop with the +desired function around $data } -\author{ Jonathan Harrington } - -\section{Warning }{The function can be very slow -if applied to a large trackdata object. In this case, -it may be faster to use a for-loop with the desired -function around \$data} -\seealso{ -\code{\link{apply}} -\code{\link{by.trackdata}} -} \examples{ + # mean value per spectrum, input is a spectral matrix m <- fapply(vowlax.dft.5, sapply, FUN=mean) # as above but after converting dB to powers before @@ -67,6 +59,12 @@ d2 <- fapply(vowlax.dft.5, dct, 10, TRUE) # dct-smooth a trackdata object with 10 coefficients d3 <- fapply(fric.dft[1:4,], dct, 10, TRUE) -} +} +\seealso{ +\code{\link{apply}} \code{\link{by.trackdata}} +} +\author{ +Jonathan Harrington +} \keyword{utilities} diff --git a/man/frames.Rd b/man/frames.Rd index c250522d..14b66cf8 100644 --- a/man/frames.Rd +++ b/man/frames.Rd @@ -1,20 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{frames} \alias{frames} \title{frames} -\description{ - Get frames from trackdata objects -} \usage{ frames(trackdata) } \arguments{ - \item{trackdata}{an object of class trackdata} +\item{trackdata}{an object of class trackdata} } \value{ - Data frames from the input object. +Data frames from the input object. +} +\description{ +Get frames from trackdata objects } \seealso{ - \code{\link{trackdata}} +\code{\link{trackdata}} +} +\author{ +Jonathan Harrington } -\author{Jonathan Harrington} \keyword{utilities} diff --git a/man/frames.time.Rd b/man/frames.time.Rd index 294429dc..5b25139a 100644 --- a/man/frames.time.Rd +++ b/man/frames.time.Rd @@ -1,35 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dfuns.R \name{frames.time} \alias{frames.time} -\title{ -Find the time and position of a data element. -} +\title{Find the time and position of a data element.} \usage{ frames.time(dataset, datanum) -} -\description{ -Finds the time and position of a data element. - } \arguments{ -\item{dataset}{ -A dataset returned by \code{track} or \code{frames}. -} -\item{datanum}{ -An integer, an index into the \code{data} component of \code{dataset}. -} +\item{dataset}{A dataset returned by \code{track} or \code{frames}.} + +\item{datanum}{An integer, an index into the \code{data} component of +\code{dataset}.} } \value{ -The segment number which contains the element \code{datanum} of \code{dataset$data}. +The segment number which contains the element \code{datanum} of +\code{dataset$data}. +} +\description{ +Finds the time and position of a data element. } \details{ -The dataset returned from \code{track} or \code{frames} consists of a matrix of -data (the \code{data} component) and two index components (\code{index} and -\code{ftime}). The data for all segments is concatenated together in -\code{$data}. This function can be used to find out which segment a -particular row of \code{$data} corresponds to. +The dataset returned from \code{track} or \code{frames} consists of a +matrix of data (the \code{data} component) and two index components +(\code{index} and \code{ftime}). The data for all segments is concatenated +together in \code{$data}. This function can be used to find out which +segment a particular row of \code{$data} corresponds to. } \seealso{ track, frames } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/freqtoint.Rd b/man/freqtoint.Rd index 80943657..bb21b7f1 100644 --- a/man/freqtoint.Rd +++ b/man/freqtoint.Rd @@ -1,35 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{freqtoint} \alias{freqtoint} -\title{ Function to find the column number corresponding to frequencies of a spectral object } -\description{ - Find the column number corresponding to frequencies of -a spectral object. -} +\title{Function to find the column number corresponding to frequencies of a +spectral object} \usage{ freqtoint(trackdata, j) } \arguments{ - \item{trackdata}{ A spectral object} - \item{j}{ A vector of frequencies } +\item{trackdata}{A spectral object} + +\item{j}{A vector of frequencies} +} +\description{ +Find the column number corresponding to frequencies of a spectral object. } \details{ - This function is used in conjunction with object oriented -programming of EMU spectral objects. It should not in general -be called from inside a function. Its principal use -is to determine the column number(s) corresponding to -frequencies for spectral trackdata objects or spectral matrices -or the element number for spectral vectors. +This function is used in conjunction with object oriented programming of +EMU spectral objects. It should not in general be called from inside a +function. Its principal use is to determine the column number(s) +corresponding to frequencies for spectral trackdata objects or spectral +matrices or the element number for spectral vectors. } - - -\author{Jonathan Harrington} - - -%\seealso{ -%\code{\link{[.spectral}} -%} - \examples{ + freqtoint(fric.dft,1000:2000) # all frequencies except 1000-2000 freqtoint(vowlax.dft.5, -(1000:2000)) @@ -42,8 +36,9 @@ freqtoint(vowlax.dft.5, -1) -} +} +\author{ +Jonathan Harrington +} \keyword{math} - - diff --git a/man/fric.Rd b/man/fric.Rd index ffe530f4..eda651c1 100644 --- a/man/fric.Rd +++ b/man/fric.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{fric} \alias{fric} -\title{Segment list of word-medial s or z one male speaker of Standard North German, read speech from database kielread.} -\usage{fric} -\description{An EMU dataset} +\title{Segment list of word-medial s or z one male speaker of Standard North +German, read speech from database kielread.} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/fric.dft.Rd b/man/fric.dft.Rd index ee2642d5..5c72e6f9 100644 --- a/man/fric.dft.Rd +++ b/man/fric.dft.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{fric.dft} \alias{fric.dft} \title{Spectral trackdata object from the segment list fric.} -\usage{fric.dft} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/fric.l.Rd b/man/fric.l.Rd index 0b3f8170..9552ec25 100644 --- a/man/fric.l.Rd +++ b/man/fric.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{fric.l} \alias{fric.l} \title{Vector of labels from the segment list fric} -\usage{fric.l} -\description{An EMU dataset} +\format{ +vector of labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/fric.w.Rd b/man/fric.w.Rd index e85bdeb8..122b0b19 100644 --- a/man/fric.w.Rd +++ b/man/fric.w.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{fric.w} \alias{fric.w} \title{Vector of word labels from the segment list fric.} -\usage{fric.w} -\description{An EMU dataset} +\format{ +vector of word labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/gerst.sub.Rd b/man/gerst.sub.Rd index 0990a4e5..e2448629 100644 --- a/man/gerst.sub.Rd +++ b/man/gerst.sub.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{gerst.sub} \alias{gerst.sub} -\title{ gerst sub } +\title{gerst sub} +\usage{ +gerst.sub(data) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/get.originalFreq.Rd b/man/get.originalFreq.Rd deleted file mode 100644 index f101b1d8..00000000 --- a/man/get.originalFreq.Rd +++ /dev/null @@ -1,9 +0,0 @@ -\name{get.originalFreq} -\alias{get.originalFreq} - -\title{ get originalFreq } -\description{ -see function -} -\keyword{ internal } - diff --git a/man/get.time.element.Rd b/man/get.time.element.Rd index 901e4ca4..e0d3924e 100644 --- a/man/get.time.element.Rd +++ b/man/get.time.element.Rd @@ -1,28 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dfuns.R \name{get.time.element} \alias{get.time.element} -\title{ -Get data for a given time -} +\title{Get data for a given time} \usage{ get.time.element(timeval, dataset) -} -\description{ -Gets data for a given time - } \arguments{ -\item{timeval}{ -A time in milliseconds +\item{timeval}{A time in milliseconds} + +\item{dataset}{A trackdata object as returned by \code{track}.} } -\item{dataset}{ -A trackdata object as returned by \code{track}. -}} \value{ -The element number of \code{trackdata$data} corresponding to \code{time} +The element number of \code{trackdata$data} corresponding to +\code{time} +} +\description{ +Gets data for a given time } \seealso{ track, frames } - \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/get.trackkeywrd.Rd b/man/get.trackkeywrd.Rd index 44caef8f..8cd49e53 100644 --- a/man/get.trackkeywrd.Rd +++ b/man/get.trackkeywrd.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{get.trackkeywrd} \alias{get.trackkeywrd} - -\title{ get trackkeywrd} +\title{get trackkeywrd} +\usage{ +get.trackkeywrd(fname) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/get_trackdata.Rd b/man/get_trackdata.Rd new file mode 100644 index 00000000..88785db8 --- /dev/null +++ b/man/get_trackdata.Rd @@ -0,0 +1,139 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-get_trackdata.R +\name{get_trackdata} +\alias{get_trackdata} +\alias{emu.track} +\title{Get trackdata from loaded emuDB} +\usage{ +get_trackdata( + emuDBhandle, + seglist = NULL, + ssffTrackName = NULL, + cut = NULL, + npoints = NULL, + onTheFlyFunctionName = NULL, + onTheFlyParams = NULL, + onTheFlyOptLogFilePath = NULL, + onTheFlyFunction = NULL, + resultType = "tibble", + consistentOutputType = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{seglist}{\code{tibble}, \code{\link{emuRsegs}} or \code{\link{emusegs}} +object obtained by \code{\link{query}}ing a loaded emuDB} + +\item{ssffTrackName}{The name of track that one wishes to extract (see +\code{\link{list_ssffTrackDefinitions}} for the defined ssffTracks of the +emuDB). If the parameter \code{onTheFlyFunctionName} is set, then +this corresponds to the column name af the AsspDataObj (see +\code{wrassp::wrasspOutputInfos[[onTheFlyFunctionName]]$tracks} and +\code{\link[wrassp]{wrasspOutputInfos}} - NOTE: \code{library(wrassp)} might be +necessary to access the \code{wrasspOutputInfos} object without the \code{wrassp::} prefix). +If the parameter \code{onTheFlyFunctionName} is set and this one isn't, then per default +the first track listed in the \code{wrassp::wrasspOutputInfos} is chosen +(\code{wrassp::wrasspOutputInfos[[onTheFlyFunctionName]]$tracks[1]}). + +\code{get_trackdata} has so called constant track names that are always available +for every emuDB. The constant track names are: + +\itemize{ +\item{"MEDIAFILE_SAMPLES": refers to the audio sample values specified +by the "mediafileExtension" entry of the DBconfig.json} +}} + +\item{cut}{An optional cut time for segment data, ranges between +0 and 1, a value of 0.5 will extract data only at the segment midpoint.} + +\item{npoints}{An optional number of points to retrieve for each segment or event. +For segments this requires the \code{cut} parameter to be set; if this is the +case, then data is extracted around the resulting cut time. +For events data is extracted around the event time. If npoints is an odd number, the +samples are centered around the cut-time-sample; if not, they are skewed to the +right by one sample.} + +\item{onTheFlyFunctionName}{Name of wrassp function that will perform the on-the-fly +calculation (see \code{?wrassp} for a list of all the signal processing functions wrassp provides)} + +\item{onTheFlyParams}{A \code{pairlist} of parameters that will be given to the function +passed in by the \code{onTheFlyFunctionName} parameter. This list can easily be +generated by applying the \code{formals} function to the on-the-fly function name and then setting the according +parameter one wishes to change.} + +\item{onTheFlyOptLogFilePath}{Path to optional log file for on-the-fly function} + +\item{onTheFlyFunction}{pass in a function pointer. This function will be called with the path to the +current media file. It is required that the function returns a tibble/data.frame like object that contains +a column called \code{frame_time} that specifies the time point of each row. \code{get_trackdata} will then +extract the rows belonging to the current segment. This allows users to code their own function to be used with +\code{get_trackdata} and allows for most data formats to be used within an emuDB.} + +\item{resultType}{Specify class of returned object. Either \code{"emuRtrackdata"}, +\code{"trackdata"} or \code{"tibble"} == the default (see \code{\link{trackdata}}, \code{\link{emuRtrackdata}} +and \code{\link[tibble]{tibble}} for details about these objects).} + +\item{consistentOutputType}{Prevent converting the output object to a \code{data.frame} +depending on the \code{npoint} and \code{cut} arguments (only applies to output +type "trackdata"). Set to \code{FALSE} if the following legacy \code{emu.track} output +conversion behaviour is desired: If the \code{cut} parameter is not set (the default) an +object of type \code{\link{trackdata}} is returned. If \code{cut} is set and \code{npoints} +is not, or the seglist is of type event and npoints is not set, a \code{\link{data.frame}} is +returned (see the \code{consistentOutputType} to change this behaviour).} + +\item{verbose}{Show progress bars and further information} +} +\value{ +object of type specified with \code{resultType} +} +\description{ +Extract trackdata information from a loaded emuDB that +corresponds to the entries in a segment list. +} +\details{ +This function utilizes the wrassp package for signal processing and +SSFF/audio file handling. It reads time relevant data from a given +segment list (\code{\link{emuRsegs}} or \code{\link{emusegs}}), extracts the +specified trackdata and places it into a +trackdata object (analogous to the deprecated \code{emu.track}). This function +replaces the deprecated \code{emu.track} function. Note that an warning is issued +if the bundles in the \code{\link{emuRsegs}} or \code{\link{emusegs}} object +have in-homogeneous sampling rates as this could lead to inconsistent/erroneous +\code{\link{trackdata}}, \code{\link{emuRtrackdata}} or \code{\link[tibble]{tibble}} result objects. For +more information on the structural elements of an emuDB +see the signal data extraction chapter of the EMU-SDMS manual +(\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-sigDataExtr.html}). +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# query loaded "ae" emuDB for all "i:" segments of the "Phonetic" level +sl = query(emuDBhandle = ae, + query = "Phonetic == i:") + +# get the corresponding formant trackdata +td = get_trackdata(emuDBhandle = ae, + seglist = sl, + ssffTrackName = "fm") + +# get the corresponding F0 trackdata +# as there is no F0 ssffTrack defined in the "ae" emuDB we will +# calculate the necessary values on-the-fly +td = get_trackdata(emuDBhandle = ae, + seglist = sl, + onTheFlyFunctionName = "ksvF0") + +} + +} +\seealso{ +\code{\link{formals}}, \code{\link[wrassp]{wrasspOutputInfos}}, +\code{\link{trackdata}}, \code{\link{emuRtrackdata}} +} +\keyword{misc} diff --git a/man/import_mediaFiles.Rd b/man/import_mediaFiles.Rd new file mode 100644 index 00000000..19904007 --- /dev/null +++ b/man/import_mediaFiles.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.files.R +\name{import_mediaFiles} +\alias{import_mediaFiles} +\title{Import media files to emuDB} +\usage{ +import_mediaFiles(emuDBhandle, dir, targetSessionName = "0000", verbose = TRUE) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{dir}{directory containing mediafiles or session directories} + +\item{targetSessionName}{name of session in which to create the new bundles} + +\item{verbose}{display infos & show progress bar} +} +\description{ +Import new recordings (media files) to emuDB and create bundles. +Looks for files with the defined mediafile extension of the emuDB +(see \code{mediaFileExtension} in vignette \code{emuDB}) in \code{dir} +or in sub-directories thereof (interpreted as sessions), for each mediafile +create a bundle directory +named as the basename of the mediafile in the specified session, and copies +the mediafile into the bundle. If not already present, adds 'OSCI' and +'SPEC' perspectives to the emuDB config file. +} +\examples{ +\dontrun{ +## Add mediafiles from directory + + import_mediaFiles(myEmuDB,dir="/data/mymedia/") + +} +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} diff --git a/man/is.seglist.Rd b/man/is.seglist.Rd index bcd46421..f486cd1b 100644 --- a/man/is.seglist.Rd +++ b/man/is.seglist.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{is.seglist} \alias{is.seglist} - \title{is seglist} +\usage{ +is.seglist(object) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/is.spectral.Rd b/man/is.spectral.Rd index 41b806a9..132189c0 100644 --- a/man/is.spectral.Rd +++ b/man/is.spectral.Rd @@ -1,30 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralclass.R \name{is.spectral} \alias{is.spectral} - -\title{ Function to test whether the object is of class "spectral"} -\description{ - Returns T or F depending on whether the object is of class "spectral" -} +\title{Function to test whether the object is of class "spectral"} \usage{ is.spectral(dat) } - \arguments{ - \item{dat}{An R object } +\item{dat}{An R object} } - \value{ - A single element logical vector: T or F +A single element logical vector: TRUE or FALSE } - -\author{ Jonathan Harrington} - -\seealso{ -\code{\link{as.spectral}} +\description{ +Returns TRUE or FALSE depending on whether the object is of class "spectral" } - \examples{ + is.spectral(vowlax.dft.5) is.spectral(fric.dft) is.spectral(fric.dft$data) @@ -32,7 +25,12 @@ is.spectral(vowlax.dft.5[1,]) is.spectral(fric.dft[1,1]) -} - +} +\seealso{ +\code{\link{as.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{attribute} diff --git a/man/is.trackdata.Rd b/man/is.trackdata.Rd index 9b64ae43..ba8a3c2b 100644 --- a/man/is.trackdata.Rd +++ b/man/is.trackdata.Rd @@ -1,20 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{is.trackdata} \alias{is.trackdata} -\title{ Test whether an object is an Emu trackdata object } -\description{ - Test whether an object is an Emu trackdata object -} +\title{Test whether an object is an Emu trackdata object} \usage{ - is.trackdata(object) +is.trackdata(object) } \arguments{ - \item{object}{ A data object to be tested } +\item{object}{A data object to be tested} } \value{ - Returns TRUE if the argument is a trackdata object. +Returns TRUE if the argument is a trackdata object. +} +\description{ +Test whether an object is an Emu trackdata object +} +\seealso{ +\code{\link{get_trackdata}} } - -\seealso{ \code{\link{emu.track}} } - - \keyword{misc} diff --git a/man/isol.Rd b/man/isol.Rd index b4aae74d..380a2459 100644 --- a/man/isol.Rd +++ b/man/isol.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{isol} \alias{isol} -\title{Segment list of vowels in a d d context isolated word speech, one male speaker of Australian English from database isolated.} -\usage{isol} -\description{An EMU dataset} +\title{Segment list of vowels in a d d context isolated word speech, one male +speaker of Australian English from database isolated.} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/isol.fdat.Rd b/man/isol.fdat.Rd index c01796ca..12f7e898 100644 --- a/man/isol.fdat.Rd +++ b/man/isol.fdat.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{isol.fdat} \alias{isol.fdat} \title{Trackdata of formants from the segment list isol} -\usage{isol.fdat} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/isol.l.Rd b/man/isol.l.Rd index 25ebcfe4..aea0c51b 100644 --- a/man/isol.l.Rd +++ b/man/isol.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{isol.l} \alias{isol.l} \title{Vector of vowel phoneme labels from the segment list isol} -\usage{isol.l} -\description{An EMU dataset} +\format{ +vector of vowel phoneme labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/label.Rd b/man/label.Rd index 2c9d3153..a8bdfc67 100644 --- a/man/label.Rd +++ b/man/label.Rd @@ -1,38 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{label} -\alias{label.emusegs} \alias{label} +\alias{label.emusegs} \alias{utt.emusegs} \alias{utt} - - \title{Get labels / utterances from segment list} -\description{ - label: extracts the labels from the segment list. - utt: extracts the utterances from the segment list. - -} \usage{ - \method{label}{emusegs}(segs) - \method{utt}{emusegs}(x) +label(segs) } \arguments{ - \item{segs}{ segment list} - \item{x}{ segment list} - +\item{segs}{segment list} } - \value{ - label / utterance vector - +label / utterance vector } - -\author{Jonathan Harrington} - -\seealso{ - \code{\link{segmentlist} \link{start} \link{end}} +\description{ +label: extracts the labels from the segment list. utt: extracts the +utterances from the segment list. } - \examples{ + data(dip) #dip is a segment list - first ten segments only dip[1:10,] @@ -42,6 +30,12 @@ dips.labs = label(dip) dips.labs -} +} +\seealso{ +\code{\link{segmentlist} \link{start} \link{end}} +} +\author{ +Jonathan Harrington +} \keyword{methods} diff --git a/man/label_convert.Rd b/man/label_convert.Rd index 6b585e78..20558950 100644 --- a/man/label_convert.Rd +++ b/man/label_convert.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{label_convert} \alias{label_convert} - \title{convert label} +\usage{ +label_convert(segs.or.labels, match, replace) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/label_num.Rd b/man/label_num.Rd index f13b796a..8b4478ed 100644 --- a/man/label_num.Rd +++ b/man/label_num.Rd @@ -1,9 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{label_num} \alias{label_num} - -\title{ num label } +\title{num label} +\usage{ +label_num(labs) +} \description{ see function } -\keyword{ internal } - +\details{ +depricated function of the legacy EMU system +still available for backward compatibility +} +\keyword{internal} diff --git a/man/linear.Rd b/man/linear.Rd index 249cd6e8..6eae3d07 100644 --- a/man/linear.Rd +++ b/man/linear.Rd @@ -1,30 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear.R \name{linear} \alias{linear} -\title{ -Perform linear time normalisation on trackdata. -} +\title{Perform linear time normalisation on trackdata.} \usage{ -linear(dataset, n=20) -} -\description{ -Performs linear time normalisation on trackdata. +linear(dataset, n = 20) } \arguments{ -\item{dataset}{ -A trackdata object as returned by \code{track}. +\item{dataset}{A trackdata object as returned by \code{track}.} + +\item{n}{The number of points (samples) required for each segment.} } -\item{n}{ -The number of points (samples) required for each segment. -}} \value{ A new trackdata object where the data for each segment has the same -number (\code{n}) of samples. +number (\code{n}) of samples. +} +\description{ +Performs linear time normalisation on trackdata. } \details{ -The data for each segment is normaised using the \code{approx} function. +The data for each segment is normalised using the \code{approx} function. } \seealso{ approx } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/linear.av.Rd b/man/linear.av.Rd index 9b6b8aeb..9292a9a4 100644 --- a/man/linear.av.Rd +++ b/man/linear.av.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear.R \name{linear.av} \alias{linear.av} - -\title{ linear av} +\title{linear av} +\usage{ +linear.av(dataset, labs) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/list_bundles.Rd b/man/list_bundles.Rd new file mode 100644 index 00000000..8e018d6a --- /dev/null +++ b/man/list_bundles.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{list_bundles} +\alias{list_bundles} +\title{List bundles of emuDB} +\usage{ +list_bundles( + emuDBhandle, + session = NULL, + sessionPattern = ".*", + bundlePattern = ".*" +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{session}{optional session (depricated!)} + +\item{sessionPattern}{A regular expression pattern matching session names to +be searched for in the database. Note: "_ses$" is appended to this RegEx automatically} + +\item{bundlePattern}{A regular expression pattern matching bundle names to +be searched for in the database. Note: "_bndl$" is appended to this RegEx automatically} +} +\value{ +data.frame object with columns session and name of bundles +} +\description{ +List all bundles of emuDB or of particular session. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# list bundles of session "0000" of ae emuDB +list_bundles(emuDBhandle = ae, + session = "0000") + +} + +} diff --git a/man/list_files.Rd b/man/list_files.Rd new file mode 100644 index 00000000..a3231b5f --- /dev/null +++ b/man/list_files.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.files.R +\name{list_files} +\alias{list_files} +\title{List files of emuDB} +\usage{ +list_files( + emuDBhandle, + fileExtension = ".*", + sessionPattern = ".*", + bundlePattern = ".*" +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{fileExtension}{file extension of files} + +\item{sessionPattern}{A (RegEx) pattern matching sessions to be searched from the database} + +\item{bundlePattern}{A (RegEx) pattern matching bundles to be searched from the database} +} +\value{ +file paths as character vector +} +\description{ +List files belonging to emuDB. For +more information on the structural elements of an emuDB +see \code{vignette{emuDB}}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# list all files of ae emuDB +list_files(emuDBhandle = ae) + +# list all files of ae emuDB in bundles ending with '3' +list_files(emuDBhandle = ae, bundlePattern=".*3$") + +} + +} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{schema} diff --git a/man/list_sampleRates.Rd b/man/list_sampleRates.Rd new file mode 100644 index 00000000..25df90e4 --- /dev/null +++ b/man/list_sampleRates.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-autoproc_annots.R +\name{list_sampleRates} +\alias{list_sampleRates} +\title{List sample rates of media and annotation (_annot.json) files} +\usage{ +list_sampleRates(emuDBhandle, sessionPattern = ".*", bundlePattern = ".*") +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{sessionPattern}{A regular expression pattern matching session names to be searched from the database} + +\item{bundlePattern}{A regular expression pattern matching bundle names to be searched from the database} +} +\value{ +tibble with the columns +\itemize{ +\item session +\item bundle +\item sample_rate_annot_json +\item sample_rate_media_file +} +\code{session}, \code{b} +} +\description{ +List sample rates of media and annotation (_annot.json) files +} diff --git a/man/list_sessions.Rd b/man/list_sessions.Rd new file mode 100644 index 00000000..e20ad678 --- /dev/null +++ b/man/list_sessions.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{list_sessions} +\alias{list_sessions} +\title{List sessions of emuDB} +\usage{ +list_sessions(emuDBhandle, sessionPattern = ".*") +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{sessionPattern}{A regular expression pattern matching session names to +be searched for in the database. Note: "_ses$" is appended to this RegEx automatically} +} +\value{ +data.frame object with session names +} +\description{ +List session names of emuDB +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# list all sessions of ae emuDB +list_sessions(emuDBhandle = ae) + +} + +} diff --git a/man/load_emuDB.Rd b/man/load_emuDB.Rd new file mode 100644 index 00000000..f714d23a --- /dev/null +++ b/man/load_emuDB.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{load_emuDB} +\alias{load_emuDB} +\title{Load emuDB} +\usage{ +load_emuDB( + databaseDir, + inMemoryCache = FALSE, + connection = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{databaseDir}{directory of the emuDB} + +\item{inMemoryCache}{cache the loaded DB in memory} + +\item{connection}{pass in DBI connection to SQL database if you want to override the default which is to +use an SQLite database either in memory (\code{inMemoryCache = TRUE}) or in the emuDB folder. This is intended +for expert use only!} + +\item{verbose}{be verbose} + +\item{...}{additional parameters} +} +\value{ +emuDB handle object +} +\description{ +Function loads emuDB into its cached representation and makes it accessible from within the +current R session by returning a emuDBhandle object +} +\details{ +In order to access an emuDB from R it is necessary to load the annotation and configuration +files to an emuR internal database format. The function expects a emuDB file structure in directory +\code{databaseDir}. The emuDB configuration file is loaded first. On success the function iterates +through session and bundle directories and loads found annotation files. The parameter \code{inMemoryCache} +determines where the internal database is stored: If \code{FALSE} a database cache file in \code{databaseDir} +is used. When the database is loaded for the first time the function will create a new cache file and store +the data to it. On subsequent loading of the same database the cache is only updated if files have changed, +therefore the loading is then much faster. For this to work the user needs write permissions to +\code{databaseDir} and the cache file. The database is loaded into a volatile in-memory database if +\code{inMemoryCache} is set to \code{TRUE}. +} +\examples{ +\dontrun{ +## Load database ae in directory /homes/mylogin/EMUnew/ae +## assuming an existing emuDB structure in this directory + +ae = load_emuDB("/homes/mylogin/EMU/ae") + +## Load database ae from demo data + +# create demo data in temporary directory +create_emuRdemoData(dir = tempdir()) +# build base path to demo emuDB +demoDatabaseDir = file.path(tempdir(), "emuR_demoData", "ae_emuDB") + +# load demo emuDB +ae = load_emuDB(demoDatabaseDir) + +} +} +\keyword{DBconfig} +\keyword{database} +\keyword{emuDB} diff --git a/man/lob.sub.Rd b/man/lob.sub.Rd index b0746ff1..8ccb925b 100644 --- a/man/lob.sub.Rd +++ b/man/lob.sub.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{lob.sub} \alias{lob.sub} - -\title{ lob sub } +\title{lob sub} +\usage{ +lob.sub(data) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/locus.Rd b/man/locus.Rd index da9b9f50..53e56297 100644 --- a/man/locus.Rd +++ b/man/locus.Rd @@ -1,49 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/locus.R \name{locus} \alias{locus} -\title{ Calculate locus equations for two-dimensional data } -\description{ - The function plots a locus equation and returns associated -statistical information. +\title{Calculate locus equations for two-dimensional data} +\usage{ +locus( + target, + onset, + labels.vow = NULL, + yxline = TRUE, + plotgraph = TRUE, + axes = TRUE, + ... +) } -\usage{locus (target, onset, labels.vow = NULL, yxline = TRUE, plotgraph = TRUE, axes = TRUE, ...) } - \arguments{ - \item{target}{a numerical vector typically of F2 values at the vowel target} - \item{onset}{a numerical vector typically of the same -length as target of F2 values at the vowel onset} - \item{labels.vow}{an optionally character vector for plotting labels -at the points (target, onset) of the same length as target} +\item{target}{a numerical vector typically of F2 values at the vowel target} + +\item{onset}{a numerical vector typically of the same length as target of +F2 values at the vowel onset} + +\item{labels.vow}{an optionally character vector for plotting labels at the +points (target, onset) of the same length as target} + \item{yxline}{optionally plot the line target = onset. Defaults to True.} - \item{plotgraph}{a logical vector for specifying whether the data -should be plotted. Defaults to True.} + +\item{plotgraph}{a logical vector for specifying whether the data should be +plotted. Defaults to True.} + \item{axes}{A logical vector indicating whether the axes should be plotted} -\item{...}{graphical options \link{par}} -} +\item{...}{graphical options \link{par}} +} \value{ -A list containing regression diagnostics of -the function lm() that can be accessed with -summary() and the estimated locus frequency in \$locus. -A plot of values in the onset x target plane -with superimposed locus equation and line onset=target. +A list containing regression diagnostics of the function lm() that +can be accessed with summary() and the estimated locus frequency in +$locus. A plot of values in the onset x target plane with superimposed +locus equation and line onset=target. +} +\description{ +The function plots a locus equation and returns associated statistical +information. } - \details{ - A locus equation is a straight line regression -fitted with -lm() in which the F2- values typically at the vowel onset -are regressed on those of the target. The slope -can be used to give an indication of target-on-onset -coarticulatory influences. +A locus equation is a straight line regression fitted with lm() in which +the F2- values typically at the vowel onset are regressed on those of the +target. The slope can be used to give an indication of target-on-onset +coarticulatory influences. -The best estimate -of the locus frequency is where the locus equation -bisects the line target = onset. +The best estimate of the locus frequency is where the locus equation +bisects the line target = onset. } - -\author{Jonathan Harrington} \examples{ + # calculate an F2-locus equation for initial [d] # preceding lax vowels produced by female speaker "68". # the onset is taken at the vowel onset; the @@ -66,6 +76,9 @@ result$coeff # point of bisection of on = TRUEarg with the regression line result$locus -} -\keyword{math} \ No newline at end of file +} +\author{ +Jonathan Harrington +} +\keyword{math} diff --git a/man/mahal.Rd b/man/mahal.Rd index 12800b08..2d1863d9 100644 --- a/man/mahal.Rd +++ b/man/mahal.Rd @@ -1,47 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{mahal} \alias{mahal} -\title{ -Classify using Mahalanobis distance -} +\title{Classify using Mahalanobis distance} \usage{ mahal(data, train) } -\description{ -Classifies using Mahalanobis distance -} \arguments{ -\item{data}{ -A vector or matrix of data +\item{data}{A vector or matrix of data} + +\item{train}{A Gaussian model generated by \code{train}.} } -\item{train}{ -A Gaussian model generated by \code{train}. -}} \value{ A label vector with one element per row of \code{data} } +\description{ +Classifies using Mahalanobis distance +} \details{ The \code{model} argument contains the mean and inverse covariance matrix -(or standard deviation if the data is one-dimensional) for each class -in the training set as well as the class labels. This function -calculates the Mahalanobis distance of each row of \code{data} from each -class mean and assigns the label of the closest mean to that row. The -result is a vector of labels corresponding to the rows of \code{data}. - +(or standard deviation if the data is one-dimensional) for each class in +the training set as well as the class labels. This function calculates the +Mahalanobis distance of each row of \code{data} from each class mean and +assigns the label of the closest mean to that row. The result is a vector +of labels corresponding to the rows of \code{data}. -The Mahalanobis distance between a data point and a class is the -Euclidean distance between the point and the class mean devided by the -covariance matrix for the class. This means that classes with large -covariances will -attract -data points from a larger area than those with small covariances. +The Mahalanobis distance between a data point and a class is the Euclidean +distance between the point and the class mean divided by the covariance +matrix for the class. This means that classes with large covariances will +attract data points from a larger area than those with small covariances. } \references{ -O'Shaughnessy, D. -Speech Communication -(Addison-Wesley: Reading, MA. 1987) +O'Shaughnessy, D. Speech Communication (Addison-Wesley: +Reading, MA. 1987) } \seealso{ train } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/mahal.dist.Rd b/man/mahal.dist.Rd index cae8d615..ac92be1f 100644 --- a/man/mahal.dist.Rd +++ b/man/mahal.dist.Rd @@ -1,41 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{mahal.dist} \alias{mahal.dist} -\title{ -Calculate mahalanobis distances -} +\title{Calculate mahalanobis distances} \usage{ mahal.dist(data, train, labels = NULL) -} -\description{ -Calculates mahalanobis distances - } \arguments{ -\item{data}{ -A matrix of numerical data points. -} -\item{labels}{ -A vector of labels.. -} +\item{data}{A matrix of numerical data points.} + +\item{train}{A gaussian model as returned by the \code{train} function.} -\item{train}{ -A gaussian model as returned by the \code{train} function. -}} +\item{labels}{A vector of labels..} +} \value{ -A matrix of distances with one column for every class (label) in the -gaussian model. +A matrix of distances with one column for every class (label) in +the gaussian model. +} +\description{ +Calculates mahalanobis distances } \details{ The \code{train} function finds the centroids and covariance matrices for a -set of data and corresponding labels: one per unique label. This -function can be used to find the mahalanobis distance of every data -point in a dataset to each of the class centroids. The columns of the -resulting matrix are marked with the label of the centroid to which -they refer. The function \code{mahal} should be used if you want to find -the closest centroid to each data point. +set of data and corresponding labels: one per unique label. This function +can be used to find the mahalanobis distance of every data point in a +dataset to each of the class centroids. The columns of the resulting +matrix are marked with the label of the centroid to which they refer. The +function \code{mahal} should be used if you want to find the closest +centroid to each data point. } \seealso{ train, mahal, bayes.lab, bayes.dist } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/mahalanobis.metric.Rd b/man/mahalanobis.metric.Rd index 093c5bc9..c9e85000 100644 --- a/man/mahalanobis.metric.Rd +++ b/man/mahalanobis.metric.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{mahalanobis.metric} \alias{mahalanobis.metric} - \title{mahalanobis metric} +\usage{ +mahalanobis.metric(data, mean, invcov) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/make.emuRsegs.Rd b/man/make.emuRsegs.Rd new file mode 100644 index 00000000..1585316e --- /dev/null +++ b/man/make.emuRsegs.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRsegs.R +\name{make.emuRsegs} +\alias{make.emuRsegs} +\title{Make emuDB segment list} +\usage{ +make.emuRsegs(dbName, seglist, query, type) +} +\arguments{ +\item{dbName}{name of emuDB} + +\item{seglist}{segment list data.frame} + +\item{query}{query string} + +\item{type}{type of list elements} +} +\description{ +Make emuDB segment list +} diff --git a/man/make.seglist.Rd b/man/make.seglist.Rd index 92b8c9b0..0d80d7df 100644 --- a/man/make.seglist.Rd +++ b/man/make.seglist.Rd @@ -1,46 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{make.seglist} \alias{make.seglist} -\title{ Make an Emu segment list from the various components } -\description{ - This is the appropriate way to make an Emu segment list and ensure - that it has all of the required components. -} +\title{Make an Emu segment list from the various components} \usage{ make.seglist(labels, start, end, utts, query, type, database) } \arguments{ - \item{labels}{ A character vector of labels for each segment } - \item{start}{ A vector of start times } - \item{end}{ A vector of end times } - \item{utts}{ A character vector of utterance names } - \item{query}{ A query string } - \item{type}{ \code{segment} or \code{event} } - \item{database}{ The database name associated with the segment list } -} -\details{ - An Emu segment list is the result of a query to a speech database (see - \code{\link{emu.query}}) and has one row per matching segment or event - from the query. Each row lists the label, start and end times (in - milliseconds) and utterance name for the segment. This information - is used by \code{\link{emu.track}} and other functions to extract - data corresponding to these segments. +\item{labels}{A character vector of labels for each segment} + +\item{start}{A vector of start times} + +\item{end}{A vector of end times} + +\item{utts}{A character vector of utterance names} + +\item{query}{A query string} + +\item{type}{\code{segment} or \code{event}} - In order to ensure the proper format for segment lists and to ensure - against future changes to the format, \code{make.seglist} should be - used whenever you wish to create a segment list. Another function, - \code{\link{modify.seglist}} can be used to change some part of an - existing segment list. The functions \code{\link{label.emusegs}}, - \code{\link{start.emusegs}}, \code{\link{end.emusegs}} and \code{\link{utt.emusegs}} can - be used to access the different columns of the segment list. +\item{database}{The database name associated with the segment list} } \value{ - An Emu segment list. +An Emu segment list. } -\author{ Steve Cassidy } -\seealso{ \code{\link{modify.seglist}}, \code{\link{label.emusegs}}} +\description{ +This is the appropriate way to make an Emu segment list and ensure that it +has all of the required components. +} +\details{ +An Emu segment list is the result of a query to a speech database (see +\code{\link{query}}) and has one row per matching segment or event from +the query. Each row lists the label, start and end times (in milliseconds) +and utterance name for the segment. This information is used by +\code{\link{get_trackdata}} and other functions to extract data corresponding +to these segments. +In order to ensure the proper format for segment lists and to ensure +against future changes to the format, \code{make.seglist} should be used +whenever you wish to create a segment list. Another function, +\code{\link{modify.seglist}} can be used to change some part of an existing +segment list. The functions \code{\link{label.emusegs}}, +\code{\link{start.emusegs}}, \code{\link{end.emusegs}} and +\code{\link{utt.emusegs}} can be used to access the different columns of +the segment list. +} \examples{ + l <- c("A", "B", "C") s <- 1:3 e <- 2:4 @@ -63,6 +70,12 @@ make.seglist(labels, start, end, utts, query, type, database) # segment durations should all be 1 dur(segs) == c(1,1,1) + +} +\seealso{ +\code{\link{modify.seglist}}, \code{\link{label.emusegs}} +} +\author{ +Steve Cassidy } \keyword{misc} - diff --git a/man/makelab.Rd b/man/makelab.Rd index 69a37624..6eaab79f 100644 --- a/man/makelab.Rd +++ b/man/makelab.Rd @@ -1,31 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/makelab.R \name{makelab} \alias{makelab} - -\title{ Write out ESPS-style label files } -\description{ -Writes out separate ESPS-label files for each utterance to a specified directory. +\title{Write out ESPS-style label files} +\usage{ +makelab(vectimes, uttname, dir, extn = "xlab", labels = NULL) } -\usage{makelab (vectimes, uttname, dir, extn = "xlab", labels = NULL) -} - \arguments{ - \item{vectimes}{a vector of times} - \item{uttname}{a character vector of the same length as vectimes giving the utterance name associated with each element of vectimes} - \item{dir}{a character specifying the directory} - \item{extn}{a character specifying the extension of the resulting files. Defaults to xlab} - \item{labels}{either a single character vector or a character vector the same length as vectimes. -Defaults to "T"} -} +\item{vectimes}{a vector of times} +\item{uttname}{a character vector of the same length as vectimes giving the +utterance name associated with each element of vectimes} -\value{ - ESPS-style label files are written out to the directory of the user's choice. -One ESPS-label file is created for each utterance containing -all time values for that utterance.} +\item{dir}{a character specifying the directory} -\author{Jonathan Harrington} +\item{extn}{a character specifying the extension of the resulting files. +Defaults to xlab} +\item{labels}{either a single character vector or a character vector the +same length as vectimes. Defaults to "T"} +} +\value{ +ESPS-style label files are written out to the directory of the +user's choice. One ESPS-label file is created for each utterance containing +all time values for that utterance. +} +\description{ +Writes out separate ESPS-label files for each utterance to a specified +directory. +} \examples{ + #first two segments (for the whole example) of segmentlist vowlax vowlax[1:2,] @@ -51,6 +56,9 @@ all time values for that utterance.} #the first two segments are from the same utterance, #thus one label file was created in the R_HOME directory -} -\keyword{IO} \ No newline at end of file +} +\author{ +Jonathan Harrington +} +\keyword{IO} diff --git a/man/matscan.Rd b/man/matscan.Rd index 4fac875e..86e0d3b9 100644 --- a/man/matscan.Rd +++ b/man/matscan.Rd @@ -1,40 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{matscan} \alias{matscan} -\title{ -Read matrix data from a file -} +\title{Read matrix data from a file} \usage{ -matscan(file, num.cols=count.fields(file)[1], what=0, sk=0) -} -\description{ -Reads matrix data from a file +matscan(file, num.cols = utils::count.fields(file)[1], what = 0, sk = 0) } \arguments{ -\item{file}{ -A filename. -} -\item{num.cols}{ -The number of columns of data in the file. +\item{file}{A filename.} + +\item{num.cols}{The number of columns of data in the file.} + +\item{what}{A template for the data elements in the file, it should be a +number for numeric data (the default) or a string for string data. Note +that an Splus matrix can only hold one type of data (string or numeric), +for mixed types use data tables and the \code{read.table} function.} + +\item{sk}{The number of leading lines of the file to skip.} } -\item{what}{ -A template for the data elements in the file, it should be a number for -numeric data (the default) or a string for string data. Note that an -Splus matrix can only hold one type of data (string or numeric), for -mixed types use data tables and the \code{read.table} function. -} -\item{sk}{ -The number of leading lines of the file to skip. -}} \value{ -A matrix corresponding to the data in \code{file}. +A matrix corresponding to the data in \code{file}. +} +\description{ +Reads matrix data from a file } \details{ -This function has been partially superceeded by the introduction of -data frames and the read.table function. It is still useful however -for reading data into Splus matrix objects. +This function has been partially superseded by the introduction of data +frames and the read.table function. It is still useful however for reading +data into Splus matrix objects. } \seealso{ read.table } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/mel.Rd b/man/mel.Rd index b00a3f27..a00e3314 100644 --- a/man/mel.Rd +++ b/man/mel.Rd @@ -1,56 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mel.R \name{mel} \alias{mel} \alias{mel.trackdata} \alias{mel.spectral} - -\title{ -Convert Hz to the mel scale -} - -\description{ - The calculation is done using the formulae -mel = 1/log(2) * (log(1 + (Hz/1000))) * 1000 -where Hz is the frequency in Hz. - -} - - +\title{Convert Hz to the mel scale} \usage{ mel(a) } \arguments{ \item{a}{A vector or matrix of data or a spectral object.} - } \value{ -A vector or matrix or spectral object of the same length and dimensions as data. +A vector or matrix or spectral object of the same length and +dimensions as data. } -\details{ -If 'data' is a spectral object, then -the frequencies are changed so that they are proportional -to the mel scale and such that the mel intervals -between frequencies are constant between the lowest -and highest frequencies. More specifically, -suppose that a spectral object has frequencies -at 0, 1000, 2000, 3000, 4000 Hz. Then the corresponding -frequencies extend in mel between 0 and 2321.928 mel (=4000 Hz -in mels) -in four equal intervals, and linear interpolation -is used with the 'approx' function to obtain -the dB values at those frequencies. +\description{ +The calculation is done using the formulae mel = 1/log(2) * (log(1 + +(Hz/1000))) * 1000 where Hz is the frequency in Hz. } - -\author{Jonathan Harrington} - -\references{ Traunmueller, H. (1990) \"Analytical expressions for the tonotopic sensory scale\" J. Acoust. Soc. Am. 88: 97-100.} - -\seealso{ - \code{\link{bark}}, - \code{\link{plot.spectral}} +\details{ +If 'data' is a spectral object, then the frequencies are changed so that +they are proportional to the mel scale and such that the mel intervals +between frequencies are constant between the lowest and highest +frequencies. More specifically, suppose that a spectral object has +frequencies at 0, 1000, 2000, 3000, 4000 Hz. Then the corresponding +frequencies extend in mel between 0 and 2321.928 mel (=4000 Hz in mels) in +four equal intervals, and linear interpolation is used with the 'approx' +function to obtain the dB values at those frequencies. } - \examples{ + #convert Hertz values to mel vec <- c(500, 1500, 2500) @@ -63,7 +44,7 @@ the dB values at those frequencies. mel(vec) - # convert the \$data values in a trackdata object to mel + # convert the $data values in a trackdata object to mel # create a new track data object t1 <- dip.fdat @@ -78,7 +59,7 @@ the dB values at those frequencies. # that it is proportional to the mel scale. w = mel(e.dft) -par(mfrow=c(1,2)) +oldpar = par(mfrow=c(1,2)) plot(w, type="l") @@ -89,8 +70,18 @@ plot(e.dft, freq=mel(trackfreq(e.dft))) # the latter has a greater concentration of values # in a higher frequency range. -} +par(oldpar) -% Converted by Sd2Rd version 0.3-3. -\keyword{math} \ No newline at end of file +} +\references{ +Traunmueller, H. (1990) "Analytical expressions for the +tonotopic sensory scale" J. Acoust. Soc. Am. 88: 97-100. +} +\seealso{ +\code{\link{bark}}, \code{\link{plot.spectral}} +} +\author{ +Jonathan Harrington +} +\keyword{math} diff --git a/man/mel.default.Rd b/man/mel.default.Rd index 5483062c..3f0d2d7f 100644 --- a/man/mel.default.Rd +++ b/man/mel.default.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mel.R \name{mel.default} \alias{mel.default} - -\title{ mel default } +\title{mel default} +\usage{ +\method{mel}{default}(a) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/modify.seglist.Rd b/man/modify.seglist.Rd index 6569f8b2..07b9598d 100644 --- a/man/modify.seglist.Rd +++ b/man/modify.seglist.Rd @@ -1,44 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{modify.seglist} \alias{modify.seglist} -\title{ Modify one of the components of an Emu segment list } -\description{ - This function can be used to modify one of the parts of an - Emu segment list while leaving the other parts unchanged. -} +\title{Modify one of the components of an Emu segment list} \usage{ -modify.seglist(segs, labels, start, end, utts, query, type, database) +modify.seglist( + segs, + labels = label.emusegs(segs), + start = start.emusegs(segs), + end = end.emusegs(segs), + utts = utt.emusegs(segs), + query = emusegs.query(segs), + type = emusegs.type(segs), + database = emusegs.database(segs) +) } \arguments{ - \item{segs}{ A segment list to modify, a modified copy is returned } - \item{labels}{ A new label vector } - \item{start}{ A new start time vector } - \item{end}{ A new end time vector } - \item{utts}{ A new vector of utterance labels } - \item{query}{ A new query string to associate with the segment list } - \item{type}{ A new type string } - \item{database}{ A new database name } -} -\details{ - An Emu segment list has a number of components and is stored as an R - object of class \code{emusegs}. This function can be used to modify a - segment list while retaining all of the proper structures. +\item{segs}{A segment list to modify, a modified copy is returned} + +\item{labels}{A new label vector} - Any new vectors passed to the function must have the same length as - the segment list itself for this call to succeed. +\item{start}{A new start time vector} - All arguments are optional and default to not modifying the segment - list if not supplied. +\item{end}{A new end time vector} - The original segment list is not modified, instead, a modified copy is - returned. +\item{utts}{A new vector of utterance labels} + +\item{query}{A new query string to associate with the segment list} + +\item{type}{A new type string} + +\item{database}{A new database name} } \value{ - An Emu segment list. +An Emu segment list. +} +\description{ +This function can be used to modify one of the parts of an Emu segment list +while leaving the other parts unchanged. } -\author{ Steve Cassidy } -\seealso{ \code{\link{emu.query}} } +\details{ +An Emu segment list has a number of components and is stored as an R object +of class \code{emusegs}. This function can be used to modify a segment +list while retaining all of the proper structures. + +Any new vectors passed to the function must have the same length as the +segment list itself for this call to succeed. + +All arguments are optional and default to not modifying the segment list if +not supplied. +The original segment list is not modified, instead, a modified copy is +returned. +} \examples{ + data(vowlax) segs = vowlax # extend the start times by 10ms @@ -48,6 +64,12 @@ newsegs <- modify.seglist( segs, start=start(segs)+10 ) # this will affect where emu.track looks to find data newsegs <- modify.seglist( segs, database="notdemo" ) -} +} +\seealso{ +\code{\link{query}} +} +\author{ +Steve Cassidy +} \keyword{misc} diff --git a/man/moments.Rd b/man/moments.Rd index 3438cbd5..5362b0ca 100644 --- a/man/moments.Rd +++ b/man/moments.Rd @@ -1,33 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/moments.R \name{moments} \alias{moments} -\title{ Function to calculate statistical moments } -\description{ - The function calculates the first 4 moments, i.e. the mean, variance, -skew, kurtosis. -} +\title{Function to calculate statistical moments} \usage{ moments(count, x, minval = FALSE) } \arguments{ - \item{count}{ A vector of the observed instances per class } - \item{x}{ A vector of the same length as count defining the class. -If missing, and if count is of class spectral, then x is equal to trackfreq(count). If x is missing and is not of class spectral, then x default to 0:(length(count)-1) } - \item{minval}{ If T, subtract min(count) from count so that -the minimum value of count is zero. This is principally -used in calculating spectral moments where count is in decibels, -and more generally if count contains negative values.} +\item{count}{A vector of the observed instances per class} + +\item{x}{A vector of the same length as count defining the class. If +missing, and if count is of class spectral, then x is equal to +trackfreq(count). If x is missing and is not of class spectral, then x +default to 0:(length(count)-1)} + +\item{minval}{If TRUE, subtract min(count) from count so that the minimum +value of count is zero. This is principally used in calculating spectral +moments where count is in decibels, and more generally if count contains +negative values.} +} +\description{ +The function calculates the first 4 moments, i.e. the mean, variance, skew, +kurtosis. } \details{ - The units of the first moment are the same as x, -the units of the second moment are x\eqn{\mbox{\textasciicircum}}{^}2, and the -third and fourth moments are dimensionless. +The units of the first moment are the same as x, the units of the second +moment are x\eqn{\mbox{\textasciicircum}}{^}2, and the third and fourth +moments are dimensionless. } - -\references{ Snedecor, G & Cochran, W. 'Statistical Methods' Iowa State Press. -Wuensch,K., 2005 } -\author{ Jonathan Harrington } - \examples{ + # first four moments of a vector mom <- moments(bridge[,2]) # the above is the same as moments(bridge[,2], 0:12) @@ -38,6 +40,13 @@ mom <- moments(e.dft[-1], minval=TRUE) # the temporal skew of F1 for the 10th segment. Use m <- moments(vowlax.fdat[10,1]$data)[3] -} +} +\references{ +Snedecor, G & Cochran, W. 'Statistical Methods' Iowa State +Press. Wuensch,K., 2005 +} +\author{ +Jonathan Harrington +} \keyword{math} diff --git a/man/mu.colour.Rd b/man/mu.colour.Rd index 7d9598c2..2f65e785 100644 --- a/man/mu.colour.Rd +++ b/man/mu.colour.Rd @@ -1,110 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mu.colour.R \name{mu.colour} \alias{mu.colour} - -\title{ Function for specifying color, linetype, and line-widths in - EMU plotting functions. } -\description{ - The function specifies color, linetype and linewidths - in EMU plotting functions as is used mostly in calls - from within plot.trackdata, plot.spectral, eplot, and dplot -} +\title{Function for specifying color, linetype, and line-widths in EMU plotting +functions.} \usage{ mu.colour(labs, col = TRUE, linetype = FALSE, lwd = NULL, pch = NULL) } - \arguments{ - \item{labs}{ A vector of character labels } - \item{col}{ A code passed to the 'col' - argument in plotting functions. There are four possibilities. Either - logical, a character vector, or a numeric vector. - In the first case, if TRUE, then a different numeric - code is given for each unique label type. For example, - if labs is c("a", "b", "a", "c"), then - the output is c(1, 2, 1, 3). If F, then for this - example, the output is c(1, 1, 1, 1). In - the second case, the character vector can be - either a single element specifying a character, - or there can be as many elements as there are - unique colors. Thus if col = "red", then - for the example c("a", "b", "a", "c"), the output - is c("red", "red", "red", "red"). Alternatively, - since there are three unique labels for this example, - then the user could specify col = c("green", "red", "blue") - and the output is c("green", "red", "green", "blue") - if labs is c("a", "b", "a", "c"). - In the third case, 'col'. can be either - a single element numeric vector, or its length must - be equal to the number of unique types in labs. - For example, if col=3 and if labs = c("a", "b", "a", "c"), - then the output is c(3, 3, 3, 3). Alternatively, - if col = c(2,3,1), then the output is - c(2, 3, 2, 1) for the same example. Finally, - col can be specified as a character or numeric - vector that is the same length as labs, allowing the - user to choose the color in which each line should be drawn. - The default is col = TRUE.} - \item{linetype}{A code specifying - linetypes, i.e. the values passed to lty - in plotting functions.There are 2 possibilities. - Either - logical, a character vector, or a numeric vector. - In the first case, if TRUE, then a different numeric - code is given for each unique label type. For example, - if labs is c("a", "b", "a", "c"), then - the output is c(1, 2, 1, 3). If F, then for this - example, the output is c(1, 1, 1, 1). - In the second case, 'linetype' can be either - a single element numeric vector, or its length must - be equal to the number of unique types in labs. - For example, if linetype=3 and if labs = c("a", "b", "a", "c"), - then the output is c(3, 3, 3, 3). Alternatively, - if linetype = c(2,3,1), then the output is - c(2, 3, 2, 1) for the same example. Finally, - linetype can be specified as a numeric - vector that is the same length as labs, allowing the - user to choose the linetype in which each line should be drawn. - The default is linetype=F - } - \item{lwd}{A code passed to the lwd argument - in plotting functions. - 'lwd' can be either - a single element numeric vector, or its length must - be equal to the number of unique types in labs. - For example, if lwd=3 and if labs = c("a", "b", "a", "c"), - then the output is c(3, 3, 3, 3). Alternatively, - if lwd = c(2,3,1), then the output is - c(2, 3, 2, 1) for the same example. The default is - NULL in which case all lines are drawn with lwd=1 } - \item{pch}{ A code passed to the pch argument - in plotting functions. Functions in the same way as lwd above} -} -\details{ - Parameters are also supplied for use with the function 'legend' -} -\value{ - If it is a LISTRUE, use - \item{colour}{A code for the color'} - \item{linetype}{A code for the linetype} - \item{lwd}{A code for the line width} - \item{legend}{A list consisting of \$legend\$lab, - \$legend\$lty and \$legend\$lwd that specify - the parameters for the 'legend' function. +\item{labs}{A vector of character labels} - ... - } -} +\item{col}{A code passed to the 'col' argument in plotting functions. There +are four possibilities. Either logical, a character vector, or a numeric +vector. In the first case, if TRUE, then a different numeric code is given +for each unique label type. For example, if labs is c("a", "b", "a", "c"), +then the output is c(1, 2, 1, 3). If FALSE, then for this example, the output +is c(1, 1, 1, 1). In the second case, the character vector can be either a +single element specifying a character, or there can be as many elements as +there are unique colors. Thus if col = "red", then for the example c("a", +"b", "a", "c"), the output is c("red", "red", "red", "red"). Alternatively, +since there are three unique labels for this example, then the user could +specify col = c("green", "red", "blue") and the output is c("green", "red", +"green", "blue") if labs is c("a", "b", "a", "c"). In the third case, +'col'. can be either a single element numeric vector, or its length must be +equal to the number of unique types in labs. For example, if col=3 and if +labs = c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). +Alternatively, if col = c(2,3,1), then the output is c(2, 3, 2, 1) for the +same example. Finally, col can be specified as a character or numeric +vector that is the same length as labs, allowing the user to choose the +color in which each line should be drawn. The default is col = TRUE.} -\author{ Steve Cassidy, modified by Jonathan Harrington } +\item{linetype}{A code specifying linetypes, i.e. the values passed to lty +in plotting functions.There are 2 possibilities. Either logical, a +character vector, or a numeric vector. In the first case, if TRUE, then a +different numeric code is given for each unique label type. For example, if +labs is c("a", "b", "a", "c"), then the output is c(1, 2, 1, 3). If FALSE, then +for this example, the output is c(1, 1, 1, 1). In the second case, +'linetype' can be either a single element numeric vector, or its length +must be equal to the number of unique types in labs. For example, if +linetype=3 and if labs = c("a", "b", "a", "c"), then the output is c(3, 3, +3, 3). Alternatively, if linetype = c(2,3,1), then the output is c(2, 3, 2, +1) for the same example. Finally, linetype can be specified as a numeric +vector that is the same length as labs, allowing the user to choose the +linetype in which each line should be drawn. The default is linetype=FALSE} -\seealso{ - \code{\link{plot.trackdata}} - \code{\link{dplot}} - \code{\link{eplot}} - \code{\link{plot.spectral}} -} +\item{lwd}{A code passed to the lwd argument in plotting functions. 'lwd' +can be either a single element numeric vector, or its length must be equal +to the number of unique types in labs. For example, if lwd=3 and if labs = +c("a", "b", "a", "c"), then the output is c(3, 3, 3, 3). Alternatively, if +lwd = c(2,3,1), then the output is c(2, 3, 2, 1) for the same example. The +default is NULL in which case all lines are drawn with lwd=1} +\item{pch}{A code passed to the pch argument in plotting functions. +Functions in the same way as lwd above} +} +\value{ +If it is a LISTRUE, use \item{colour}{A code for the color'} +\item{linetype}{A code for the linetype} \item{lwd}{A code for the line +width} \item{legend}{A list consisting of $legend$lab, $legend$lty and +$legend$lwd that specify the parameters for the 'legend' function. +... } +} +\description{ +The function specifies color, linetype and linewidths in EMU plotting +functions as is used mostly in calls from within plot.trackdata, +plot.spectral, eplot, and dplot +} +\details{ +Parameters are also supplied for use with the function 'legend' +} \examples{ + # examples will be given using the above functions # b/w but with different linetypes eplot(vowlax.fdat.5[,1:2], vowlax.l, col=FALSE, lty=TRUE) @@ -129,8 +96,13 @@ dplot(vowlax.fdat[,2], vowlax.l, average=TRUE, lty=2, pch=4, type="b", xlim=c(40 # the default except plot everything with a dotted line and # with double line thickness eplot(vowlax.fdat.5[,1:2], vowlax.l, lty=2, lwd=2) -} - - -\keyword{utilities} \ No newline at end of file +} +\seealso{ +\code{\link{plot.trackdata}} \code{\link{dplot}} +\code{\link{eplot}} \code{\link{plot.spectral}} +} +\author{ +Steve Cassidy, modified by Jonathan Harrington +} +\keyword{utilities} diff --git a/man/mu.colour.get.Rd b/man/mu.colour.get.Rd index f2871ec2..39fb16f5 100644 --- a/man/mu.colour.get.Rd +++ b/man/mu.colour.get.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mu.colour.R \name{mu.colour.get} \alias{mu.colour.get} - -\title{ get a EMU color} +\title{get a EMU color} +\usage{ +mu.colour.get(col.lty, label) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/mu.legend.Rd b/man/mu.legend.Rd index 4076678a..fc704f3e 100644 --- a/man/mu.legend.Rd +++ b/man/mu.legend.Rd @@ -1,8 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mu.legend.R \name{mu.legend} \alias{mu.legend} -\title{ make a EMU legend} +\title{make a EMU legend} +\usage{ +mu.legend(legn, xlim, ylim) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/mu.linetype.get.Rd b/man/mu.linetype.get.Rd index 0fb9fe43..e6085026 100644 --- a/man/mu.linetype.get.Rd +++ b/man/mu.linetype.get.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mu.colour.R \name{mu.linetype.get} \alias{mu.linetype.get} - -\title{ mu linetype get} +\title{mu linetype get} +\usage{ +mu.linetype.get(col.lty, label) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/muclass.Rd b/man/muclass.Rd index fb86301a..c68264f3 100644 --- a/man/muclass.Rd +++ b/man/muclass.Rd @@ -1,31 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{muclass} \alias{muclass} -\title{ -Find common elements in vectors -} +\title{Find common elements in vectors} \usage{ muclass(labels, class) } -\description{ -Finds common elements in vectors -} \arguments{ -\item{labels}{ -A vector of labels. +\item{labels}{A vector of labels.} + +\item{class}{A label or vector of labels.} } -\item{class}{ -A label or vector of labels. -}} \value{ -A logical vector which is T for each element in \code{labels} which matches -\code{class} or an element of \code{class}. +A logical vector which is TRUE for each element in \code{labels} which +matches \code{class} or an element of \code{class}. } -\seealso{ -match +\description{ +Finds common elements in vectors } \examples{ + muclass(c("a", "b", "c"), c("a", "c")) -#[1] T F T +#[1] TRUE FALSE TRUE + +} +\seealso{ +match } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/nearey.sub.Rd b/man/nearey.sub.Rd index bec2db79..9390ca49 100644 --- a/man/nearey.sub.Rd +++ b/man/nearey.sub.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{nearey.sub} \alias{nearey.sub} - -\title{ nearey sub } +\title{nearey sub} +\usage{ +nearey.sub(data) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/norm.Rd b/man/norm.Rd index 0b56b09d..5ae3f5b2 100644 --- a/man/norm.Rd +++ b/man/norm.Rd @@ -1,50 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{norm} \alias{norm} -\title{ -Normalise speech data -} +\title{Normalise speech data} \usage{ -norm(data, speakerlabs, type="gerst", rescale=FALSE) -} -\description{ -Normalises speech data +norm(data, speakerlabs, type = "gerst", rescale = FALSE) } \arguments{ -\item{data}{ -A matrix of data. Can be either an n-columned matrix or a trackdata -object as returned by \code{track}. -} -\item{speakerlabs}{ -A parallel vector of speaker labels. -} -\item{type}{ -The type of extrinsic normalisation to be performed on data. type can -be \code{"nearey"}, \code{"cen"}, \code{"lob"}, \code{"gerst"} (default), for normalisation -according to Nearey, centroid method, Lobanov, or Gerstman. +\item{data}{A matrix of data. Can be either an n-columned matrix or a +trackdata object as returned by \code{track}.} + +\item{speakerlabs}{A parallel vector of speaker labels.} + +\item{type}{The type of extrinsic normalisation to be performed on data. +type can be \code{"nearey"}, \code{"cen"}, \code{"lob"}, \code{"gerst"} +(default), for normalisation according to Nearey, centroid method, Lobanov, +or Gerstman.} + +\item{rescale}{Currently only works for Lobanov normalisation. The +normalised values are multiplied by the standard deviation and then the +mean is added, where the standard deviation and mean are across all +original speakers' unnormalised data.} } -\item{rescale}{ -Currently only works for Lobanov normalisation. The normalised values are -multiplied by the standard deviation and then the mean is added, where the -standard deviation and mean are across all original speakers' unnormalised -data. -}} \value{ -Normalised values of data are retuned, having the same structure as data. +Normalised values of data are returned, having the same structure as +data. +} +\description{ +Normalises speech data } \details{ -Types of normalisation: -\code{"nearey"}, Nearey : Find the log of each data element and subtract -from each the mean of the logarithmic data. -\code{"cen"}, centroid: Find the mean of the data column and subtract it from each -data element in that column. -\code{"lob"}, Lobanov: Find the mean and standard deviation of the data. Subtract -the mean from each data element and devide each result by the standard -deviation. -"gerst", Gerstman: Subtract from the data the minimun formant value then devide -by the formant range. +Types of normalisation: \code{"nearey"}, Nearey : Find the log of each data +element and subtract from each the mean of the logarithmic data. +\code{"cen"}, centroid: Find the mean of the data column and subtract it +from each data element in that column. \code{"lob"}, Lobanov: Find the +mean and standard deviation of the data. Subtract the mean from each data +element and divide each result by the standard deviation. "gerst", +Gerstman: Subtract from the data the minimum formant value then divide by +the formant range. } \seealso{ track } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/normalize_length.Rd b/man/normalize_length.Rd new file mode 100644 index 00000000..36cd992b --- /dev/null +++ b/man/normalize_length.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRtrackdata.R +\name{normalize_length} +\alias{normalize_length} +\title{Normalize length of segments contained in a \code{data.frame} like object returned by \code{\link{get_trackdata}}} +\usage{ +normalize_length(x, colNames = NULL, N = 21) +} +\arguments{ +\item{x}{data.frame like object that was generated by \code{\link{get_trackdata}} with +the resultType set to either \code{emuRtrackdata} or \code{tibble}} + +\item{colNames}{character vector containing names of columns to normalize. If not set all +data columns are normalized (T1-TN as well as other numeric columns).} + +\item{N}{specify length of normalized segments (each segment in resulting +object will consist of \code{N} rows).} +} +\value{ +data.frame like object containing the length normalized segments +} +\description{ +Normalize length of segments contained in a \code{data.frame} like object returned by \code{\link{get_trackdata}} +} +\seealso{ +\code{\link{emuRtrackdata} \link{emuRsegs}} +} diff --git a/man/outliers.Rd b/man/outliers.Rd index 08b0f04a..48d16cca 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/outliers.R \name{outliers} \alias{outliers} - -\title{ outliers} +\title{outliers} +\usage{ +outliers(data, labels, threshold) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/palate.Rd b/man/palate.Rd index d2c88122..5d2ad20e 100644 --- a/man/palate.Rd +++ b/man/palate.Rd @@ -1,57 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/palate.R \name{palate} \alias{palate} - -\title{ Obtain a three-dimensional palatographic array } -\description{ -Function to calculate a three-dimensional -palatographic array from. +\title{Obtain a three-dimensional palatographic array} +\usage{ +palate(epgdata) } -\usage{ palate(epgdata) } - \arguments{ - \item{epgdata}{ An eight-columned EPG-compressed trackdata object -or an eight columned matrix of EPG-compressed trackdata.} -% \item{weights}{A vector of five values that are applied to -%EPG rows 1-5 respectively in epgai(). A vector of -%four values that are applied to columns 1 and 8, -%to columns 2 and 7, columns 3 and 6, columns 4 and 5 -%respectively. Defaults to the values given in Recasens & Pallares (2001). } -} - -\details{ - An EPG compressed trackdata object that is output -from the Reading system contains eight columns of data -and each row value when converted to binary -numbers (after adding 1) gives the corresponding -EPG contact patterns. This function does -the conversion to binary values. +\item{epgdata}{An eight-columned EPG-compressed trackdata object or an +eight columned matrix of EPG-compressed trackdata.} } - \value{ An array of three dimensions of 8 rows x 8 columns x n segments -where n is the number of segments in the trackdata object or -matrix. The rows and columns are given dimension names, -the dimension names of the third dimension contains -the times at which the palatograms occur. +where n is the number of segments in the trackdata object or matrix. The +rows and columns are given dimension names, the dimension names of the +third dimension contains the times at which the palatograms occur. +} +\description{ +Function to calculate a three-dimensional palatographic array from. +} +\details{ +An EPG compressed trackdata object that is output from the Reading system +contains eight columns of data and each row value when converted to binary +numbers (after adding 1) gives the corresponding EPG contact patterns. This +function does the conversion to binary values. } - \examples{ + # convert an EPG-compressed trackdata object to palatograms p <- palate(coutts.epg) # convert an EPG-compressed matrix to palatograms p <- palate(dcut(coutts.epg, 0, prop=TRUE)) -} - -\author{ Jonathan Harrington } - -\seealso{ -\code{\link{epgcog}} -\code{\link{epggs}} -\code{\link{epgai}} +} +\seealso{ +\code{\link{epgcog}} \code{\link{epggs}} \code{\link{epgai}} \code{\link{epgplot}} } - -\keyword{datagen} \ No newline at end of file +\author{ +Jonathan Harrington +} +\keyword{datagen} diff --git a/man/perform.Rd b/man/perform.Rd index d5e271fe..16167f56 100644 --- a/man/perform.Rd +++ b/man/perform.Rd @@ -1,25 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/perform.R \name{perform} \alias{perform} -\title{ -Performance (hit rate) of a confusion matrix -} +\title{Performance (hit rate) of a confusion matrix} \usage{ perform(data) -} -\description{ -Performs (hit rate) of a confusion matrix - } \arguments{ -\item{data}{ -A confusion matrix. -}} +\item{data}{A confusion matrix.} +} \value{ -Caluculates the accuracy (total score) of the confusion matrix, returning -percentage of correct, and incorrect matches. +Calculates the accuracy (total score) of the confusion matrix, +returning percentage of correct, and incorrect matches. +} +\description{ +Performs (hit rate) of a confusion matrix } \seealso{ confusion } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/plafit.Rd b/man/plafit.Rd index a488be8f..09d2bc6c 100644 --- a/man/plafit.Rd +++ b/man/plafit.Rd @@ -1,56 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plafit.R \name{plafit} \alias{plafit} - -\title{ Calculate the coefficients of a parabola} -\description{ - Fit a second ordered polynomial to a vector of values +\title{Calculate the coefficients of a parabola} +\usage{ +plafit(wav, fit = FALSE, n = 101) } -\usage{plafit(wav, fit = FALSE, n = 101)} - \arguments{ - \item{wav}{ a vector or single column matrix of numeric values to which the 2nd order polynomial is to be fitted. } - \item{fit}{ if F, return the coefficients of -the polynomial; if T, the values of the polynomial -are returned to the same length as the vector wav. } - \item{n}{ in fitting the polynomial, linear time normalisation -is first applied to the input vector wav to 101 points. -The polynomial is fitted under the assumption that -these points extend linearly in time between t = -1 and t = 1 -with t = 0 occurring at the temporal midpoint. } -} +\item{wav}{a vector or single column matrix of numeric values to which the +2nd order polynomial is to be fitted.} -\details{ -The function fits a parabola (2nd order polynomial) following the method of van Bergem, Speech Communication, 14, 1994, 143-162. -The algorithm fixes the parabola at the onset, midpoint, and offset -of the vector i.e. such htat the fitted parabola and original vector have the same values at these points. -} +\item{fit}{if FALSE, return the coefficients of the polynomial; if TRUE, the +values of the polynomial are returned to the same length as the vector wav.} +\item{n}{in fitting the polynomial, linear time normalisation is first +applied to the input vector wav to 101 points. The polynomial is fitted +under the assumption that these points extend linearly in time between t = +-1 and t = 1 with t = 0 occurring at the temporal midpoint.} +} \value{ -The function returns the coefficients of -c0, c1, c2 in the parabola y = c0 + c1t + c2t\eqn{\mbox{\textasciicircum}}{^}2 -where t extends between -1 and 1. The function -can also be used to derive the values -of the parabola as a function of time from the coefficients. +The function returns the coefficients of c0, c1, c2 in the parabola +y = c0 + c1t + c2t\eqn{\mbox{\textasciicircum}}{^}2 where t extends between +-1 and 1. The function can also be used to derive the values of the +parabola as a function of time from the coefficients. +} +\description{ +Fit a second ordered polynomial to a vector of values +} +\details{ +The function fits a parabola (2nd order polynomial) following the method of +van Bergem, Speech Communication, 14, 1994, 143-162. The algorithm fixes +the parabola at the onset, midpoint, and offset of the vector i.e. such +htat the fitted parabola and original vector have the same values at these +points. } - -\author{Jonathan Harrington} - \examples{ + # fit a polynomial to a segment of fundamental frequency data plafit(vowlax.fund[1,]$data) # return the fitted values of the polynomial plafit(vowlax.fund[1,]$data, fit=TRUE) -} - - - -\seealso{ - \code{\link{dct}} } - - +\seealso{ +\code{\link{dct}} +} +\author{ +Jonathan Harrington +} \keyword{math} - diff --git a/man/plot.spectral.Rd b/man/plot.spectral.Rd index 9b01523b..930ebfdd 100644 --- a/man/plot.spectral.Rd +++ b/man/plot.spectral.Rd @@ -1,64 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spectralclass.R \name{plot.spectral} \alias{plot.spectral} - -\title{ Plot spectra from EMU spectral objects } -\description{ - The function plots spectrum of any EMU spectral object. -} +\title{Plot spectra from EMU spectral objects} \usage{ -\method{plot}{spectral}(x, labs, ylim, xlim, col, lty, lwd, fun, freq, type = "l", - power = FALSE, powcoeffs = c(10, 10), dbnorm = FALSE, dbcoeffs = c(0,0), legend = TRUE, axes = TRUE, ...) +\method{plot}{spectral}( + x, + labs, + ylim, + xlim, + col, + lty, + lwd, + fun, + freq, + type = "l", + power = FALSE, + powcoeffs = c(10, 10), + dbnorm = FALSE, + dbcoeffs = c(0, 0), + legend = TRUE, + axes = TRUE, + ... +) } - \arguments{ - \item{x}{ An EMU object of class 'spectral' } - \item{labs}{ An optional vector character labels. Must be the same length as specdata } - \item{ylim}{ A two-element numeric vector for the y-axis range (see 'par') } - \item{xlim}{ A two-element numeric vector for the x-axis range (see 'par') } - \item{col}{ Specify a color - see 'mu.colour') } - \item{lty}{ Specify a linetype - see 'mu.colour' } - \item{lwd}{ Specify line thickness - see 'mu.colour' } - \item{fun}{ An R function name e.g., mean, var, sum, etc. -The function is applied separately to each category type specified in labs } - \item{freq}{ A numeric vector the same length as the number of columns -in specdata specifying the frequencies at which the -spectral data is to be plotted. If not supplied, defaults to trackfreq(specdata) } - \item{type}{ A single element character vector for the linetype } - \item{power}{ Logical. If T, then specdata (or specdata\$data -if specdata is a trackdata object, is converted to -a * specdata\eqn{\mbox{\textasciicircum}}{^}b, where a and b have the values given in powcoeffs. -This operation is applied before b } - \item{powcoeffs}{ A two-element numeric vector. Defaults to c(10, 10) } - \item{dbnorm}{ Logical. If T, apply dB-level normalization -per spectrum as defined by dbcoeffs below. Defaults to F. } - \item{dbcoeffs}{ A two element numeric vector (x, y). -The spectra are normalised in such a way that the values -of each spectrum at a frequency of y are set to a dB level of x. -For example, to normalise the spectrum to 10 dB at 2000 Hz, -set dbnorm to T and dbcoeffs to c(2000, 10) } - \item{legend}{ Parameters for defining the legend. See 'mu.legend' for -further details } - \item{axes}{ A logical vector indicating whether the axes should be plotted } - \item{\dots}{ Further graphical parameters may be supplied. } +\item{x}{An EMU object of class 'spectral'} + +\item{labs}{An optional vector character labels. Must be the same length as +specdata} + +\item{ylim}{A two-element numeric vector for the y-axis range (see 'par')} + +\item{xlim}{A two-element numeric vector for the x-axis range (see 'par')} + +\item{col}{Specify a color - see 'mu.colour')} + +\item{lty}{Specify a linetype - see 'mu.colour'} + +\item{lwd}{Specify line thickness - see 'mu.colour'} + +\item{fun}{An R function name e.g., mean, var, sum, etc. The function is +applied separately to each category type specified in labs} + +\item{freq}{A numeric vector the same length as the number of columns in +specdata specifying the frequencies at which the spectral data is to be +plotted. If not supplied, defaults to trackfreq(specdata)} + +\item{type}{A single element character vector for the linetype} + +\item{power}{Logical. If TRUE, then specdata (or specdata$data if specdata is +a trackdata object, is converted to a * +specdata\eqn{\mbox{\textasciicircum}}{^}b, where a and b have the values +given in powcoeffs. This operation is applied before b} + +\item{powcoeffs}{A two-element numeric vector. Defaults to c(10, 10)} + +\item{dbnorm}{Logical. If TRUE, apply dB-level normalization per spectrum as +defined by dbcoeffs below. Defaults to FALSE.} + +\item{dbcoeffs}{A two element numeric vector (x, y). The spectra are +normalised in such a way that the values of each spectrum at a frequency of +y are set to a dB level of x. For example, to normalise the spectrum to 10 +dB at 2000 Hz, set dbnorm to TRUE and dbcoeffs to c(2000, 10)} + +\item{legend}{Parameters for defining the legend. See 'mu.legend' for +further details} + +\item{axes}{A logical vector indicating whether the axes should be plotted} + +\item{\dots}{Further graphical parameters may be supplied.} } -\details{ - This function is implemented when a spectral trackdata object -is called with the 'plot' function. +\description{ +The function plots spectrum of any EMU spectral object. } - -\author{ Jonathan Harrington } -\note{ To plot spectral data from a spectral trackdata object, -then call the function explicitly with 'plot/spectral' rather than -with just 'plot' +\details{ +This function is implemented when a spectral trackdata object is called +with the 'plot' function. } - -\seealso{ -\code{\link{plot}} -\code{\link{plot.trackdata}} -\code{\link{as.spectral}} +\note{ +To plot spectral data from a spectral trackdata object, then call the +function explicitly with 'plot/spectral' rather than with just 'plot' } - \examples{ +\dontrun{ + plot(vowlax.dft.5[1,]) # with label types @@ -68,12 +94,16 @@ plot(vowlax.dft.5[1:20,], vowlax.l[1:20]) plot(vowlax.dft.5[1:20,], vowlax.l[1:20], fun=mean, power=TRUE) # All the spectra of one segment in a trackdata object -plot.spectral(fric.dft[1,]) - - +plot(fric.dft[1,]) } +} +\seealso{ +\code{\link{plot}} \code{\link{plot.trackdata}} +\code{\link{as.spectral}} +} +\author{ +Jonathan Harrington +} \keyword{dplot} - - diff --git a/man/plot.trackdata.Rd b/man/plot.trackdata.Rd index 797f7d14..9e3e0241 100644 --- a/man/plot.trackdata.Rd +++ b/man/plot.trackdata.Rd @@ -1,77 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{plot.trackdata} \alias{plot.trackdata} - -\title{ Produces time-series plots from trackdata } - -\description{ -The function produces a plot as a function of -time for a single segment or multiple plots -as a function of time for several segments. -} - +\title{Produces time-series plots from trackdata} \usage{ -\method{plot}{trackdata}(x, timestart = NULL, xlim = NULL, ylim = NULL, labels = NULL, - col = TRUE, lty = FALSE, type = "p", pch = NULL, contig = TRUE, ...) } - - - +\method{plot}{trackdata}( + x, + timestart = NULL, + xlim = NULL, + ylim = NULL, + labels = NULL, + col = TRUE, + lty = FALSE, + type = "p", + pch = NULL, + contig = TRUE, + ... +) +} \arguments{ - \item{x}{A trackdata object.} - \item{timestart}{ A single valued numeric vector for setting -the time at which the trackdata should start. Defaults -to NULL which means that the start time is taken from start(trackdata), i.e. the time at which the trackdata object starts.} - \item{xlim}{A numeric vector of two values for -specifying the time interval over which the trackdata is to be plotted. Defaults to NULL which means that the trackdata object is -plotted between between the start time of the first segment and the end time of the last segment.} - \item{ylim}{Specify a yaxis range.} - \item{labels}{ A character vector the same length as -the number of segments in the trackdata object. Each label -is plotted at side = 3 on the plotted at the temporal -midpoint of each segment in the trackdata object. -Defaults to NULL (plot no labels). Labels will only -be plotted if xlim=NULL.} - \item{col}{A single element logical vector. Defaults to T -to plot each label type in a different colour} - \item{lty}{A single element logical vector. Defaults to F. -If TRUE, plot each label type in a different linetype } -\item{type}{ Specify the type of plot. See \link{plot} for the various possibilities } -\item{pch}{ The symbol types to be used for plotting. Should be specified as a -numeric vector of the same length as there are unique label classes } - \item{contig}{ A single valued logical vector T or F. If T, -then all the segments of the trackdata object are -assumed to be temporally contiguous, i.e. the -boundaries of the segments are abutting in time -and the start time of segment[j,] is -the end time of segment[j-1,]. In this case, -all the segments of the trackdata object are -plotted on the same plot as a function of time. -An example of a contiguous trackdata object is coutts.sam. -contig = FALSE is when a trackdata object is non-contiguous -e.g. all "i:" vowels in a database. An example -of a non-contiguous trackdata object is vowlax.fdat. -If contig=F then each segment of the trackdata object -is plotted separately.} - \item{...}{ the same graphical parameters -can be supplied to this function as for plot e.g type="l", lty=2 etc.} +\item{x}{A trackdata object.} + +\item{timestart}{A single valued numeric vector for setting the time at +which the trackdata should start. Defaults to NULL which means that the +start time is taken from start(trackdata), i.e. the time at which the +trackdata object starts.} + +\item{xlim}{A numeric vector of two values for specifying the time interval +over which the trackdata is to be plotted. Defaults to NULL which means +that the trackdata object is plotted between between the start time of the +first segment and the end time of the last segment.} + +\item{ylim}{Specify a yaxis range.} + +\item{labels}{A character vector the same length as the number of segments +in the trackdata object. Each label is plotted at side = 3 on the plotted +at the temporal midpoint of each segment in the trackdata object. Defaults +to NULL (plot no labels). Labels will only be plotted if xlim=NULL.} + +\item{col}{A single element logical vector. Defaults to TRUE to plot each +label type in a different colour} + +\item{lty}{A single element logical vector. Defaults to FALSE. If TRUE, plot +each label type in a different linetype} + +\item{type}{Specify the type of plot. See \link{plot} for the various +possibilities} + +\item{pch}{The symbol types to be used for plotting. Should be specified as +a numeric vector of the same length as there are unique label classes} + +\item{contig}{A single valued logical vector TRUE or FALSE. If TRUE, then all the +segments of the trackdata object are assumed to be temporally contiguous, +i.e. the boundaries of the segments are abutting in time and the start time +of segment[j,] is the end time of segment[j-1,]. In this case, all the +segments of the trackdata object are plotted on the same plot as a function +of time. An example of a contiguous trackdata object is coutts.sam. contig += FALSE is when a trackdata object is non-contiguous e.g. all "i:" vowels +in a database. An example of a non-contiguous trackdata object is +vowlax.fdat. If contig=FALSE then each segment of the trackdata object is +plotted separately.} + +\item{...}{the same graphical parameters can be supplied to this function +as for plot e.g type="l", lty=2 etc.} +} +\description{ +The function produces a plot as a function of time for a single segment or +multiple plots as a function of time for several segments. } - \details{ -The function plots a single segment of trackdata as -a function of time. If the segment contains -multiple tracks, then these will be overlaid. If -there are several temporally non-contiguous -segments in the trackdata object, -each segment is plotted in a different panel -by specifying contig=F. This function -is not suitable for overlaying -trackdata from more than one segments on the same plot -as a function of time: for this use dplot().} - -\author{ Jonathan Harrington} - -\seealso{ \code{\link{plot}}, \code{\link{dplot}}} +The function plots a single segment of trackdata as a function of time. If +the segment contains multiple tracks, then these will be overlaid. If there +are several temporally non-contiguous segments in the trackdata object, +each segment is plotted in a different panel by specifying contig=FALSE. This +function is not suitable for overlaying trackdata from more than one +segments on the same plot as a function of time: for this use dplot(). +} \examples{ + # a single segment of trackdata (F1) plotted as a function of time. plot(vowlax.fdat[1,1]) @@ -88,7 +95,7 @@ plot(vowlax.fdat[1,], col="blue", pch=20, xlim=c(900, 920), type="b", lty=TRUE, # F1 and F2 of six vowels with labels, separate windows -par(mfrow=c(2,3)) +oldpar = par(mfrow=c(2,3)) plot(vowlax.fdat[1:6,1:2], contig=FALSE, labels=vowlax.l[1:6], ylab="F1 and F2", xlab="Time (ms)", type="b", ylim=c(300, 2400)) @@ -112,13 +119,20 @@ par(mfrow=c(1,1)) plot(coutts.rms, labels=labels, type="l", bty="n") # as above, double line-thickness, green, line type 3, no box, # time start 0 ms with x and y axis labels - plot(coutts.rms, labels=labels, type="l", lwd=2, col="green", lty=3, bty="n", timestart=0, xlab="Time (ms)", ylab="Amplitude") + plot(coutts.rms, labels=labels, type="l", lwd=2, + col="green", lty=3, bty="n", timestart=0, xlab="Time (ms)", ylab="Amplitude") # as above with a different plotting symbol for the points par(mfrow=c(2,3)) - plot(coutts.rms, labels=labels, type="b", lwd=2, col="green", timestart=0, bty="n", contig=FALSE, pch=20) + plot(coutts.rms, labels=labels, type="b", lwd=2, col="green", + timestart=0, bty="n", contig=FALSE, pch=20) +par(oldpar) } - -%\keyword{emu} +\seealso{ +\code{\link{plot}}, \code{\link{dplot}} +} +\author{ +Jonathan Harrington +} \keyword{dplot} diff --git a/man/polhom.Rd b/man/polhom.Rd index b0fff016..7a65d907 100644 --- a/man/polhom.Rd +++ b/man/polhom.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{polhom} \alias{polhom} \title{Segment list of four Polish homorganic fricatives from database epgpolish.} -\usage{polhom} -\description{An EMU dataset} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/polhom.epg.Rd b/man/polhom.epg.Rd index 83a418d9..c0886dd1 100644 --- a/man/polhom.epg.Rd +++ b/man/polhom.epg.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{polhom.epg} \alias{polhom.epg} \title{EPG-compressed trackdata from the segment list polhom} -\usage{polhom.epg} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/polhom.l.Rd b/man/polhom.l.Rd index f8fe19ae..c57f7180 100644 --- a/man/polhom.l.Rd +++ b/man/polhom.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{polhom.l} \alias{polhom.l} \title{Vector of phonetic labels from the segment list polhom} -\usage{polhom.l} -\description{An EMU dataset} +\format{ +vector of phonetic labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/polygonplot.Rd b/man/polygonplot.Rd index a73a31fe..193df7a8 100644 --- a/man/polygonplot.Rd +++ b/man/polygonplot.Rd @@ -1,24 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eplot.R \name{polygonplot} \alias{polygonplot} - \title{polygonplot} -\description{ -plots a polygon -} \usage{ -polygonplot(data, labels, order, formant = TRUE, axes = TRUE, xlab = "", ylab = "", main = "", xlim, ylim) +polygonplot( + data, + labels, + order, + formant = TRUE, + axes = TRUE, + xlab = "", + ylab = "", + main = "", + xlim, + ylim +) } \arguments{ - \item{data}{ data matrix} - \item{labels}{labels} - \item{order}{ order } - \item{formant}{ formant TRUE or FALSE transposes the axes} - \item{axes}{axes} - \item{xlab}{xlab} - \item{ylab}{ylab} - \item{main}{main} - \item{xlim}{xlim} - \item{ylim}{ylim} -} -\keyword{ internal } +\item{data}{data matrix} + +\item{labels}{labels} + +\item{order}{order} + +\item{formant}{formant TRUE or FALSE transposes the axes} + +\item{axes}{axes} +\item{xlab}{xlab} + +\item{ylab}{ylab} + +\item{main}{main} + +\item{xlim}{xlim} + +\item{ylim}{ylim} +} +\description{ +plots a polygon +} +\keyword{internal} diff --git a/man/print.emuRsegs.Rd b/man/print.emuRsegs.Rd new file mode 100644 index 00000000..32c932e1 --- /dev/null +++ b/man/print.emuRsegs.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRsegs.R +\name{print.emuRsegs} +\alias{print.emuRsegs} +\title{Print emuRsegs segment list} +\usage{ +\method{print}{emuRsegs}(x, ...) +} +\arguments{ +\item{x}{object to print} + +\item{...}{additional params} +} +\description{ +Print emuRsegs segment list +} diff --git a/man/print.emuRtrackdata.Rd b/man/print.emuRtrackdata.Rd new file mode 100644 index 00000000..98276826 --- /dev/null +++ b/man/print.emuRtrackdata.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRtrackdata.R +\name{print.emuRtrackdata} +\alias{print.emuRtrackdata} +\title{Print emuRtrackdata object} +\usage{ +\method{print}{emuRtrackdata}(x, ...) +} +\arguments{ +\item{x}{object to print} + +\item{...}{additional params} +} +\description{ +Print emuRtrackdata object +} diff --git a/man/print.emusegs.Rd b/man/print.emusegs.Rd index 2d878f50..e91605e3 100644 --- a/man/print.emusegs.Rd +++ b/man/print.emusegs.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{print.emusegs} \alias{print.emusegs} - -\title{print emusegs } +\title{print emusegs} +\usage{ +\method{print}{emusegs}(x, ...) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/print.trackdata.Rd b/man/print.trackdata.Rd index ef7b5b27..d482320b 100644 --- a/man/print.trackdata.Rd +++ b/man/print.trackdata.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{print.trackdata} \alias{print.trackdata} - -\title{ print trackdata} +\title{print trackdata} +\usage{ +\method{print}{trackdata}(x, ...) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/query.Rd b/man/query.Rd new file mode 100644 index 00000000..79920c45 --- /dev/null +++ b/man/query.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-query.database.R +\name{query} +\alias{query} +\title{Query emuDB} +\usage{ +query( + emuDBhandle, + query, + sessionPattern = ".*", + bundlePattern = ".*", + queryLang = "EQL2", + timeRefSegmentLevel = NULL, + resultType = "tibble", + calcTimes = TRUE, + verbose = FALSE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{query}{string (see vignette \url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-querysys.html})} + +\item{sessionPattern}{A regular expression pattern matching session names to +be searched from the database} + +\item{bundlePattern}{A regular expression pattern matching bundle names to be +searched from the database} + +\item{queryLang}{query language used for evaluating the query string} + +\item{timeRefSegmentLevel}{set time segment level from which to derive time +information. It is only necessary to set this parameter if more than one child +level contains time information and the queried parent level is of type ITEM.} + +\item{resultType}{type (class name) of result (either 'tibble', 'emuRsegs' or +'emusegs' (use 'emusegs' for legacy compatablility only))} + +\item{calcTimes}{calculate times for resulting segments (results in +\code{NA} values for start and end times in emuseg/emuRsegs). As it can be +very computationally expensive to +calculate the times for large nested hierarchies, it can be turned off via this +parameter.} + +\item{verbose}{be verbose. Set this to \code{TRUE} if you wish to choose which +path to traverse on intersecting hierarchies. If set to \code{FALSE} (the default) +all paths will be traversed (= legacy EMU behavior).} +} +\value{ +result set object of class resultType (default: \link[tibble]{tibble}, +compatible to legacy types \link{emuRsegs} and \link{emusegs}) +} +\description{ +Function to query annotation items/structures in an emuDB +} +\details{ +Evaluates a query string of query language queryLang on an +emuDB referenced by \code{emuDBhandle} and returns a segment list of the desired type resultType. +For details of the query language please refer to the EMU-SDMS manual's query +system chapter (\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/chap-querysys.html}). +This function extracts a list of segments which meet the conditions given by the query string. +A segment can consist of one (e.g. 's') or more (e.g. 's->t') items from +the specified emuDB level. Segment objects (type 'SEGMENT') contain the label +string and the start and end time information of the segment (in ms). +The \code{tibble} return type (now the defaults) objects additionally contain +sample position of start and end item. +Time information of symbolic elements (type 'ITEM') are derived from linked SEGMENT +levels if available. If multiple linked SEGMENT levels exist, you can specify the +level with the \code{timeRefSegmentLevel} argument. If time and sample values cannot be +derived they will be set to \code{\link{NA}}. \link[tibble]{tibble}s will +be ordered by the columns UUID, session, bundle and sequence index (seq_idx). +Legacy \link{emusegs} lists are ordered by the columns utts and start. +The query may be limited to session and/or bundle names specified by regular +expression pattern strings (see \link{regex}) in parameters \code{sessionPattern} +respectively \code{bundlePattern}. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +## Query database ae with EQL query "[Phonetic=t -> Phonetic=s]": +## 'Find all sequences /ts/ on the level named Phonetics'. +## and store result seglist in variable segListTs + +seglistTs = query(ae, "[Phonetic == t -> Phonetic == s]") + +## Query database ae with EQL query "[Syllable == S ^ Phoneme == t]": +## 'Find all items 't' on the level named Phoneme that are dominated by +## items 'S' in level Syllable.' +## Return legacy Emu result type 'emusegs' + +query(ae, "[Syllable == S ^ Phoneme == t]", resultType = "emusegs") + +## Query 'p' items on the level named Phoneme from bundles whose +## bundle names start with 'msajc07' +## and whose session names start with '00' +## (Note that here the query uses the operator '=' (meaning '==') +## which is kept for backwards compatibilty to EQL1.) + +query(ae, "Phoneme = p", bundlePattern = "msajc05.*", sessionPattern = "00.*") + +} + +} +\seealso{ +\code{\link{load_emuDB}} +} +\keyword{EQL} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{query} diff --git a/man/rad.Rd b/man/rad.Rd index 594f815b..ea89dc00 100644 --- a/man/rad.Rd +++ b/man/rad.Rd @@ -1,35 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{rad} \alias{rad} - -\title{ Function to convert between Hertz and Radians } -\description{ - convert between Hertz and Radians -} +\title{Function to convert between Hertz and Radians} \usage{ rad(vec, samfreq = 20000, hz = TRUE) } - \arguments{ - \item{vec}{A numerical vector of frequencies in Hz or radians } - \item{samfreq}{A single element numerical vector of the sampling frequency. Defaults to 20000 Hz } - \item{hz}{ Logical. If T, convert from Hz to radians otherwise from radians to hz } -} +\item{vec}{A numerical vector of frequencies in Hz or radians} +\item{samfreq}{A single element numerical vector of the sampling frequency. +Defaults to 20000 Hz} - -\author{ Jonahtan Harrington} - -\seealso{ -\code{\link{help}} +\item{hz}{Logical. If TRUE, convert from Hz to radians otherwise from radians +to hz} +} +\description{ +convert between Hertz and Radians } - \examples{ + # 4000 Hz in radians at a sampling frequency of 8000 Hz rad(4000, 8000) # pi/2 and pi/4 radians in Hz at a sampling frequency of 10000 Hz rad(c(pi/2, pi/4), 10000, FALSE) -} +} +\seealso{ +\code{\link{help}} +} +\author{ +Jonathan Harrington +} \keyword{math} - diff --git a/man/radians.Rd b/man/radians.Rd index 0b9e253f..e715460b 100644 --- a/man/radians.Rd +++ b/man/radians.Rd @@ -1,23 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{radians} \alias{radians} -\title{ -Converts degrees to radians -} +\title{Converts degrees to radians} \usage{ radians(degrees) } -\description{ -Converts degrees to radians -} \arguments{ -\item{degrees}{ -Angular measurement for conversion. -}} +\item{degrees}{Angular measurement for conversion.} +} \value{ Angular measurement in radians. } +\description{ +Converts degrees to radians +} \details{ There are 360 degrees or 2 * PI radians in one full rotation. } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/randomise.segs.Rd b/man/randomise.segs.Rd index a2554b49..f5b3d768 100644 --- a/man/randomise.segs.Rd +++ b/man/randomise.segs.Rd @@ -1,34 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{randomise.segs} \alias{randomise.segs} -\title{ - Randomise or Reverse items in a segment list -} +\title{Randomise or Reverse items in a segment list} \usage{ -randomise.segs(segs, rand=TRUE, bwd=FALSE) -} -\description{ - Randomises or Reverses items in a segment list - +randomise.segs(segs, rand = TRUE, bwd = FALSE) } \arguments{ -\item{segs}{ -An Emu segment list. -} -\item{bwd}{ - If T, reverse the order of the segment list. +\item{segs}{An Emu segment list.} + +\item{rand}{If TRUE, randomise the order of the segment lists (default).} + +\item{bwd}{If TRUE, reverse the order of the segment list.} } -\item{rand}{ - If T, randomise the order of the segment lists (default). -}} \value{ - A segment list containing the original elements in random or reversed - order. This is useful if the segment list is to be used as the source - for a set of stimuli in a perception experiment. +A segment list containing the original elements in random or +reversed order. This is useful if the segment list is to be used as the +source for a set of stimuli in a perception experiment. } -\seealso{ - \code{\link{emu.query}} +\description{ +Randomises or Reverses items in a segment list } \examples{ + data(vowlax) ## assumes a database called demo is available on your system and that ## the Emu system is installed. @@ -39,5 +33,9 @@ segs <- vowlax # randomise the segment list rsegs <- randomise.segs( segs ) + +} +\seealso{ +\code{\link{query}} } \keyword{misc} diff --git a/man/rbind.trackdata.Rd b/man/rbind.trackdata.Rd index 4f48cca2..a3089d6d 100644 --- a/man/rbind.trackdata.Rd +++ b/man/rbind.trackdata.Rd @@ -1,45 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rbind.trackdata.R \name{rbind.trackdata} \alias{rbind.trackdata} \alias{rbind} - -\title{ A method of the generic function rbind for objects of class trackdata} - -\description{ - Different track data objects from one segment list are bound by combining the \$data - columns of the track data object by rows. - Track data objects are created by emu.track(). -} +\title{A method of the generic function rbind for objects of class trackdata} \usage{ - \method{rbind}{trackdata}(\dots) +\method{rbind}{trackdata}(...) } - \arguments{ - \item{\dots}{ track data objects } +\item{\dots}{track data objects} } - -\details{ - All track data objects have to be track data of the same segment list. - Thus \$index and \$ftime values have to be identically for all track data objects. - The number of columns of the track data objects must match. Thus a track data object of - more than one formant and single columned F0 track data object can not be rbind()ed. -} - \value{ - A track data object with the same \$index and \$ftime values of the source track data objects and - with \$data that includes all columns of \$data of the source track data objects. +A track data object with the same $index and $ftime values of the +source track data objects and with $data that includes all columns of +$data of the source track data objects. } - -\author{Jonathan Harrington} - - -\seealso{ - \code{\link{rbind}} - \code{\link{cbind.trackdata}} - \code{\link{trackdata}} - \code{\link{emu.track}} +\description{ +Different track data objects from one segment list are bound by combining +the $data columns of the track data object by rows. Track data objects +are created by \code{\link{get_trackdata}}. +} +\details{ +All track data objects have to be track data of the same segment list. +Thus $index and $ftime values have to be identically for all track data +objects. The number of columns of the track data objects must match. Thus +a track data object of more than one formant and single columned F0 track +data object can not be rbind()ed. } - \examples{ + data(vowlax) #segment list vowlax - first segment only @@ -55,10 +44,17 @@ fund.rms.lax = rbind(vowlax.fund[1:10,], vowlax.rms[1:10,]) #the combined track data object - #The first ten rows in \$data keep vowlax.fund data, the 11th to last row keeps vowlax.rms data + #The first ten rows in $data keep vowlax.fund data, the 11th to last row keeps vowlax.rms data fund.rms.lax -} -\keyword{methods} \ No newline at end of file +} +\seealso{ +\code{\link{rbind}} \code{\link{cbind.trackdata}} +\code{\link{trackdata}} \code{\link{get_trackdata}} +} +\author{ +Jonathan Harrington +} +\keyword{methods} diff --git a/man/read.emusegs.Rd b/man/read.emusegs.Rd index e53fc7eb..c18c25a4 100644 --- a/man/read.emusegs.Rd +++ b/man/read.emusegs.Rd @@ -1,37 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{read.emusegs} \alias{read.emusegs} -\title{ Create an Emu segment list from a file } -\description{ - Create an Emu segment list from a file saved by the Emu query tools. -} +\title{Create an Emu segment list from a file} \usage{ - read.emusegs(file) +read.emusegs(file) } \arguments{ - \item{file}{ The name of the file to read } -} -\details{ - Reads segment lists created by programs external to R/Splus and stored - in text files on disk. +\item{file}{The name of the file to read} } \value{ - An Emu segment list. +An Emu segment list. +} +\description{ +Create an Emu segment list from a file saved by the Emu query tools. +} +\details{ +Reads segment lists created by programs external to R/Splus and stored in +text files on disk. } -\author{ Steve Cassidy } -\seealso{ \code{\link{emu.query}} } - \examples{ + ## create a segment list file and write it out -seglist.txt <- "database:demo\ -query:Phonetic=vowel\ -type:segment\ +# seglist.txt <- "database:demo"\ +# query:Phonetic=vowel\ +# type:segment\ #\ -@: 3059.65 3343.65 msdjc001\ -e: 5958.55 6244.55 msdjc002\ -@u 8984.75 9288.75 msdjc003\ -A 11880.8 12184.8 msdjc004\ -E 17188.3 17366.4 msdjc005\ -ei 20315.2 20655.2 msdjc006" +# @: 3059.65 3343.65 msdjc001\ +# e: 5958.55 6244.55 msdjc002\ +# @u 8984.75 9288.75 msdjc003\ +# A 11880.8 12184.8 msdjc004\ +# E 17188.3 17366.4 msdjc005\ +# ei 20315.2 20655.2 msdjc006" \dontrun{cat(seglist.txt, file="seglist.txt")} @@ -41,7 +41,12 @@ ei 20315.2 20655.2 msdjc006" ## and clean up \dontrun{unlink("seglist.txt")} -} +} +\seealso{ +\code{\link{query}} +} +\author{ +Steve Cassidy +} \keyword{IO} - diff --git a/man/read.trackdata.Rd b/man/read.trackdata.Rd deleted file mode 100644 index f458b025..00000000 --- a/man/read.trackdata.Rd +++ /dev/null @@ -1,46 +0,0 @@ -\name{read.trackdata} -\alias{read.trackdata} -\title{Load track data from file} -\description{ - read data from a file into a trackdata object, the files - contain the data and time components of the object, they're - produced by gettrack (within the EMU System) and \code{\link{write.trackdata}}. -} -\usage{ - read.trackdata(filename, trackname = "data") -} - -\arguments{ - \item{filename}{ file name} - \item{trackname}{track name of the a track that is written in FILE. - without trackname, track is defined as data in the returned track data object} -} - -\value{ - track data object -} - -\author{Jonathan Harrington} - -\seealso{\code{\link{write.trackdata}}} - -\examples{ - data(dip) - #Formant track data of the segment list dip (see data(dip)) - first segment only - dip.fdat[1] - \dontrun{write.trackdata(dip.fdat, "emu.write.track.example.txt")} - - #There is a file emu_write.track.example.txt written to R_HOME/ - #that includes the track data - - #Now load the track data into R - \dontrun{dip.fdat.rl = read.trackdata("emu.write.track.example.txt", "fm")} - \dontrun{dip.fdat.rl[1]} - - #Now load the track data into R without argument trackname - \dontrun{dip.fdat.rl = read.trackdata("emu.write.track.example.txt")} - \dontrun{dip.fdat.rl[1]} - \dontrun{unlink("emu.write.track.example.txt")} -} -%\keyword{emu} -\keyword{IO} diff --git a/man/read.trackdata.twofile.Rd b/man/read.trackdata.twofile.Rd deleted file mode 100644 index b608dcdc..00000000 --- a/man/read.trackdata.twofile.Rd +++ /dev/null @@ -1,9 +0,0 @@ -\name{read.trackdata.twofile} -\alias{read.trackdata.twofile} - -\title{read trackdata twofile} -\description{ -see function -} -\keyword{ internal } - diff --git a/man/read_bundleList.Rd b/man/read_bundleList.Rd new file mode 100644 index 00000000..ff9aa7a1 --- /dev/null +++ b/man/read_bundleList.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bundleList.R +\name{read_bundleList} +\alias{read_bundleList} +\title{read bundleList} +\usage{ +read_bundleList(emuDBhandle, name) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{name}{name of bundleList (excluding the _bundleList.json suffix)} +} +\value{ +tibble with the columns \code{session}, \code{name}, +\code{comment}, \code{finishedEditing} +} +\description{ +read bundleList JSON file in emuDB +} +\details{ +Read bundleList JSON file in emuDB that is stored in +the databases root dir sub-dir \code{bundleLists/} +} diff --git a/man/rename_bundles.Rd b/man/rename_bundles.Rd new file mode 100644 index 00000000..bcd40e9b --- /dev/null +++ b/man/rename_bundles.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{rename_bundles} +\alias{rename_bundles} +\title{Rename bundles in emuDB} +\usage{ +rename_bundles(emuDBhandle, bundles) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{bundles}{data.frame like object with the columns +\itemize{ +\item \code{session}: name of sessions containing bundle +\item \code{name}: name of bundle +\item \code{name_new}: new name given to bundle +} +It is worth noting that \code{session} and \code{name} are the columns returned by +\code{\link{list_bundles}}.} +} +\description{ +Rename bundles of emuDB. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# list bundles of session "0000" of ae emuDB +bundles = list_bundles(emuDBhandle = ae, + session = "0000") + +# append "XXX" to bundle names and rename +bundles$name_new = paste0(bundles$name, "XXX") +rename_bundles(emuDBhandle, bundles) +} + +} diff --git a/man/rename_emuDB.Rd b/man/rename_emuDB.Rd new file mode 100644 index 00000000..c01afc57 --- /dev/null +++ b/man/rename_emuDB.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-database.R +\name{rename_emuDB} +\alias{rename_emuDB} +\title{Rename emuDB} +\usage{ +rename_emuDB(databaseDir, newName) +} +\arguments{ +\item{databaseDir}{directory of the emuDB} + +\item{newName}{new name of emuDB} +} +\description{ +Rename a emuDB. This effectively renames the folder of a +emuDB the _DBconfig.json file as well as the "name" entry in the _DBconfig.json +file and the _emuDBcache.sqlite file if available. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# rename ae emuDB to "aeNew" +rename_emuDB(databaseDir = "/path/2/ae_emuDB", newName = "aeNew") + +} + +} diff --git a/man/replace_itemLabels.Rd b/man/replace_itemLabels.Rd new file mode 100644 index 00000000..e2d991d1 --- /dev/null +++ b/man/replace_itemLabels.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-autoproc_annots.R +\name{replace_itemLabels} +\alias{replace_itemLabels} +\title{Replace item labels} +\usage{ +replace_itemLabels( + emuDBhandle, + attributeDefinitionName, + origLabels, + newLabels, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{attributeDefinitionName}{name of a attributeDefinition of a emuDB +where the labels are to be replaced} + +\item{origLabels}{character vector containing labels that are to be replaced} + +\item{newLabels}{character vector containing labels that are to replaced +the labels of \code{origLabels}. This vector has to be of equal length +to the \code{origLabels} vector.} + +\item{verbose}{Show progress bars and further information} +} +\description{ +Replace the labels of all annotation items, or more specifically +of attribute definitions belonging to annotation items, in an emuDB that +match the provided \code{origLabels} character vector which the +corresponding labels provided by the \code{newLabels} character vector. +The indices of the label vectors provided are used to match the labels +(i.e. \code{origLabels[i]} will be replaced by \code{newLabels[i]}). +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# replace all "I" and "p" labels with "I_replaced" and "p_replaced" +replace_itemLabels(ae, attributeDefinitionName = "Phonetic", + origLabels = c("I", "p"), + newLabels = c("I_replaced", "p_replaced")) + +} + +} +\seealso{ +\code{\link{load_emuDB}} +} +\keyword{emuDB} diff --git a/man/requery_hier.Rd b/man/requery_hier.Rd new file mode 100644 index 00000000..4ef17613 --- /dev/null +++ b/man/requery_hier.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-requery.database.R +\name{requery_hier} +\alias{requery_hier} +\title{Requery hierarchical context of a segment list in an emuDB} +\usage{ +requery_hier( + emuDBhandle, + seglist, + level, + collapse = TRUE, + resultType = "tibble", + calcTimes = TRUE, + timeRefSegmentLevel = NULL, + verbose = FALSE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{seglist}{segment list to requery on (type: \link{emuRsegs})} + +\item{level}{character string: name of target level} + +\item{collapse}{collapse the found items in the requested level to a sequence (concatenated with ->). +If set to \code{FALSE} separate items as new entries in the emuRsegs object are returned.} + +\item{resultType}{type of result (either 'tibble' == default or 'emuRsegs')} + +\item{calcTimes}{calculate times for resulting segments (results in \code{NA} values for start and end +times in emuseg/emuRsegs). As it can be very computationally expensive to +calculate the times for large nested hierarchies, it can be turned off via this boolean parameter.} + +\item{timeRefSegmentLevel}{set time segment level from which to derive time information. It is only +necessary to set this parameter if more than one child level contains time information and the queried +parent level is of type ITEM.} + +\item{verbose}{be verbose. Set this to \code{TRUE} if you wish to choose which path to traverse on intersecting +hierarchies. If set to \code{FALSE} (the default) all paths will be traversed (= legacy EMU behaviour).} +} +\value{ +result set object of class \link{emuRsegs} or \link[tibble]{tibble} +} +\description{ +Function to requery the hierarchical context of a segment list queried from an emuDB +} +\details{ +A segment is defined as a single item or a chain of items from the respective level, e.g. +if a level in a bundle instance has labels 'a', 'b' and 'c' in that order, 'a' or 'a->b' or 'a->b->c' +are all valid segments, 'a->c' is not. For each segment of the input segment list \code{seglist} +the function checks the start and end item for hierarchically linked items in the given target +level, and based on them constructs segments in the target level. As the start item in the resulting +segment the item with the lowest sequence index is chosen; for the end item that with the highest +sequence index. If the parameter \code{collapse} is set to \code{TRUE} (the default), it is guaranteed +that result and input segment list have the same length (for each input +segment one or multiple segments on the target level was found). If multiple linked segments where found +they are collapsed into a sequence of segments ('a->b->c') and if no linked items where found an NA row +is inserted. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +## Downward requery: find 'Phoneme' sequences of all words 'beautiful' (of level 'Text') +## Note that the resulting segments consists of phoneme sequences and have therefore +## the same length as the word segments. + +sl1 = query(ae, "Text == beautiful") +requery_hier(ae, sl1, level = "Phoneme") + +## Upward requery: find all word segments that dominate a 'p' on level 'Phoneme' +## Note that the resulting segments are larger than the input segments, +## because they contain the complete words. + +sl1 = query(ae, "Phonetic == p") +requery_hier(ae, sl1, level = 'Text') + +## Why is there a 'p' the word 'emphazised'? Requery the whole words back down to 'Phoneme' level: + +requery_hier(ae, sl1, level = 'Phoneme') + +## ... because of 'stop epenthesis' a 'p' is inserted between 'm' and 'f' + +## Combined requery: last phonemes of all words beginning with 'an'. +## Note that we use a regular expression 'an.*' (EQL operator '=~') in the query. + +sl1=query(ae, "Text =~ an.*") +requery_seq(ae, requery_hier(ae, sl1, level = 'Phoneme'), offsetRef = 'END') + +} +} +\seealso{ +\code{\link{query}} \code{\link{requery_seq}} \code{\link{emuRsegs}} +} +\keyword{database} +\keyword{emuDB} +\keyword{requery} diff --git a/man/requery_seq.Rd b/man/requery_seq.Rd new file mode 100644 index 00000000..13507e2a --- /dev/null +++ b/man/requery_seq.Rd @@ -0,0 +1,129 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-requery.database.R +\name{requery_seq} +\alias{requery_seq} +\title{Requery sequential context of segment list in an emuDB} +\usage{ +requery_seq( + emuDBhandle, + seglist, + offset = 0, + offsetRef = "START", + length = 1, + ignoreOutOfBounds = FALSE, + resultType = "tibble", + calcTimes = TRUE, + timeRefSegmentLevel = NULL, + verbose = FALSE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{seglist}{segment list to requery on (type: 'tibble' or 'emuRsegs')} + +\item{offset}{start item offset in sequence (default is 0, meaning the start +or end item of the input segment)} + +\item{offsetRef}{reference item for offset: 'START' for first and 'END' +for last item of segment} + +\item{length}{item length of segments in the returned segment list} + +\item{ignoreOutOfBounds}{ignore result segments that are out of bundle bounds} + +\item{resultType}{type of result (either 'tibble' == default, 'emuRsegs')} + +\item{calcTimes}{calculate times for resulting segments (results in \code{NA} +values for start and end times in emuseg/emuRsegs). As it can be very +computationally expensive to calculate the times for large nested hierarchies, +it can be turned off via this boolean parameter.} + +\item{timeRefSegmentLevel}{set time segment level from which to derive time +information. It is only necessary to set this parameter if more than one +child level contains time information and the queried parent level is of type ITEM.} + +\item{verbose}{be verbose. Set this to \code{TRUE} if you wish to choose which +path to traverse on intersecting hierarchies. If set to \code{FALSE} (the +default) all paths will be traversed (= legacy EMU behaviour).} +} +\value{ +result set object of class \link{emuRsegs} or \link[tibble]{tibble} +} +\description{ +Function to requery sequential context of a segment list queried +from an emuDB +} +\details{ +Builds a new segment list on the same hierarchical level +and the same length as the segment list given in \code{seglist}. The +resulting segments usually have different start position and length (in +terms of items of the respective level) controlled by the \code{offset}, +\code{offsetRef} and \code{length} parameters. +A segment here is defined as a single item or a chain of items from the +respective level, e.g. if a level in a bundle instance has labels 'a', 'b' +and 'c' in that order, 'a' or 'a->b' oder 'a->b->c' are all valid segments, +but not 'a->c'. +\code{offsetRef} determines if the position offset is referenced to the +start or the end item of the segments in the input list \code{seglist}; +parameter \code{offset} determines the offset of the resulting item start +position to this reference item; parameter \code{length} sets the item +length of the result segments. If the requested segments are out of bundle +item boundaries and parameter \code{ignoreOutOfBounds} is \code{FALSE} +(the default), an error is generated. To get residual resulting segments +that lie within the bounds the \code{ignoreOutOfBounds} parameter can be +set to \code{TRUE}. The returned segment list is usually of the same +length and order as the input \code{seglist}; if \code{ignoreOutOfBounds=FALSE}, +the resulting segment list may be out of sync. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +## Requery previous item of 'p' on level 'Phonetic' +sl1 = query(ae, "Phonetic == p") + +requery_seq(ae, sl1, offset = -1) + +## Requery context (adding previuos and following elements) +## of 'p' on phonetic level + +requery_seq(ae, sl1, offset = -1, length = 3) + +## Requery previous item of n->t sequence +sl2 = query(ae, "[Phoneme == n -> Phoneme == t]") + +requery_seq(ae, sl2, offset = -1) + +## Requery last item within n->t sequence + +requery_seq(ae, sl2, offsetRef = 'END') + +## Requery following item after n->t sequence + +requery_seq(ae, sl2, offset = 1, offsetRef = 'END') + +## Requery context (previous and following items) of n->t sequence + +requery_seq(ae, sl2, offset = -1, length = 4) + +## Requery next word contexts (sequence includes target word) + +sl3 = query(ae, "Text == to") +requery_seq(ae, sl3, length = 2) + +## Requery following two word contexts, ignoring segment +## sequences that are out of bundle end bounds +requery_seq(ae, sl3, length = 3, ignoreOutOfBounds = TRUE) + +} +} +\seealso{ +\code{\link{query}} \code{\link{requery_hier}} \code{\link{emuRsegs}} +} +\keyword{database} +\keyword{emuDB} +\keyword{requery} diff --git a/man/resample_annots.Rd b/man/resample_annots.Rd new file mode 100644 index 00000000..9ae65485 --- /dev/null +++ b/man/resample_annots.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-autoproc_annots.R +\name{resample_annots} +\alias{resample_annots} +\title{Resample annotations (\code{_annot.json}) files of emuDB} +\usage{ +resample_annots(emuDBhandle, newSampleRate, verbose = TRUE) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{newSampleRate}{target sample rate} + +\item{verbose}{show progress bars and further information} +} +\description{ +Resample all annotations (\code{_annot.json}) files of emuDB to a specified +sample rate. It is up to the user to ensure that the samplerates of +the annot.json files match those of the \code{.wav} files. +} +\examples{ +\dontrun{ + +################################## +# prerequisite: loaded ae emuDB +# (see ?load_emuDB for more information) + +# resample +resample_annots(ae, newSampleRate = 16000) + +} +} diff --git a/man/rescale.gerst.Rd b/man/rescale.gerst.Rd index 75164122..4359f1dc 100644 --- a/man/rescale.gerst.Rd +++ b/man/rescale.gerst.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{rescale.gerst} \alias{rescale.gerst} - -\title{ rescale gerst } +\title{rescale gerst} +\usage{ +rescale.gerst(data, mind, ranged) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/rescale.lob.Rd b/man/rescale.lob.Rd index 4b0221d7..531bfcee 100644 --- a/man/rescale.lob.Rd +++ b/man/rescale.lob.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{rescale.lob} \alias{rescale.lob} - -\title{ rescale lob } +\title{rescale lob} +\usage{ +rescale.lob(data, mvals, sdvals) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/rescale.nearey.Rd b/man/rescale.nearey.Rd index 1fd9e8c9..a88b13b8 100644 --- a/man/rescale.nearey.Rd +++ b/man/rescale.nearey.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/norm.R \name{rescale.nearey} \alias{rescale.nearey} - -\title{ rescale nearey } +\title{rescale nearey} +\usage{ +rescale.nearey(data, neardata) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/runBASwebservice_all.Rd b/man/runBASwebservice_all.Rd new file mode 100644 index 00000000..d0c5df6f --- /dev/null +++ b/man/runBASwebservice_all.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_all} +\alias{runBASwebservice_all} +\title{Runs several BAS webservices, starting from an orthographic transcription} +\usage{ +runBASwebservice_all( + handle, + transcriptionAttributeDefinitionName, + language, + orthoAttributeDefinitionName = "ORT", + canoAttributeDefinitionName = "KAN", + mausAttributeDefinitionName = "MAU", + minniAttributeDefinitionName = "MINNI", + sylAttributeDefinitionName = "MAS", + canoSylAttributeDefinitionName = "KAS", + chunkAttributeDefinitionName = "TRN", + runMINNI = TRUE, + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{transcriptionAttributeDefinitionName}{name of the attribute (not level!) containing an orthographic transcription.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{orthoAttributeDefinitionName}{attribute name for orthographic words} + +\item{canoAttributeDefinitionName}{attribute name for canonical pronunciations of words} + +\item{mausAttributeDefinitionName}{attribute name for the MAUS segmentation} + +\item{minniAttributeDefinitionName}{attribute name for the MINNI segmentation} + +\item{sylAttributeDefinitionName}{attribute name for syllable segmentation} + +\item{canoSylAttributeDefinitionName}{attribute name for syllabified canonical pronunciations of words} + +\item{chunkAttributeDefinitionName}{attribute name for the chunk segmentation. +Please note that the chunk segmentation will only be generated if your emuDB contains +audio files beyond the one minute mark.} + +\item{runMINNI}{if set to \code{TRUE} (the default) the MINNI service is also run. As the MINNI service contains +less languages than the others it can be useful to turn this off.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the BAS webservices G2P, MAUS, Pho2Syl, MINNI and (if necessary) Chunker. +Starting from an orthographic transcription, it derives a tokenized orthographical word tier +using the G2P tool. It also derives canonical pronunciations (in SAMPA) for the words. +If at least one audio file is longer than 60 seconds, the function then calls the Chunker webservice +to presegment the recordings. Subsequently, the webservice MAUS is called to derive a phonetic +segmentation. A second, rough segmentation is created by running the phoneme decoder MINNI. +Finally, syllabification is performed by calling Pho2Syl. \strong{This function requires an internet connection.} +} +\details{ +All necessary level, attribute and link definitions are created in the process. +Note that this function will run all BAS webservices with default parameters, with four exceptions: +\itemize{ +\item{Chunker: force=rescue} +\item{G2P: embed=maus} +\item{Pho2Syl: wsync=yes} +\item{MAUS: USETRN=[true if Chunker was called or transcription is a segment tier, false otherwise]} +} +If you wish to change parameters, you must use the individual runBASwebservices functions. This will also allow +you to carry out manual corrections in between the steps, or to use different languages for different webservices. +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_chunker.Rd b/man/runBASwebservice_chunker.Rd new file mode 100644 index 00000000..4f543aa8 --- /dev/null +++ b/man/runBASwebservice_chunker.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_chunker} +\alias{runBASwebservice_chunker} +\title{Creates a chunk segmentation using the webservice Chunker.} +\usage{ +runBASwebservice_chunker( + handle, + canoAttributeDefinitionName, + language, + chunkAttributeDefinitionName = "TRN", + rootLevel = NULL, + orthoAttributeDefinitionName = NULL, + params = list(force = "rescue"), + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{canoAttributeDefinitionName}{name of the attribute (not level!) containing a canonical pronunciation of the words.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{chunkAttributeDefinitionName}{attribute name for the chunk segmentation} + +\item{rootLevel}{if provided, the new level will be linked to the root level} + +\item{orthoAttributeDefinitionName}{if provided, chunk attributes will contain orthographic instead of SAMPA strings. +Must be paired with the canonical pronunciation attributes in canoAttributeDefinitionName.} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{perspective}{the webApp perspective that the new level will be added to. +If NULL, the new level is not added to any perspectives.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +When audio input files are longer than approximately 10 minutes, alignment-based segmentation +tools such as MAUS will take a long time to run. In these cases, the Chunker pre-segments +the input into more digestible "chunks". As input, it requires a word tier with canonical +pronunciation attributes (which can be derived by \link{runBASwebservice_g2pForPronunciation}). +The resulting chunk level can be passed as input to \link{runBASwebservice_maus}. +\strong{This function requires an internet connection.} +} +\details{ +Please note that the chunker output is \strong{not} a semantically meaningful sentence +or turn segmentation, meaning that it cannot be used for analyses of sentence durations and the like. +By default, the chunker is called in force rescue mode. This means that the chunker is first run +in its normal mode, and switches to forced chunking mode only when it fails to find chunks that +are short enough for processing by MAUS. To disable the force mode completely, call this function with +params=list(force="false"). To skip the normal chunking mode and go directly into forced chunking +mode, use params=list(force="true"). +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_g2pForPronunciation.Rd b/man/runBASwebservice_g2pForPronunciation.Rd new file mode 100644 index 00000000..ce0566f3 --- /dev/null +++ b/man/runBASwebservice_g2pForPronunciation.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_g2pForPronunciation} +\alias{runBASwebservice_g2pForPronunciation} +\title{Creates canonical pronunciation attributes for a tier of tokenized orthographical words.} +\usage{ +runBASwebservice_g2pForPronunciation( + handle, + orthoAttributeDefinitionName, + language, + canoAttributeDefinitionName = "KAN", + params = list(embed = "maus"), + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{orthoAttributeDefinitionName}{name of a attribute (not level!) containing orthographic words.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{canoAttributeDefinitionName}{attribute name for canonical pronunciations of words} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the G2P webservice to add canonical pronunciation attributes in SAMPA (default) +or IPA to a tier of tokenized orthographical words. It is usually called after tokenization +with \link{runBASwebservice_g2pForTokenization}. Its output can be used as input to +\link{runBASwebservice_maus} or \link{runBASwebservice_chunker}. +\strong{This function requires an internet connection.} +} +\details{ +By default, G2P is called in MAUS embed mode. This is important if you intend to use MAUS +afterwards. To disable MAUS embed mode, call this function with params=list(embed="no"). +To derive IPA symbols, add outsym="ipa" to the parameter list. +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_g2pForTokenization.Rd b/man/runBASwebservice_g2pForTokenization.Rd new file mode 100644 index 00000000..30edf074 --- /dev/null +++ b/man/runBASwebservice_g2pForTokenization.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_g2pForTokenization} +\alias{runBASwebservice_g2pForTokenization} +\title{Tokenizes an orthographic transcription.} +\usage{ +runBASwebservice_g2pForTokenization( + handle, + transcriptionAttributeDefinitionName, + language, + orthoAttributeDefinitionName = "ORT", + params = list(), + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{transcriptionAttributeDefinitionName}{name of the attribute (not level!) containing an orthographic transcription.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{orthoAttributeDefinitionName}{attribute name for orthographic words} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the webservice G2P to break up a transcription into tokens, or words. +In addition to tokenization, G2P performs normalization of numbers and other special words. +A call to this function is usually followed by a call to \link{runBASwebservice_g2pForPronunciation}. +\strong{This function requires an internet connection.} +} +\details{ +All necessary level, link and attribute definitions are created in the process. +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_maus.Rd b/man/runBASwebservice_maus.Rd new file mode 100644 index 00000000..03e2e390 --- /dev/null +++ b/man/runBASwebservice_maus.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_maus} +\alias{runBASwebservice_maus} +\title{Runs MAUS webservice to create a phonetic segmentation} +\usage{ +runBASwebservice_maus( + handle, + canoAttributeDefinitionName, + language, + mausAttributeDefinitionName = "MAU", + chunkLevel = NULL, + turnChunkLevelIntoItemLevel = TRUE, + params = NULL, + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{canoAttributeDefinitionName}{name of the attribute (not level!) containing the SAMPA word pronunciations. +If this attribute resides on a segment level, the segment time information is used as a presegmentation. +If it is an item level, no assumption is made about the temporal position of segments.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{mausAttributeDefinitionName}{attribute name for the MAUS segmentation} + +\item{chunkLevel}{if you have a chunk segmentation level, you can provide it to improve the speed and accuracy +of MAUS. The chunk segmentation level must be a segment level, and it must link to the level of canoAttributeDefinitionName.} + +\item{turnChunkLevelIntoItemLevel}{if TRUE, and if a chunk level is provided, the chunk level is converted into an ITEM level after segmentation} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{perspective}{the webApp perspective that the new level will be added to. +If NULL, the new level is not added to any perspectives.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the BAS webservice MAUS to generate a phonemic segmentation. +It requires a word-tokenized tier with a SAMPA pronunciation, which can be generated +by the function \link{runBASwebservice_g2pForPronunciation}. +\strong{This function requires an internet connection.} +} +\details{ +All necessary level, link and attribute definitions are created in the process. +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_minni.Rd b/man/runBASwebservice_minni.Rd new file mode 100644 index 00000000..3d6b5762 --- /dev/null +++ b/man/runBASwebservice_minni.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_minni} +\alias{runBASwebservice_minni} +\title{Creates a rough phonetic segmentation by running the phoneme decoder webservice MINNI.} +\usage{ +runBASwebservice_minni( + handle, + language, + minniAttributeDefinitionName = "MINNI", + rootLevel = NULL, + params = list(), + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{minniAttributeDefinitionName}{attribute name for the MINNI segmentation} + +\item{rootLevel}{if provided, the new level will be linked to the root level} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{perspective}{the webApp perspective that the new level will be added to. +If NULL, the new level is not added to any perspectives.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +The MINNI phoneme decoder performs phoneme-based decoding on the signal without input from +the transcription. Therefore, labelling quality is usually worse than that obtained from +MAUS (\link{runBASwebservice_maus}). Contrary to MAUS however, there is no need for a +pre-existing transcription. +} +\details{ +All necessary level, link and attribute definitions are created in the process. +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_pho2sylCanonical.Rd b/man/runBASwebservice_pho2sylCanonical.Rd new file mode 100644 index 00000000..85ae3030 --- /dev/null +++ b/man/runBASwebservice_pho2sylCanonical.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_pho2sylCanonical} +\alias{runBASwebservice_pho2sylCanonical} +\title{Adds syllabified word labels to a word level that already contains canonical pronunciations.} +\usage{ +runBASwebservice_pho2sylCanonical( + handle, + canoAttributeDefinitionName, + language, + canoSylAttributeDefinitionName = "KAS", + params = list(), + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{canoAttributeDefinitionName}{name of the attribute (not level!) containing a canonical pronunciation of the words.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{canoSylAttributeDefinitionName}{attribute name for syllabified canonical pronunciations of words} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the webservice Pho2Syl to add syllabified canonical pronunciation labels +to a word level that already contains unsyllabified canonical pronunciation labels (as can be +derived using \link{runBASwebservice_g2pForPronunciation}). \strong{This function requires an internet +connection.} +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylSegmental}()} +} +\concept{BAS webservice functions} diff --git a/man/runBASwebservice_pho2sylSegmental.Rd b/man/runBASwebservice_pho2sylSegmental.Rd new file mode 100644 index 00000000..43c9528b --- /dev/null +++ b/man/runBASwebservice_pho2sylSegmental.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bas_webservices.R +\name{runBASwebservice_pho2sylSegmental} +\alias{runBASwebservice_pho2sylSegmental} +\title{Creates a syllable segmentation on the basis of a phonetic segmentation.} +\usage{ +runBASwebservice_pho2sylSegmental( + handle, + segmentAttributeDefinitionName, + language, + superLevel = NULL, + sylAttributeDefinitionName = "MAS", + params = list(wsync = "yes"), + perspective = "default", + patience = 0, + resume = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{handle}{emuDB handle} + +\item{segmentAttributeDefinitionName}{name of the attribute (not level!) containing a phonetic segmentation.} + +\item{language}{language(s) to be used. If you pass a single string (e.g. "deu-DE"), this language will be used for all bundles. +Alternatively, you can select the language for every bundle individually. To do so, you must pass a data frame with the columns +session, bundle, language. This data frame must contain one row for every bundle in your emuDB. +Up-to-date lists of the languages accepted by all webservices can be found here: +\url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}} + +\item{superLevel}{name of the segments' parent level (typically the word level). +If set to NULL, the syllable level cannot be linked up.} + +\item{sylAttributeDefinitionName}{attribute name for syllable segmentation} + +\item{params}{named list of parameters to be passed on to the webservice. It is your own responsibility to +ensure that these parameters are compatible with the webservice API +(see \url{https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/help}). +Some options accepted by the API (e.g. output format) cannot be set when calling a webservice from within emuR, +and will be overridden. If file parameters are used please wrap the file path in \code{httr::upload_file("/path/2/file/rules.nrul")}.} + +\item{perspective}{the webApp perspective that the new level will be added to. +If NULL, the new level is not added to any perspectives.} + +\item{patience}{If a web service call fails, it is repeated a further n times, with n being the value of patience. +Must be set to a value between 0 and 3.} + +\item{resume}{If a previous call to this function has failed (and you think you have fixed the issue +that caused the error), you can set resume=TRUE to recover any progress made up to that point. This +will only work if your R temporary directory has not been deleted or emptied in the meantime.} + +\item{verbose}{Display progress bars and other information} +} +\description{ +This function calls the BAS webservice Pho2Syl to create a syllable segmentation on the basis +of a phonetic segmentation (created by, for example, \link{runBASwebservice_maus}). +You can provide the level of your word segmentation, or of any other hierarchically +dominant segmentation, via the superLevel parameter. This way, the new syllable +items can be linked up into the pre-existing hierarchy. If you do not provide +this input, the syllables will only be linked down to the segments. +} +\details{ +All necessary level, link and parameter definitions are created in the process. +By default, Pho2Syl is run in word synchronized mode. To override this, call this function +with the parameter params=list(wsync="no"). +} +\seealso{ +Other BAS webservice functions: +\code{\link{runBASwebservice_all}()}, +\code{\link{runBASwebservice_chunker}()}, +\code{\link{runBASwebservice_g2pForPronunciation}()}, +\code{\link{runBASwebservice_g2pForTokenization}()}, +\code{\link{runBASwebservice_maus}()}, +\code{\link{runBASwebservice_minni}()}, +\code{\link{runBASwebservice_pho2sylCanonical}()} +} +\concept{BAS webservice functions} diff --git a/man/segmentlist.Rd b/man/segmentlist.Rd index ada6b6b8..ef0ccf17 100644 --- a/man/segmentlist.Rd +++ b/man/segmentlist.Rd @@ -1,34 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-objDocs.R \name{segmentlist} \alias{segmentlist} \alias{emusegs} - \title{Segment list} -\description{ - A segment list is the result of emu.query() or read.emusegs(). -} - \format{ - multi-columned matrix - one row per segment - \itemize{ - \item{first column}{label} - \item{second column}{segment onset time} - \item{third column}{segment offset time} - \item{fourth column}{utterance name} - } +multi-columned matrix one row per segment +\itemize{ + \item columnlabel + \item columnsegment onset time + \item columnsegment offset time + \item columnutterance name } - - -\seealso{ - \code{\link{emu.query}}, - \code{\link{demo.vowels}} } - +\description{ +A segment list is the result type of legacy Emu query. +} \examples{ + data(demo.vowels) #demo.vowels is a segment list demo.vowels -} +} +\seealso{ +\code{\link{query}}, \code{\link{demo.vowels}} +} \keyword{classes} diff --git a/man/serve.Rd b/man/serve.Rd new file mode 100644 index 00000000..4c8289ff --- /dev/null +++ b/man/serve.Rd @@ -0,0 +1,120 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-server.R +\name{serve} +\alias{serve} +\title{Serve EMU database to EMU-webApp} +\usage{ +serve( + emuDBhandle, + sessionPattern = ".*", + bundlePattern = ".*", + seglist = NULL, + bundleListName = NULL, + host = "127.0.0.1", + port = 17890, + autoOpenURL = "https://ips-lmu.github.io/EMU-webApp/?autoConnect=true", + browser = getOption("browser"), + useViewer = TRUE, + debug = FALSE, + debugLevel = 0 +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \code{\link{load_emuDB}}} + +\item{sessionPattern}{A regular expression pattern matching session names to be served} + +\item{bundlePattern}{A regular expression pattern matching bundle names to be served} + +\item{seglist}{segment list to use for times anchors and session + bundle restriction (type: \link{emuRsegs})} + +\item{bundleListName}{name of bundleList stored in emuDB/bundleLists subdir to send to EMU-webApp} + +\item{host}{host IP to listen to (default: 127.0.0.1 (localhost))} + +\item{port}{the port number to listen on (default: 17890)} + +\item{autoOpenURL}{URL passed to \code{\link{browseURL}} function. If NULL or an empty string are passed in +\code{\link{browseURL}} will not be invoked.} + +\item{browser}{argument passed on to \code{browser} argument of \code{\link{browseURL}} (see +it's documentation for details )} + +\item{useViewer}{Use the viewer provided by \code{getOption("viewer")} (the viewer pane when using RStudio) +and host a local version of the EMU-webApp in it. This will clone the current +EMU-webApp build (\url{https://github.com/IPS-LMU/EMU-webApp/tree/gh-pages/}) into the directory provided by +\code{\link{tempdir}} and serve this local version. A clone will +only be performed if no \code{file.path(tempdir(), "EMU-webApp")} directory is present. An alternative +directory can be also set: \code{options(emuR.emuWebApp.dir="path/to/EMU-webApp")} (use if offline functionality is required).} + +\item{debug}{TRUE to enable debugging (default: no debugging messages)} + +\item{debugLevel}{integer higher values generate more detailed debug output} +} +\value{ +TRUE (invisible) if the server was started +} +\description{ +Serves emuDB media files, SSFF tracks and annotations for +EMU-webApp browser GUI \url{http://ips-lmu.github.io/EMU-webApp/} + +Instructions: + +Start and connect (this should happen automatically): + +\itemize{ +\item Call this function to start the server. +\item Start a suitable HTML5 capable Web-Browser (Google Chrome, Firefox,...). +\item Navigate to the EMU-Webapp URL: \url{http://ips-lmu.github.io/EMU-webApp/}. +\item Press the 'Connect' button in the EMU-webApp and connect with default URL. +\item EMU-webApp loads the bundle list and the first +bundles media file, SSFF tracks and annotations. +} + +Disconnect and stop: +\itemize{ +\item Disconnect and stop the server with the 'Clear' button of +the webapp or the reload button of your browser. +\item The server can also be stopped by +calling \code{\link[httpuv]{stopAllServers}} of the \link[httpuv]{httpuv} package +} + +Hints: +\itemize{ +\item To serve only a subset of sessions or bundles use +the parameters \code{sessionPattern} and/or \code{bundlePattern}. +\item Use the \code{seglist} parameter to pass in a segment list +which was generated using the \code{query} function. This will +allow quick navigation to those segments. +} +} +\details{ +Function opens a HTTP/websocket and waits in a loop for browser requests. +Parameter host determines the IP address(es) of hosts allowed to connect to the +server. By default the server only listens to localhost. If you want to allow +connection from any host set the host parameter to \code{0.0.0.0}. Please note +that this might be an safety issue! The \code{port} parameter determines the port +the server listens on. The \code{host} and \code{port} parameters are intended +only for expert users. When started the R console will be blocked. On successful +connection the server sends the session and bundle list of the database referenced +by name by parameter \code{dbName} or by UUID parameter \code{dbUUID}. +The Web application requests bundle data for viewing or editing. If a bundle +is modified with the EMU-webApp and the save button is pressed the server modifies +the internal database and saves the changes to disk. +Communication between server and EMU webApp is defined by EMU-webApp-websocket-protocol +version 0.0.2 (\url{https://ips-lmu.github.io/The-EMU-SDMS-Manual/app-chap-wsProtocol.html}). +} +\examples{ +\dontrun{ +## Load EMU database 'myDb' and serve it to the EMU-webApp (opens default HTTP/websocket port 17890) + +myDb = load_emuDB("/path/to/myDb") +serve(myDb) +} + +} +\keyword{EMU-webApp} +\keyword{Emu} +\keyword{database} +\keyword{emuDB} +\keyword{websocket} diff --git a/man/shift.Rd b/man/shift.Rd index bb385931..f9aa30c6 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -1,40 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{shift} \alias{shift} -\title{ Function to shift the elements of a vector. } -\description{ - The function makes use of the function 'fitler' -to delay or advance a signal by k points. +\title{Function to shift the elements of a vector.} +\usage{ +shift(x, delta = 1, circular = TRUE) } -\usage{shift(x, delta = 1, circular = TRUE)} \arguments{ - \item{x}{ A numeric vector } - \item{delta}{ A single element numeric vector. Defines -the number of points by which the signal should be shifted. } - \item{circular}{ Logical. If T, the signal is wrapped -around itself so that if delta = 1, x[n] becomes x[1]. Otherwise, -if delta is positive, -the same number of zeros are prepended to the signal } -} -\details{ - The function makes use of the function 'filter' for -linear filtering to carry out the shifting. +\item{x}{A numeric vector} + +\item{delta}{A single element numeric vector. Defines the number of points +by which the signal should be shifted.} +\item{circular}{Logical. If TRUE, the signal is wrapped around itself so that +if delta = 1, x[n] becomes x[1]. Otherwise, if delta is positive, the same +number of zeros are prepended to the signal} } -\value{The signal shifted by a certain number of points. - ... +\value{ +The signal shifted by a certain number of points. ... +} +\description{ +The function makes use of the function 'filter' to delay or advance a +signal by k points. +} +\details{ +The function makes use of the function 'filter' for linear filtering to +carry out the shifting. } - -\author{ Jonathan Harrington } - -\seealso{ filter } \examples{ + vec = 1:10 shift(vec, 2) shift(vec, -2) shift(vec, 2, circular=FALSE) -} +} +\seealso{ +filter +} +\author{ +Jonathan Harrington +} \keyword{manip} - diff --git a/man/sort.emuRsegs.Rd b/man/sort.emuRsegs.Rd new file mode 100644 index 00000000..56a1d127 --- /dev/null +++ b/man/sort.emuRsegs.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuRsegs.R +\name{sort.emuRsegs} +\alias{sort.emuRsegs} +\title{Sort emuRsegs segment list by session, bundle and sample_start} +\usage{ +\method{sort}{emuRsegs}(x, decreasing, ...) +} +\arguments{ +\item{x}{object to sort} + +\item{decreasing}{NOT IMPLEMENTED!} + +\item{...}{additional params} +} +\description{ +Sort emuRsegs segment list by session, bundle and sample_start +} diff --git a/man/sortmatrix.Rd b/man/sortmatrix.Rd index 483144b6..d4911111 100644 --- a/man/sortmatrix.Rd +++ b/man/sortmatrix.Rd @@ -1,26 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{sortmatrix} \alias{sortmatrix} -\title{ -Sort matrix by label -} +\title{Sort matrix by label} \usage{ -sortmatrix(mat, labs=dimnames(mat)[[2]]) -} -\description{ -Sorts matrix by label +sortmatrix(mat, labs = dimnames(mat)[[2]]) } \arguments{ -\item{mat}{ -A mu+ sement matrix. +\item{mat}{A mu+ segment matrix.} + +\item{labs}{A label vector which has the same number of columns as +\code{mat}.} } -\item{labs}{ -A label vector which has the same number of columns as \code{mat}. -}} \value{ Returns a sorted matrix by label, created from \code{mat}. } +\description{ +Sorts matrix by label +} \seealso{ label, phon } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/splitstring.Rd b/man/splitstring.Rd index 31f17616..5197c498 100644 --- a/man/splitstring.Rd +++ b/man/splitstring.Rd @@ -1,28 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R \name{splitstring} \alias{splitstring} -\title{ -Split a string into words. -} +\title{Split a string into words.} \usage{ -splitstring(str,char) -} -\description{ -Splits a string into words. +splitstring(str, char) } \arguments{ -\item{str}{ -A string. +\item{str}{A string.} + +\item{char}{A character to split on} } -\item{char}{ -A character to split on -}} \value{ -A vector of strings. The original \code{str} is split at ever occurrence of -\code{char} to generate a vector of strings. +A vector of strings. The original \code{str} is split at ever +occurrence of \code{char} to generate a vector of strings. +} +\description{ +Splits a string into words. } \examples{ + splitstring("/home/recog/steve/foo", "/") #[1] "home" "recog" "steve" "foo" + } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/start.emusegs.Rd b/man/start.emusegs.Rd new file mode 100644 index 00000000..4fd5a456 --- /dev/null +++ b/man/start.emusegs.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-objDocs.R +\name{start.emusegs} +\alias{start.emusegs} +\alias{end.emusegs} +\alias{start.trackdata} +\alias{end.trackdata} +\title{Start and end times for EMU segment lists and trackdata objects} +\arguments{ +\item{x}{a segment list or a trackdata object} + +\item{...}{due to the generic only} +} +\value{ +A vector of times. +} +\description{ +Obtain start and end times for EMU segment lists and trackdata objects +} +\details{ +The function returns the start and/or end times of either a segment list or +a trackdata object. The former refers to the boundary times of segments, +the latter the start and end times at which the tracks from segments occur. +start.emusegs and end.emusegs give exactly the same output as start and end +respectively. +} +\examples{ + +# start time of a segment list +start(polhom) +# duration of a segment list +end(polhom) - start(polhom) +# duration from start time of segment list +# and start time of parallel EPG trackdata +start(polhom) - start(polhom.epg) + + +} +\seealso{ +\code{\link{tracktimes}} +} +\author{ +Jonathan Harrington +} +\keyword{utilities} diff --git a/man/sub-.EPG.Rd b/man/sub-.EPG.Rd new file mode 100644 index 00000000..f28ef3b1 --- /dev/null +++ b/man/sub-.EPG.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MethodEPG.R +\name{[.EPG} +\alias{[.EPG} +\title{expand EPG} +\usage{ +\method{[}{EPG}(palates, i, j, k) +} +\description{ +see function +} +\keyword{internal} diff --git a/man/sub-.spectral.Rd b/man/sub-.spectral.Rd new file mode 100644 index 00000000..db12075a --- /dev/null +++ b/man/sub-.spectral.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Methodspectral.R +\name{[.spectral} +\alias{[.spectral} +\title{Expand spectral} +\usage{ +\method{[}{spectral}(dat, i, j, drop) +} +\description{ +see function +} +\keyword{internal} diff --git a/man/sub-.trackdata.Rd b/man/sub-.trackdata.Rd new file mode 100644 index 00000000..382c7bd9 --- /dev/null +++ b/man/sub-.trackdata.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R +\name{[.trackdata} +\alias{[.trackdata} +\title{Expand trackdata} +\usage{ +\method{[}{trackdata}(dataset, i, j, ...) +} +\description{ +see function +} +\keyword{internal} diff --git a/man/summary.emuDBhandle.Rd b/man/summary.emuDBhandle.Rd new file mode 100644 index 00000000..1dd8661d --- /dev/null +++ b/man/summary.emuDBhandle.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-emuDBhandle.R +\name{summary.emuDBhandle} +\alias{summary.emuDBhandle} +\title{Print summary of loaded EMU database (emuDB).} +\usage{ +\method{summary}{emuDBhandle}(object, ...) +} +\arguments{ +\item{object}{emuDBhandle as returned by \code{\link{load_emuDB}}} + +\item{...}{additional arguments affecting the summary produced.} +} +\description{ +Gives an overview of an EMU database. +Prints database name, UUID, base directory path, session and bundle +count and informations about signal track, annotation level, attribute and link definitions. +} diff --git a/man/summary.emusegs.Rd b/man/summary.emusegs.Rd index d1977a54..1de37c8e 100644 --- a/man/summary.emusegs.Rd +++ b/man/summary.emusegs.Rd @@ -1,16 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{summary.emusegs} \alias{summary.emusegs} - -\title{ summary emusegs } -\description{ -summarizes data in emu segment lists -} +\title{summary emusegs} \usage{ - \method{summary}{emusegs}(object, ...) +\method{summary}{emusegs}(object, ...) } \arguments{ - \item{object}{ the segmentlist} - \item{\dots}{ nothing special} -} -\keyword{ internal } +\item{object}{the segmentlist} +\item{\dots}{nothing special} +} +\description{ +summarizes data in emu segment lists +} +\keyword{internal} diff --git a/man/summary.trackdata.Rd b/man/summary.trackdata.Rd index 461cc37e..575d9478 100644 --- a/man/summary.trackdata.Rd +++ b/man/summary.trackdata.Rd @@ -1,16 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{summary.trackdata} \alias{summary.trackdata} - -\title{ summary trackdata } -\description{ -summarizes trackdata objects -} +\title{summary trackdata} \usage{ - \method{summary}{trackdata}(object, ...) +\method{summary}{trackdata}(object, ...) } \arguments{ - \item{object}{ track data object} - \item{\dots}{see summary} -} -\keyword{ internal } +\item{object}{track data object} +\item{\dots}{see summary} +} +\description{ +summarizes trackdata objects +} +\keyword{internal} diff --git a/man/times.Rd b/man/times.Rd deleted file mode 100644 index cb0f4f03..00000000 --- a/man/times.Rd +++ /dev/null @@ -1,59 +0,0 @@ -\name{Start and end times for segment lists and trackdata ojects} -\alias{start.emusegs} -\alias{end.emusegs} -\alias{start.trackdata} -\alias{end.trackdata} - -\title{ Start and end times for EMU segment lists and trackdata objects } - -\description{ -Obtain start and end times for EMU segment lists and -trackdata objects -} - -\usage{ -\method{start}{emusegs}(x, ...) -\method{end}{emusegs}(x, ...) -\method{start}{trackdata}(x, ...) -\method{end}{trackdata}(x, ...) -} - -\arguments{ - \item{x}{ a segment list or a trackdata object } - \item{...}{ due to the generic only} -} - -\details{ -The function returns the start and/or end -times of either a segment list or a trackdata object. -The former refers to the boundary times of -segments, the latter the start and end times -at which the tracks from segments occur. -start.emusegs and end.emusegs give -exactly the same output as start and end -respectively. -} - -\value{ -A vector of times.} - -\examples{ -# start time of a segment list -start(polhom) -# duration of a segment list -end(polhom) - start(polhom) -# duration from start time of segment list -# and start time of parallel EPG trackdata -start(polhom) - start(polhom.epg) - -} - -\author{ Jonathan Harrington } - - -\seealso{ -\code{\link{tracktimes}} -} - -\keyword{utilities} - diff --git a/man/track.gradinfo.Rd b/man/track.gradinfo.Rd index bd26e149..a4a787e0 100644 --- a/man/track.gradinfo.Rd +++ b/man/track.gradinfo.Rd @@ -1,45 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/track.gradinfo.R \name{track.gradinfo} \alias{track.gradinfo} -\title{ Calculate gradient summary information for trackdata } -\description{ - Calculates a number of summary measures for a trackdata object: - duration, start and end data points, delta values and slope. -} +\title{Calculate gradient summary information for trackdata} \usage{ - track.gradinfo(trackdata) +track.gradinfo(trackdata) } \arguments{ - \item{trackdata}{ An Emu trackdata object as returned by - \code{\link{emu.track}} } -} -\details{ - \code{track.gradinfo} calculates a number of summary measure for the - segments within a trackdata object. These are useful for data such as - kinematic measures where segments might correspond to articulatory - movements etc. - - Measures returned are: duration, start and end data values (ie. the - first and last rows of data for each segment), delta (the difference - between the first and last rows of data) and slope (delta divided by - the duration). - +\item{trackdata}{An Emu trackdata object as returned by +\code{\link{get_trackdata}}} } \value{ - A data frame with one row per segment and columns: - \item{duration}{Segment} - \item{startN }{The starting value for each segment (start1 is the - starting value for the first column) } - \item{endN }{The ending value for each segment } - \item{deltaN }{The delta value for each segment} - \item{slopeN }{The slope value for each segment} +A data frame with one row per segment and columns: +\item{duration}{Segment} \item{startN }{The starting value for each segment +(start1 is the starting value for the first column) } \item{endN }{The +ending value for each segment } \item{deltaN }{The delta value for each +segment} \item{slopeN }{The slope value for each segment} - Since the result is a data frame, the columns can be referred to by - name (\code{result$duration}) or as matrix columns (\code{result[,1]}). +Since the result is a data frame, the columns can be referred to by name +(\code{result$duration}) or as matrix columns (\code{result[,1]}). +} +\description{ +Calculates a number of summary measures for a trackdata object: duration, +start and end data points, delta values and slope. } -\author{ Steve Cassidy } -\seealso{ \code{\link{emu.track}}, \code{\link{dapply}} } +\details{ +\code{track.gradinfo} calculates a number of summary measure for the +segments within a trackdata object. These are useful for data such as +kinematic measures where segments might correspond to articulatory +movements etc. +Measures returned are: duration, start and end data values (ie. the first +and last rows of data for each segment), delta (the difference between the +first and last rows of data) and slope (delta divided by the duration). +} \examples{ + data(vowlax) segs = vowlax ## fm has 4 columns @@ -63,7 +59,12 @@ text(info.fm$duration, info.fm$delta1, labels=label(segs)) ## extract just the delta values from the formant info ## You need to eyeball the data to work out which columns to select delta.fm <- info.fm[,10:13] + +} +\seealso{ +\code{\link{get_trackdata}}, \code{\link{dapply}} +} +\author{ +Steve Cassidy } \keyword{misc} - - diff --git a/man/track.gradinfo.sub.Rd b/man/track.gradinfo.sub.Rd index c1c87188..35765baa 100644 --- a/man/track.gradinfo.sub.Rd +++ b/man/track.gradinfo.sub.Rd @@ -1,9 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/track.gradinfo.R \name{track.gradinfo.sub} \alias{track.gradinfo.sub} - \title{track gradinfo sub} +\usage{ +track.gradinfo.sub(data, ftime) +} \description{ see function } -\keyword{ internal } - +\keyword{internal} diff --git a/man/trackdata.Rd b/man/trackdata.Rd index ab459548..8fbe86ae 100644 --- a/man/trackdata.Rd +++ b/man/trackdata.Rd @@ -1,69 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-objDocs.R \name{trackdata} \alias{trackdata} \alias{Math.trackdata} \alias{Math2.trackdata} \alias{Ops.trackdata} \alias{Summary.trackdata} - \title{Track data object} -\description{ - A track data object is the result of emu.track(). - -} - \format{ - \describe{ - \item{\$index}{a two columned matrix, each row keeps the first and last index of the \$data rows that belong to one segment} - \item{\$ftime}{a two columned matrix, each row keeps the times marks of one segment} - \item{\$data}{a multi-columned matrix with the real track values for each segment} - } +\describe{ \item{$index}{a two columned matrix, each row keeps the +first and last index of the $data rows that belong to one segment} +\item{$ftime}{a two columned matrix, each row keeps the times marks of one +segment} \item{$data}{a multi-columned matrix with the real track values +for each segment} } } - -\section{Methods}{ - The following generic methods are implemented for trackdata obects. - \describe{ - \item{\code{Arith}}{\code{"+"}, \code{"-"}, \code{"*"}, \code{"^"}, - \code{"\%\%"}, \code{"\%/\%"}, \code{"/"}} - \item{\code{Compare}}{\code{"=="}, \code{">"}, \code{"<"}, - \code{"!="}, \code{"<="}, \code{">="}} - \item{\code{Logic}}{\code{"&"}, \code{"|"}. - } - \item{\code{Ops}}{\code{"Arith"}, \code{"Compare"}, \code{"Logic"}} - \item{\code{Math}}{\code{"abs"}, \code{"sign"}, \code{"sqrt"}, - \code{"ceiling"}, \code{"floor"}, \code{"trunc"}, -X \code{"cummax"}, \code{"cummin"}, \code{"cumprod"}, \code{"cumsum"}, - \code{"log"}, \code{"log10"}, \code{"log2"}, \code{"log1p"}, - \code{"acos"}, \code{"acosh"}, - \code{"asin"}, \code{"asinh"}, \code{"atan"}, \code{"atanh"}, - \code{"exp"}, \code{"expm1"}, \code{"cos"}, \code{"cosh"}, - \code{"sin"}, \code{"sinh"}, \code{"tan"}, \code{"tanh"}, - \code{"gamma"}, \code{"lgamma"}, \code{"digamma"}, - \code{"trigamma"} - } - \item{\code{Math2}}{\code{"round"}, \code{"signif"}} - \item{\code{Summary}}{\code{"max"}, \code{"min"}, \code{"range"}, - \code{"prod"}, \code{"sum"}, \code{"any"}, \code{"all"}} - } +\description{ +A track data object is the result of get_trackdata(). } - \note{ - The entire data track is retrieved for each segment in the segment list. The amount of data - returned will depend on the sample rate and number of columns in the track requested. +The entire data track is retrieved for each segment in the segment +list. The amount of data returned will depend on the sample rate and number +of columns in the track requested. } - -\seealso{ - \code{\link{emu.track}}, - \code{\link{demo.vowels.fm}} - \code{\link{demo.all.rms}} +\section{Methods}{ + The following generic methods are implemented for +trackdata objects. \describe{ \item{list("Arith")}{\code{"+"}, \code{"-"}, +\code{"*"}, \code{"^"}, \code{"\%\%"}, \code{"\%/\%"}, \code{"/"}} +\item{list("Compare")}{\code{"=="}, \code{">"}, \code{"<"}, \code{"!="}, +\code{"<="}, \code{">="}} \item{list("Logic")}{\code{"&"}, \code{"|"}. } +\item{list("Ops")}{\code{"Arith"}, \code{"Compare"}, \code{"Logic"}} +\item{list("Math")}{\code{"abs"}, \code{"sign"}, \code{"sqrt"}, +\code{"ceiling"}, \code{"floor"}, \code{"trunc"}, X \code{"cummax"}, +\code{"cummin"}, \code{"cumprod"}, \code{"cumsum"}, \code{"log"}, +\code{"log10"}, \code{"log2"}, \code{"log1p"}, \code{"acos"}, +\code{"acosh"}, \code{"asin"}, \code{"asinh"}, \code{"atan"}, +\code{"atanh"}, \code{"exp"}, \code{"expm1"}, \code{"cos"}, \code{"cosh"}, +\code{"sin"}, \code{"sinh"}, \code{"tan"}, \code{"tanh"}, \code{"gamma"}, +\code{"lgamma"}, \code{"digamma"}, \code{"trigamma"} } +\item{list("Math2")}{\code{"round"}, \code{"signif"}} +\item{list("Summary")}{\code{"max"}, \code{"min"}, \code{"range"}, +\code{"prod"}, \code{"sum"}, \code{"any"}, \code{"all"}} } } \examples{ + data(demo.vowels.fm) data(demo.vowels) #Formant track data for the first segment of the segment list demo.vowels demo.vowels.fm[1] -} +} +\seealso{ +\code{\link{get_trackdata}}, \code{\link{demo.vowels.fm}} +\code{\link{demo.all.rms}} +} \keyword{classes} diff --git a/man/trackfreq.Rd b/man/trackfreq.Rd index 1004410f..25b03eb1 100644 --- a/man/trackfreq.Rd +++ b/man/trackfreq.Rd @@ -1,27 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackdata.class.R \name{trackfreq} \alias{trackfreq} - -\title{ function to find the frequencies of a spectral object } -\description{ - Find the frequencies of a spectral object. -} +\title{function to find the frequencies of a spectral object} \usage{ trackfreq(specdata) } - \arguments{ - \item{specdata}{ A spectral object } +\item{specdata}{A spectral object} } - \value{ - A vector of the frequencies at which the columns of -a spectral matrix occur. - +A vector of the frequencies at which the columns of a spectral +matrix occur. +} +\description{ +Find the frequencies of a spectral object. } - -\author{ Jonathan Harrington } - \examples{ + trackfreq(vowlax.dft.5) # Frequency components between 1000 and 2000 Hz trackfreq(vowlax.dft.5[,1000:2000]) @@ -35,7 +31,9 @@ trackfreq(fric.dft[,-c(1, 5000:20000)]) trackfreq(e.dft[1000:3000]) -} +} +\author{ +Jonathan Harrington +} \keyword{attribute} - diff --git a/man/trackinfo.Rd b/man/trackinfo.Rd deleted file mode 100644 index b250cd37..00000000 --- a/man/trackinfo.Rd +++ /dev/null @@ -1,29 +0,0 @@ -\name{trackinfo} -\alias{trackinfo} -\title{Tracks names and track file extensions} -\description{ - Gets tracks names and track file extensions that are defined for an EMU data base. -} -\usage{ - trackinfo(template) -} -\arguments{ - \item{template}{The name of an EMU data base (name of the template file).} -} -\details{ - Data base must exist for the EMU system. -} -\value{ - 2-columned matrix - \item{first column}{name of track} - \item{second column}{file extension of the track files} -} - -\author{ Jonathan Harrington} - -\examples{ - #tracks that are defined for data base kielread06 - \dontrun{trackinfo("kielread06")} -} -%\keyword{emu} -\keyword{utilities} diff --git a/man/tracktimes.Rd b/man/tracktimes.Rd index 3f317b7d..26baf6c8 100644 --- a/man/tracktimes.Rd +++ b/man/tracktimes.Rd @@ -1,42 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tracktimes.R \name{tracktimes} \alias{tracktimes} -\title{ Get the track times from EMU trackdata objects } -\description{ - The function obtains the times at which track values occur. +\title{Get the track times from EMU trackdata objects} +\usage{ +tracktimes(trackdata) } - -\usage{tracktimes(trackdata) } - \arguments{ - \item{trackdata}{ An EMU trackdata object, or a matrix -of track values obtained at a single time point using dcut() } +\item{trackdata}{An EMU trackdata object, or a matrix of track values +obtained at a single time point using dcut()} +} +\description{ +The function obtains the times at which track values occur. } - \details{ - Every \$data value in a trackdata object is -associated with a time at which it occurs -in the utterance. This function returns those -times. +Every $data value in a trackdata object is associated with a time at which +it occurs in the utterance. This function returns those times. } - \examples{ + # track time values for a trackdata object times <- tracktimes(vowlax.fdat) # track time values for a matrix of trackdata values # at the temporal midpoint tracktimes(dcut(vowlax.fdat[1:3,], 0.5, prop=TRUE)) -} - - - -\author{ Jonathan Harrington } - -\seealso{ - \code{\link{start.trackdata}} - \code{\link{end.trackdata}} - \code{\link{start.emusegs}} - \code{\link{end.emusegs}} +} +\seealso{ +\code{\link{start.trackdata}} \code{\link{end.trackdata}} +\code{\link{start.emusegs}} \code{\link{end.emusegs}} +} +\author{ +Jonathan Harrington } \keyword{datagen} - diff --git a/man/train.Rd b/man/train.Rd index f94c5628..2679dfca 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -1,52 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesdist.R \name{train} \alias{train} -\title{ -Train a Gaussian Model -} +\title{Train a Gaussian Model} \usage{ -train(x, lab) -} -\description{ -Trains a Gaussian Model +train(x, lab = rep("x", nrow(x))) } \arguments{ -\item{x}{ -A data vector or matrix. +\item{x}{A data vector or matrix.} + +\item{lab}{A vector of labels parallel to \code{x}. If missing, all data is +assumed to be from the same class.} } -\item{lab}{ -A vector of labels parallel to \code{x}. If missing, all data is -assumed to be from the same class. -}} \value{ -A structure with the following components: +A structure with the following components: -\item{label}{ -The unique labels in \code{lab}. -} -\item{means}{ -The means for each dimension per unique label. +\item{label}{ The unique labels in \code{lab}. } \item{means}{ The means +for each dimension per unique label. } \item{cov}{ The combined covariance +matrixes for each unique label. The matrixes are joined with \code{rbind}. +If the input data is one-dimensional, this is just the standard deviation +of the data. } \item{invcov}{ The combined inverse covariance matrixes for +each unique label. The matrixes are joined with \code{rbind}. If the input +data is one-dimensional, this is just the reciprocal of the standard +deviation of the data. } } -\item{cov}{ -The combined covariance matrixes for each unique label. The -matrixes are joined with \code{rbind}. If the input data is -one-dimensional, this is just the standard deviation of the data. +\description{ +Trains a Gaussian Model } -\item{invcov}{ -The combined inverse covariance matrixes for each unique label. The -matrixes are joined with \code{rbind}. If the input data is -one-dimensional, this is just the reciprocal of the standard deviation -of the data. -}} \details{ -This function is used to train a gaussian model on a data set. The -result can be passed to either the \code{mahal} or \code{bayes.lab} functions to +This function is used to train a gaussian model on a data set. The result +can be passed to either the \code{mahal} or \code{bayes.lab} functions to classify either the training set (\code{x}) or a test set with the same -number of dimensions. Train simply finds the mean and inverse -covariance matrix/standard deviation for the data corresponding to each -unique label in labs. +number of dimensions. Train simply finds the mean and inverse covariance +matrix/standard deviation for the data corresponding to each unique label +in labs. } \seealso{ mahal, bayes.lab, mahalplot, bayes.plot } \keyword{misc} -% Converted by Sd2Rd version 0.3-3. diff --git a/man/trapply.Rd b/man/trapply.Rd index 8beca861..2485b174 100644 --- a/man/trapply.Rd +++ b/man/trapply.Rd @@ -1,64 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trapply.R \name{trapply} \alias{trapply} - -\title{A method of the generic function by for objects of class \'trackdata\'} -\description{ - A given function 'FUN' is applied to the data corresponding to each segment of data. -} +\title{A method of the generic function by for objects of class 'trackdata'} \usage{ - trapply (trackdata, fun, \dots, simplify = FALSE, returntrack = FALSE) +trapply(trackdata, fun, ..., simplify = FALSE, returntrack = FALSE) } \arguments{ - \item{trackdata}{a track data object } - \item{fun}{a function that is applied to each segment} - \item{\dots}{ arguments of the function fun} - \item{simplify}{simplify = TRUE , output is a matrix; simplify = FALSE a list is returned} - \item{returntrack}{returntrack = FALSE , return a trackdata object} -} +\item{trackdata}{a track data object} -\details{ - trapply() applies a function iteratively -to each segment of a trackdata object without the need -for using a for-loop. It can be used to calculate, for example, -the mean value of the data values of each segment separately. -Any function that can be applied sensibly to -trackdata[j]\$data where j is a segment number can be -used as the fun argument to trapply(). It is also possible -to write your own function and use trapply() to apply -it separately to each segment. -Care needs to be taken in using trapply() in the -following two ways. Firstly, the argument simplify=T should -only be set if it can be guaranteed that -a vector of the same length or matrix of the same -number of rows as the number of segments in the trackdata -object is returned. For example, simplify=T can -be used in calculating the mean per segment of -a trackdata object, because there will only be one -value (the mean) per segment. However, simplify should -be set to F in calculating the range because here -two values are returned per segment. Similarly use simplify=F -n smoothing the data in which the number of values returned per segment -is different. -Secondly, trapply() only applies a function -to a single parameter; the function can be used to -apply to a function to multi-parameter trackdata such -as F1-F4, but then the function needs to be put inside -apply() - see examples below. -} -\value{ - list or vector or matrix -} +\item{fun}{a function that is applied to each segment} -\author{Jonathan Harrington} +\item{\dots}{arguments of the function fun} +\item{simplify}{simplify = TRUE , output is a matrix; simplify = FALSE a +list is returned} -\seealso{ - \code{\link{apply}} - +\item{returntrack}{returntrack = FALSE , return a trackdata object} +} +\value{ +list or vector or matrix +} +\description{ +A given function 'FUN' is applied to the data corresponding to each segment +of data. +} +\details{ +trapply() applies a function iteratively to each segment of a trackdata +object without the need for using a for-loop. It can be used to calculate, +for example, the mean value of the data values of each segment separately. +Any function that can be applied sensibly to trackdata[j]$data where j is +a segment number can be used as the fun argument to trapply(). It is also +possible to write your own function and use trapply() to apply it +separately to each segment. Care needs to be taken in using trapply() in +the following two ways. Firstly, the argument simplify=TRUE should only be set +if it can be guaranteed that a vector of the same length or matrix of the +same number of rows as the number of segments in the trackdata object is +returned. For example, simplify=TRUE can be used in calculating the mean per +segment of a trackdata object, because there will only be one value (the +mean) per segment. However, simplify should be set to FALSE in calculating the +range because here two values are returned per segment. Similarly use +simplify=FALSE n smoothing the data in which the number of values returned per +segment is different. Secondly, trapply() only applies a function to a +single parameter; the function can be used to apply to a function to +multi-parameter trackdata such as F1-F4, but then the function needs to be +put inside apply() - see examples below. } - - \examples{ + # mean f0 one value per segment m = trapply(vowlax.fund, mean, simplify=TRUE) # mean F1 - F4 @@ -81,7 +70,12 @@ f2sm = trapply(vowlax.fdat[,2], dct, 4, TRUE, returntrack=TRUE) # F2 divided by its F2 range pfun <- function(x) x/(diff(abs(range(x)))) newf2 = trapply(vowlax.fdat[,2], pfun, returntrack=TRUE) -} +} +\seealso{ +\code{\link{apply}} +} +\author{ +Jonathan Harrington +} \keyword{methods} - diff --git a/man/update_itemsInLevel.Rd b/man/update_itemsInLevel.Rd new file mode 100644 index 00000000..ac567ab1 --- /dev/null +++ b/man/update_itemsInLevel.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-annotations_crud.R +\name{update_itemsInLevel} +\alias{update_itemsInLevel} +\title{Update items programmatically} +\usage{ +update_itemsInLevel( + emuDBhandle, + itemsToUpdate, + rewriteAllAnnots = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle as returned by \link{load_emuDB}} + +\item{itemsToUpdate}{A data frame with the columns: +\itemize{ +\item \code{session} (character) +\item \code{bundle} (character) +\item \code{level} (character) +\item \code{start_item_seq_idx} (character) +\item \code{attribute} (character) +\item \code{labels} (character) +}} + +\item{rewriteAllAnnots}{should changes be written to file system (_annot.json +files) (intended for expert use only)} + +\item{verbose}{if set to \code{TRUE}, more status messages are printed} +} +\description{ +Update annotation items programmatically. You have to pass in a +data frame, called \code{itemsToUpdate}, describing the new state of the items. +The required columns are described below. + +This function belongs to emuR’s CRUD family of functions, which let the user +manipulate items programmatically: +\itemize{ +\item Create items (\link{create_itemsInLevel}) +\item Read items (\link{query}) +\item Update items (\link{update_itemsInLevel}) +\item Delete items (\link{delete_itemsInLevel})) +} +} diff --git a/man/vowlax.Rd b/man/vowlax.Rd index fc753876..f7b97de6 100644 --- a/man/vowlax.Rd +++ b/man/vowlax.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax} \alias{vowlax} -\title{Segment list of four lax vowels, read speech, one male and one female speaker of Standard North German from database kielread.} -\usage{vowlax} -\description{An EMU dataset} +\title{Segment list of four lax vowels, read speech, one male and one female +speaker of Standard North German from database kielread.} +\format{ +segmentlist +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.df.Rd b/man/vowlax.df.Rd index 041f207f..9d9f95c5 100644 --- a/man/vowlax.df.Rd +++ b/man/vowlax.df.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.df} \alias{vowlax.df} \title{Data frame of various parameters and labels from the segment list vowlax} -\usage{vowlax.df} -\description{An EMU dataset} +\format{ +dataframe +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.dft.5.Rd b/man/vowlax.dft.5.Rd index 1a53899b..b6047064 100644 --- a/man/vowlax.dft.5.Rd +++ b/man/vowlax.dft.5.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.dft.5} \alias{vowlax.dft.5} -\title{Spectral matrix centred at the temporal midpoint of the vowels from the segment list vowlax.} -\usage{vowlax.dft.5} -\description{An EMU dataset} +\title{Spectral matrix centred at the temporal midpoint of the vowels from the +segment list vowlax.} +\format{ +spectral matrix +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.fdat.5.Rd b/man/vowlax.fdat.5.Rd index 1035a5ed..8ba3c04a 100644 --- a/man/vowlax.fdat.5.Rd +++ b/man/vowlax.fdat.5.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.fdat.5} \alias{vowlax.fdat.5} -\title{Matrix of formant data extracted at the temporal midpoint from the segment list vowlax.} -\usage{vowlax.fdat.5} -\description{An EMU dataset} +\title{Matrix of formant data extracted at the temporal midpoint from the segment +list vowlax.} +\format{ +matrix of formant data +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.fdat.Rd b/man/vowlax.fdat.Rd index 3363cb83..71e56421 100644 --- a/man/vowlax.fdat.Rd +++ b/man/vowlax.fdat.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.fdat} \alias{vowlax.fdat} \title{Trackdata of formants from the segment list vowlax} -\usage{vowlax.fdat} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.fund.5.Rd b/man/vowlax.fund.5.Rd index ef79164a..91f0dbb1 100644 --- a/man/vowlax.fund.5.Rd +++ b/man/vowlax.fund.5.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.fund.5} \alias{vowlax.fund.5} -\title{Vector of fundamental frequency extracted at the temporal midpoint from the segment list vowlax.} -\usage{vowlax.fund.5} -\description{An EMU dataset} +\title{Vector of fundamental frequency extracted at the temporal midpoint from the +segment list vowlax.} +\format{ +vector of fundamental frequency +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.fund.Rd b/man/vowlax.fund.Rd index d7e494cc..d8fc3c8e 100644 --- a/man/vowlax.fund.Rd +++ b/man/vowlax.fund.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.fund} \alias{vowlax.fund} \title{Trackdata of fundamental frequency from the segment list vowlax} -\usage{vowlax.fund} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.l.Rd b/man/vowlax.l.Rd index a989298e..e4766c10 100644 --- a/man/vowlax.l.Rd +++ b/man/vowlax.l.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.l} \alias{vowlax.l} \title{Vector of phoneme labels from the segment list vowlax} -\usage{vowlax.l} -\description{An EMU dataset} +\format{ +vector of phoneme labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.left.Rd b/man/vowlax.left.Rd index af8c686c..43dbe1ec 100644 --- a/man/vowlax.left.Rd +++ b/man/vowlax.left.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.left} \alias{vowlax.left} \title{Vector of labels preceding the vowels from the segment list vowlax} -\usage{vowlax.left} -\description{An EMU dataset} +\format{ +vector of phoneme labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.right.Rd b/man/vowlax.right.Rd index e969635f..290a81bb 100644 --- a/man/vowlax.right.Rd +++ b/man/vowlax.right.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.right} \alias{vowlax.right} \title{Vector of labels following the vowels from the segment list vowlax} -\usage{vowlax.right} -\description{An EMU dataset} +\format{ +vector of phoneme labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.rms.5.Rd b/man/vowlax.rms.5.Rd index 4ae1770b..52baefda 100644 --- a/man/vowlax.rms.5.Rd +++ b/man/vowlax.rms.5.Rd @@ -1,6 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.rms.5} \alias{vowlax.rms.5} -\title{Vector of RMS energy values at the temporal midpoint extracted at the temporal midpoint from the segment list vowlax} -\usage{vowlax.rms.5} -\description{An EMU dataset} +\title{Vector of RMS energy values at the temporal midpoint extracted at the +temporal midpoint from the segment list vowlax} +\format{ +vector of RMS energy values +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.rms.Rd b/man/vowlax.rms.Rd index b8cfa6af..455641f3 100644 --- a/man/vowlax.rms.Rd +++ b/man/vowlax.rms.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.rms} \alias{vowlax.rms} \title{Trackdata of RMS energy from the segment list vowlax} -\usage{vowlax.rms} -\description{An EMU dataset} +\format{ +trackdata object +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.spkr.Rd b/man/vowlax.spkr.Rd index c7968b0b..c8aa45dc 100644 --- a/man/vowlax.spkr.Rd +++ b/man/vowlax.spkr.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.spkr} \alias{vowlax.spkr} \title{Vector of speaker labels from the segment list vowlax.} -\usage{vowlax.spkr} -\description{An EMU dataset} +\format{ +vector of speaker labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/vowlax.word.Rd b/man/vowlax.word.Rd index 504572d8..dadd29af 100644 --- a/man/vowlax.word.Rd +++ b/man/vowlax.word.Rd @@ -1,6 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{vowlax.word} \alias{vowlax.word} \title{Vector of word labels from the segment list vowlax.} -\usage{vowlax.word} -\description{An EMU dataset} +\format{ +vector of word labels +} +\description{ +An EMU dataset +} \keyword{datasets} diff --git a/man/wordlax.l.Rd b/man/wordlax.l.Rd index 43184ce3..4e066e82 100644 --- a/man/wordlax.l.Rd +++ b/man/wordlax.l.Rd @@ -1,8 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-dataDocs.R +\docType{data} \name{wordlax.l} \alias{wordlax.l} -\title{Vector of word labels from segment list wordlax } -\usage{data(vowlax)} +\title{Vector of word labels from segment list wordlax} +\format{ +vector of word labels +} \description{ - For wordlax (see data(vowlax)) +For wordlax (see data(vowlax)) } \keyword{datasets} diff --git a/man/write.emusegs.Rd b/man/write.emusegs.Rd index 204940f6..7c93927f 100644 --- a/man/write.emusegs.Rd +++ b/man/write.emusegs.Rd @@ -1,30 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emusegs.R \name{write.emusegs} \alias{write.emusegs} -\title{ -Write an Emu segment list to a file -} +\title{Write an Emu segment list to a file} \usage{ - write.emusegs(seglist, file) -} -\description{ -Writes an Emu segment list to a file +write.emusegs(seglist, file) } \arguments{ - \item{seglist}{An Emu segment list} - \item{file}{The name of a file to write the segment list into.} +\item{seglist}{An Emu segment list} + +\item{file}{The name of a file to write the segment list into.} } \value{ - None. +None. } -\section{Side Effects}{ - The segment list is written to a file in the standard format, suitable - for input to \code{gettrack} or other Emu utility programs. +\description{ +Writes an Emu segment list to a file } -\seealso{ - \code{\link{emu.query}} +\section{Side Effects}{ + The segment list is written to a file in the +standard format, suitable for input to \code{gettrack} or other Emu utility +programs. } \examples{ + data(dip) #dip a segment list - first 10 segments only dip[1:10,] @@ -32,6 +32,9 @@ Writes an Emu segment list to a file #The file write.emusegs.example.txt would have been written to R_HOME \dontrun{unlink("write.emusegs.example.txt")} + +} +\seealso{ +\code{\link{query}} } \keyword{misc} - diff --git a/man/write.trackdata.Rd b/man/write.trackdata.Rd deleted file mode 100644 index 174631e8..00000000 --- a/man/write.trackdata.Rd +++ /dev/null @@ -1,41 +0,0 @@ -\name{write.trackdata} -\alias{write.trackdata} - -\title{ Write track data objects to file } -\description{ - The track data object can be saved to a text file in a format - suitable for loading into other applications. Single point data is saved in a simple table. - Multiple point per segment data is stored in columns with more than one entry per segment. - Use \code{\link{read.trackdata}} to load the file into R. -} -\usage{ - write.trackdata(trackdata, file) -} - -\arguments{ - \item{trackdata}{ track data object or track data object as character} - \item{file}{ file name } -} - -\value{ - a file with the track data is written to the given path -} - -\author{ Jonathan Harrington} - -\seealso{\code{\link{read.trackdata}} } -\examples{ - data(dip) - #Formant track data of the segment list dip (see data(dip)) - first segment only - dip.fdat[1] - \dontrun{write.trackdata(dip.fdat, "emu.write.track.example.txt")} - - #There is a file emu.write.track.example.txt would have been written to R_HOME/ - #that includes the track data - - \dontrun{unlink("emu.write.track.example.txt")} - -} - -\keyword{IO} - diff --git a/man/write.trackdata.get.Rd b/man/write.trackdata.get.Rd deleted file mode 100644 index aef26390..00000000 --- a/man/write.trackdata.get.Rd +++ /dev/null @@ -1,30 +0,0 @@ -\name{write.trackdata.get} -\Rdversion{1.1} -\alias{write.trackdata.get} -\title{This is the write.trackdata version for the a trackdata argument of type character} -\description{ -It does the same like \code{write.trackdata} but with the additional commands that are necessary to handle the string to get the data. -} -\usage{ -write.trackdata.get(trackdata, file) -} -\arguments{ - \item{trackdata}{ - as character value of the track data object variable -} - \item{file}{ - file name -} -} -\author{ -Tina John -} -\note{ -For larger objects this function is faster than \code{write.trackdata} -} - -\seealso{ -\code{\link{write.trackdata}} -} - -\keyword{IO} diff --git a/man/write_bundleList.Rd b/man/write_bundleList.Rd new file mode 100644 index 00000000..b2f50262 --- /dev/null +++ b/man/write_bundleList.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/emuR-bundleList.R +\name{write_bundleList} +\alias{write_bundleList} +\title{write bundleList} +\usage{ +write_bundleList( + emuDBhandle, + name, + bundleList, + seglist, + updateDBconfig = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{emuDBhandle}{emuDB handle object (see \link{load_emuDB})} + +\item{name}{name of bundleList (excluding the _bundleList.json suffix)} + +\item{bundleList}{tibble/data.frame with the columns \code{session}, \code{name}, +\code{comment} (optional), \code{finishedEditing} (optional). Use \link{list_bundles}} + +\item{seglist}{segment list returned by \link{query} function. If set the +\code{bundleList} parameter will be ignored and a bundleList will be created by +collapsing the segments as timeAnchors into the \code{_bundleList.json}} + +\item{updateDBconfig}{if set to TRUE (the default) DBconfig will be updated +with the fields} + +\item{verbose}{be verbose +\itemize{ + \item \code{"bundleComments": true} + \item \code{"bundleFinishedEditing": true} +}} +} +\description{ +write bundleList JSON file to emuDB +} +\details{ +Write bundleList JSON file to emuDB sub-dir \code{bundleLists/} +} diff --git a/style.css b/style.css deleted file mode 100644 index fabfe3e8..00000000 --- a/style.css +++ /dev/null @@ -1,25 +0,0 @@ -BODY{ background: white; - color: black; - margin-left: 30pt; - margin-right: 30pt; -} - -A:link{ color: blue } -A:visited{ color: rgb(50%, 0%, 50%) } - -H1{ background: #fec802; - font-family: monospace; - font-size: x-large; - text-align: center } - -H2{ background: #fec802; - font-family: monospace; - font-size: large; - text-align: center } - -H3{ color: rgb(40%, 40%, 40%); - font-family: monospace; - font-size: large } - -IMG.toplogo{ vertical-align: center } - diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..7825cb59 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("emuR") diff --git a/tests/testthat/test_aaa_initData.R b/tests/testthat/test_aaa_initData.R new file mode 100644 index 00000000..490eca3f --- /dev/null +++ b/tests/testthat/test_aaa_initData.R @@ -0,0 +1,12 @@ +context("init data for testing") +# AAA in the name so it is run as first test +path2demoData = file.path(tempdir(),"emuR_demoData") +path2testhatFolder = file.path(tempdir(),"emuR_testthat") + +unlink(path2demoData, recursive = TRUE) +unlink(path2testhatFolder, recursive = TRUE) + +create_emuRdemoData(precache = TRUE) +create_BPFcollectionManipulated(path2demoData) + +dir.create(path2testhatFolder) diff --git a/tests/testthat/test_emuR-TextGridToBundleAnnotDFs.R b/tests/testthat/test_emuR-TextGridToBundleAnnotDFs.R new file mode 100644 index 00000000..769859f4 --- /dev/null +++ b/tests/testthat/test_emuR-TextGridToBundleAnnotDFs.R @@ -0,0 +1,128 @@ +##' testthat tests for TextGridToBundleAnnotDFs +##' +context("testing TextGridToBundleAnnotDFs function") + +path2demoData = file.path(tempdir(),"emuR_demoData") + +path2tg = file.path(path2demoData, "TextGrid_collection", "msajc003.TextGrid") + +tgBundleAnnotDFs = TextGridToBundleAnnotDFs(path2tg, sampleRate = 20000, annotates = "msajc003.wav", name = "msajc003") + +############################## +test_that("correct SEGMENT values are parsed and calculated in data.frame tables", { + + # get Phonetic table + phoneticTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Phonetic") + + expect_that(phoneticTbl[1,]$type, equals('SEGMENT')) + + # first segment of Phonetic + # item[0] = {id: XYZ, labels: [{name: ‘lab', value: ‘V'}], sampleStart: 3749, sampleDur: 1389} + expect_that(phoneticTbl[1,]$sample_start, equals(0)) + + # second segment + expect_that(phoneticTbl[2,]$sample_start, equals(3749)) + expect_that(phoneticTbl[2,]$sample_dur, equals(1389)) + tmpItemID = phoneticTbl[2,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('V')) + + # 18th segment + # item[16] = {id: XYZ, labels: [{name: ‘lab', value: ‘@'}], sampleStart: 30124, sampleDur: 844} + expect_that(phoneticTbl[18,]$sample_start, equals(30124)) + expect_that(phoneticTbl[18,]$sample_dur, equals(844)) + tmpItemID = phoneticTbl[18,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('@')) + + # 35th segment + # item[33] = {id: XYZ, labels: [{name: ‘lab', value: ‘l'}], sampleStart: 50126, sampleDur: 1962} + expect_that(phoneticTbl[35,]$sample_start, equals(50126)) + expect_that(phoneticTbl[35,]$sample_dur, equals(1962)) + tmpItemID = phoneticTbl[35,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('l')) + +}) + +############################## +test_that("correct EVENT values are parsed and calculated in SQLite items table", { + + # get Tone table + toneTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Tone") + + # first event + # item[0] = {id: XYZ, labels: [{name: ’tone', value: ‘H*'}], samplePoint: 8381} + expect_that(toneTbl[1,]$sample_point, equals(8381)) + tmpItemID = toneTbl[1,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('H*')) + + # 4th event + # item[3] = {id: XYZ, labels: [{name: ’tone', value: ‘H*'}], samplePoint: 38255} + expect_that(toneTbl[4,]$sample_point, equals(38255)) + tmpItemID = toneTbl[4,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('H*')) + + # 7th event + # item[6] = {id: XYZ, labels: [{name: ’tone', value: ‘L%'}], samplePoint: 51552} + expect_that(toneTbl[7,]$sample_point, equals(51552)) + tmpItemID = toneTbl[7,]$item_id + labelsRow = dplyr::filter(tgBundleAnnotDFs$labels, item_id == tmpItemID) + + expect_that(labelsRow$label, equals('L%')) + +}) + +############################## +test_that("SEGMENTs & EVENTs have correct itemIDs in data.frame tables", { + + # get Tone table + toneTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Tone") + + # get Phonetic table + phoneticTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Phonetic") + + # increment IDs for EVENTs + expect_equal(toneTbl[2,]$item_id, toneTbl[1,]$item_id + 1) + expect_equal(toneTbl[3,]$item_id, toneTbl[2,]$item_id + 1) + + # increment ids for SEGMENTs + expect_equal(phoneticTbl[2,]$item_id, phoneticTbl[1,]$item_id + 1) + expect_equal(phoneticTbl[3,]$item_id, phoneticTbl[2,]$item_id + 1) + +}) + + +############################## +test_that("data.frame labels table has correct values", { + # get Tone table + toneTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Tone") + + # get Phonetic table + phoneticTbl = dplyr::filter(tgBundleAnnotDFs$items, level == "Phonetic") + + # check phoneticsTable are ok + expect_equal(phoneticTbl[1,]$item_id, 86) + expect_equal(sum(phoneticTbl[1,]$labelIdx), 0) + expect_equal(phoneticTbl[1,]$level, 'Phonetic') + + # check toneTbl are ok + expect_equal(toneTbl[1,]$item_id, 122) + expect_equal(sum(toneTbl[1,]$labelIdx), 0) + expect_equal(toneTbl[1,]$level, 'Tone') + + # check labelTbl + labelsTbl = dplyr::filter(tgBundleAnnotDFs$labels, name == "Phonetic") + expect_equal(paste0(labelsTbl$label, collapse = ''), 'VmVNstH@:frEnzSi:w@zkH@nsId@dbju:dH@f@l') + labelsTbl = dplyr::filter(tgBundleAnnotDFs$labels, name == "Tone") + expect_equal(paste0(labelsTbl$label, collapse = ''), 'H*H*L-H*H*L-L%') + +}) + diff --git a/tests/testthat/test_emuR-annotations_crud.R b/tests/testthat/test_emuR-annotations_crud.R new file mode 100644 index 00000000..4513fa81 --- /dev/null +++ b/tests/testthat/test_emuR-annotations_crud.R @@ -0,0 +1,254 @@ +##' testthat tests for CRUD annotation operations +##' +context("testing CRUD annotation operations functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +test_that("errors are thrown on bad inputs", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # missing cols + expect_error(create_itemsInLevel(ae, + itemsToCreate = data.frame(session = "", stringsAsFactors = FALSE))) + # bad sequenceIndex type + expect_error(create_itemsInLevel(ae, itemsToCreate = data.frame(session = "", + bundle = "", + level = "", + sequenceIndex = "", + attribute = "", + labels = "", + stringsAsFactors = FALSE))) + + # bad session / bundle + expect_error(create_itemsInLevel(ae, + itemsToCreate = data.frame(session = "0000", + bundle = "badBndlName", + level = "", + sequenceIndex = 1.5, + attribute = "", + labels = "", + stringsAsFactors = FALSE))) + + # existing sequence index + expect_error(create_itemsInLevel(ae, + itemsToCreate = data.frame(session = "0000", + bundle = "msajc003", + level = "Utterance", + sequenceIndex = 1, + attribute = "Utterance", + labels = "newLabel", + stringsAsFactors = FALSE), + verbose = FALSE)) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + + +test_that("create_itemsInLevel in ITEM levels works as expected", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # insert new root node after existing item + create_itemsInLevel(ae, + itemsToCreate = data.frame(session = "0000", + bundle = "msajc003", + level = "Utterance", + start_item_seq_idx = 1.5, + attribute = "Utterance", + labels = "newLabel_post", + stringsAsFactors = FALSE), + verbose = FALSE) + + + sl = query(ae, + "Utterance == newLabel_post", + calcTimes = FALSE) + + expect_equal(nrow(sl), 1) + expect_equal(sl$start_item_seq_idx, 2) + + # insert new root node be4 existing item + create_itemsInLevel(ae, + itemsToCreate = data.frame(session = "0000", + bundle = "msajc003", + level = "Utterance", + start_item_seq_idx = 0.5, + attribute = "Utterance", + labels = "newLabel_pre", + stringsAsFactors = FALSE), + verbose = FALSE) + + + sl = query(ae, + "Utterance == newLabel_pre", + calcTimes = FALSE) + + expect_equal(nrow(sl), 1) + expect_equal(sl$start_item_seq_idx, 1) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + +test_that("create_itemsInLevel in EVENT levels works as expected", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + sl = query(ae, + "Tone =~ .*", + resultType = "tibble") + + sl$labels = "new_labels" + + # insert new root node after existing item (should cause error as the times exist) + expect_error(create_itemsInLevel(ae, + itemsToCreate = sl, + verbose = FALSE)) + + sl$start = sl$start + 10 + + create_itemsInLevel(ae, + itemsToCreate = sl, + verbose = FALSE) + + + sl_new = query(ae, + "Tone =~ .*", + resultType = "tibble") + + # twice as many + expect_equal(2*nrow(sl), nrow(sl_new)) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + +test_that("create_itemsInLevel in SEGMENT levels works as expected", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + sl = query(ae, + "Phonetic =~ .*", + resultType = "tibble") + + sl$labels = "new_labels" + # sl$sample_end = -1 + + expect_error(create_itemsInLevel(ae, + itemsToCreate = sl, + verbose = FALSE)) + + # add new level + add_levelDefinition(ae, + name = "new_level", + type = "SEGMENT", + verbose = FALSE) + + sl$level = "new_level" + sl$attribute = "new_level" + + create_itemsInLevel(ae, + itemsToCreate = sl, + verbose = FALSE, + calculateEndTimeForSegments = FALSE) + + + sl_new = query(ae, + "new_level =~ .*", + resultType = "tibble") + + # same nr of segs + expect_equal(nrow(sl), nrow(sl_new)) + # same length of segs (not all coz last segment as long as ) + expect_equal(sl[1:6,]$sample_start, sl_new[1:6,]$sample_start) + expect_equal(sl[1:6,]$sample_end, sl_new[1:6,]$sample_end) + + # set_levelCanvasesOrder(ae, "default", c("Phonetic", "Tone", "new_level")) + # serve(ae) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + +test_that("update_itemsInLevel updates labels correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + sl = query(ae, + "[Phonetic == I ^ Syllable == S]", + resultType = "tibble") + + sl$labels = paste0(sl$labels, "_in_strong_syl") + + update_itemsInLevel(ae, + itemsToUpdate = sl, + verbose = FALSE) + + sl_new = query(ae, + "Phonetic == I_in_strong_syl", + resultType = "tibble") + + expect_equal(nrow(sl), nrow(sl_new)) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) diff --git a/tests/testthat/test_emuR-autobuild.R b/tests/testthat/test_emuR-autobuild.R new file mode 100644 index 00000000..a64ab6bd --- /dev/null +++ b/tests/testthat/test_emuR-autobuild.R @@ -0,0 +1,884 @@ +##' testthat tests for autobuild +##' +context("testing autobuild functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("autobuild_linkFromTimes works correctly", { + skip_on_cran() + ########################### + test_that("bad calls to autobuild_linkFromTimes", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + + expect_error(autobuild_linkFromTimes(ae, 'Phoneti', 'Tone')) + expect_error(autobuild_linkFromTimes(ae, 'Phonetic', 'Ton')) + expect_error(autobuild_linkFromTimes(ae, 'Phonetic', 'Tone')) + expect_error(autobuild_linkFromTimes(ae, 'Phoneme', 'Phonetic'), + regexp = "The super level type and.*") # super = ITEM + expect_error(autobuild_linkFromTimes(ae, 'Syllable', 'Phoneme'), + regexp = "The super level type and.*") # super = ITEM + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + }) + + + ############################## + test_that("correct links are present after autobuild_linkFromTimes with EVENTS", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_MANY", + superlevelName = "Phonetic", + sublevelName = "Tone") + + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Tone', + FALSE, + verbose = FALSE) + + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND from_id = 149 ", + " AND to_id = 181")) + # + expect_equal(dim(qr)[1], 1) + expect_equal(qr$session, '0000') + expect_equal(qr$bundle, 'msajc003') + expect_equal(qr$from_id, 149) + expect_equal(qr$to_id, 181) + + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND from_id = 156 ", + " AND to_id = 182")) + + expect_equal(qr$session, '0000') + expect_equal(qr$bundle, 'msajc003') + expect_equal(qr$from_id, 156) # redundant + expect_equal(qr$to_id, 182) # redundant + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################# + test_that("no duplicates are present after autobuild_linkFromTimes with EVENTs", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_MANY", + superlevelName = "Phonetic", + sublevelName = "Tone") + + # addlink that will also be automatically linked + DBI::dbExecute(ae$connection, paste0("INSERT INTO links ", + "VALUES ('", ae$UUID, "', '0000', 'msajc003', 140, 181, NULL)")) + + autobuild_linkFromTimes(ae, 'Phonetic', 'Tone', FALSE, verbose = FALSE) + + # extract only one link to be present + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND from_id = 149 ", + " AND to_id = 181")) + + # extract only one link to be present + expect_equal(dim(qr)[1], 1) + + # if re-run nothing should change (duplicate links) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Tone', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ")) + + expect_equal(dim(qr)[1], 840) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + + ############################## + test_that("correct links are present after autobuild_linkFromTimes with SEGMENTS linkDef type ONE_TO_MANY", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add levelDef. + add_levelDefinition(ae, + "Phonetic2", + "SEGMENT", + verbose = FALSE) + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_MANY", + superlevelName = "Phonetic", + sublevelName = "Phonetic2") + + + # add item to Phonetic2 = left edge + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 980, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3749, ", + " 10)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 980")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 980) + + # add item to Phonetic2 = exact match + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 981, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3749, ", + " 1389)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 981")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 981) + + # add item to Phonetic2 = completely within + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 982, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3800, ", + " 200)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 982")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 982) + + + # add item to Phonetic2 = left overlap + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 983, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3500, ", + " 1000)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 983")) + expect_equal(dim(qr)[1], 0) + + + # add item to Phonetic2 = right overlap + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 984, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3800, ", + " 2000)")) + autobuild_linkFromTimes(ae, 'Phonetic', 'Phonetic2', FALSE, verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 984")) + expect_equal(dim(qr)[1], 0) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################## + test_that("correct links are present after autobuild_linkFromTimes with SEGMENTS linkDef type MANY_TO_MANY", { + + #delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add levelDef. + add_levelDefinition(ae, + "Phonetic2", + "SEGMENT", + verbose = FALSE) + # add linkDef. + add_linkDefinition(ae, + "MANY_TO_MANY", + superlevelName = "Phonetic", + sublevelName = "Phonetic2") + + # add item to Phonetic2 = completely within + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3800, 200) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 980, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3800, ", + " 200)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 980")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 980) + + # add item to Phonetic2 = left overlap + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3500, 1000) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 981, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + "NULL, ", + " 3500, ", + " 1000)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 981")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 981) + + # add item to Phonetic2 = right overlap + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3800, 2000) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 982, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3800, ", + " 2000)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 982")) + expect_equal(dim(qr)[1], 2) + expect_equal(qr$from_id, c(147, 148)) + expect_equal(qr$to_id, c(982, 982)) + + + # add item to Phonetic2 = left and right overlap + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3500, 2000) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 983, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3500, ", + " 2000)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 983")) + expect_equal(dim(qr)[1], 2) + expect_equal(qr$from_id, c(147, 148)) + expect_equal(qr$to_id, c(983, 983)) + + + # add item to Phonetic2 = not within + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 200, 200) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + "'msajc003', ", + " 984, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 200, ", + " 200)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 984")) + expect_equal(dim(qr)[1], 0) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################## + test_that("correct links are present after autobuild_linkFromTimes with SEGMENTS linkDef type ONE_TO_ONE", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add levelDef. + add_levelDefinition(ae, + "Phonetic2", + "SEGMENT", + verbose = FALSE) + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_ONE", + superlevelName = "Phonetic", + sublevelName = "Phonetic2") + + + # add item to Phonetic2 = exact match + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3749, 1389) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 980, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3749, ", + " 1389)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 980")) + expect_equal(dim(qr)[1], 1) + expect_equal(qr$from_id, 147) + expect_equal(qr$to_id, 980) + + # add item to Phonetic2 = left overlap + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3748, 1389) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 981, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3748, ", + " 1389)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 981")) + expect_equal(dim(qr)[1], 0) + + # add item to Phonetic2 = right overlap + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3749, 1390) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 982, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3749, ", + " 1390)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"'", + " AND to_id = 982")) + expect_equal(dim(qr)[1], 0) + + + + # add item to Phonetic2 = within + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 3750, 200) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 983, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3750, ", + " 200)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 983")) + expect_equal(dim(qr)[1], 0) + + + + # add item to Phonetic2 = not within + # ae$items[737, ] = c('ae_0000_msajc003_999', '0000', 'msajc003', 'Phonetic2', 999, 'SEGMENT', 1, 20000, NA, 200, 200) + DBI::dbExecute(ae$connection, paste0("INSERT INTO items VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 984, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 200, ", + " 200)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + FALSE, + verbose = FALSE) + qr = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND to_id = 984")) + expect_equal(dim(qr)[1], 0) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################## + test_that("backup works correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add levelDef. + add_levelDefinition(ae, + "Phonetic2", + "SEGMENT", + verbose = FALSE) + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_ONE", + superlevelName = "Phonetic", + sublevelName = "Phonetic2") + + + # add item to Phonetic2 = exact match + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0000', ", + " 'msajc003', ", + " 980, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3749, ", + " 1389)")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + TRUE, + TRUE, + verbose = FALSE) + + + + qr1 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND level = 'Phonetic'")) + qr2 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND level = 'Phonetic-autobuildBackup'")) + # same amount of of items + expect_equal(dim(qr1), dim(qr2)) + # cols that should be the same are + expect_equal(qr1$session, qr2$session) + expect_equal(qr1$bundle, qr2$bundle) + expect_equal(qr1$seqIdx, qr2$seqIdx) + expect_equal(qr1$sampleRate, qr2$sampleRate) + + + qr1 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND name='Phonetic'")) + qr2 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND name = 'Phonetic-autobuildBackup'")) + # same labels + expect_equal(dim(qr1), dim(qr2)) + expect_equal(dim(qr1$label), dim(qr2$label)) + + + # itemIDs are the same in items and labels table + qr1 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID,"' ", + " AND level = 'Phonetic-autobuildBackup'")) + qr2 = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '", ae$UUID,"' ", + "AND name = 'Phonetic-autobuildBackup'")) + expect_equal(qr1$itemID, qr2$itemID) + + + # new levelDefinition is present + dbConfig = load_DBconfig(ae) + expect_equal(dbConfig$levelDefinitions[[length(dbConfig$levelDefinitions)]]$name, 'Phonetic-autobuildBackup') + expect_equal(dbConfig$levelDefinitions[[length(dbConfig$levelDefinitions)]]$type, 'SEGMENT') + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################## + test_that("rewrite works correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # add levelDef. + add_levelDefinition(ae, + "Phonetic2", + "SEGMENT", + verbose = FALSE) + # add linkDef. + add_linkDefinition(ae, + "ONE_TO_ONE", + superlevelName = "Phonetic", + sublevelName = "Phonetic2") + + + # add item to Phonetic2 + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + "'0000', ", + " 'msajc003', ", + " 980, ", + " 'Phonetic2', ", + " 'SEGMENT', ", + " 1, ", + " 20000, ", + " NULL, ", + " 3750, ", + " 200)")) + + # add label to Phonetic2 + DBI::dbExecute(ae$connection, paste0("INSERT INTO labels ", + "VALUES (", + " '", ae$UUID, "', ", + "'0000', ", + "'msajc003', ", + " 980, ", + " 0, ", + " 'Phonetic2', ", + " 'testLabel12')")) + + set_levelCanvasesOrder(ae, + "default", + c("Phonetic", "Phonetic2", "Tone")) + autobuild_linkFromTimes(ae, + 'Phonetic', + 'Phonetic2', + TRUE, + TRUE, + verbose = FALSE) + + # check if levelCanvasOrder entry was removed + expect_false("Phonetic" %in% get_levelCanvasesOrder(ae, "default")) + + + # _DBconfig.json has new definitions + dbConfig = load_DBconfig(ae) + expect_equal(dbConfig$levelDefinitions[[11]]$name, + "Phonetic-autobuildBackup") + expect_equal(dbConfig$linkDefinitions[[10]]$type, + "ONE_TO_ONE") + expect_equal(dbConfig$linkDefinitions[[10]]$superlevelName, + "Phonetic") + expect_equal(dbConfig$linkDefinitions[[10]]$sublevelName, + "Phonetic2") + + # annot.jsons has new fields + testAnnoFilePath = file.path(path2db, + "0000_ses", + "msajc003_bndl", + "msajc003_annot.json") + annotJson = fromJSON(testAnnoFilePath, + simplifyVector = FALSE) + lastLvlName=annotJson$levels[[11]]$name + expect_equal(lastLvlName, "Phonetic-autobuildBackup") + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + + }) + + ############################## + test_that("autobuild of converted TGcol works", { + + path2tgCol = file.path(tempdir(), + "emuR_demoData", + "TextGrid_collection") + + # convert TextGridCollection to the emuDB format + convert_TextGridCollection(path2tgCol, + dbName = "tgCol", + targetDir = path2testData, + verbose = FALSE) + + tgCol = load_emuDB(file.path(path2testData, + paste0("tgCol", emuDB.suffix)), + verbose = FALSE) + + add_linkDefinition(tgCol, + "ONE_TO_MANY", + superlevelName = "Utterance", + sublevelName = "Intonational") + # this autobuild causes the warning + autobuild_linkFromTimes(tgCol, + "Utterance", + "Intonational", + verbose = FALSE) + + expect_true(TRUE) # to avoid skip message + + test_that("MD5 sums are updated",{ + annotJSONpath = file.path(path2testData, + paste0("tgCol", emuDB.suffix), + paste0("0000", session.suffix), + paste0("msajc003", bundle.dir.suffix), + paste0("msajc003", bundle.annotation.suffix, ".json")) + + curMd5sum = tools::md5sum(annotJSONpath) + names(curMd5sum) = NULL + bundleDF = DBI::dbGetQuery(tgCol$connection, paste0("SELECT * ", + "FROM bundle ", + "WHERE name = 'msajc003'")) + + expect_equal(curMd5sum, bundleDF$md5_annot_json) + + }) + + # clean up + DBI::dbDisconnect(tgCol$connection) + tgCol = NULL + + }) + +}) diff --git a/tests/testthat/test_emuR-autoproc_annots.R b/tests/testthat/test_emuR-autoproc_annots.R new file mode 100644 index 00000000..f20db781 --- /dev/null +++ b/tests/testthat/test_emuR-autoproc_annots.R @@ -0,0 +1,198 @@ +##' testthat tests for autoproc_annots +##' +context("testing autoproc_annots") + +dbName = "ae" +path2orig = file.path(tempdir(), "emuR_demoData", paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("replace_itemLabels works correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = FALSE) + + test_that("replace_itemLabels throws correct errors", { + + expect_error(replace_itemLabels(ae, attributeDefinitionName = "badName", origLabels = "a", newLabels = "a"), regexp = "No attributeDefinitionName: badName", ignore.case = TRUE) + expect_error(replace_itemLabels(ae, attributeDefinitionName = "Phonetic", origLabels = "a", newLabels = c("a","b")), regexp = "origLabels and newLabels have to be", ignore.case = TRUE) + expect_error(replace_itemLabels(ae, attributeDefinitionName = "Phonetic", origLabels = 1, newLabels = "a"), regexp = "origLabels and newLabels have to be", ignore.case = TRUE) + expect_error(replace_itemLabels(ae, attributeDefinitionName = "Phonetic", origLabels = "a", newLabels = 1), regexp = "origLabels and newLabels have to be", ignore.case = TRUE) + }) + + + test_that("replace_itemLabels replaces correct labels", { + + replace_itemLabels(ae, attributeDefinitionName = "Phonetic", origLabels = "n", newLabels = "n_rep", verbose = FALSE) + + sl = query(ae, "Phonetic == n") + expect_equal(nrow(sl), 0) + sl = query(ae, "Phonetic == n_rep") + expect_equal(nrow(sl), 12) + + replace_itemLabels(ae, attributeDefinitionName = "Phonetic", origLabels = c("I", "p"), newLabels = c("I_rep", "p_rep"), verbose = FALSE) + + sl = query(ae, "Phonetic == I_rep") + expect_equal(nrow(sl), 14) + sl = query(ae, "Phonetic == p_rep") + expect_equal(nrow(sl), 2) + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + + +test_that("duplicate_level works correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = FALSE) + + test_that("duplicate_level throws correct errors", { + + expect_error(duplicate_level(ae, + levelName = "badName", + duplicateLevelName = "bla"), + regexp = "not a valid level name", + ignore.case = TRUE) + expect_error(duplicate_level(ae, + levelName = "Phonetic", + duplicateLevelName = "bla", + duplicateLinks = TRUE, + linkDuplicates = TRUE), + regexp = "duplicateLinks & linkDuplicates", + ignore.case = TRUE) + + # unique multiple attribute definitions + # expect_error(duplicate_level(ae, + # levelName = "Word", + # duplicateLevelName = "Word2", + # verbose = FALSE), + # regexp = "attributeDefinition with name") + + }) + + test_that("duplicate_level works correctly", { + duplicate_level(ae, + levelName = "Phonetic", + duplicateLevelName = "Phonetic2", + verbose = FALSE) + dbConfig = load_DBconfig(ae) + expect_true(length(dbConfig$levelDefinitions) == 10) + expect_equal(dbConfig$levelDefinitions[[10]]$name, "Phonetic2") + # check items + oldIts = DBI::dbGetQuery(ae$connection, "SELECT * FROM items WHERE level = 'Phonetic'") + newIts = DBI::dbGetQuery(ae$connection, "SELECT * FROM items WHERE level = 'Phonetic2'") + expect_equal(nrow(oldIts), nrow(newIts)) + # check labels + oldLabs = DBI::dbGetQuery(ae$connection, "SELECT * FROM labels WHERE name = 'Phonetic'") + newLabs = DBI::dbGetQuery(ae$connection, "SELECT * FROM labels WHERE name = 'Phonetic2'") + expect_equal(nrow(oldLabs), nrow(newLabs)) + # check labelGroups + oldLG = list_attrDefLabelGroups(ae, "Phonetic", "Phonetic") + newLG = list_attrDefLabelGroups(ae, "Phonetic2", "Phonetic2") + expect_equal(nrow(oldLG), nrow(newLG)) + + }) + + test_that("duplicateLinks = FALSE works correctly", { + duplicate_level(ae, levelName = "Phonetic", duplicateLevelName = "Phonetic3", duplicateLinks = FALSE, verbose = FALSE) + linkDefs = list_linkDefinitions(ae) + # no linkdefs are added + expect_false("Phonetic3" %in% linkDefs$superlevelName) + expect_false("Phonetic3" %in% linkDefs$sublevelName) + }) + + + test_that("linkDuplicates works correctly", { + duplicate_level(ae, levelName = "Phonetic", duplicateLevelName = "Phonetic4", duplicateLinks = FALSE, linkDuplicates = TRUE, verbose = FALSE) + linkDefs = list_linkDefinitions(ae) + # linkdefs are added + expect_true("Phonetic4" %in% linkDefs$sublevelName) + + sl1 = query(ae, "[Phonetic == n ^ #Word =~.*]", timeRefSegmentLevel = "Phonetic") + sl2 = query(ae, "[Phonetic4 == n ^ #Word =~.*]", timeRefSegmentLevel = "Phonetic4") + + expect_true(all(sl1 == sl2)) + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) +}) + +# test_that("resample annots works correctly", { +# +# # delete, copy and load +# unlink(path2db, recursive = TRUE) +# file.copy(path2orig, path2testData, recursive = TRUE) +# ae = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = FALSE) +# +# # test_that("duplicate_level throws correct errors", { +# # +# # expect_error(duplicate_level(ae, levelName = "badName", duplicateLevelName = "bla"), regexp = "not a valid level name", ignore.case = TRUE) +# # expect_error(duplicate_level(ae, levelName = "Phonetic", duplicateLevelName = "bla", duplicateLinks = TRUE, linkDuplicates = TRUE), regexp = "duplicateLinks & linkDuplicates", ignore.case = TRUE) +# # +# # }) +# +# test_that("correct updates are made to cache", { +# +# # resample_annots(ae, oldSampleRate = 20000, newSampleRate = 44100, verbose = TRUE) +# +# }) +# +# test_that("correct updates are made to annotation files", { +# +# }) +# +# +# # clean up +# DBI::dbDisconnect(ae$connection) +# ae = NULL +# unlink(path2db, recursive = TRUE) +# }) + +# test_that("add_itemsToEmptyLevel works correctly", { +# +# # delete, copy and load +# unlink(path2db, recursive = TRUE) +# file.copy(path2orig, path2testData, recursive = TRUE) +# ae = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = FALSE) +# +# test_that("add_itemsToEmptyLevel throws correct errors", { +# sl = query(ae, "Phonetic == n") +# #expect_error(add_itemsToEmptyLevel(ae, levelName = "badName", sl), regexp = "Specified level does not exist", ignore.case = TRUE) +# #expect_error(add_itemsToEmptyLevel(ae, levelName = "Tone", sl), regexp = "Specified level is of type EVENT", ignore.case = TRUE) +# #expect_error(add_itemsToEmptyLevel(ae, levelName = "Phonetic", labels = sl$labels), regexp = "Specified level is of type SEGMENT", ignore.case = TRUE) +# #expect_error(add_itemsToEmptyLevel(ae, levelName = "Phonetic", labels = sl$labels, sampleStart = sl$sample_start, sampleEnd = sl$sample_end), regexp = "Specified level is not empty", ignore.case = TRUE) +# +# }) +# +# test_that("correct segment level is created", { +# sl = query(ae, "Phonetic == n") +# # add_levelDefinition(ae, name = "Phonetic_n", type = "SEGMENT", verbose = FALSE) +# # add_itemsToEmptyLevel(ae, levelName = "Phonetic_n", sl) +# # resample_annots(ae, oldSampleRate = 20000, newSampleRate = 44100, verbose = TRUE) +# +# }) +# +# test_that("correct updates are made to annotation files", { +# +# }) +# +# +# # clean up +# DBI::dbDisconnect(ae$connection) +# ae = NULL +# unlink(path2db, recursive = TRUE) +# }) diff --git a/tests/testthat/test_emuR-bas_webservices.R b/tests/testthat/test_emuR-bas_webservices.R new file mode 100644 index 00000000..811a21b2 --- /dev/null +++ b/tests/testthat/test_emuR-bas_webservices.R @@ -0,0 +1,62 @@ +context("stub tests for BAS webservices (to be extended)") + +sourceDirMain = file.path(tempdir(), "emuR_demoData") +sourceDir = file.path(sourceDirMain, "txt_collection") +testDir = file.path(tempdir(), "emuR_testthat") +dbName = "bas_test" + +unlink(file.path(testDir, paste0(dbName, emuDB.suffix)), recursive = TRUE) + +test_that("testing whether runBASwebservice_all runs without error", + { + skip("skipping as it takes 4 ever to test -> make sure to rerun on deploy") + skip_on_cran() + convert_txtCollection( + sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE + ) + handle = load_emuDB(file.path(testDir, paste0(dbName, emuDB.suffix)), verbose = FALSE) + + runBASwebservice_all(handle, "transcription", "eng-AU", verbose = FALSE) + }) + +unlink(file.path(testDir, paste0(dbName, emuDB.suffix)), recursive = TRUE) + +test_that( + "testing whether all the other runBASwebservice_* functions run without error when chained together", + { + skip("skipping as it takes 4 ever to test -> make sure to rerun on deploy") + skip_on_cran() + convert_txtCollection( + sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE + ) + handle = load_emuDB(file.path(testDir, paste0(dbName, emuDB.suffix)), verbose = FALSE) + + runBASwebservice_g2pForTokenization(handle, "transcription", "eng-GB", verbose = FALSE) + runBASwebservice_g2pForPronunciation(handle, "ORT", "eng-GB", verbose = FALSE) + runBASwebservice_chunker(handle, + "KAN", + "eng-GB", + rootLevel = "bundle", + verbose = FALSE) + runBASwebservice_maus(handle, + "KAN", + "eng-GB", + chunkLevel = "TRN", + verbose = FALSE) + runBASwebservice_minni(handle, "eng-GB", rootLevel = "bundle", verbose = FALSE) + runBASwebservice_pho2sylCanonical(handle, "KAN", "eng-GB", verbose = FALSE) + runBASwebservice_pho2sylSegmental(handle, + "MAU", + "eng-GB", + superLevel = "ORT", + verbose = FALSE) + } +) + +unlink(file.path(testDir, paste0(dbName, emuDB.suffix)), recursive = TRUE) diff --git a/tests/testthat/test_emuR-bundleList.R b/tests/testthat/test_emuR-bundleList.R new file mode 100644 index 00000000..0cea9bcb --- /dev/null +++ b/tests/testthat/test_emuR-bundleList.R @@ -0,0 +1,45 @@ +##' testthat tests for get_trackdata +##' +context("testing bundleList functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("write_bundleList with list of bundles works", { + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + bund <- list_bundles(ae) + write_bundleList(ae, + "RW", + bundleList = bund, + updateDBconfig = TRUE) + expect_true(dir.exists(file.path(path2db, "bundleLists"))) + expect_true(file.exists(file.path(path2db, + "bundleLists", + "RW_bundleList.json"))) + + bl_tmp = jsonlite::read_json(file.path(path2db, + "bundleLists", + "RW_bundleList.json"), + simplifyVector = TRUE) + + expect_true(all(dim(bl_tmp) == c(7,4))) + +}) diff --git a/tests/testthat/test_emuR-convert_BPFCollection.R b/tests/testthat/test_emuR-convert_BPFCollection.R new file mode 100644 index 00000000..119e7a09 --- /dev/null +++ b/tests/testthat/test_emuR-convert_BPFCollection.R @@ -0,0 +1,736 @@ + +# --------------------------------------------------------------------------- +context("testing convert_BPFCollection") +# --------------------------------------------------------------------------- + +sourceDirMain = file.path(tempdir(), "emuR_demoData") +testDir = file.path(tempdir(), "emuR_testthat") +dbName = "bpf_converter_test" + +# Cleaning up (just in case) +unlink(file.path(testDir, dbName), recursive = TRUE) + +# --------------------------------------------------------------------------- +# Testing with original BPFs +# --------------------------------------------------------------------------- + +sourceDir = file.path(sourceDirMain, "BPF_collection") +newDbFolderName = paste0(dbName, emuDB.suffix) +newDbPath = file.path(testDir, newDbFolderName) +configPath = file.path(newDbPath, paste0(dbName, '_DBconfig.json')) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Code throws error when new levels are declared incorrectly", + { + # length(newLevels) != length(newLevelClasses) + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + newLevels = c("ABC"), + newLevelClasses = c(1,2)), + regexp = "newLevels", + ignore.case = TRUE) + + # new level classes outside of range 1-5 + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + newLevels = c("ABC"), + newLevelClasses = c(6)), + regexp = "1.*5", + ignore.case = TRUE) + + # trying to change the class of an already existing BPF standard level + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + newLevels = c("ORT"), + newLevelClasses = c(2)), + regexp = "standard", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Code throws error for failed directory checks", + { + # there is already a database of with the same name in the target dir + dir.create(file.path(testDir, "something_silly_emuDB")) + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = "something_silly", + verbose = FALSE), + regexp = "directory.*already exists", + ignore.case = TRUE) + unlink(file.path(testDir, "something_silly_emuDB"), + recursive = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Error when using unifyLevels incorrectly.", + { + # unifyLevels without refLevel + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + unifyLevels = c("KAN")), + regexp = "unify.*reference", + ignore.case = TRUE) + + # refLevel in unifyLevels + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + unifyLevels = c("ORT", "KAN")), + regexp = "reference level", + ignore.case = TRUE) + + # class 2-5 level in unifyLevels + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + unifyLevels = c("GES")), + regexp = "unif", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Error when using refLevel incorrectly.", + { + # link-less refLevel + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "GES"), + regexp = "link-less.*reference level", + ignore.case = TRUE) + + # extractLevels on, but refLevel not in extractLevels + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + extractLevels = c("MAU", "TRN"), + refLevel = "ORT"), + regexp = "reference level", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Error when trying declare an unknown level in refLevel, extractLevels or unifyLevels.", + { + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + extractLevels = c("ABC")), + regexp = "unknown level.*ABC", + ignore.case = TRUE) + + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ABC"), + regexp = "unknown level.*ABC", + ignore.case = TRUE) + + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + unifyLevels = c("ABC")), + regexp = "unknown level.*ABC", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Error when segmentToEventLevels is used with a non-segment level", + { + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + segmentToEventLevels = c("PRB")), + regexp = "segment", + ignore.case = TRUE) + } +) + +# Cleaning up (just in case) +unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion without reference level.", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE) + + # Format of data base. + expect_true(newDbFolderName %in% list.dirs(testDir, + full.names = FALSE, + recursive = FALSE)) + expect_equal(length(list.files(newDbPath, recursive = FALSE)), 2) + expect_equal(length(list.files(file.path(newDbPath, "0000_ses"), recursive = FALSE)), 7) + expect_equal(length(list.files(file.path(newDbPath, "0000_ses", "msajc003_bndl"), recursive = FALSE)), 2) + + # Correctness of config file. + dbConfigLines = readLines(configPath, warn=FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, collapse=''), + simplifyVector=FALSE) + + # General & webAppConfig + expect_equal(dbConfig$name, dbName) + expect_equal(length(dbConfig$ssffTrackDefinitions), 0) + expect_true(dbConfig$EMUwebAppConfig$activeButtons$saveBundle) + expect_true(dbConfig$EMUwebAppConfig$activeButtons$showHierarchy) + + # Check that level canvas order is by order of appearance in BPF + expect_equal(dbConfig$EMUwebAppConfig$perspectives[[1]]$levelCanvases$order, + list("TRN", "MAU")) + + # Check that there are five level definitions (bundle, KAN, ORT, TRN, MAU) + expect_equal(length(dbConfig$levelDefinitions), 5) + + # Check that level names and types are correct + expect_equal(sapply(dbConfig$levelDefinitions, + function(x) x$name), c("bundle", + "KAN", + "ORT", + "TRN", + "MAU")) + expect_equal(sapply(dbConfig$levelDefinitions, + function(x) x$type), c("ITEM", + "ITEM", + "ITEM", + "SEGMENT", + "SEGMENT")) + + # Check that each level has the appropriate amount of attribute definitions + expect_equal(sapply(dbConfig$levelDefinitions, + function(x) length(x$attributeDefinitions)), c(9, 1, 1, 1, 1)) + expect_equal(sapply(dbConfig$levelDefinitions, + function(x) x$attributeDefinitions[[1]]$name), + c("bundle", + "KAN", + "ORT", + "TRN", + "MAU")) + + # Check that all header entries have become attributes of the bundle level + expect_equal(sapply(dbConfig$levelDefinitions[[1]]$attributeDefinitions, + function(x) x$name), + c("bundle", + "LHD", + "REP", + "SNB", + "SAM", + "SBF", + "SSB", + "NCH", + "SPN")) + + # No link definitions + expect_equal(length(dbConfig$linkDefinitions), 0) + + # Correctness of one annot file (msajc003_annot) + annotPath = file.path(newDbPath, + "0000_ses", + "msajc003_bndl", + "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn=FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse = ''), + simplifyVector = FALSE) + + # Check that all levels have the appropriate number of items + expect_equal(length(dbAnnot$levels[[1]]$items), 1) + expect_equal(length(dbAnnot$levels[[2]]$items), 7) + expect_equal(length(dbAnnot$levels[[3]]$items), 7) + expect_equal(length(dbAnnot$levels[[4]]$items), 1) + expect_equal(length(dbAnnot$levels[[5]]$items), 35) + + # Check individual items + expect_equal(dbAnnot$levels[[1]]$items[[1]]$id, 1) + expect_equal(dbAnnot$levels[[4]]$items[[1]]$sampleStart, 3800) + expect_equal(dbAnnot$levels[[4]]$items[[1]]$sampleDur, 48199) + + # Check that all header entries have become labels of the bundle item + expect_equal(sapply(dbAnnot$levels[[1]]$items[[1]]$labels, + function(x) x$name), + c("bundle", + "LHD", + "REP", + "SNB", + "SAM", + "SBF", + "SSB", + "NCH", + "SPN")) + expect_equal(sapply(dbAnnot$levels[[1]]$items[[1]]$labels, + function(x) x$value), + c("", + "Partitur 1.2.16", + "unknown", + "2", + "20000", + "01", + "16", + "1", + "unknown")) + + # Check individual label + expect_equal(dbAnnot$levels[[2]]$items[[3]]$labels[[1]]$value, "frendz") + + # Check that there are no links + expect_equal(length(dbAnnot$links), 0) + } +) + +# Cleaning up. +unlink(newDbPath, recursive = TRUE) + + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion with reference level.", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT") + + # Correctness of config file + dbConfigLines = readLines(configPath, warn = FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, collapse=''), + simplifyVector = FALSE) + + # Check that all link definitions are correct + expect_equal(length(dbConfig$linkDefinitions), 5) + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$superlevelName), + c("ORT", + "TRN", + "ORT", + "bundle", + "bundle")) + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$sublevelName), + c("KAN", + "ORT", + "MAU", + "ORT", + "TRN")) + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$type), + c("ONE_TO_ONE", + "ONE_TO_MANY", + "ONE_TO_MANY", + "ONE_TO_MANY", + "ONE_TO_ONE")) + + # Correctness of one annot file (msajc003_bndl) + annotPath = file.path(newDbPath, + "0000_ses", + "msajc003_bndl", + "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn = FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse=''), + simplifyVector = FALSE) + + # Check that bundle item (ID 1) links to TRN item (ID 16) and ORT items (ID 9-15) + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$fromID == 1) x$toID)), + c(9:16)) + + # Check that TRN item (ID 16) links to ORT items (ID 9-15) + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$fromID == 16) x$toID)), + c(9:15)) + + # Check that ORT items (ID 9-15) link to KAN items (ID 2-8) + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$toID %in% c(2:8)) x$fromID)), + c(9:15)) + + # Check that ORT items (ID 9-15) link to MAU items (ID 17 and upwards) + expect_equal(unique(unlist(sapply(dbAnnot$links, + function(x) if(x$toID > 16) x$fromID))), + c(9:15)) + + # Check some individual links + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$fromID == 10) x$toID)), + c(3,25)) + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$fromID == 15) x$toID)), + c(8, c(43:50))) + } +) + +# Cleaning up +unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion with unifyLevels", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + unifyLevels = c("KAN")) + + # Correctness of config file + dbConfigLines = readLines(configPath, warn = FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, collapse=''), + simplifyVector=FALSE) + + # Check that there are only four levels (since KAN has become a label on ORT level) + expect_equal(length(dbConfig$levelDefinitions), 4) + + # Check that ORT level has two labels ORT and KAN + expect_equal(sapply(dbConfig$levelDefinitions[[2]]$attributeDefinitions, + function(x) x$name), + c("ORT", "KAN")) + + # Check that there is no link between ORT and KAN in link definitions + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$superlevelName), + c("TRN", + "ORT", + "bundle", + "bundle")) + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$sublevelName), + c("ORT", + "MAU", + "ORT", + "TRN")) + expect_equal(sapply(dbConfig$linkDefinitions, + function(x) x$type), + c("ONE_TO_MANY", + "ONE_TO_MANY", + "ONE_TO_MANY", + "ONE_TO_ONE")) + + # Correctness of one annot file (msajc003_bndl) + annotPath = file.path(newDbPath, "0000_ses", "msajc003_bndl", "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn = FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse=''), + simplifyVector = FALSE) + + # Check levels + expect_equal(sapply(dbAnnot$levels, + function(x) x$name), + c("bundle", + "ORT", + "TRN", + "MAU")) + expect_equal(dbAnnot$levels[[2]]$name, "ORT") + expect_equal(dbAnnot$levels[[2]]$type, "ITEM") + + # Check that all items on level ORT have two labels, and that their names are ORT and KAN + expect_equal(unique(sapply(dbAnnot$levels[[2]]$items, + function(x) length(x$labels))), + 2) + expect_equal(unique(sapply(dbAnnot$levels[[2]]$items, + function(x) x$labels[[1]]$name)), + "ORT") + expect_equal(unique(sapply(dbAnnot$levels[[2]]$items, + function(x) x$labels[[2]]$name)), + "KAN") + + # Check some individual labels + expect_equal(dbAnnot$levels[[2]]$items[[3]]$labels[[1]]$value, "friends") + expect_equal(dbAnnot$levels[[2]]$items[[3]]$labels[[2]]$value, "frendz") + expect_equal(dbAnnot$levels[[2]]$items[[5]]$labels[[1]]$value, "was") + expect_equal(dbAnnot$levels[[2]]$items[[5]]$labels[[2]]$value, "wQz") + } +) +# Cleaning up +unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion with extractLevels.", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + extractLevels = c("MAU")) + + # Correctness of config file + dbConfigLines = readLines(configPath, warn=FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, collapse=''), simplifyVector=FALSE) + + # Check that level definitions include only extractedLevels and bundle + expect_equal(length(dbConfig$levelDefinitions), 2) + expect_equal(sapply(dbConfig$levelDefinitions, + function(x) x$name), + c("bundle", + "MAU")) + + # Check that there are no links defined (refLevel = NULL) + expect_equal(length(dbConfig$linkDefinitions), 0) + + # Correctness of one annot file (msajc003_bndl) + annotPath = file.path(newDbPath, "0000_ses", "msajc003_bndl", "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn=FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse=''), + simplifyVector = FALSE) + + # Check that there are only two levels. + expect_equal(sapply(dbAnnot$levels, + function(x) x$name), + c("bundle", + "MAU")) + + # Check that there are no links + expect_equal(length(dbAnnot$links), 0) + } +) +# Cleaning up +unlink(newDbPath, recursive = TRUE) + +test_that("Loading emuDB", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + unifyLevels = c("KAN")) + handle = load_emuDB(file.path(testDir, paste0(dbName, emuDB.suffix)), + verbose = FALSE) + DBI::dbDisconnect(handle$connection) + handle = NULL + } +) + +# Cleaning up +unlink(newDbPath, recursive = TRUE) + + +# --------------------------------------------------------------------------- +# Testing with manipulated BPFs +# --------------------------------------------------------------------------- + +sourceDir = file.path(sourceDirMain, "BPF_collection_manipulated") + +# Manipulated BPFs contain: +# msajc003.parmanipulated: +# - multi-label label string on ORT level +# - semicolon in link on KAN tier +# - unknown level "XYZ" (with class 1 syntax) +# - blank linkes +# - missing SAM header tag +# - segmental overlap on "MAU" tier +# msajc010.parmanipulated: +# - empty BPF (no header or body) +# msajc012.parmanipulated: +# - MAU -> ORT (ONE_TO_MANY) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Correct call with necessary arguments", + { + convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + newLevels = c("XYZ"), + newLevelClasses = c(1), + refLevel = "ORT", + segmentToEventLevels = c("MAU"), + bpfExt = "parmanipulated") + + # Correctness of config file + dbConfigLines = readLines(configPath, warn = FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, collapse=''), + simplifyVector = FALSE) + + # Check that there are 6 levels defined (bundle, KAN, ORT, TRN, MAU, XYZ) + expect_equal(length(dbConfig$levelDefinitions), 6) + + # Check that MAU has been turned into an event level + expect_equal(dbConfig$levelDefinitions[[5]]$type, "EVENT") + expect_equal(dbConfig$levelDefinitions[[5]]$name, "MAU") + + # Check that new level XYZ has been added + expect_equal(dbConfig$levelDefinitions[[6]]$name, "XYZ") + expect_equal(dbConfig$levelDefinitions[[6]]$type, "ITEM") + + # Check that ORT has three label names defined (ORT, ABC, XYZ) + expect_equal(sapply(dbConfig$levelDefinitions[[3]]$attributeDefinitions, + function(x) x$name), + c("ORT", + "ABC", + "XYZ")) + + # Check that link from ORT to MAU is MANY_TO_MANY + expect_equal(dbConfig$linkDefinitions[[3]]$superlevelName, "ORT") + expect_equal(dbConfig$linkDefinitions[[3]]$sublevelName, "MAU") + expect_equal(dbConfig$linkDefinitions[[3]]$type, "MANY_TO_MANY") + + # Correctness of annot file msajc003_bndl + annotPath = file.path(newDbPath, "0000_ses", "msajc003_bndl", "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn=FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse=''), + simplifyVector = FALSE) + + # Check that all labels on level 'MAU' have _start/_end suffix + expect_true(all(unlist(sapply(dbAnnot$levels[[5]]$items, + function(x) if(stringr::str_detect(x$labels[[1]]$value, "_start") || stringr::str_detect(x$labels[[1]]$value, "_end")) TRUE)))) + expect_equal(dbAnnot$levels[[5]]$items[[3]]$labels[[1]]$value, "@_start") + expect_equal(dbAnnot$levels[[5]]$items[[4]]$labels[[1]]$value, "@_end") + + # Check that labels on level 'ORT' are correct + expect_equal(sapply(dbAnnot$levels[[3]]$items[[2]]$labels, + function(x) x$name), + c("ORT", + "ABC", + "XYZ")) + expect_equal(sapply(dbAnnot$levels[[3]]$items[[2]]$labels, + function(x) x$value), + c("", + "ABC_label", + "XYZ_label")) + expect_equal(sapply(dbAnnot$levels[[3]]$items[[3]]$labels, + function(x) x$name), + c("ORT", + "ABC", + "XYZ")) + expect_equal(sapply(dbAnnot$levels[[3]]$items[[3]]$labels, + function(x) x$value), + c("friends", + "", + "")) + + # Check that the item on 'KAN' with the semicolon does not have an incoming link + expect_true(all(unlist(sapply(dbAnnot$links, function(x) if(x$toID == 5) FALSE)))) + + # Correctness of annot file msajc010_bndl + annotPath = file.path(newDbPath, "0000_ses", "msajc010_bndl", "msajc010_annot.json") + dbAnnotLines = readLines(annotPath, warn = FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse=''), + simplifyVector = FALSE) + + # Check that there is only one item (the bundle). + expect_equal(sapply(dbAnnot$levels, + function(x) length(x$items) > 0), + c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)) + + # Check that the bundle item has only nine labels (the bundle name + all the empty labels for the others). + expect_equal(length(dbAnnot$levels[[1]]$items[[1]]$labels), 9) + expect_equal(dbAnnot$levels[[1]]$items[[1]]$labels[[1]]$name, "bundle") + expect_equal(dbAnnot$levels[[1]]$items[[1]]$labels[[1]]$value, "") + + # Check that there are no links. + expect_equal(length(dbAnnot$links), 0) + + # Correctness of annot file msajc012_bndl + annotPath = file.path(newDbPath, "0000_ses", "msajc012_bndl", "msajc012_annot.json") + dbAnnotLines = readLines(annotPath, warn = FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, collapse = ''), + simplifyVector = FALSE) + + # Check that links between ORT and MAU have been turned around (should have been MAU->ORT after parsing but ORT->MAU after link disambiguation) + expect_equal(unlist(sapply(dbAnnot$links, + function(x) if(x$toID == 21) x$fromID)), + c(10, 11, 12)) + + # Check that this annot file does not contain any items on the XYZ level + expect_equal(sapply(dbAnnot$levels, + function(x) length(x$items) > 0), + c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE)) + } +) +# Cleaning up +unlink(newDbPath, recursive = TRUE) + +# # --------------------------------------------------------------------------- +# # --------------------------------------------------------------------------- +# test_that("Warnings (semicolon) are displayed if verbose.", +# { +# expect_warning(convert_BPFCollection(sourceDir = sourceDir, targetDir = testDir, dbName = dbName, verbose = TRUE, refLevel = "ORT", +# newLevels = c("XYZ"), newLevelClasses = c(1), segmentToEventLevels = c("MAU"), bpfExt = "parmanipulated"), +# regexp = "between.*';'", ignore.case = TRUE) +# } +# ) +# # Cleaning up +# unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion without overlap resolution on BPF with overlap causes error.", + { + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + newLevels = c("XYZ"), + newLevelClasses = c(1), + bpfExt = "parmanipulated"), + regexp = "overlap", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion with unknown level name in a BPF causes error.", + { + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + bpfExt = "parmanipulated"), + regexp = "unknown level", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("Conversion with a mismatch between level class and BPF line causes error.", + { + expect_error(convert_BPFCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE, + refLevel = "ORT", + newLevels = c("XYZ"), + newLevelClasses = c(5), + segmentToEventLevels = c("MAU"), + bpfExt = "parmanipulated"), + regexp = "class", + ignore.case = TRUE) + } +) +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +# Final clean-up (just in case) +unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- + diff --git a/tests/testthat/test_emuR-convert_TextGridCollection.R b/tests/testthat/test_emuR-convert_TextGridCollection.R new file mode 100644 index 00000000..55ab4e99 --- /dev/null +++ b/tests/testthat/test_emuR-convert_TextGridCollection.R @@ -0,0 +1,213 @@ +##' testthat tests for convert_TextGridCollection +##' + +context("testing convert_TextGridCollection function") + +path2demoData = file.path(tempdir(), "emuR_demoData") +path2testData = file.path(tempdir(), "emuR_testthat") +path2tgCol = file.path(path2demoData, "TextGrid_collection") + +emuDBname = 'convert-TextGridCollection-testDB' + +path2newDb = file.path(path2testData, + paste0(emuDBname, emuDB.suffix)) + + +# clean up +unlink(path2newDb, recursive = TRUE) + +############################## +test_that("bad calls cause errors", { + + # create dir + dir.create(path2newDb) + + # existing targetDir causes errors + expect_error(convert_TextGridCollection(dir = path2tgCol, + dbName = emuDBname, + targetDir = path2testData, + verbose=FALSE), + regexp = "already exists!", + ignore.case = TRUE) + # clean up + unlink(path2newDb, recursive = TRUE) + +}) + +############################## +test_that("correct emuDB is created", { + + convert_TextGridCollection(dir = path2tgCol, + dbName = emuDBname, + path2testData, + verbose = FALSE) + + test_that("emuDB has correct file format on disc", { + # 2 files in top level + tmp = list.files(path2newDb) + expect_equal(length(tmp), 2) + + # 14 files in 0000_ses + tmp = list.files(file.path(path2newDb,'0000_ses'), + recursive = TRUE) + expect_equal(length(tmp), 14) + }) + + test_that("emuDB _DBconfig.json is correct", { + # read config + dbCfgJSONLns = readLines(file.path(path2newDb, + paste0(emuDBname, + '_DBconfig.json')), + warn = FALSE) + dbCfgJSON = paste(dbCfgJSONLns, + collapse = '') + dbCfgPersisted = jsonlite::fromJSON(dbCfgJSON, + simplifyVector = FALSE) + + # correct name + expect_equal(dbCfgPersisted$name, emuDBname) + # no ssffTrackDefs + expect_equal(length(dbCfgPersisted$ssffTrackDefinitions), 0) + # no linkDefs + expect_equal(length(dbCfgPersisted$linkDefinitions), 0) + # levelDef stuff + expect_equal(length(dbCfgPersisted$levelDefinitions), 11) + expect_equal(dbCfgPersisted$levelDefinitions[[9]]$name, 'Phonetic') + + # EMUwebAppConfig stuff + expect_equal(length(dbCfgPersisted$EMUwebAppConfig$perspectives), 1) + expect_equal(dbCfgPersisted$EMUwebAppConfig$perspectives[[1]]$signalCanvases$order[[1]], 'OSCI') + expect_equal(length(dbCfgPersisted$EMUwebAppConfig$perspectives[[1]]$levelCanvases$order), 11) + + }) + + test_that("emuDB _annot.json is correct", { + # read annot + annotJSONLns = readLines(file.path(path2newDb, + '0000_ses/msajc003_bndl/msajc003_annot.json'), + warn = FALSE) + annotJSON = paste(annotJSONLns,collapse='') + annotPersisted = jsonlite::fromJSON(annotJSON,simplifyVector=FALSE) + # general stuff + expect_equal(annotPersisted$name, 'msajc003') + expect_equal(annotPersisted$annotates, 'msajc003.wav') + expect_equal(length(annotPersisted$links), 0) + expect_equal(length(annotPersisted$levels), 11) + expect_equal(annotPersisted$levels[[9]]$name, 'Phonetic') + + # test a couple of items + + # second segment + expect_that(annotPersisted$levels[[9]]$items[[2]]$sampleStart, equals(3749)) + expect_that(annotPersisted$levels[[9]]$items[[2]]$sampleDur, equals(1389)) + expect_that(annotPersisted$levels[[9]]$items[[2]]$labels[[1]]$value, equals('V')) + + # 18th segment + expect_that(annotPersisted$levels[[9]]$items[[18]]$sampleStart, equals(30124)) + expect_that(annotPersisted$levels[[9]]$items[[18]]$sampleDur, equals(844)) + expect_that(annotPersisted$levels[[9]]$items[[18]]$labels[[1]]$value, equals('@')) + + # 35th segment + # item[33] = {id: XYZ, labels: [{name: ‘lab', value: ‘l'}], sampleStart: 50126, sampleDur: 1962} + expect_that(annotPersisted$levels[[9]]$items[[35]]$sampleStart, equals(50126)) + expect_that(annotPersisted$levels[[9]]$items[[35]]$sampleDur, equals(1962)) + expect_that(annotPersisted$levels[[9]]$items[[35]]$labels[[1]]$value, equals('l')) + + }) + + # clean up + unlink(path2newDb, recursive = TRUE) + +}) + +############################## +test_that("only specified tiers are converted when tierNames is set", { + + convert_TextGridCollection(dir = path2tgCol, + dbName = emuDBname, + path2testData, + tierNames = c("Phonetic", "Tone"), + verbose = FALSE) + + test_that("emuDB has correct file format on disc", { + # 2 files in top level + tmp = list.files(path2newDb) + expect_equal(length(tmp), 2) + + # 14 files in 0000_ses + tmp = list.files(file.path(path2newDb,'0000_ses'), + recursive = TRUE) + expect_equal(length(tmp), 14) + }) + + test_that("emuDB _DBconfig.json is correct", { + # read config + dbCfgJSONLns=readLines(file.path(path2newDb, + paste0(emuDBname, + '_DBconfig.json')), + warn = FALSE) + dbCfgJSON = paste(dbCfgJSONLns, collapse = '') + dbCfgPersisted = jsonlite::fromJSON(dbCfgJSON, + simplifyVector = FALSE) + + # correct name + expect_equal(dbCfgPersisted$name, emuDBname) + # no ssffTrackDefs + expect_equal(length(dbCfgPersisted$ssffTrackDefinitions), 0) + # no linkDefs + expect_equal(length(dbCfgPersisted$linkDefinitions), 0) + # levelDef stuff + expect_equal(length(dbCfgPersisted$levelDefinitions), 2) + expect_equal(dbCfgPersisted$levelDefinitions[[1]]$name, 'Phonetic') + expect_equal(dbCfgPersisted$levelDefinitions[[1]]$type, 'SEGMENT') + expect_equal(dbCfgPersisted$levelDefinitions[[2]]$name, 'Tone') + expect_equal(dbCfgPersisted$levelDefinitions[[2]]$type, 'EVENT') + + # EMUwebAppConfig stuff + expect_equal(length(dbCfgPersisted$EMUwebAppConfig$perspectives), 1) + expect_equal(dbCfgPersisted$EMUwebAppConfig$perspectives[[1]]$signalCanvases$order[[1]], 'OSCI') + expect_equal(length(dbCfgPersisted$EMUwebAppConfig$perspectives[[1]]$levelCanvases$order), 2) + + }) + + test_that("emuDB _annot.json is correct", { + # read annot + annotJSONLns = readLines(file.path(path2newDb, + '0000_ses/msajc003_bndl/msajc003_annot.json'), + warn = FALSE) + annotJSON = paste(annotJSONLns, collapse = '') + annotPersisted = jsonlite::fromJSON(annotJSON, simplifyVector = FALSE) + # general stuff + expect_equal(annotPersisted$name, 'msajc003') + expect_equal(annotPersisted$annotates, 'msajc003.wav') + expect_equal(length(annotPersisted$links), 0) + expect_equal(length(annotPersisted$levels), 2) + expect_equal(annotPersisted$levels[[1]]$name, 'Phonetic') + expect_equal(annotPersisted$levels[[1]]$type, 'SEGMENT') + expect_equal(annotPersisted$levels[[2]]$name, 'Tone') + expect_equal(annotPersisted$levels[[2]]$type, 'EVENT') + # test a couple of items + + # second segment + expect_that(annotPersisted$levels[[1]]$items[[2]]$sampleStart, equals(3749)) + expect_that(annotPersisted$levels[[1]]$items[[2]]$sampleDur, equals(1389)) + expect_that(annotPersisted$levels[[1]]$items[[2]]$labels[[1]]$value, equals('V')) + + # 18th segment + expect_that(annotPersisted$levels[[1]]$items[[18]]$sampleStart, equals(30124)) + expect_that(annotPersisted$levels[[1]]$items[[18]]$sampleDur, equals(844)) + expect_that(annotPersisted$levels[[1]]$items[[18]]$labels[[1]]$value, equals('@')) + + # 35th segment + # item[33] = {id: XYZ, labels: [{name: ‘lab', value: ‘l'}], sampleStart: 50126, sampleDur: 1962} + expect_that(annotPersisted$levels[[1]]$items[[35]]$sampleStart, equals(50126)) + expect_that(annotPersisted$levels[[1]]$items[[35]]$sampleDur, equals(1962)) + expect_that(annotPersisted$levels[[1]]$items[[35]]$labels[[1]]$value, equals('l')) + + }) + + + # clean up + unlink(path2newDb, recursive = TRUE) + +}) diff --git a/tests/testthat/test_emuR-convert_txtCollection.R b/tests/testthat/test_emuR-convert_txtCollection.R new file mode 100644 index 00000000..050c1a02 --- /dev/null +++ b/tests/testthat/test_emuR-convert_txtCollection.R @@ -0,0 +1,101 @@ + +# --------------------------------------------------------------------------- +context("testing convert_txtCollection") +# --------------------------------------------------------------------------- + +sourceDirMain = file.path(tempdir(), "emuR_demoData") +testDir = file.path(tempdir(), "emuR_testthat") +dbName = "txt_converter_test" + +# Cleaning up (just in case) +unlink(file.path(testDir, dbName), recursive = TRUE) + +# --------------------------------------------------------------------------- +# Testing with original BPFs +# --------------------------------------------------------------------------- + +sourceDir = file.path(sourceDirMain, "txt_collection") +newDbFolderName = paste0(dbName, emuDB.suffix) +newDbPath = file.path(testDir, newDbFolderName) +configPath = file.path(newDbPath, paste0(dbName, '_DBconfig.json')) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("testing txt collection conversion", + { + convert_txtCollection(sourceDir = sourceDir, + targetDir = testDir, + dbName = dbName, + verbose = FALSE) + + # Format of data base. + expect_true(newDbFolderName %in% list.dirs(testDir, + full.names = FALSE, + recursive = FALSE)) + expect_equal(length(list.files(newDbPath, + recursive = FALSE)), 2) + expect_equal(length(list.files(file.path(newDbPath, + "0000_ses"), + recursive = FALSE)), 7) + expect_equal(length(list.files(file.path(newDbPath, + "0000_ses", + "msajc003_bndl"), + recursive = FALSE)), 2) + + # Correctness of config file. + dbConfigLines = readLines(configPath, warn = FALSE) + dbConfig = jsonlite::fromJSON(paste(dbConfigLines, + collapse = ''), simplifyVector = FALSE) + + # General & webAppConfig + expect_equal(dbConfig$name, dbName) + expect_equal(length(dbConfig$ssffTrackDefinitions), 0) + expect_true(dbConfig$EMUwebAppConfig$activeButtons$saveBundle) + expect_true(dbConfig$EMUwebAppConfig$activeButtons$showHierarchy) + + # Check that there are no level canvas orders (bundle is item!) + expect_equal(length(dbConfig$EMUwebAppConfig$perspectives[[1]]$levelCanvases$order), 0) + + # Check that there is one level definition (bundle) + expect_equal(length(dbConfig$levelDefinitions), 1) + + # Check that level names and types are correct + expect_equal(sapply(dbConfig$levelDefinitions, function(x) x$name), "bundle") + expect_equal(sapply(dbConfig$levelDefinitions, function(x) x$type), "ITEM") + + # Check that each level has the appropriate amount of attribute definitions + expect_equal(sapply(dbConfig$levelDefinitions, function(x) length(x$attributeDefinitions)), 2) + expect_equal(sapply(dbConfig$levelDefinitions, function(x) x$attributeDefinitions[[1]]$name), "bundle") + expect_equal(sapply(dbConfig$levelDefinitions, function(x) x$attributeDefinitions[[2]]$name), "transcription") + + # No link definitions + expect_equal(length(dbConfig$linkDefinitions), 0) + + # Correctness of one annot file (msajc003_annot) + annotPath = file.path(newDbPath, + "0000_ses", + "msajc003_bndl", + "msajc003_annot.json") + dbAnnotLines = readLines(annotPath, warn = FALSE) + dbAnnot = jsonlite::fromJSON(paste(dbAnnotLines, + collapse = ''), + simplifyVector = FALSE) + + # Check that all levels have the appropriate number of items + expect_equal(length(dbAnnot$levels[[1]]$items), 1) + + # Check individual items + expect_equal(dbAnnot$levels[[1]]$items[[1]]$id, 1) + expect_equal(dbAnnot$levels[[1]]$items[[1]]$labels[[2]]$value, "amongst her friends she was considered beautiful") + + # Check that there are no links + expect_equal(length(dbAnnot$links), 0) + } + ) + +# Cleaning up. +unlink(newDbPath, recursive = TRUE) + +# --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- + diff --git a/tests/testthat/test_emuR-create_DBconfigFromTextGrid.R b/tests/testthat/test_emuR-create_DBconfigFromTextGrid.R new file mode 100644 index 00000000..4f2ab5d6 --- /dev/null +++ b/tests/testthat/test_emuR-create_DBconfigFromTextGrid.R @@ -0,0 +1,47 @@ +##' testthat tests for convert_TextGridCollection_to_emuDB +##' + +context("testing create_DBconfigFromTextGrid function") + +path2demoData = file.path(tempdir(), "emuR_demoData") +path2testData = file.path(tempdir(), "emuR_testthat") +path2tg = file.path(path2demoData, "TextGrid_collection/msajc003.TextGrid") + +dbName = 'test12' + +# tmp project base path +basePath=file.path(tempdir(), dbName) + +############################## +test_that("test that correct values are set for msajc003", { + conf = create_DBconfigFromTextGrid(path2tg, dbName,basePath) + expect_equal(length(conf$linkDefinitions), 0) + expect_equal(length(conf$ssffTrackDefinitions), 0) + expect_equal(length(conf$levelDefinitions), 11) + expect_equal(conf$mediafileExtension, 'wav') + + expect_equal(conf$levelDefinitions[[1]]$name, 'Utterance') + expect_equal(conf$levelDefinitions[[1]]$type, 'SEGMENT') + expect_equal(conf$levelDefinitions[[1]]$attributeDefinitions[[1]]$name, 'Utterance') + expect_equal(conf$levelDefinitions[[1]]$attributeDefinitions[[1]]$type, 'STRING') + +}) + +############################## +test_that("test only correct tiers are extracted if tierNames is set", { + conf = create_DBconfigFromTextGrid(path2tg, + dbName, + basePath, + c("Phonetic", "Tone")) + + expect_equal(conf$levelDefinitions[[1]]$name, 'Phonetic') + expect_equal(conf$levelDefinitions[[1]]$type, 'SEGMENT') + expect_equal(conf$levelDefinitions[[1]]$attributeDefinitions[[1]]$name, 'Phonetic') + expect_equal(conf$levelDefinitions[[1]]$attributeDefinitions[[1]]$type, 'STRING') + + expect_equal(conf$levelDefinitions[[2]]$name, 'Tone') + expect_equal(conf$levelDefinitions[[2]]$type, 'EVENT') + expect_equal(conf$levelDefinitions[[2]]$attributeDefinitions[[1]]$name, 'Tone') + expect_equal(conf$levelDefinitions[[2]]$attributeDefinitions[[1]]$type, 'STRING') + +}) diff --git a/tests/testthat/test_emuR-create_filePairList.R b/tests/testthat/test_emuR-create_filePairList.R new file mode 100644 index 00000000..0bcc0aa5 --- /dev/null +++ b/tests/testthat/test_emuR-create_filePairList.R @@ -0,0 +1,118 @@ +##' testthat tests for create_filePairList +##' +context("testing create_filePairList function") + + +path2demoData = file.path(tempdir(), "emuR_demoData") +path2testData = file.path(tempdir(), "emuR_testthat") +path2tgCol = file.path(path2demoData, "TextGrid_collection") + + +ext1 = 'wav' +ext2 = 'TextGrid' + +wavPaths = list.files(path2tgCol, + pattern = paste(ext1, "$", sep = ""), + recursive = TRUE, full.names = TRUE) +tgPaths = list.files(path2tgCol, + pattern = paste(ext2, "$", sep = ""), + recursive = TRUE, full.names = TRUE) + +testDirName = 'test_createFilePairList' + +path2testDir = file.path(path2testData, + testDirName) + +############################## +test_that("bad calls cause errors", { + + expect_error(create_filePairList('asdf', '', '', ''), + 'ext1Path2rootDir does not exist: .*asdf') + expect_error(create_filePairList(path2tgCol, 'asdf', '', ''), + 'ext2Path2rootDir does not exist: .*asdf') + +}) + +############################## +test_that("error is generated when nr of ext1 files > ext2 files", { + # create testdir + dir.create(path2testDir) + + # copy files + file.copy(wavPaths, path2testDir) + file.copy(tgPaths[-length(tgPaths)], path2testDir) + + expect_error(create_filePairList(path2testDir, + path2testDir, + 'wav', + 'TextGrid')) + + # clean up + unlink(path2testDir, recursive = TRUE) + +}) + + +############################## +test_that("correct filePairList is generated when nr of ext1 files < ext2 files", { + # create testdir + dir.create(path2testDir) + + # copy files + file.copy(wavPaths[-length(wavPaths)], path2testDir) + file.copy(tgPaths, path2testDir) + + fpl = create_filePairList(path2testDir, + path2testDir, + 'wav', + 'TextGrid') + + expect_equal(dim(fpl)[1], 6) + expect_equal(dim(fpl)[2], 2) + + # clean up + unlink(path2testDir, recursive = TRUE) + +}) + +############################## +test_that("error is thrown if dirs are empty", { + + # create testdir + dir.create(path2testDir) + + expect_error(create_filePairList(path2testDir, + path2testDir, + 'wav', + 'TextGrid')) + + # clean up + unlink(path2testDir, recursive = TRUE) + +}) + +############################## +test_that("error is thrown if one ext2 does not have same base name", { + + # create testdir + dir.create(path2testDir) + + # copy files + file.copy(wavPaths, path2testDir) + file.copy(tgPaths, path2testDir) + + #rename file + file.rename(file.path(path2testDir, + basename(tgPaths[3])), + file.path(path2testDir, 'asdf.TextGrid')) + + expect_error(create_filePairList(path2testDir, + path2testDir, + 'wav', + 'TextGrid')) + + # clean up + unlink(path2testDir, recursive = TRUE) + +}) + diff --git a/tests/testthat/test_emuR-database.DBconfig.EMUwebAppConfig.R b/tests/testthat/test_emuR-database.DBconfig.EMUwebAppConfig.R new file mode 100644 index 00000000..3bad3439 --- /dev/null +++ b/tests/testthat/test_emuR-database.DBconfig.EMUwebAppConfig.R @@ -0,0 +1,162 @@ +##' testthat tests for database.DBconfig.EMUwebAppConfig +##' +context("testing database.DBconfig.EMUwebAppConfig functions") + +dbName = 'ae' + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + + +############################## +test_that("CRUD operations work for perspectives", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + test_that("add = (C)RUD", { + # bad call persp. already exists + expect_error(add_perspective(ae, + name = 'default')) + add_perspective(ae, + name = 'newPersp') + + }) + + test_that("list = C(R)UD", { + df = list_perspectives(ae) + + expect_true(df$name[1] == "default") + expect_true(df$signalCanvasesOrder[1] == "OSCI; SPEC") + expect_true(df$levelCanvasesOrder[1] == "Phonetic; Tone") + + expect_true(df$name[4] == "newPersp") + expect_true(df$signalCanvasesOrder[4] == "OSCI; SPEC") + expect_true(df$levelCanvasesOrder[4] == "") + }) + + test_that("modify = CR(U)D", { + # currently not implemented + }) + + test_that("remove = CRU(D)", { + + remove_perspective(ae, + name = 'newPersp') + + df = list_perspectives(ae) + expect_equal(nrow(df), 3) + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + +############################## +test_that("CRUD operations work for signalCanvasesOrder", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + + test_that("set = (C)RUD", { + expect_error(set_signalCanvasesOrder(ae, + perspectiveName = "default", + order = c("OSCI", "badTrackName"))) + + set_signalCanvasesOrder(ae, + perspectiveName = "default", + order = c("OSCI", "SPEC", "fm")) + + }) + + test_that("get = C(R)UD", { + order = get_signalCanvasesOrder(ae, perspectiveName = "default") + + expect_equal(order[1], "OSCI") + expect_equal(order[2], "SPEC") + expect_equal(order[3], "fm") + }) + + test_that("modify = CR(U)D", { + # currently not implemented + }) + + test_that("remove = CRU(D)", { + # currently not implemented + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + +############################## +test_that("CRUD operations work for levelCanvasesOrder", { + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + + test_that("set = (C)RUD", { + # bad level name + expect_error(set_levelCanvasesOrder(ae, + perspectiveName = "default", + order = c("Phonetic", "badLevelName"))) + + # bad level type + expect_error(set_levelCanvasesOrder(ae, + perspectiveName = "default", + order = c("Phonetic", "Tone", "Word"))) + + set_levelCanvasesOrder(ae, + perspectiveName = "default", + order = c("Tone", "Phonetic")) + + }) + + test_that("get = C(R)UD", { + order = get_levelCanvasesOrder(ae, + perspectiveName = "default") + + expect_equal(order[1], "Tone") + expect_equal(order[2], "Phonetic") + }) + + test_that("modify = CR(U)D", { + # currently not implemented + }) + + test_that("remove = CRU(D)", { + # currently not implemented + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + + + diff --git a/tests/testthat/test_emuR-database.DBconfig.R b/tests/testthat/test_emuR-database.DBconfig.R new file mode 100644 index 00000000..37f9976e --- /dev/null +++ b/tests/testthat/test_emuR-database.DBconfig.R @@ -0,0 +1,551 @@ +##' testthat tests for database.DBconfig +##' +context("testing database.DBconfig functions") + +dbName = 'ae' +useInMemoryCache = FALSE + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + + +############################## +test_that("get_levelDefinition returns correct levelDef", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + ######################### + # get dbObj + dbConfig = load_DBconfig(ae) + + ld = get_levelDefinition(ae, 'Phonetic') + expect_equal(ld$name, 'Phonetic') + expect_equal(ld$type, 'SEGMENT') + expect_equal(ld$attributeDefinitions[[1]]$name, 'Phonetic') + expect_equal(ld$attributeDefinitions[[1]]$type, 'STRING') + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) +}) + +############################## +test_that("CRUD operations work for ssffTrackDefinitions", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + test_that("add = (C)RUD", { + expect_error(add_ssffTrackDefinition(ae, 'fm')) + expect_error(add_ssffTrackDefinition(ae, 'fm', 'bla')) + expect_error(add_ssffTrackDefinition(ae, + 'newTrackName', + 'badColName', + 'pit', + onTheFlyFunctionName = 'mhsF0', + interactive = TRUE, + verbose = FALSE)) + + add_ssffTrackDefinition(ae, + 'newTrackName', + 'pitch', + 'pit', + onTheFlyFunctionName = 'mhsF0', + interactive = FALSE, + verbose = FALSE) + + pitFilePaths = list.files(path2db, + pattern = 'pit$', + recursive = TRUE) + expect_equal(length(pitFilePaths), 7) + + }) + + test_that("list = C(R)UD", { + df = list_ssffTrackDefinitions(ae) + expect_equal(df$name, c('dft','fm', 'newTrackName')) + expect_equal(df$columnName, c('dft','fm', 'pitch')) + expect_equal(df$fileExtension, c('dft','fms', 'pit')) + }) + + + test_that("remove = CRU(D)", { + # bad name + expect_error(remove_ssffTrackDefinition(ae, name="asdf")) + remove_ssffTrackDefinition(ae, + name = "newTrackName", + deleteFiles = TRUE) + # check that _DBconfig entry is deleted + dbConfig = load_DBconfig(ae) + expect_equal(dbConfig$ssffTrackDefinitions[[1]]$name, "dft") + expect_equal(dbConfig$ssffTrackDefinitions[[2]]$name, "fm") + + # check that files have been deleted + filePaths = list_files(ae, "pit") + expect_equal(nrow(filePaths), 0) + + }) + + test_that("remove = CRU(D) with force", { + remove_linkDefinition(ae, + "Intermediate", + "Word", + force = TRUE, + verbose = FALSE) + remove_linkDefinition(ae, + "Word", + "Syllable", + force = TRUE, + verbose = FALSE) + remove_levelDefinition(ae, + "Word", + force = TRUE, + verbose = FALSE) + + # check items table + df = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE level = 'Word'")) + expect_equal(nrow(df), 0) + + # TODO: probably should also check labels table + + # check annot json + ajson = jsonlite::fromJSON(file.path(path2db, + "0000_ses", + "msajc003_bndl", + "msajc003_annot.json"), + simplifyVector = FALSE) + expect_false(ajson$levels[[4]]$name == "Word") + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + +############################## +test_that("CRUD operations work for levelDefinitions", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + + test_that("add = (C)RUD", { + expect_error(add_levelDefinition(ae, 'Phonetic', 'SEGM')) # bad type + expect_error(add_levelDefinition(ae, 'Phonetic', 'SEGMENT')) # already exists + + add_levelDefinition(ae, 'Phonetic2', 'SEGMENT', verbose = FALSE) + + dbConfig = load_DBconfig(ae) + expect_equal(length(dbConfig$levelDefinitions), 10) + + }) + + test_that("list = C(R)UD", { + df = list_levelDefinitions(ae) + expect_equal(as.vector(df$name[8:10]), + c('Tone', + 'Foot', + 'Phonetic2')) + expect_equal(as.vector(df$type[8:10]), + c('EVENT', + 'ITEM', + 'SEGMENT')) + expect_equal(as.vector(df$nrOfAttrDefs[1:4]), + c(1, 1, 1, 3)) + }) + + test_that("remove = CRU(D)", { + + expect_error(remove_levelDefinition(ae, name="asdf")) # bad name + expect_error(remove_levelDefinition(ae, name="Phonetic")) # linkDef present + + DBI::dbExecute(ae$connection, paste0("INSERT INTO session ", + "VALUES (", + " '", ae$UUID, "', ", + "'0001')")) # add item + + DBI::dbExecute(ae$connection, paste0("INSERT INTO bundle ", + "VALUES (", + " '", ae$UUID, "', ", + " '0001', ", + " 'fakeBundle', ", + " 'fakeBundle.wav', ", + " 20000, ", + " '785c7cdb6d4bd5e8b5cd7c56a5946ddf' ", + ")")) # add item + + DBI::dbExecute(ae$connection, paste0("INSERT INTO items ", + "VALUES (", + " '", ae$UUID, "', ", + " '0001', ", + " 'fakeBundle', ", + " 1, ", + " 'Phonetic2', ", + " 'ITEM', ", + " 20000, ", + " 1, ", + " NULL, ", + " NULL, ", + " NULL", + ")")) # add item + + expect_error(remove_levelDefinition(ae, name = "Phonetic2")) # item present + + DBI::dbExecute(ae$connection, paste0("DELETE FROM items ", + "WHERE db_uuid = '", ae$UUID, "'")) # items present + + remove_levelDefinition(ae, name = "Phonetic2", verbose = FALSE) + dbConfig = load_DBconfig(ae) + expect_equal(length(dbConfig$levelDefinition), 9) + + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + +############################## +test_that("CRUD operations work for attributeDefinitions", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + + test_that("add = (C)RUD", { + expect_error(add_attributeDefinition(ae, + 'Word', + 'Word', + verbose = FALSE)) # present attrDef + + add_attributeDefinition(ae, + 'Word', + 'testAttrDef', + verbose = FALSE) + df = list_attributeDefinitions(ae, 'Word') + expect_true('testAttrDef' %in% df$name) + }) + + test_that("list = C(R)UD", { + df = list_attributeDefinitions(ae, 'Word') + expect_equal(df$name, c('Word', 'Accent', 'Text', 'testAttrDef')) + expect_equal(df$type, c('STRING', 'STRING', 'STRING', 'STRING')) + expect_equal(df$hasLabelGroups, c(FALSE, FALSE, FALSE, FALSE)) + expect_equal(df$hasLegalLabels, c(FALSE, FALSE, FALSE, FALSE)) + }) + + + test_that("remove = CRU(D)", { + expect_error(remove_attributeDefinition(ae, + 'Word', + 'Word', + verbose = FALSE)) + expect_error(remove_attributeDefinition(ae, + 'Word', + 'Accent', + verbose = FALSE)) + remove_attributeDefinition(ae, 'Word', + 'testAttrDef', + force = TRUE, + verbose = FALSE) + df = list_attributeDefinitions(ae, 'Word') + expect_equal(nrow(df), 3) + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + +############################## +test_that("CRUD operations work for legalLabels", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + test_that("set = (C)RUD", { + # non character vector causes error: + expect_error(set_legalLabels(ae, + levelName = 'Word', + attributeDefinitionName = 'Word', + legalLabels=c(1:3))) + + + set_legalLabels(ae, + levelName = 'Word', + attributeDefinitionName = 'Word', + legalLabels=c('A', 'B', 'C')) + }) + + test_that("get = C(R)UD", { + ll = get_legalLabels(ae, + levelName = 'Word', + attributeDefinitionName = 'Word') + + expect_equal(ll, c('A', 'B', 'C')) + }) + + + test_that("remove = CRU(D)", { + remove_legalLabels(ae, + levelName = 'Word', + attributeDefinitionName = 'Word') + + ll = get_legalLabels(ae, + levelName = 'Word', + attributeDefinitionName = 'Word') + + expect_true(is.na(ll)) + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + +############################## +test_that("CRUD operations work for labelGroups", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + test_that("add = (C)RUD", { + # bad call already def. labelGroup + expect_error(add_attrDefLabelGroup(ae, + levelName = 'Phoneme', + attributeDefinitionName = 'Phoneme', + labelGroupName = 'vowel', + labelGroupValues = c('sdf', 'f'))) + + add_attrDefLabelGroup(ae, + levelName = 'Word', + attributeDefinitionName = 'Word', + labelGroupName = 'newGroup', + labelGroupValues = c('sdf', 'f')) + + }) + + test_that("list = C(R)UD", { + df = list_attrDefLabelGroups(ae, + levelName = 'Utterance', + attributeDefinitionName = 'Utterance') + expect_equal(nrow(df), 0) + + df = list_attrDefLabelGroups(ae, + levelName = 'Phoneme', + attributeDefinitionName = 'Phoneme') + expect_equal(nrow(df), 6) + expect_true(df[6,]$values == "H") + + df = list_attrDefLabelGroups(ae, + levelName = 'Word', + attributeDefinitionName = 'Word') + expect_true(df[1,]$name == "newGroup") + expect_true(df[1,]$values == "sdf; f") + }) + + test_that("remove = CRU(D)", { + expect_error(remove_attrDefLabelGroup(ae, + levelName = 'Word', + attributeDefinitionName = 'Word', + labelGroupName = 'notThere')) + + remove_attrDefLabelGroup(ae, + levelName = 'Word', + attributeDefinitionName = 'Word', + labelGroupName = 'newGroup') + + df = list_attrDefLabelGroups(ae, + levelName = 'Word', + attributeDefinitionName = 'Word') + expect_equal(nrow(df), 0) + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + +############################## +test_that("CRUD operations work for linkDefinitions", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + test_that("add = (C)RUD", { + # bad call (bad type) + expect_error(add_linkDefinition(ae, "ONE_TO_TWO")) + # bad call (link exists) + expect_error(add_linkDefinition(ae, "ONE_TO_ONE", + superlevelName ="Syllable", + sublevelName = "Tone")) + # bad call undefined superlevelName + expect_error(add_linkDefinition(ae, "ONE_TO_MANY", + superlevelName ="undefinedLevel", + sublevelName = "Tone")) + + + add_linkDefinition(ae, "ONE_TO_MANY", + superlevelName ="Phoneme", + sublevelName = "Tone") + + }) + + test_that("list = C(R)UD", { + df = list_linkDefinitions(ae) + expect_equal(ncol(df), 3) + expect_equal(nrow(df), 10) + expect_true(df$type[10] == "ONE_TO_MANY") + expect_true(df$superlevelName[10] == "Phoneme") + expect_true(df$sublevelName[10] == "Tone") + }) + + test_that("remove = CRU(D)", { + # bad call -> bad superlevelName + expect_error(remove_linkDefinition(ae, + superlevelName ="badName", + sublevelName = "Tone")) + # bad call -> bad sublevelName + expect_error(remove_linkDefinition(ae, + superlevelName ="Word", + sublevelName = "badName")) + # bad call -> links present + expect_error(remove_linkDefinition(ae, + superlevelName ="Syllable", + sublevelName = "Tone")) + + remove_linkDefinition(ae, + superlevelName ="Phoneme", + sublevelName = "Tone") + + df = list_linkDefinitions(ae) + expect_equal(ncol(df), 3) + expect_equal(nrow(df), 9) + + }) + + test_that("remove = CRU(D) with force", { + remove_linkDefinition(ae, + "Word", + "Syllable", + force = TRUE, + verbose = FALSE) + remove_linkDefinition(ae, + "Syllable", + "Phoneme", + force = TRUE, + verbose = FALSE) + ldefs = list_linkDefinitions(ae) + expect_false("Word" %in% ldefs$superlevelName) + expect_false("Phoneme" %in% ldefs$sublevelName) + # TODO: should probably check annot json files & cache as well.. + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) +}) + + +############################## +test_that("CRUD operations work for labelGroups", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = useInMemoryCache, + verbose = FALSE) + + + test_that("add = (C)RUD", { + add_labelGroup(ae, + name = 'testLG', + values = c('a', 'b', 'c')) + }) + + test_that("list = C(R)UD", { + df = list_labelGroups(ae) + expect_true(df$name == 'testLG') + expect_true(df$values =='a; b; c') + }) + + test_that("remove = CRU(D)", { + # bad call -> bad name + expect_error(remove_labelGroup(ae, + name = 'badName')) + + remove_labelGroup(ae, + name = 'testLG') + df = list_labelGroups(ae) + expect_equal(nrow(df), 0) + }) + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) +}) diff --git a/tests/testthat/test_emuR-database.R b/tests/testthat/test_emuR-database.R new file mode 100644 index 00000000..993dfced --- /dev/null +++ b/tests/testthat/test_emuR-database.R @@ -0,0 +1,751 @@ +context("testing database functions") + +aeSampleRate = 20000 + +dbName = "ae" +path2demoData = file.path(tempdir(), + "emuR_demoData") +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +test_that("database functions work", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromLegacy"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # convert and load legacy database + convert_legacyEmuDB(emuTplPath = file.path(path2demoData, "legacy_ae", "ae.tpl"), + targetDir = file.path(path2testData, "fromLegacy"), + dbUUID = ae$UUID, + verbose = FALSE) + aeFromLegacy = load_emuDB(file.path(path2testData, "fromLegacy", + paste0(dbName, emuDB.suffix)), + verbose = FALSE) + + test_that("function get_legacyFilePath()",{ + primaryTrackFilePath = get_legacyFilePath("/path/to/db", + 'BLOCK*/SES*', + c('BLOCK30', 'SES3042', '0001abc'), + 'wav') + expect_equal(primaryTrackFilePath, + "/path/to/db/BLOCK30/SES3042/0001abc.wav") + + signalTrackFilePath = get_legacyFilePath("/path/to/db", + 'F0', + c('BLOCK30', 'SES3042', '0001abc'), + 'f0') + expect_equal(signalTrackFilePath, + "/path/to/db/F0/0001abc.f0") + }) + + + test_that("Converted emuDB is equal to original",{ + expect_equal(ae$dbName, aeFromLegacy$dbName) + expect_equal(ae$UUI, aeFromLegacy$UUID) + + origItems = DBI::dbReadTable(ae$connection, "items") + convItems = DBI::dbReadTable(aeFromLegacy$connection, "items") + expect_equal(origItems, convItems) + + origLabels = DBI::dbReadTable(ae$connection, "labels") + convLabels = DBI::dbReadTable(aeFromLegacy$connection, "labels") + expect_equal(origLabels, convLabels) + + }) + + test_that("properties of ae are correct",{ + bp = file.path(path2testData, 'ae_emuDB') + nbp = normalizePath(path2db) + + expect_that(ae$dbName, is_equivalent_to('ae')) + expect_that(ae$basePath, is_equivalent_to(nbp)) + sesss = list_sessionsDBI(ae) + expect_that(nrow(sesss), is_equivalent_to(1)) + bndls = list_bundlesDBI(ae) + expect_that(nrow(bndls), is_equivalent_to(7)) + itCntQ = paste0("SELECT count(*) ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'") + + itCntDf = DBI::dbGetQuery(ae$connection,itCntQ) + itemCnt = itCntDf[[1]] + liCntQ = paste0("SELECT count(*) ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'") + liCntDf = DBI::dbGetQuery(ae$connection, liCntQ) + linkCnt = liCntDf[[1]] + expect_that(itemCnt, is_equivalent_to(736)) + expect_that(linkCnt, is_equivalent_to(785)) + }) + + test_that("Create emuDB from scratch works",{ + create_emuDB('create_emuDB_test1', path2testData) + t1BasePath = file.path(path2testData, 'create_emuDB_test1_emuDB') + t1 = load_emuDB(t1BasePath) + expect_that(t1$dbName, is_equivalent_to('create_emuDB_test1')) + t1 = NULL + unlink(t1BasePath, recursive = TRUE) + }) + + test_that("Data types are correct",{ + items = DBI::dbReadTable(ae$connection, 'items') + + expect_that(class(items[['seq_idx']]), is_equivalent_to('integer')) + expect_that(class(items[['item_id']]), is_equivalent_to('integer')) + expect_that(class(items[['sample_rate']]), is_equivalent_to('numeric')) + expect_that(class(items[['sample_point']]), is_equivalent_to('integer')) + expect_that(class(items[['sample_start']]), is_equivalent_to('integer')) + expect_that(class(items[['sample_dur']]), is_equivalent_to('integer')) + + labels = DBI::dbReadTable(ae$connection, 'labels') + expect_that(class(labels[['label_idx']]), is_equivalent_to('integer')) + + links = DBI::dbReadTable(ae$connection,'links') + expect_that(class(links[['from_id']]), is_equivalent_to('integer')) + expect_that(class(links[['to_id']]), is_equivalent_to('integer')) + }) + + test_that("Test ae samples",{ + + # aeB1=get.bundle(sessionName='0000',bundleName='msajc003',dbUUID=.test_emu_ae_db_uuid) + + bundleAnnotDFs = load_bundleAnnotDFsDBI(ae, "0000", "msajc003") + aeB1char = bundleAnnotDFsToAnnotJSONchar(ae, bundleAnnotDFs) + aeB1 = jsonlite::fromJSON(aeB1char, simplifyVector = FALSE) + + expect_equivalent(aeB1[['sampleRate']], aeSampleRate) + + halfSample = 0.5 / aeSampleRate + msajc015_lab_values = c(0.300000, 0.350276, 0.425417, 0.496601, 0.558601, 0.639601, + 0.663601, 0.706601, 0.806601, 1.006101, 1.085101, 1.097601, + 1.129101, 1.160101, 1.213101, 1.368101, 1.413095, 1.449550, + 1.464601, 1.500731, 1.578583, 1.623228, 1.653718, 1.717601, + 1.797463, 1.828601, 1.903635, 2.070101, 2.104101, 2.154601, + 2.200911, 2.226601, 2.271132, 2.408601, 2.502214, 2.576618, + 2.606558, 2.693704, 2.749004, 2.780766, 2.798504, 2.876593, + 2.958101, 3.026668, 3.046168, 3.067703, 3.123168, 3.238668, + 3.297668,3.456899) + msajc015_tone_events = c(0.531305, 1.486760, 1.609948, 2.445220, 2.910929, 3.110782, + 3.262078) + lvCnt = length(msajc015_lab_values) + teCnt = length(msajc015_tone_events) + #msajc015_phonetic=ae[['items']][ae[['items']][['bundle']]=="msajc015" & ae[['items']][['level']]=='Phonetic',] + msajc015_phonetic = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID, "' ", + " AND session='0000' ", + " AND bundle='msajc015' ", + " AND level='Phonetic'")) + rc = nrow(msajc015_phonetic) + expect_equivalent(rc + 1, lvCnt) + # order by sequence index + msajc015_phonetic_ordered = msajc015_phonetic[order(msajc015_phonetic[['seq_idx']]),] + rc = nrow(msajc015_phonetic_ordered) + expect_equivalent(rc + 1, lvCnt) + + #msajc015_tone=ae[['items']][ae[['items']][['bundle']]=="msajc015" & ae[['items']][['level']]=='Tone',] + msajc015_tone = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID, "' ", + " AND session = '0000' ", + " AND bundle = 'msajc015' ", + " AND level = 'Tone'")) + msajc015_tone_ordered = msajc015_tone[order(msajc015_tone[['seq_idx']]),] + lvSq = 1:rc + + # check sequence + for(i in lvSq){ + + poSampleStart = msajc015_phonetic_ordered[i, 'sample_start'] + poSampleDur=msajc015_phonetic_ordered[i,'sample_dur'] + if(i < rc){ + poNextSampleStart = msajc015_phonetic_ordered[i + 1, 'sample_start'] + # TODO + expect_equivalent(poNextSampleStart, poSampleStart + poSampleDur + 1) + #expect_equivalent(poNextSampleStart,poSampleStart+poSampleDur+1) + } + } + # check segment boundaries + for(i in lvSq){ + lv = msajc015_lab_values[i] + poSampleStart = msajc015_phonetic_ordered[i, 'sample_start'] + poSampleDur = msajc015_phonetic_ordered[i, 'sample_dur'] + poStart = (poSampleStart + 0.5) / aeSampleRate + absFail = abs(poStart - lv) + # accept deviation of at least half a sample + expect_lt(absFail, halfSample) + } + # and the last value + lv = msajc015_lab_values[lvCnt] + poSampleEnd = msajc015_phonetic_ordered[rc, 'sample_start'] + msajc015_phonetic_ordered[rc, 'sample_dur'] + 1 + poEnd = (poSampleEnd + 0.5) / aeSampleRate + absFail = abs(poEnd - lv) + # accept deviation of at least half a sample + expect_lt(absFail, halfSample) + + # check tone events + teS = 1:teCnt + for(i in teS){ + teTime = msajc015_tone_events[i] + teLSample = msajc015_tone_ordered[i, 'sample_point'] + teLTime = teLSample / aeSampleRate + absFail = abs(teLTime - teTime) + expect_lt(absFail, halfSample) + } + + }) + + test_that("Test ae modify",{ + orgItems = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + orgLabels = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + orgLinks = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + expect_equivalent(nrow(orgItems),736) + expect_equivalent(nrow(orgLinks),785) + # b015=get.bundle(sessionName='0000',bundleName = 'msajc015',dbUUID = .test_emu_ae_db_uuid) + bundleAnnotDFs = load_bundleAnnotDFsDBI(ae, + "0000", + "msajc015") + b015char = bundleAnnotDFsToAnnotJSONchar(ae, bundleAnnotDFs) + b015 = jsonlite::fromJSON(b015char, + simplifyVector = FALSE, + na = 'null') + + # select arbitrary item + b015m = b015 + phoneticLvlIt10 = b015m[['levels']][[7]][['items']][[10]] + lblOrg = phoneticLvlIt10[['labels']][[1]][['value']] + b015m[['levels']][[7]][['items']][[10]][['labels']][[1]][['value']] = 'test!!' + + # convert to bundleAnnotDFs + b015mChar = jsonlite::toJSON(b015m, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015mChar[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015m) + + modItems = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + modLabels = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + modLinks = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + expect_equivalent(nrow(modItems), 736) + expect_equivalent(nrow(modLinks), 785) + + # change only affects labels + # items should be equal + expect_equal(orgItems, modItems) + # labels not + cmLbls1 = compare::compare(orgLabels, + modLabels, + allowAll = TRUE) + expect_false(cmLbls1$result) + # links are not changed, should be equal to original + expect_equal(orgLinks, modLinks) + + b015m[['levels']][[7]][['items']][[10]][['sampleDur']] = 99 + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015m) + # convert to bundleAnnotDFs + b015mChar = jsonlite::toJSON(b015m, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015mChar[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + + mod2Items = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + mod2Labels = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + mod2Links = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + expect_equivalent(nrow(mod2Items), 736) + expect_equivalent(nrow(mod2Links), 785) + # + # # should all be equal to original + cm2 = compare::compare(orgItems, + mod2Items, + allowAll = TRUE) + expect_false(cm2$result) + cmLbls2 = compare::compare(orgLabels, + mod2Labels, + allowAll = TRUE) + expect_false(cmLbls2$result) + cml2 = compare::compare(orgLinks, + mod2Links, + allowAll = TRUE) + expect_true(cml2$result) + + # remove link + b015Lks = b015m[['links']] + b015LksM = list() + for(b015Lk in b015Lks){ + if(!(b015Lk[['fromID']] == 177 & b015Lk[['toID']] == 224)){ + b015LksM[[length(b015LksM) + 1]] = b015Lk + } + } + b015m2 = b015m + b015m2[['links']] = b015LksM + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015m2) + # convert to bundleAnnotDFs and store + b015m2Char = jsonlite::toJSON(b015m2, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015m2Char[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + + mod3Items = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + mod3Labels = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + mod3Links = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + expect_equivalent(nrow(mod3Items), 736) + expect_equivalent(nrow(mod3Links), 784) + + cm3 = compare::compare(mod3Items, + mod2Items, + allowAll = TRUE) + expect_true(cm3$result) + cmLbls3 = compare::compare(mod2Labels, + mod3Labels, + allowAll = TRUE) + expect_true(cmLbls3$result) + cml3 = compare::compare(mod3Links, + mod2Links, + allowAll = TRUE) + expect_false(cml3$result) + + # insert the link again + # b015m3=get.bundle(dbUUID = .test_emu_ae_db_uuid,sessionName = '0000',bundleName = 'msajc015') + bundleAnnotDFs = load_bundleAnnotDFsDBI(ae, + "0000", + "msajc015") + b015m3char = bundleAnnotDFsToAnnotJSONchar(ae, bundleAnnotDFs) + b015m3 = jsonlite::fromJSON(b015m3char, + simplifyVector = FALSE, + na = 'null') + + b015m3Lks = b015m3[['links']] + b015m3Lks[[length(b015m3Lks) + 1]] = list(fromID = 177, + toID = 224) + b015m3[['links']] = b015m3Lks + + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015m3) + # convert to bundleAnnotDFs and store + b015m3Char = jsonlite::toJSON(b015m3, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015m3Char[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + + mod4Links=DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + cml3 = compare::compare(orgLinks, + mod4Links, + allowAll = TRUE) + expect_true(cml3$result) + + # + # # TODO move segment boundaries, change links,etc... + # + # + + # # store original bundle + + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015) + # convert to bundleAnnotDFs and store + b015Char = jsonlite::toJSON(b015, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015Char[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + + # + modOrgItems = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + modOrgLabels = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + modOrgLinks = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + expect_equivalent(nrow(modOrgItems), 736) + expect_equivalent(nrow(modOrgLinks), 785) + + # + # # should all be equal to original + cm2 = compare::compare(orgItems, + modOrgItems, + allowAll = TRUE) + expect_true(cm2$result) + cmLbls2 = compare::compare(dplyr::arrange(orgLabels, + bundle, + item_id), + dplyr::arrange(modOrgLabels, + bundle, + item_id)) + compare::compare(orgLabels, + modOrgLabels, + allowAll = TRUE) + expect_true(cmLbls2$result) + cml2 = compare::compare(orgLinks, + modOrgLinks, + allowAll = TRUE) + expect_true(cml2$result) + + b015ModInsrt = b015 + # insert segment + its = b015ModInsrt[['levels']][[7]][['items']] + + #b$levels[['Phonetic']][['items']][[9]] + #$id + #[1] 193 + # + #$sampleStart + #[1] 16132 + # + #$sampleDur + #[1] 3989 + # + #$labels + #$labels[[1]] + #$labels[[1]]$name + #[1] "Phonetic" + # + #$labels[[1]]$value + #[1] "ai" + + # split this item: + # shrink item to length 3500 + b015ModInsrt[['levels']][[7]][['items']][[9]]$sampleDur = 3500 + b015ModInsrt[['levels']][[7]][['items']][[9]]$labels[[1]]$value = 'a' + + # shift items to the right to free index 10 + itCnt = length(b015ModInsrt[['levels']][[7]][['items']]) + shiftSeq = itCnt:10 + + for(itIdx in shiftSeq){ + b015ModInsrt[['levels']][[7]][['items']][[itIdx+1]] = b015ModInsrt[['levels']][[7]][['items']][[itIdx]] + } + + # insert item at index 10 + itLbl = list(name = 'Phonetic', + value = 'i') + itLbls = list(itLbl) + insertIt = list(id = 999, + sampleStart = 19633, + sampleDur = 488, + labels = itLbls) + b015ModInsrt[['levels']][[7]][['items']][[10]] = insertIt + + # store.bundle.annotation(dbUUID=.test_emu_ae_db_uuid,bundle=b015ModInsrt) + # convert to bundleAnnotDFs and store + b015ModInsrtChar = jsonlite::toJSON(b015ModInsrt, + auto_unbox = TRUE, + pretty = TRUE) + bundleAnnotDFs = annotJSONcharToBundleAnnotDFs(b015ModInsrtChar[1]) + remove_bundleAnnotDBI(ae, + "0000", + bundleName = "msajc015") + store_bundleAnnotDFsDBI(ae, + bundleAnnotDFs, + "0000", + "msajc015") + + # read again + # b015Read=get.bundle(sessionName='0000',bundleName = 'msajc015',dbUUID = .test_emu_ae_db_uuid) + bundleAnnotDFs = load_bundleAnnotDFsDBI(ae, + "0000", + "msajc015") + b015ReadChar = bundleAnnotDFsToAnnotJSONchar(ae, bundleAnnotDFs) + b015Read = jsonlite::fromJSON(b015ReadChar, + simplifyVector = FALSE, + na = 'null') + rItCnt = length(b015Read[['levels']][[7]][['items']]) + # check insert sequence + for(itIdx in 1:9){ + expect_equal(b015[['levels']][[7]][['items']][[itIdx]][['id']], + b015Read[['levels']][[7]][['items']][[itIdx]][['id']]) + } + expect_equivalent(b015Read[['levels']][[7]][['items']][[10]]$id, 999) + + for(itIdx in 11:rItCnt){ + expect_equal(b015[['levels']][[7]][['items']][[itIdx-1]][['id']], + b015Read[['levels']][[7]][['items']][[itIdx]][['id']]) + } + + + }) + ###################### + # cleanup + + # delete vars to be safe (& to diconnect) + DBI::dbDisconnect(ae$connection) + DBI::dbDisconnect(aeFromLegacy$connection) + ae = NULL + aeFromLegacy = NULL + + unlink(file.path(path2testData, "fromLegacy"), recursive = TRUE) + unlink(unlink(path2db, recursive = TRUE), recursive = TRUE) +}) + +test_that("store works correctly",{ + + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromStore"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + newFolderPath = file.path(path2testData, "fromStore") + unlink(newFolderPath, recursive = TRUE) + store(ae, + targetDir = newFolderPath, + verbose = FALSE) + aeStored = load_emuDB(file.path(newFolderPath, "ae_emuDB"), + verbose = FALSE) + + aeItems=DBI::dbGetQuery(ae$connection,paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",ae$UUID,"'")) + aeLabels=DBI::dbGetQuery(ae$connection,paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",ae$UUID,"'")) + aeLinks=DBI::dbGetQuery(ae$connection,paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",ae$UUID,"'")) + + aeStoredItems=DBI::dbGetQuery(aeStored$connection,paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '",aeStored$UUID,"'")) + aeStoredLabels=DBI::dbGetQuery(aeStored$connection,paste0("SELECT * ", + "FROM labels ", + "WHERE db_uuid = '",aeStored$UUID,"'")) + aeStoredLinks=DBI::dbGetQuery(aeStored$connection,paste0("SELECT * ", + "FROM links ", + "WHERE db_uuid = '",aeStored$UUID,"'")) + + # check that all tabels are the same + # expect_equal(aeItems, aeStoredItems) # new index on items table results in different seq. of items hence: + cres = compare::compare(aeItems, + aeStoredItems, + allowAll = TRUE) + expect_true(cres$result) + expect_equal(aeLabels, aeStoredLabels) + expect_equal(aeLinks, aeStoredLinks) + + #################### + # cleanup + + # delete vars to be safe (& to diconnect ae = NULL + DBI::dbDisconnect(aeStored$connection) + DBI::dbDisconnect(ae$connection) + aeStored = NULL + ae = NULL + unlink(file.path(path2testData, "fromStore"), recursive = TRUE) + unlink(path2db, recursive = TRUE) +}) + +test_that("rename emuDB works correctly",{ + skip_on_cran() # for some reason fails on cran’s windows machines, but I confirmed that it works on windows. So skip. + skip_on_os("windows") + + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromStore"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + rename_emuDB(path2db, "aeRename") + newPath = file.path(path2testData, + paste0("aeRename", "_emuDB")) + + DBconfig = jsonlite::fromJSON(file.path(newPath, "aeRename_DBconfig.json"), + simplifyVector = FALSE) + expect_equal(DBconfig$name, "aeRename") + + expect_true("aeRename_emuDB" %in% list.files(path2testData)) + expect_true("aeRename_emuDBcache.sqlite" %in% list.files(newPath)) + + # cleanup + unlink(newPath, recursive = TRUE) +}) + +test_that("rename bundles works correctly",{ + + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromStore"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + db = load_emuDB(path2db, verbose = FALSE) + bundles = list_bundles(db) + # missing col + expect_error(rename_bundles(db, bundles)) + bundles$name_new = paste0(bundles$name, "XXX") + # bad bundle/session names + bad_bundles = bundles + bad_bundles[1,]$name = "bad_bundle_name" + expect_error(rename_bundles(db, bad_bundles)) + bad_bundles = bundles + bad_bundles[1,]$session = "bad_session_name" + expect_error(rename_bundles(db, bad_bundles)) + + + rename_bundles(db, bundles) + + new_bundles = list_bundles(db) + + expect_true(all(stringr::str_detect(new_bundles$name, "XXX"))) + + new_bundles = list_bundlesDBI(db) + + expect_true(all(stringr::str_detect(new_bundles$name, "XXX"))) + + files = list_files(db) + + expect_true(all(stringr::str_detect(files$bundle, "XXX"))) + expect_true(all(stringr::str_detect(files$file, "XXX"))) + expect_true(all(stringr::str_detect(files$absolute_file_path, "XXX"))) + + # cleanup + DBI::dbDisconnect(db$connection) + db = NULL + unlink(path2db, recursive = TRUE) +}) + + +test_that("load of read only emuDB works",{ + skip_on_cran() # probably won't work on windows (because of mode) so skip on cran + skip_on_os("windows") + + # delete, copy and load + unlink(path2db, recursive = TRUE) + unlink(file.path(path2testData, "fromStore"), + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + # change emuDB folder to r-x only for everyone + Sys.chmod(path2db, mode = "555") + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + sl = query(ae, "Phonetic == n") + expect_true("tbl_df" %in% class(sl)) + Sys.chmod(path2db, mode = "755") # change back + + # change emuDBcache.sqlite to + Sys.chmod(file.path(path2db, "ae_emuDBcache.sqlite"), mode = "555") + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + sl = query(ae, "Phonetic == n") + expect_true("tbl_df" %in% class(sl)) + Sys.chmod(path2db, mode = "755") # change back + + # cleanup + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) +}) + diff --git a/tests/testthat/test_emuR-database.caching.R b/tests/testthat/test_emuR-database.caching.R new file mode 100644 index 00000000..8e87deea --- /dev/null +++ b/tests/testthat/test_emuR-database.caching.R @@ -0,0 +1,133 @@ +##' testthat tests for autobuild +##' +context("testing caching functions") + +suppressMessages(require('jsonlite')) + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +########################### +test_that("update_cache works", { + + # delete, copy and load + unlink(path2db, + recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + ################################ + # + test_that("new bundle in new session is re-cached", { + dir.create(file.path(path2db, 'new_ses')) + file.copy(from = file.path(path2db, '0000_ses', 'msajc010_bndl'), + to = file.path(path2db, 'new_ses'), + recursive = TRUE) + + update_cache(ae, verbose=FALSE) + + l = list_sessionsDBI(ae) + expect_true("new" %in% l$name) + b = list_bundlesDBI(ae) + expect_true(any(b$session == "new" & b$name == 'msajc010')) + + sl = query(ae, "Phonetic == n") + expect_true(any(sl$session == "new")) + }) + + ################################ + # + test_that("change in _annot.json is re-cached", { + # change entry + annotJson = jsonlite::fromJSON(readLines(file.path(path2db, + "new_ses", + "msajc010_bndl", + "msajc010_annot.json")), + simplifyVector = TRUE) + + annotJson$levels$items[[1]]$id = 666666 + + pbpJSON = jsonlite::toJSON(annotJson, + auto_unbox = TRUE, + force = TRUE, + pretty = TRUE) + writeLines(pbpJSON,file.path(path2db, + "new_ses", + "msajc010_bndl", + "msajc010_annot.json"), + useBytes = TRUE) + + update_cache(ae, verbose = FALSE) + + res = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID, "' ", + " AND session = 'new' ", + " AND bundle = 'msajc010' ", + " AND level = 'Utterance'"))$item_id + + expect_true(res == 666666) + + }) + + + ################################ + # + test_that("deleted bundle is re-cached", { + unlink(file.path(path2db, + 'new_ses', + 'msajc010_bndl'), + recursive = TRUE) + + update_cache(ae, verbose = FALSE) + + res = DBI::dbGetQuery(ae$connection, paste0("SELECT * ", + "FROM items ", + "WHERE db_uuid = '", ae$UUID, "' ", + " AND session = 'new' ", + " AND bundle = 'msajc010'")) + + expect_true(nrow(res) == 0) + + bndls = list_bundles(ae) + expect_false(any(bndls$session == "new")) + + + }) + + ################################ + # + test_that("deleted session is re-cached", { + unlink(file.path(path2db, 'new_ses'), recursive = TRUE) + ses = list_sessionsDBI(ae) + expect_true(any(ses$name == "new")) + + update_cache(ae, verbose = FALSE) + ses = list_sessionsDBI(ae) + + expect_false(any(ses$name == "new")) + + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + diff --git a/tests/testthat/test_emuR-database.files.R b/tests/testthat/test_emuR-database.files.R new file mode 100644 index 00000000..5b5315da --- /dev/null +++ b/tests/testthat/test_emuR-database.files.R @@ -0,0 +1,98 @@ +##' testthat tests for database.files +##' +context("testing database.files functions") + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +####################################### +test_that("file operations work", { + + dbName = 'ae' + + path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) + path2testData = file.path(tempdir(), + "emuR_testthat") + path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + # extract internalVars from environment .emuR_pkgEnv + internalVars = get("internalVars", + envir = .emuR_pkgEnv) + + test_that("import_mediaFiles works", { + wavPath = system.file('extdata', + package = 'wrassp') + import_mediaFiles(ae, + dir = wavPath, + targetSessionName = 'newSes', + verbose = FALSE) + expect_true(file.exists(file.path(path2db, 'newSes_ses'))) + paths = list.files(file.path(path2db, 'newSes_ses'), + recursive = TRUE, + full.names = TRUE, + pattern = 'wav$') + expect_equal(length(paths), 9) + paths = list.files(file.path(path2db, 'newSes_ses'), + recursive = TRUE, + full.names = TRUE, + pattern = '_annot.json$') + expect_equal(length(paths), 9) + }) + + test_that("CRUD operations for files work", { + + test_that("add = (C)RUD", { + wrasspExtdataPath = system.file('extdata', + package = 'wrassp') + wavFilePaths = list.files(wrasspExtdataPath, + pattern = "wav$", + full.names = TRUE, + recursive = TRUE) + + outDirPath = file.path(path2testData, 'zcranaVals') + dir.create(outDirPath) + wrassp::zcrana(wavFilePaths, + outputDirectory = outDirPath, + verbose = FALSE) + + add_files(ae, + dir = outDirPath, + fileExtension = 'zcr', + targetSessionName = 'newSes') + zcrPaths = list.files(path2db, + pattern = 'zcr$', + recursive = TRUE) + expect_equal(length(zcrPaths), 9) + + # cleanup + unlink(outDirPath, recursive = TRUE) + + + }) + + test_that("list = C(R)UD", { + df = list_files(ae) + expect_equal(dim(df),c(55, 4)) + }) + + + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) diff --git a/tests/testthat/test_emuR-database.flatfiledata.R b/tests/testthat/test_emuR-database.flatfiledata.R new file mode 100644 index 00000000..2e892c03 --- /dev/null +++ b/tests/testthat/test_emuR-database.flatfiledata.R @@ -0,0 +1,169 @@ +##' testthat tests for flatfiledata +##' +context("testing database flatfiledata functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# delete, copy and load +unlink(path2db, recursive = TRUE) +file.copy(path2orig, + path2testData, + recursive = TRUE) + +# create a second session +dir.create(file.path(path2db, "0001_ses")) +file.copy(from = list.files(file.path(path2db, "0000_ses"), full.names = TRUE), + to = file.path(path2db, "0001_ses"), + recursive = TRUE) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +db = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + +test_that("join_flatFileData works on emuDB level", { + + sl = query(db, "Phonetic == S") + + # check error is thrown for bad columns in long data + long_data = tibble::tibble(bad_col_name = c("location", "institution"), + value = c("Muenchen", "IPS")) + + file_path = file.path(db$basePath, paste0(db$dbName, "_long.", "csv")) + + readr::write_excel_csv2(x = long_data, file = file_path) + expect_error(join_flatFileData(db, sl), + ".*doesn't only contain the columns.*") + + # long emuDB data + long_data = tibble::tibble(key = c("location", "institution"), + value = c("Muenchen", "IPS")) + + file_path = file.path(db$basePath, paste0(db$dbName, "_long.", "csv")) + + readr::write_excel_csv2(x = long_data, file = file_path) + + sl_joined = join_flatFileData(db, sl) + + expect_true(all(c("location", "institution") %in% names(sl_joined))) + expect_equal(sl_joined$location, rep("Muenchen", 10)) + expect_equal(sl_joined$institution, rep("IPS", 10)) + + unlink(file_path) + + # wide emuDB data + # specify session and bundle + wide_data = tibble::tibble(session = c("0000", "0000", "0001"), + bundle = c("msajc003", "msajc012", "msajc022"), + eyecolor = c("blue", "brown", "green"), + height = c("1.80", "1.60", "2.00")) + + file_path = file.path(db$basePath, paste0(db$dbName, "_wide.", "csv")) + + readr::write_excel_csv2(x = wide_data, file = file_path) + + sl_joined = join_flatFileData(db, sl) + + # plus new wide data is added and correct + expect_equal(sl_joined$eyecolor[1:3], c("blue", "brown", "brown")) + expect_equal(sl_joined$eyecolor[9:10], c("green", "green")) + expect_equal(length(which(is.na(sl_joined$eyecolor))), 5) + + unlink(file_path) + +}) + +test_that("join_flatFileData works on single session level", { + + sl = query(db, "Phonetic == S") + + file_path = file.path(db$basePath, "0000_ses", paste0("0000", "_long.", "csv")) + + # long session data + long_data = tibble::tibble(key = c("location", "fudge", "speed"), + value = c("Muenchen", "yummy", "fast")) + + readr::write_excel_csv2(x = long_data, + file = file_path) + + + sl_joined = join_flatFileData(db, sl) + + expect_true(all(c("location", "fudge", "speed") %in% names(sl_joined))) + + expect_equal(length(which(is.na(sl_joined$location))), 5) + expect_equal(length(which(is.na(sl_joined$fudge))), 5) + expect_equal(length(which(is.na(sl_joined$speed))), 5) + + + expect_equal(sl_joined$location[1:5], c("Muenchen", + "Muenchen", + "Muenchen", + "Muenchen", + "Muenchen")) + + expect_equal(sl_joined$fudge[1:5], c("yummy", + "yummy", + "yummy", + "yummy", + "yummy")) + + unlink(file_path) + + # wide session data + # specify bundle only + + file_path = file.path(db$basePath, "0000_ses", paste0("0000", "_wide.", "csv")) + + wide_data = tibble::tibble(bundle = c("msajc003", "msajc012"), + eyecolor = c("blue", "brown")) + + readr::write_excel_csv2(x = wide_data, + file = file_path) + + sl_joined = join_flatFileData(db, sl) + + expect_equal(length(which(is.na(sl_joined$eyecolor))), 7) + + expect_equal(sl_joined$eyecolor[1:3], c("blue", "brown", "brown")) + + unlink(file_path) +}) + + +test_that("join_flatFileData works on single bundle level", { + + sl = query(db, "Phonetic == S") + + file_path = file.path(db$basePath, + "0000_ses", + "msajc003_bndl", + paste0("msajc003", "_long.", "csv")) + + # long bundle data + long_data = tibble::tibble(key = c("location", "fudge", "speed"), + value = c("Muenchen", "yummy", "fast")) + + readr::write_excel_csv2(x = long_data, + file = file_path) + + sl_joined = join_flatFileData(db, sl) + + expect_equal(length(which(is.na(sl_joined$location))), 9) + + expect_equal(sl_joined$location[1], "Muenchen") + expect_equal(sl_joined$fudge[1], "yummy") + expect_equal(sl_joined$speed[1], "fast") + + unlink(file_path) +}) diff --git a/tests/testthat/test_emuR-emuRsegs.R b/tests/testthat/test_emuR-emuRsegs.R new file mode 100644 index 00000000..354680bd --- /dev/null +++ b/tests/testthat/test_emuR-emuRsegs.R @@ -0,0 +1,61 @@ +##' testthat tests for emuRsegs/tibble +##' +context("testing emuRsegs functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), + "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("emuRtrackdata functions work", { + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + ################ + test_that("export_seglistToTxtCollection works", { + sl_his = query(ae, "Text == his") + export_seglistToTxtCollection(ae, + seglist = sl_his, + targetDir = tempdir()) + + fps_wav = list.files(file.path(tempdir(), + "ae_txt_col_from_seglist"), + pattern = "wav$", + full.names = TRUE) + fps_txt = list.files(file.path(tempdir(), + "ae_txt_col_from_seglist"), + pattern = "txt$", + full.names = TRUE) + file_content = readr::read_file(fps_txt[1]) + + expect_equal(length(fps_wav), 2) + expect_equal(length(fps_txt), 2) + expect_equal(file_content, "his") + + unlink(file.path(tempdir(), + "ae_txt_col_from_seglist"), + recursive = TRUE) + + }) + + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) diff --git a/tests/testthat/test_emuR-emuRtrackdata.R b/tests/testthat/test_emuR-emuRtrackdata.R new file mode 100644 index 00000000..130503a6 --- /dev/null +++ b/tests/testthat/test_emuR-emuRtrackdata.R @@ -0,0 +1,97 @@ +##' testthat tests for emuRtrackdata +##' +context("testing emuRtrackdata functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("emuRtrackdata functions work", { + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + + ############################## + test_that("correct classes are returned", { + + sl = query(ae, "Phonetic == @ | i:", + resultType = "emuRsegs") + td = get_trackdata(ae, + seglist = sl, + ssffTrackName = 'fm', + resultType = "trackdata", + verbose = FALSE) + + newTd = create_emuRtrackdata(sl, td) + + expect_true(inherits(newTd, "emuRtrackdata")) + + }) + + ############################## + # test_that("cut works correctly", { + # + # sl = query(dbName, "Phonetic=@|i:") + # td = get_trackdata(dbName, + # seglist = sl, + # ssffTrackName = 'fm') + # + # newTd = create_emuRtrackdata(sl, td) + # + # propRes = cut_td(newTd, 0.5, prop=TRUE) + # print(propRes) + # }) + + + test_that("normalize_length returns normalized segments on tibble", { + + sl = query(ae, "Phonetic = I", + resultType = "tibble") + td = get_trackdata(ae, + seglist = sl, + ssffTrackName = 'fm', + resultType = "tibble") + + N = 21 + td_norm = normalize_length(td, N = N) + + expect_true(nrow(td_norm[td_norm$sl_rowIdx == 1,]) == N) + }) + + test_that("normalize_length returns normalized segments on emuRtrackdata", { + + sl = query(ae, "Phonetic = I") + td = get_trackdata(ae, + seglist = sl, + ssffTrackName = 'fm', + resultType = "emuRtrackdata") + + N = 21 + td_norm = normalize_length(td, N = N) + + expect_true(nrow(td_norm[td_norm$sl_rowIdx == 1,]) == N) + }) + + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + +}) + + diff --git a/tests/testthat/test_emuR-export_TextGridCollection.R b/tests/testthat/test_emuR-export_TextGridCollection.R new file mode 100644 index 00000000..eb846803 --- /dev/null +++ b/tests/testthat/test_emuR-export_TextGridCollection.R @@ -0,0 +1,164 @@ +##' testthat tests for export_TextGridCollection +##' +context("testing export_TextGridCollection") + +dbName = "ae" +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +test_that("export_TextGridCollection works correctly", { + + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, + path2testData, + recursive = TRUE) + ae = load_emuDB(path2db, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + test_that("errors are thrown correctly", { + # preclean just in case + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + # bad attr. name + expect_error(export_TextGridCollection(ae, + targetDir = file.path(path2testData, "tgCol"), + attributeDefinitionNames = "badName"), + verbose = FALSE) + }) + + test_that("exporting every level works", { + # preclean just in case + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + + export_TextGridCollection(ae, + targetDir = file.path(path2testData, "tgCol"), + verbose = FALSE) + + tgLines = readLines(file.path(path2testData, + "tgCol", + "0000", + "msajc003.TextGrid")) + # check header + expect_match(tgLines[1], ".*ooTextFile.*") + expect_match(tgLines[2], ".*TextGrid.*") + expect_match(tgLines[4], ".*xmin = 0.*") + + # check number of levels by name + expect_equal(length(grep("name", tgLines)), 11) + + #check same number of items are present as in items X label + qr = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items AS it, ", + " labels AS l ", + "WHERE it.db_uuid = l.db_uuid ", + " AND it.session = l.session ", + " AND it.bundle = 'msajc003' ", + " AND l.bundle = 'msajc003' ", + " AND it.item_id = l.item_id")) + + # 10*2 = left right padding for 10 tiers (from 10 attr. defs.) + # that are not of type EVENT mergers (containing "->") are deducted + expect_equal(length(grep("text|mark",tgLines)), + nrow(qr) + 10 * 2 - length(grep("->", tgLines))) + + + # clean up + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + }) + + test_that("exporting only msajc003 works", { + # preclean just in case + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + + export_TextGridCollection(ae, + targetDir = file.path(path2testData, "tgCol"), + bundlePattern = "msajc003", + verbose = FALSE) + list.files(file.path(path2testData, "tgCol", "0000")) + + expect_equal(length(list.files(file.path(path2testData, + "tgCol", + "0000"))), + 2) + + + # clean up + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + }) + + test_that("empty time segments are treated correctly", { + # preclean just in case + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + + export_TextGridCollection(ae, + targetDir = file.path(path2testData, "tgCol"), + bundlePattern = "msajc010", + verbose = FALSE) + + tgLines = readLines(file.path(path2testData, + "tgCol", + "0000", + "msajc010.TextGrid")) + + qr = DBI::dbGetQuery(ae$connection, + paste0("SELECT * ", + "FROM items AS it, ", + " labels AS l ", + "WHERE it.db_uuid = l.db_uuid ", + " AND it.session = l.session ", + " AND it.bundle = 'msajc010' ", + " AND l.bundle = 'msajc010' ", + " AND it.item_id = l.item_id")) + + # 10*2 = left right padding for 10 tiers + # (from 10 attr. defs.) that are not of type EVENT + expect_equal(length(grep("text|mark",tgLines)), + nrow(qr) + 10 * 2 + 3 - length(grep("->",tgLines))) + + # clean up + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + }) + + test_that("only exporting Phonetic works", { + # preclean just in case + unlink(file.path(path2testData, "tgCol"), + recursive = TRUE) + + export_TextGridCollection(ae, + targetDir = file.path(path2testData, "tgCol"), + bundlePattern = "msajc010", + attributeDefinitionNames = "Phonetic", + verbose = FALSE) + + tgLines = readLines(file.path(path2testData, + "tgCol", + "0000", + "msajc010.TextGrid")) + + expect_equal(length(grep("Phonetic", tgLines)), 1) + }) + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(path2db, recursive = TRUE) + + +}) diff --git a/tests/testthat/test_emuR-get_trackdata.R b/tests/testthat/test_emuR-get_trackdata.R new file mode 100644 index 00000000..919743cb --- /dev/null +++ b/tests/testthat/test_emuR-get_trackdata.R @@ -0,0 +1,291 @@ +##' testthat tests for get_trackdata +##' +context("testing get_trackdata function") + +dbName = "ae" + +path2orig = file.path(tempdir(), + "emuR_demoData", + paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", + envir = .emuR_pkgEnv) + +ae = load_emuDB(path2orig, + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + +# get segmentlist of type segment +path2segl <- list.files(system.file("extdata", package = "emuR"), + pattern = glob2rx("ae-n.seg"), + full.names = TRUE) + +n = read.emusegs(path2segl) + +# get segmentlist of type event +path2segl <- list.files(system.file("extdata", + package = "emuR"), + pattern = glob2rx("ae-hStar.seg"), + full.names = TRUE) +hStar = read.emusegs(path2segl) + +############################## +test_that("correct classes are returned", { + + td = get_trackdata(ae, + n, + 'fm', + consistentOutputType = FALSE, + resultType = "trackdata", + verbose = FALSE) + expect_that(class(td), equals('trackdata')) + + td = get_trackdata(ae, + n, + 'fm', + cut = .5, + resultType = "trackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_that(class(td), equals('data.frame')) + + td = get_trackdata(ae, + n, + 'fm', + cut = .5, + npoints = 3, + resultType = "trackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_that(class(td), + equals('trackdata')) + + td = get_trackdata(ae, + n, + 'fm', + cut = .5, + npoints = 1, + resultType = "trackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_that(class(td), equals('data.frame')) + + td = get_trackdata(ae, + hStar, + 'fm', + resultType = "trackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_that(class(td), equals('data.frame')) + + td = get_trackdata(ae, + hStar, + 'fm', + npoints = 3, + resultType = "trackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_that(class(td), equals('trackdata')) + + sl = query(ae, "Phonetic == @ | i:") + td = get_trackdata(ae, + sl, + "fm", + resultType = "emuRtrackdata", + consistentOutputType = FALSE, + verbose = FALSE) + expect_true(inherits(td, 'emuRtrackdata')) +}) + +############################# +test_that("bad calls", { + expect_error(get_trackdata(ae, + verbose = FALSE)) # seglist missing error + expect_error(get_trackdata(seglist = n, + verbose = FALSE)) # S3 error + expect_error(get_trackdata(ae, + n, + verbose = FALSE)) # ssffTrackName missing error + expect_error(get_trackdata(ae, + n, + cut = 2, + verbose = FALSE)) # cut > 1 error + expect_error(get_trackdata(ae, + n, + cut = -1, + verbose = FALSE)) # cut < 0 error + expect_error(get_trackdata(ae, + n, + 'fm', + npoints = 3, + verbose = FALSE)) # npoint with no cut argument error + expect_error(get_trackdata(ae, + n, + 'fm', + onTheFlyParams = formals(open), + verbose = FALSE)) # no onTheFlyFunctionName error + expect_error(get_trackdata(aeDB, + n, + 'fm', + onTheFlyOptLogFilePath = '/path/to/bla/', + verbose = FALSE)) # onTheFlyOptLogFilePath error + expect_error(get_trackdata(ae, + n, + resultType = "emuRtrackdata", + verbose = FALSE)) # bad resultType for seglist of type 'emusegs' +}) + +############################## +test_that("returned trackdata$data field has correct length", { + td = get_trackdata(ae, + n, + 'fm', + cut = .5, + npoints = 3, + resultType = "trackdata", + verbose = FALSE) + expect_that(dim(td$data)[1], equals(length(n$utts)*3)) + + td = get_trackdata(ae, + n, + 'fm', + cut = .5, + npoints = 5, + resultType = "trackdata", + verbose = FALSE) + expect_that(dim(td$data)[1], equals(length(n$utts)*5)) + +}) + +############################## +test_that("all sorts of cut values work", { + cutVals = seq(0, 1, 0.04) + for(cutV in cutVals){ + td = get_trackdata(ae, + n, + 'fm', + cut = cutV, + consistentOutputType = FALSE, + resultType = "trackdata", + verbose = FALSE) + expect_that(class(td), equals('data.frame')) + } +}) + +############################### +test_that("n points greater than boundaries word", { + + td = get_trackdata(ae, + n, + 'fm', + cut = 0.5, + npoints = 20, + resultType = "trackdata", + verbose = FALSE) + expect_that(class(td), equals('trackdata')) +}) + + +############################# +test_that("on-the-fly calculations work", { + for(wrasspFun in names(wrasspOutputInfos)){ + if(length(wrasspOutputInfos[[wrasspFun]]$tracks) > 0 + && wrasspOutputInfos[[wrasspFun]]$outputType == "SSFF"){ + td = get_trackdata(ae, + n, + wrasspOutputInfos[[wrasspFun]]$tracks[1], + onTheFlyFunctionName = wrasspFun, + resultType = "trackdata", + verbose = FALSE) + } + expect_that(class(td), equals('trackdata')) + } +}) + +############################## +test_that("on-the-fly calculations work if ssffTrackName is not set", { + td = get_trackdata(ae, + n, + onTheFlyFunctionName = "ksvF0", + resultType = "trackdata", + verbose = FALSE) + expect_equal(dim(td$index)[1], 12) +}) + +############################## +test_that("data fields are the same as hardcoded values (taken from original emu.track(n, 'fm') command)", { + # note that values have slightly changed due to the recalulation with wrassp + td = get_trackdata(ae, + n, + 'fm', + resultType = "trackdata", + verbose = FALSE) + expect_that(td$data[10,1], equals(256)) + expect_that(td$data[10,2], equals(1521)) + expect_that(td$data[10,3], equals(2382)) + expect_that(td$data[10,4], equals(3573)) + # on-the-fly values should be the same + td = get_trackdata(ae, + n, + onTheFlyFunctionName = "forest", + resultType = "trackdata", + verbose = FALSE) + expect_that(td$data[10,1], equals(256)) + expect_that(td$data[10,2], equals(1521)) + expect_that(td$data[10,3], equals(2382)) + expect_that(td$data[10,4], equals(3573)) + + +}) + +############################## +test_that("resultType tibble is accepted", { + sl = query(ae, + "Phonetic == n", + resultType = "tibble") + td = get_trackdata(ae, + sl, + onTheFlyFunctionName = "ksvF0", + resultType = "trackdata", + verbose = FALSE) + expect_equal(dim(td$index)[1], 12) +}) + +############################## +test_that("non wrassp functions work", { + fps = list_files(ae, fileExtension = "fms")$absolute_file_path + # write TSV files containing fm & bw + for(fp in fps){ + ado = wrassp::read.AsspDataObj(fp) + track_vals = cbind(ado$fm, ado$bw) + colnames(track_vals) = c("F1", "F2", "F3", "F4", "bw1", "bw2", "bw3", "bw4") + tbl = tibble::as_tibble(track_vals) + tbl$frame_time = seq(from = attributes(ado)$startTime, + by = 1/wrassp::rate.AsspDataObj(ado), + length.out = nrow(tbl)) * 1000 + readr::write_tsv(x = tbl, + file = paste0(tools::file_path_sans_ext(fp), ".tsv")) + } + + myFun <- function(mediaFilePath){ + res = readr::read_tsv(file = paste0(tools::file_path_sans_ext(mediaFilePath), ".tsv"), + col_types = readr::cols() # suppress message + ) + return(res) + } + + sl = query(ae, "Phonetic == n") + + td = get_trackdata(ae, + sl, + onTheFlyFunction = myFun, + verbose = FALSE) + expect_equal(nrow(td), 163) + expect_equal(ncol(td), 28) +}) + + +# clean up +DBI::dbDisconnect(ae$connection) +ae = NULL diff --git a/tests/testthat/test_emuR-hooks.R b/tests/testthat/test_emuR-hooks.R new file mode 100644 index 00000000..277d96ce --- /dev/null +++ b/tests/testthat/test_emuR-hooks.R @@ -0,0 +1,7 @@ +context("testing that hooks like .onLoad are doing the right stuff") + +test_that(".onLoad options are set correctly", { + expect_equal(getOption("emuR.emuWebApp.dir"), + file.path(tempdir(), "EMU-webApp")) + +}) \ No newline at end of file diff --git a/tests/testthat/test_emuR-query.database.R b/tests/testthat/test_emuR-query.database.R new file mode 100644 index 00000000..5e7cf2de --- /dev/null +++ b/tests/testthat/test_emuR-query.database.R @@ -0,0 +1,677 @@ +context("testing queries") + +.aeSampleRate = 20000 + +.test_emu_ae_db = NULL +.test_emu_ae_db_uuid = '3f627b7b-4fb5-4b4a-8c79-b5f49df4df25' +.test_emu_ae_db_dir = NULL + +path2demoData = file.path(tempdir(), + "emuR_demoData") +path2testhatFolder = file.path(tempdir(), + "emuR_testthat") + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +test_that("Convert example database ae", { + legacyDbEmuAeTpl <- file.path(path2demoData, "legacy_ae", "ae.tpl") + .test_emu_ae_db_dir <<- file.path(path2testhatFolder, 'test_emu_ae') + unlink(.test_emu_ae_db_dir, recursive = TRUE) + convert_legacyEmuDB(emuTplPath = legacyDbEmuAeTpl, + targetDir = .test_emu_ae_db_dir, + dbUUID = .test_emu_ae_db_uuid, + verbose = FALSE) +}) + +test_that("Load example database ae", { + ae = load_emuDB(file.path(.test_emu_ae_db_dir,'ae_emuDB'), + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose = FALSE) + + dbConfig = load_DBconfig(ae) + expect_that(dbConfig[['name']], + is_equivalent_to('ae')) + + test_that("sessionPattern and bundlePattern work ",{ + # sequence as seglist + sl1 = query(ae, "Phonetic == n", bundlePattern = "msajc003") + + expect_equal(unique(sl1$bundle), "msajc003") + + sl2 = query(ae, "Phonetic == n") + sl3 = query(ae, "Phonetic == n", sessionPattern = "0000") + + expect_true(all(sl2$bundle == sl3$bundle)) + + }) + + test_that("Query labels",{ + # sequence as seglist + sl1 = query(ae,"[Text == more -> Text == customers]", + resultType = 'emusegs') + expect_that(class(sl1), + is_identical_to(c('emusegs', 'data.frame'))) + expect_that(nrow(sl1), + equals(1)) + expect_that('[.data.frame'(sl1, 1, 'labels'), + is_identical_to(I('more->customers'))) + expect_that('[.data.frame'(sl1, 1, 'utts'), + is_identical_to(I('0000:msajc057'))) + }) + + + test_that("Query level label groups",{ + + sl1 = query(ae, + "Phoneme == nasal", + resultType = 'emusegs') + # TODO check some items + expect_that(nrow(sl1), equals(23)) + sl2 = query(ae, + "Phonetic == nasal", + resultType = 'emusegs') + # TODO check some items + expect_that(nrow(sl2), equals(19)) + }) + + test_that("Query database label groups",{ + + add_labelGroup(ae, + 'testGroup1', + c('p', 'r')) + sl1 = query(ae, + "Phoneme == testGroup1") + expect_that(nrow(sl1), + equals(11)) + + }) + + # + test_that("Query sequence",{ + + r1 = query(ae, + "[[[Phoneme == 'tS' ^ Phonetic == 't'] -> Phoneme == I] -> Phoneme == l]", + resultType = NULL) + expect_that(nrow(r1), + equals(1)) + expect_that(r1[1, 'db_uuid'], is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[1,'session'], is_identical_to('0000')) + expect_that(r1[1,'bundle'], is_identical_to('msajc012')) + expect_that(r1[1,'start_item_id'], equals(121)) + expect_that(r1[1,'end_item_id'], equals(123)) + + sl1 = query(ae, + "[[[Phoneme == 'tS' ^ Phonetic == 't'] -> Phoneme == I] -> Phoneme == l]", + resultType = 'emusegs') + expect_that(nrow(sl1), equals(1)) + expect_that('[.data.frame'(sl1, 1, 'labels'), + is_identical_to(I('tS->I->l'))) + expect_that('[.data.frame'(sl1, 1, 'utts'), + is_identical_to(I('0000:msajc012'))) + + sl2 = query(ae, + "[Phoneme == n -> Phoneme == t]") + expect_that(sl2$end_item_seq_idx[1], + equals(sl2$start_item_seq_idx[1] + 1)) + + }) + + test_that("Query combined sequence dominance",{ + + r1 = query(ae, + "[[Syllable == W -> Syllable == W] ^ [Phoneme == @ -> Phoneme == s]]", + resultType = NULL) + expect_that(nrow(r1), + equals(2)) + expect_that(r1[1, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[1, 'session'], is_identical_to('0000')) + expect_that(r1[1, 'bundle'], is_identical_to('msajc015')) + expect_that(r1[1, 'start_item_id'], equals(131)) + expect_that(r1[1, 'end_item_id'], equals(132)) + + expect_that(r1[2, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[2, 'session'], + is_identical_to('0000')) + expect_that(r1[2, 'bundle'], + is_identical_to('msajc015')) + expect_that(r1[2, 'start_item_id'], + equals(141)) + expect_that(r1[2, 'end_item_id'], + equals(142)) + # + }) + # + + test_that("Query dominance over more than one level",{ + + r1 = query(ae,"[ Syllable == S ^ Phonetic == p ]", + resultType = NULL) + expect_that(nrow(r1), + equals(2)) + + }) + + test_that("Distinct result set for dominance query",{ + + r1 = query(ae,"[ Syllable == S ^ Phonetic == s]") + expect_that(nrow(r1),equals(9)) + + }) + + test_that("Query using Start function",{ + r1 = query(ae, + "Phoneme = w & Start(Word, Phoneme) = 1", + resultType = NULL) + + expect_that(nrow(r1), + equals(4)) + expect_that(r1[1, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[1, 'session'], + is_identical_to('0000')) + expect_that(r1[1, 'bundle'], + is_identical_to('msajc003')) + expect_that(r1[1, 'start_item_id'], + equals(128)) + expect_that(r1[2, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[2, 'session'], + is_identical_to('0000')) + expect_that(r1[2, 'bundle'], + is_identical_to('msajc012')) + expect_that(r1[2, 'start_item_id'], + equals(124)) + expect_that(r1[3, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[3, 'session'], + is_identical_to('0000')) + expect_that(r1[3, 'bundle'], + is_identical_to('msajc015')) + expect_that(r1[3, 'start_item_id'], + equals(164)) + expect_that(r1[4, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r1[4, 'session'], + is_identical_to('0000')) + expect_that(r1[4, 'bundle'], + is_identical_to('msajc015')) + expect_that(r1[4, 'start_item_id'], + equals(177)) + + r2 = query(ae, "Phoneme == p & Start(Word, Phoneme) == 0", + resultType = NULL) + + expect_that(nrow(r2), + equals(3)) + expect_that(r2[1, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r2[1, 'session'], + is_identical_to('0000')) + expect_that(r2[1, 'bundle'], + is_identical_to('msajc015')) + expect_that(r2[1, 'start_item_id'], + equals(147)) + expect_that(r2[2, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r2[2, 'session'], + is_identical_to('0000')) + expect_that(r2[2, 'bundle'], + is_identical_to('msajc022')) + expect_that(r2[2, 'start_item_id'], + equals(122)) + expect_that(r2[3, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r2[3, 'session'], + is_identical_to('0000')) + expect_that(r2[3, 'bundle'], + is_identical_to('msajc057')) + expect_that(r2[3, 'start_item_id'], + equals(136)) + + # and some bundle pattern tests + r3 = query(ae, + "Phoneme == p & Start(Word, Phoneme) == 0", + bundlePattern = 'msajc0..', + resultType = NULL) + + expect_that(nrow(r3), + equals(3)) + expect_that(r3[1, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r3[1, 'session'], + is_identical_to('0000')) + expect_that(r3[1, 'bundle'], + is_identical_to('msajc015')) + expect_that(r3[1, 'start_item_id'], + equals(147)) + expect_that(r3[2, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r3[2, 'session'], + is_identical_to('0000')) + expect_that(r3[2, 'bundle'], + is_identical_to('msajc022')) + expect_that(r3[2, 'start_item_id'], + equals(122)) + expect_that(r3[3, 'db_uuid'], + is_identical_to(.test_emu_ae_db_uuid)) + expect_that(r3[3, 'session'], + is_identical_to('0000')) + expect_that(r3[3, 'bundle'], + is_identical_to('msajc057')) + expect_that(r3[3, 'start_item_id'], + equals(136)) + + r4 = query(ae, + "Phoneme == p & Start(Word, Phoneme) = 0", + bundlePattern = 'msajc02.', + resultType = NULL) + + expect_that(nrow(r4), equals(1)) + expect_that(r4[1, 'start_item_id'], equals(122)) + + r5 = query(ae, + "Phoneme == p & Start(Word, Phoneme) == 0", + bundlePattern = '.*7', + resultType = NULL) + + expect_that(nrow(r5), equals(1)) + expect_that(r5[1, 'start_item_id'], equals(136)) + + + }) + + test_that("Query using End function",{ + r1 = query(ae, + "Phoneme == n & End(Word, Phoneme) == 1") + + expect_that(nrow(r1), equals(2)) + expect_that(r1[1,]$start_item_id, equals(103)) + expect_that(r1[2,]$start_item_id, equals(158)) + + }) + + test_that("Query using Num function",{ + + # query words with exactly four phonemes + r = query(ae,"Num(Word, Phoneme) = 4") + expect_that(nrow(r), equals(6)) + + # Test for GitHub Issue #41 + # Num() function returns no values if level of first parameter is sublevel of second parameter. + # not this now produces an error as the legacy behaviour + # mentioned in #41 is not consistent with anything + # and makes no sense! + # Hence, decided to brake with backward compat. here... + expect_error(query(ae, + "Num(Phonetic, Phoneme) == 1"), regexp = "Second level/attribute name") + + }) + + test_that("Query using and operator",{ + sl1 = query(ae, + 'Text = them & Accent = W', + resultType = 'emusegs') + + expect_that(nrow(sl1), equals(1)) + expect_that('[.data.frame'(sl1, 1, 'labels'), + is_identical_to(I('them'))) + expect_that('[.data.frame'(sl1, 1, 'utts'), + is_identical_to(I('0000:msajc012'))) + + }) + + test_that("Projection operator #",{ + + r1 = query(ae, "[Syllable == S ^ #Phonetic == s]") + expect_that(nrow(r1), equals(10)) + + }) + + test_that("Projection operator # for emusegs result type ",{ + + r1 = query(ae, + "[Syllable == S ^ #Phonetic == s]", + resultType = 'emusegs') + + expect_that(nrow(r1), equals(10)) + + }) + + + test_that("Check Phonetic tier seglist",{ + # load legacy emu seglist + legacyEmuAePhoneticSeglist <- system.file("extdata", + "legacy_emu_ae_phonetic_seglist.RData", + package = "emuR") + load(file = legacyEmuAePhoneticSeglist) + tsl = .legacy_emu_ae_phonetic_seglist + # get original query string + tslQuery = attr(tsl, 'query') + # reprduce the original query + sl = query(ae, tslQuery, resultType = 'emusegs') + sr = .aeSampleRate + doubleSampleTime = 2 / sr + # we have to accept numeric deviations caused by double precision calculations + # therefore add the machine epsilon to the tolerance of two sample + tolSec = doubleSampleTime + .Machine[['double.eps']] + tolMs = tolSec * 1000 + # compare legacy emu generated and new seglist + eq = equal.emusegs(sl, + tsl, + tolerance = tolMs, + uttsPrefix2 = '0000:') + expect_true(eq) + + }) + + + # + test_that("bad calls cause errors",{ + expect_error(query(ae, "[#Text == more -> #Text == customers]"), + regexp = "Multiple hash tags") + }) + + test_that("additional queries (simple and complex) work for more thorough query testing",{ + skip_on_cran() + # SQ + qs = "Phonetic == m" + sl = query(ae, qs, resultType = "emuRsegs") + expect_equal(nrow(sl), 7) + expect_equal(attributes(sl)$query, qs) + sl = query(ae, "[Phonetic == m]") + expect_equal(nrow(sl), 7) + expect_equal(attr(sl, "query"), NULL) + sl = query(ae, "[Phonetic == m | n]") + expect_equal(nrow(sl), 19) + sl = query(ae, "[Phonetic != m | n]") + expect_equal(nrow(sl), 234) + sl = query(ae, "[Syllable =~ .*]") + expect_equal(nrow(sl), 83) + sl = suppressWarnings(query(ae, "[Text =~ am.*]")) + expect_equal(nrow(sl), 1) + sl = suppressWarnings(query(ae, "[Text !~ am.*]")) + expect_equal(nrow(sl), 53) + + # SEQQ + sl = query(ae, "[#Text == to -> Text =~ .*]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[Text = to -> #Text =~ .*]") + expect_equal(nrow(sl), 3) + qs = "[Phonetic == m -> Phonetic == I]" + sl = query(ae, qs, resultType = "emuRsegs") + expect_equal(nrow(sl), 0) + expect_equal(attributes(sl)$query, qs) + sl = query(ae, "[#Phonetic == m -> Phonetic == I]") + expect_equal(nrow(sl), 0) + sl = query(ae, "[Phonetic == m -> #Phonetic == I]") + expect_equal(nrow(sl), 0) + sl = query(ae, "[Phonetic == m -> #Phonetic == o]") + expect_equal(nrow(sl), 0) + sl = query(ae, "[[Phonetic == m -> Phonetic == I ] -> Phonetic == n]") + expect_equal(nrow(sl), 0) + sl = query(ae, "[Text == more -> [Text == customers -> Text == than]]") + expect_equal(sl$labels, "more->customers->than") + sl = query(ae, "[Text =~ .* -> [Text == customers -> Text =~ .*]]") + expect_equal(sl$labels, "more->customers->than") + sl = query(ae, "[#Text == more -> [Text == customers -> Text == than]]") + expect_equal(sl$labels, "more") + sl = query(ae, "[Text == more -> [#Text == customers -> Text == than]]") + expect_equal(sl$labels, "customers") + sl = query(ae, "[Text == more -> [Text == customers -> #Text == than]]") + expect_equal(sl$labels, "than") + expect_error(query(ae, "[Syllable == S & Pitch_Accent == L+H*]"), + regexp = "Unknown level attribute name") + + #CONJQ + sl = query(ae, "[Text =~ .* & Word == F]") + expect_equal(nrow(sl), 20) + sl = query(ae, "[Text =~ .* & #Word == F]") + expect_equal(nrow(sl), 20) + sl = query(ae, "[Text =~ .* & Word == C & Accent == S]") + expect_equal(nrow(sl), 25) + + # DOMQ + sl = query(ae, "[Phoneme == p ^ Syllable == S]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[Syllable =~ .* ^ Phoneme == p]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[Phoneme == p ^ #Syllable =~ .*]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[#Phoneme == p ^ Syllable =~ .*]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[Syllable =~ .* ^ Phoneme != p | t | k]") + expect_equal(nrow(sl), 83) + sl = query(ae, "[#Syllable =~ .* ^ Phoneme != p | t | k]") + expect_equal(nrow(sl), 83) + sl = query(ae, "[Syllable =~ .* ^ #Phoneme != p | t | k]") + expect_equal(nrow(sl), 195) + + # multiple DOMQs + sl = query(ae, "[[Phoneme == p ^ Syllable =~ .*] ^ Word =~.*]") + expect_equal(sl$labels, c("p", "p", "p")) + sl = query(ae, "[[#Phoneme == p ^ Syllable =~ .*] ^ Word =~.*]") + expect_equal(sl$labels, c("p", "p", "p")) + sl = query(ae, "[[Phoneme == p ^ #Syllable =~ .*] ^ Word =~.*]") + expect_equal(sl$labels, c("S", "S", "S")) + sl = query(ae, "[[Phoneme == p ^ Syllable =~ .*] ^ #Word =~.*]") + expect_equal(sl$labels, c("C", "C", "C")) + sl = suppressWarnings(query(ae, "[[Phoneme == p ^ Syllable =~.*] ^ Text =~ emphasized | tempting]")) + expect_equal(sl$labels, c("p", "p")) + sl = suppressWarnings(query(ae, "[[Phoneme == p ^ Syllable =~.*] ^ #Text =~ emphasized | tempting]")) + expect_equal(sl$labels, c("emphasized", "tempting")) + + # Position + # Simple usage of Start(), End() and Medial() + nWord = nrow(query(ae, "Word =~.*")) + sl = query(ae, "[Start(Word, Syllable) == 1]") + expect_equal(nrow(sl), nWord) + sl = query(ae, "[Start(Word, Phoneme) == 1]") + expect_equal(nrow(sl), nWord) + sl = query(ae, "[Start(Word, Syllable) == 0]") + expect_equal(nrow(sl), 28) + sl = query(ae, "[End(Word, Syllable) == 1]") + expect_equal(nrow(sl), nWord) + sl = query(ae, "[Medial(Word, Syllable) == 1]") + expect_equal(nrow(sl), 9) + sl = query(ae, "[Medial(Word, Syllable) == 0]") + expect_equal(nrow(sl), 73) + # Position and Boolean & + sl = query(ae, "[Phoneme == m & Start(Word, Phoneme) == 1]") # word initial m's + expect_equal(nrow(sl), 2) + sl = query(ae, "[Phoneme == m & End(Word, Phoneme) == 1]") # word final m's + expect_equal(nrow(sl), 1) + sl = query(ae, "[Syllable == S & End(Word, Syllable) == 0]") # non word final strong syllables + expect_equal(nrow(sl), 16) + # Position and Boolean ^ + sl = query(ae, "[Phoneme =~ .* ^ End(Word, Syllable) == 0]") # non word final Phonemes + expect_equal(nrow(sl), 61) + + # Count + sl = query(ae, "[Num(Word, Syllable) == 4]") + expect_equal(nrow(sl), 1) + sl = query(ae, "[Num(Syllable, Phoneme) > 6]") + expect_equal(nrow(sl), 1) + + # Count and Boolean & + sl = query(ae, "[Text =~ .* & Num(Word, Phoneme) > 4 ]") + expect_equal(nrow(sl), 18) + sl = query(ae, "[Syllable == S & Num(Syllable, Phoneme) == 5]") + expect_equal(nrow(sl), 4) + + # Count and ^ + sl = query(ae, "[Phoneme == m ^ Num(Word, Syllable) == 3]") + expect_equal(nrow(sl), 2) + sl = query(ae, "[Syllable = W ^ Num(Word, Syllable) < 3]") + expect_equal(nrow(sl), 28) + sl = query(ae, "[Text =~ .* ^ Num(Syllable, Phoneme) == 4]") + expect_equal(nrow(sl), 7) + + # Combinations + # ^ and -> (Domination and Sequence) + sl = query(ae, "[[Phoneme == m -> Phoneme =~ .*] ^ Syllable == S]") + expect_equal(nrow(sl), 4) + sl = query(ae, "[Phoneme == s -> [Phoneme =~ .* ^ Syllable == W]]") + expect_equal(nrow(sl), 5) + sl = query(ae, "[[Syllable == S ^ Phoneme == p] -> Syllable == W]") + expect_equal(nrow(sl), 3) + expect_true(all(grepl("->", sl$labels))) + sl = query(ae, "[[Syllable == S ^ #Phoneme == p] -> Syllable == W]") # SIC is this supposed to work? + expect_equal(nrow(sl), 3) + + # ^ and -> and & (Domination and Sequence and Boolean &) + sl = query(ae, "[Text =~ .* ^ Phoneme == @ & Start(Text, Phoneme) == 1]") + expect_equal(nrow(sl), 3) + sl = query(ae, "[[Phoneme == m & Start(Word, Phoneme) == 1 -> Phoneme == o:] ^ Syllable == S]") + expect_equal(nrow(sl), 1) + sl = query(ae, "[[Phoneme == m & Start(Word, Phoneme) == 1 -> Phoneme == o:] ^ Syllable == S]") + expect_equal(nrow(sl), 1) + sl = query(ae, "[[[Phoneme == m & Start(Word, Phoneme) == 1 -> Phoneme == o:] ^ Syllable == S] ^ #Text != x]") + expect_equal(sl$labels, "more") + sl = query(ae, "[[Text =~ .* & Num(Text, Syllable) == 3 ^ [Phoneme == @ ^ Start(Word, Syllable) == 1]] -> Text == his]") + + # A few more Q & A’s (because practice makes perfect) + sl = query(ae, "[Phoneme == m | n & Medial(Word, Phoneme) == 1]") + expect_equal(nrow(sl), 12) + sl = query(ae, "[[Phonetic == H -> Phonetic =~ .*] -> Phonetic == I | U ]") + expect_equal(sl$labels, "H->h->I") + sl = query(ae, "[Syllable =~ .* & Medial(Word, Syllable) == 0]") + expect_equal(nrow(sl), 73) + sl = query(ae, "[Text =~ .* & Num(Text, Syllable) == 2]") + expect_equal(nrow(sl), 11) + sl = query(ae, "[Text == the -> #Text =~ .* & Accent == S]") + expect_equal(sl$labels, "chill") + sl = query(ae, "[Syllable = S ^ Num(Word, Phoneme) == 5]") + expect_equal(nrow(sl), 4) + sl = query(ae, "[Syllable == W ^ Phoneme == @]") + expect_equal(nrow(sl), 29) + qs = "[Text =~ .* ^ #Tone == L* | L+H*]" + sl = query(ae, qs) + expect_equal(nrow(sl), 2) + expect_equal(attr(sl, "query"), NULL) # used to be qs but not with tibble + sl = query(ae, + "[Tone =~.* ^ [End(Word, Syllable) == 1 ^ Num(Word, Syllable) == 2]]") + expect_equal(nrow(sl), 1) + sl = query(ae, + "[[[Phoneme =~ .* ^ Phonetic == H] ^ Start(Word, Syllable) == 1] ^ Accent == S]") + expect_equal(nrow(sl), 10) + sl = query (ae , + "[[[Phonetic = n -> Phonetic = z] -> Phonetic = S ] ^ [Text = friends -> Text = she]]") + expect_equal(sl$labels, "n->z->S") + sl = query(ae, "[Utterance =~ .* ^ Phonetic == @]", + verbose = FALSE) + expect_equal(nrow(sl), 7) + expect_equal(sl$labels[1], "") + + sl = query(ae, "[Text == she ^ [Phonetic == S -> Phonetic == i:]]") + expect_equal(sl$labels, "she") + + sl = query(ae, "[[Phonetic == S -> Phonetic == i:] ^ Text == she]") + expect_equal(sl$labels, "S->i:") + + # a few more for -> + ^ queries + # overlap start -> empty as not dominated + sl = query(ae, "[[Phonetic == z -> Phonetic == S] ^ Text == she]") + expect_equal(nrow(sl), 0) + + sl = query(ae, "[Text == she ^ [Phonetic == z -> Phonetic == S]]") + expect_equal(nrow(sl), 0) + + # overlap end -> empty as not dominated + sl = query(ae, "[[Phonetic == i: -> Phonetic == w] ^ Text == she]") + expect_equal(nrow(sl), 0) + + sl = query(ae, "[Text == she ^ [Phonetic == i: -> Phonetic == w]]") + expect_equal(nrow(sl), 0) + + sl = query(ae, "[Tone =~ .* ^ [[Text == amongst -> Text == her] -> Text == friends]]") + expect_equal(nrow(sl), 2) + }) + + # + test_that("timeRefSegmentLevel works correctly",{ + # skip_on_cran() + sl = query(ae, + "[Syllable == W]") + sl = query(ae, + "[Syllable == W]", + timeRefSegmentLevel = "Phonetic") + duplicate_level(ae, + "Phonetic", + "Phonetic2", + verbose = FALSE) + expect_error(query(ae, "[Syllable == W]"), + regexp = "The level is linked") + expect_error(query(ae, + "[Syllable == W]", + timeRefSegmentLevel = "badLevelName"), + regexp = "Cannot resolve time information for result level") + query(ae, + "[Syllable == W]", + timeRefSegmentLevel = "Phonetic2") + + # clean up + remove_linkDefinition(ae, + superlevelName = "Phoneme", + sublevelName = "Phonetic2", + force = TRUE, + verbose = FALSE) + + + remove_levelDefinition(ae, + name = "Phonetic2", + force = TRUE, + verbose = FALSE) + }) + + # + test_that("calcTimes works correctly",{ + skip_on_cran() + sl = query(ae, + "[Syllable == W]", + calcTimes = FALSE) + + expect_true(all(is.na(sl$start))) + expect_true(all(is.na(sl$end))) + expect_true(all(is.na(sl$sample_start))) + expect_true(all(is.na(sl$sample_end))) + }) + + test_that("correct times are calculated for Intonational",{ + skip_on_cran() + sl = query(ae, "Intonational == L%", timeRefSegmentLevel = "Phonetic", verbose = FALSE) + all(round(sl$start, 3) == round(c(256.925, 571.925, 379.525, 425.375, 299.975, 513.925, 475.775), 3)) + all(round(sl$end, 3) == round(c(2604.425, 2753.975, 2692.325, 3456.825, 2469.525, 2554.175, 2794.925), 3)) + }) + + test_that("correct times are calculated for sequence dom. queries",{ + skip_on_cran() + sl = query(ae, + "[[Phonetic == N -> Phonetic == s] -> Phonetic == t]", + verbose = FALSE) + expect_equal(sl$sample_start, 8534) + expect_equal(sl$sample_end, 11933) + # move up one level + sl = query(ae, + "[[Phoneme == N -> Phoneme == s] -> Phoneme == t]", + verbose = FALSE) + expect_equal(sl$sample_start, 8534) + expect_equal(sl$sample_end, 13483) + # even further up the hierarchy + sl = query(ae, + "[Text == more -> Text == customers]", + verbose = FALSE) + expect_equal(sl$sample_start, 31574) + expect_equal(sl$sample_end, 47355) + + }) + + # clean up (also disconnect) + DBI::dbDisconnect(ae$connection) + ae = NULL + unlink(.test_emu_ae_db_dir, recursive = TRUE) +}) + + diff --git a/tests/testthat/test_emuR-requery.database.R b/tests/testthat/test_emuR-requery.database.R new file mode 100644 index 00000000..ed4da313 --- /dev/null +++ b/tests/testthat/test_emuR-requery.database.R @@ -0,0 +1,350 @@ +context("testing requeries") + +aeSampleRate = 20000 + +test_emu_ae_db = NULL + +test_emu_ae_db_uuid = "0fc618dc-8980-414d-8c7a-144a649ce199" +test_emu_ae_db_dir = NULL + +path2demoData = file.path(tempdir(),"emuR_demoData") +path2testhatFolder = file.path(tempdir(),"emuR_testthat") + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +legacyDbEmuAeTpl <- file.path(path2demoData, "legacy_ae", "ae.tpl") +test_emu_ae_db_dir <- file.path(path2testhatFolder, 'test_emu_ae') +unlink(test_emu_ae_db_dir, recursive = TRUE) + +# copy 4 faster tests +dir.create(test_emu_ae_db_dir) +file.copy(file.path(path2demoData, paste0('ae', emuDB.suffix)), test_emu_ae_db_dir, recursive = TRUE) + +ae = load_emuDB(file.path(test_emu_ae_db_dir, + paste0('ae', emuDB.suffix)), + inMemoryCache = internalVars$testingVars$inMemoryCache, + verbose=FALSE) + +test_that("Requery sequential",{ + + # Phoneme sequences n->t + sl1 = query(ae, "[Phoneme == n -> Phoneme == t]") + # requery two elemnts before and one after sequence + rsl1 = requery_seq(ae, sl1, offset = -2, length = 5) + rsl2 = requery_seq(ae, sl1, offset = -3, length = 5, offsetRef = 'END') + + # equivalent requery results should be equal + expect_equal(rsl1, rsl2) + + expect_that(class(rsl1), is_identical_to(c('tbl_df', 'tbl', 'data.frame'))) + expect_that(nrow(sl1), equals(2)) + expect_that(nrow(rsl1), equals(2)) + expect_that('[.data.frame'(rsl1, 1, 'labels'), is_equivalent_to('l->@->n->t->l')) + expect_that('[.data.frame'(rsl1, 1, 'start_item_id'), equals(144)) + expect_that('[.data.frame'(rsl1, 1, 'end_item_id'), equals(148)) + + expect_that('[.data.frame'(rsl1, 2, 'labels'), is_equivalent_to('s->@->n->t->ei')) + expect_that('[.data.frame'(rsl1,2,'start_item_id'), equals(101)) + expect_that('[.data.frame'(rsl1,2,'end_item_id'), equals(105)) + + # Bug ID 42 + sl1 = query(ae, "[[Phonetic == k -> Phonetic =~ .*] -> Phonetic =~ .*]") + sl1w = suppressWarnings(requery_hier(ae, sl1, level = 'Word', verbose = FALSE)) # this will insert an NA row because sl1 has 8 rows and sl1w has 7 msajc023 k->H->s not dominated by single C + # sl1w has sequence length 1 + sl1w2 = requery_seq(ae, sl1w[1,]) + # Bug startItemID != endItemID, and label is not a sequence !! + expect_that('[.data.frame'(sl1w2, 1, 'start_item_id'), equals(61)) + expect_that('[.data.frame'(sl1w2, 1, 'end_item_id'), equals(61)) + + sl1 = query(ae, "Text == her") + rsl1 = requery_seq(ae, sl1, offset = 1) + expect_equal(rsl1$labels, "friends") + expect_equal(rsl1$attribute, "Text") + + sl1 = query(ae, "Text == her") + rsl1 = requery_seq(ae, sl1, offset = 1, offsetRef = "END") + expect_equal(rsl1$labels, "friends") + expect_equal(rsl1$attribute, "Text") + + +}) + +test_that("Requery sequential produces correct NA rows",{ + + # first -> move one left + sl = query(ae, "Phonetic == V") + expect_warning(requery_seq(ae, sl, offset = -1, ignoreOutOfBounds = TRUE)) + sl_rq = suppressWarnings(requery_seq(ae, sl, offset = -1, ignoreOutOfBounds = TRUE)) + expect_true(is.na(sl_rq[1,1])) + + # last -> move one right + sl = query(ae, "Phonetic == l", resultType = "tibble") + expect_warning(requery_seq(ae, sl, offset = 1, ignoreOutOfBounds = TRUE)) + sl_rq = suppressWarnings(requery_seq(ae, sl, offset = 1, ignoreOutOfBounds = TRUE)) + expect_true(is.na(sl_rq[1,1])) + + # last -> move one right + end as ref + expect_warning(requery_seq(ae, sl, offset = 1, ignoreOutOfBounds = TRUE, offsetRef = "END")) + sl_rq = suppressWarnings(requery_seq(ae, sl, offset = 1, ignoreOutOfBounds = TRUE, offsetRef = "END")) + expect_true(is.na(sl_rq[1,1])) + + # last -> move one left + length way too long + sl_rq = suppressWarnings(requery_seq(ae, sl, offset = -1, length = 15, ignoreOutOfBounds = TRUE)) + expect_true(is.na(sl_rq[1,1])) +}) + +test_that("Requery hierarchical",{ + + # Text beginning with 'a' + sl1 = suppressWarnings(query(ae, "Text =~ '^a[mn].*'")) + # requery to level Phoneme + rsl1 = suppressWarnings(requery_hier(ae, sl1, level = 'Phoneme')) + expect_that(class(rsl1), is_identical_to(c('tbl_df', 'tbl', 'data.frame'))) + expect_that(nrow(sl1),equals(3)) + expect_that(nrow(rsl1),equals(3)) + expect_that('[.data.frame'(rsl1, 1, 'labels'), is_equivalent_to('V->m->V->N->s->t')) + expect_that('[.data.frame'(rsl1, 1, 'start_item_id'), equals(114)) + expect_that('[.data.frame'(rsl1, 1, 'end_item_id'), equals(119)) + + expect_that('[.data.frame'(rsl1, 2, 'labels'), is_equivalent_to('E->n->i:')) + expect_that('[.data.frame'(rsl1, 2, 'start_item_id'), equals(135)) + expect_that('[.data.frame'(rsl1, 2, 'end_item_id'), equals(137)) + + expect_that('[.data.frame'(rsl1, 3, 'labels'), is_equivalent_to('@->n')) + expect_that('[.data.frame'(rsl1, 3, 'start_item_id'), equals(102)) + expect_that('[.data.frame'(rsl1, 3, 'end_item_id'), equals(103)) + +}) + +test_that("Requery hierarchical preserves lengths when walking up",{ + sl = query(ae, "[Phonetic== V]")[1:2,] + rsl = requery_hier(ae, sl, level = "Text") + expect_equal(nrow(sl), nrow(rsl)) +}) + +test_that("Requery hierarchical with collapse works",{ + + # Text beginning with 'a' + sl1 = suppressWarnings(query(ae, "Text =~ '^a[mn].*'")) + # requery to level Phoneme + rsl1 = suppressWarnings(requery_hier(ae, sl1, level = 'Phonetic', collapse = FALSE, verbose = FALSE)) + expect_equal(nrow(rsl1), 12) # should have 12 elements + allLabels = paste0(rsl1$labels, collapse = "->") + expect_equal(allLabels, "V->m->V->N->s->t->H->E->n->i:->@->n") +}) + +test_that("hierarchical requery on same attrDef without times calculates missing times",{ + + slTimes = query(ae, "Word=~.*", calcTimes = TRUE) + slNoTime = query(ae, "Word=~.*", calcTimes = FALSE) + + # requery to same attrDef + slRq = requery_hier(ae, slNoTime, level='Word') + + # overwrite attr + attr(slTimes, "query") = "" + attr(slRq, "query") = "" + + cres = compare::compare(slTimes, slRq, allowAll = TRUE) + expect_true(cres$result) +}) + +test_that("hierarchical requery on parallel attrDef works",{ + + # Text beginning with 'a' + sl1 = suppressWarnings(query(ae, "Text =~ '^a[mn].*'")) + + # requery to same attrDef + slRq = requery_hier(ae, sl1, level = 'Word') + + expect_equal(paste0(sl1$labels, collapse = "; "), "amongst; any; and") + + expect_equal(sl1$start, slRq$start) + expect_equal(sl1$end, slRq$end) + expect_equal(sl1$sample_start, slRq$sample_start) + expect_equal(sl1$sample_end, slRq$sample_end) + expect_equal(sl1$start_item_id, slRq$start_item_id) + expect_equal(sl1$end_item_id, slRq$end_item_id) + expect_equal(sl1$start_item_seq_idx, slRq$end_item_seq_idx) + expect_equal(sl1$end_item_seq_idx, slRq$end_item_seq_idx) + +}) + +test_that("hierarchical requery on non main attributes work",{ + + # Text beginning with 'a' + sl1 = query(ae, "Phonetic == n") + + # requery to Word:Text + slRq = requery_hier(ae, sl1, level = 'Text') + + expect_equal(paste0(slRq$labels, collapse = "; "), + "friends; considered; any; resistance; wind; violently; concealing; weaknesses; and; no; new; than") + +}) + + +test_that("hierarchical throws warning if badly ordered/multiple levels",{ + # warning from various levels + sl1 = query(ae, "Phonetic == n") + sl2 = query(ae, "Syllable == S") + + sl = rbind(sl1, sl2) + + expect_warning(check_emuRsegsForRequery(sl)) + + + sl1 = query(ae, "Phonetic == n", resultType = "emuRsegs") + sl2 = query(ae, "Phonetic == @", resultType = "emuRsegs") + + sl = rbind(sl1, sl2) + + expect_warning(check_emuRsegsForRequery(sl)) + + sl = sort(sl) + check_emuRsegsForRequery(sl) + + # check with new default tibble result type as well + sl1 = query(ae, "Phonetic == n") + sl2 = query(ae, "Phonetic == @") + + sl = rbind(sl1, sl2) + + expect_warning(check_emuRsegsForRequery(sl)) + + sl = dplyr::arrange(sl, db_uuid, session, bundle, start_item_seq_idx) + check_emuRsegsForRequery(sl) + + +}) + +test_that("requery_hier inserts NAs",{ + + # delete link to check if NA is inserted + DBI::dbExecute(ae$connection, "DELETE FROM links WHERE bundle = 'msajc003' AND from_id = 115 AND to_id = 148") + DBI::dbExecute(ae$connection, "DELETE FROM links WHERE bundle = 'msajc012' AND from_id = 134 AND to_id = 169") + DBI::dbExecute(ae$connection, "DELETE FROM links WHERE bundle = 'msajc023' AND from_id = 96 AND to_id = 120") + rewrite_annots(ae, verbose = FALSE) + + ######################## + # parent requery + sl = query(ae, + "Phonetic == m", + resultType = "tibble") + + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Phoneme", + resultType = "tibble")) + + expect_equal(nrow(sl), nrow(sl_req)) + expect_true(all(is.na(sl_req[1,]))) + expect_true(all(is.na(sl_req[2,]))) + expect_true(all(is.na(sl_req[5,]))) + # calcTimes = FALSE + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Phoneme", + calcTimes = FALSE, + resultType = "tibble")) + + expect_equal(nrow(sl), nrow(sl_req)) + expect_true(all(is.na(sl_req[1,]))) + expect_true(all(is.na(sl_req[2,]))) + expect_true(all(is.na(sl_req[5,]))) + + + sl = query(ae, "Phonetic == db", resultType = "tibble") + sl_req = requery_hier(ae, + sl, + level = "Phoneme", + resultType = "tibble") + + expect_equal(sl_req$labels[1], "d->b") # check that collapsing of multiple parents works + + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Phoneme", + collapse = FALSE, + resultType = "tibble")) + + expect_equal(sl_req$labels[1], "d") + expect_equal(sl_req$labels[2], "b") + + + ######################## + # child requery + sl = query(ae, + "Phoneme == m", + resultType = "tibble", + calcTimes = FALSE) + + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Phonetic", + resultType = "tibble")) + + expect_equal(nrow(sl), nrow(sl_req)) + expect_true(all(is.na(sl_req[1,]))) + expect_true(all(is.na(sl_req[2,]))) + + # calcTimes = FALSE + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Phonetic", + calcTimes = FALSE, + resultType = "tibble")) + + expect_equal(sl_req$labels[6], 'Om->m') # callapsing works + + + # over multiple levels (parent requery) + sl = query(ae, + "Phonetic == m", + resultType = "tibble") + + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Text", + resultType = "tibble")) + + expect_equal(nrow(sl), nrow(sl_req)) + expect_true(all(is.na(sl_req[1,]))) + expect_true(all(is.na(sl_req[2,]))) + expect_true(all(is.na(sl_req[5,]))) + + + sl = query(ae, + "[[Phonetic == D -> Phonetic == @] -> Phonetic == m]", + resultType = "tibble", + calcTimes = FALSE) + + # if only NAs in resulting seglist an empty object is returned + sl_req = suppressWarnings(requery_hier(ae, + sl, + level = "Word", + resultType = "tibble")) + + expect_equal(nrow(sl_req), 0) + + # over multiple levels (child requery) + sl = query(ae, + "Text == them", + resultType = "tibble", + calcTimes = FALSE) + + sl_req = requery_hier(ae, + sl, + level = "Phonetic", + resultType = "tibble") + + # only dominates D->@ not D->@->m as link to m is missing + expect_equal(sl_req$labels[1], 'D->@') + +}) + +# clean up (also disconnects) +DBI::dbDisconnect(ae$connection) +ae = NULL +unlink(test_emu_ae_db_dir, recursive = TRUE) diff --git a/tests/testthat/test_emuR-validate.R b/tests/testthat/test_emuR-validate.R new file mode 100644 index 00000000..a0f9680b --- /dev/null +++ b/tests/testthat/test_emuR-validate.R @@ -0,0 +1,31 @@ +##' testthat tests for validation of bundles +##' +context("testing validate.XXX.bundle functions") + +dbName = "ae" + +path2orig = file.path(tempdir(), "emuR_demoData", paste0(dbName, emuDB.suffix)) +path2testData = file.path(tempdir(), "emuR_testthat") +path2db = file.path(path2testData, paste0(dbName, emuDB.suffix)) + +# extract internalVars from environment .emuR_pkgEnv +internalVars = get("internalVars", envir = .emuR_pkgEnv) + +################################# +test_that("unaltered bundle (sqlTableRep) validates successfully", { + # delete, copy and load + unlink(path2db, recursive = TRUE) + file.copy(path2orig, path2testData, recursive = TRUE) + ae = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = FALSE) + + res = validate_bundleDBI(ae, session = "0000", bundle = "msajc003") + expect_equal(res$type, 'SUCCESS') + expect_equal(res$message, '') + + # clean up + DBI::dbDisconnect(ae$connection) + ae = NULL + +}) + + diff --git a/tests/testthat/test_zzz_cleanUp.R b/tests/testthat/test_zzz_cleanUp.R new file mode 100644 index 00000000..dedb5341 --- /dev/null +++ b/tests/testthat/test_zzz_cleanUp.R @@ -0,0 +1,4 @@ +context("clean up test data") + +unlink(file.path(tempdir(),"emuR_demoData"), recursive = TRUE) +unlink(file.path(tempdir(),"emuR_testthat"), recursive = TRUE)