Skip to content

Commit d6610a9

Browse files
authored
Merge pull request #499 from idaholab/fix/timeseries-performance
Fix/timeseries performance
2 parents 36b0fe5 + 0935e52 commit d6610a9

File tree

7 files changed

+179
-67
lines changed

7 files changed

+179
-67
lines changed

server/src/data_access_layer/mappers/data_warehouse/data/file_mapper.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,11 @@ export default class FileMapper extends Mapper {
355355
};
356356
}
357357

358-
// fetch the fully qualified file path complete with file name and short uuid
358+
// fetch the fully qualified file path complete with file name and short uuid (if present)
359359
private filePathMetadataStatement(fileIDs: string[]): QueryConfig {
360360
const text = `SELECT id,
361-
short_uuid || file_name AS file_name,
361+
CASE WHEN short_uuid IS NULL THEN file_name
362+
ELSE short_uuid || file_name END AS file_name,
362363
TRIM('/\\' FROM adapter_file_path) AS access_path
363364
FROM files
364365
WHERE id IN (%L)`;

server/src/data_access_layer/mappers/data_warehouse/data/report_query_mapper.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import Result from '../../../../common_classes/result';
22
import Mapper from '../../mapper';
33
import { PoolClient, QueryConfig } from 'pg';
4-
import ReportQuery from '../../../../domain_objects/data_warehouse/data/report_query';
4+
import ReportQuery, { CompletedQueryMatch } from '../../../../domain_objects/data_warehouse/data/report_query';
55

66
const format = require('pg-format');
77

@@ -88,6 +88,12 @@ export default class ReportQueryMapper extends Mapper {
8888
return super.runStatement(this.deleteStatement(id), { transaction });
8989
}
9090

91+
CheckQueryExists(query: string): Promise<Result<CompletedQueryMatch>> {
92+
return super.retrieve<CompletedQueryMatch>(this.checkQueryExistsStatement(query), {
93+
resultClass: CompletedQueryMatch
94+
});
95+
}
96+
9197
// Below are a set of query building functions. So far they're very simple
9298
// and the return value is something that the postgres driver can understand.
9399
// The hope is that this method will allow us to be more flexible and create
@@ -187,4 +193,18 @@ export default class ReportQueryMapper extends Mapper {
187193
values: [queryID, fileID],
188194
};
189195
}
196+
197+
private checkQueryExistsStatement(query: string): QueryConfig {
198+
// compare a query with those in the database, sanitizing queries
199+
// of extraneous whitespace and casing before comparison
200+
// return {
201+
const text = `SELECT rq.id, r.status_message
202+
FROM report_queries rq JOIN reports r ON rq.report_id = r.id
203+
WHERE r.status = 'completed'
204+
AND LOWER(REGEXP_REPLACE(TRIM(rq.query), '\\s+', ' ', 'g')) = LOWER(REGEXP_REPLACE(TRIM($1), '\\s+', ' ', 'g'))
205+
ORDER BY rq.created_at DESC LIMIT 1`;
206+
const values = [query];
207+
// }
208+
return {text, values}
209+
}
190210
}

server/src/data_access_layer/repositories/data_warehouse/data/report_query_repository.ts

Lines changed: 92 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import RepositoryInterface, {QueryOptions, Repository} from '../../repository';
2-
import ReportQuery, { TimeseriesInitialRequest } from '../../../../domain_objects/data_warehouse/data/report_query';
2+
import ReportQuery, { ReportQueryMetadata, TimeseriesInitialRequest } from '../../../../domain_objects/data_warehouse/data/report_query';
33
import Result from '../../../../common_classes/result';
44
import ReportQueryMapper from '../../../mappers/data_warehouse/data/report_query_mapper';
55
import {PoolClient} from 'pg';
@@ -72,17 +72,13 @@ export default class ReportQueryRepository extends Repository implements Reposit
7272
return Promise.resolve(Result.Success(true));
7373
}
7474

75+
// perform all necessary checks before kicking off the query including verifying
76+
// all files are timeseries and checking for previously executed/select * queries
7577
async initiateQuery(containerID: string, dataSourceID: string, request: TimeseriesInitialRequest, user: User, describe: boolean): Promise<Result<string>> {
7678
// check that all files exist and are timeseries, return an error if not
7779
const isTimeseries = await this.#fileRepo.checkTimeseries(request.file_ids!);
7880
if (isTimeseries.isError) {return Promise.resolve(Result.Pass(isTimeseries))}
7981

80-
// create a new report object
81-
const report = new Report({container_id: containerID});
82-
const reportSaved = await this.#reportMapper.Create(user.id!, report);
83-
if (reportSaved.isError) {return Promise.resolve(Result.Pass(reportSaved))}
84-
const reportID = reportSaved.value.id!
85-
8682
// formulate query if describe, check for presence of table name if regular query
8783
if (describe) {
8884
const describeQueries: string[] = [];
@@ -98,18 +94,70 @@ export default class ReportQueryRepository extends Repository implements Reposit
9894
}
9995
}
10096

101-
// create a report query based on the timeseries rust module query request
97+
// create a new report object to return the ID if a SELECT * or repeated query is found
98+
const reportSaved = await this.#reportMapper.Create(user.id!, new Report({container_id: containerID}));
99+
if (reportSaved.isError) {return Promise.resolve(Result.Pass(reportSaved))}
100+
const reportID = reportSaved.value.id!
101+
102+
// check if the query text was already successfully used in a previous query
103+
// if so return the result file from that original query
104+
const previousQueryResults = await this.#mapper.CheckQueryExists(request.query!);
105+
// if an error is found, simply log and move on
106+
if (previousQueryResults.isError) {
107+
Logger.error(previousQueryResults.error.error);
108+
}
109+
110+
if (previousQueryResults.value) {
111+
// grab and use the previous status message for this report
112+
void this.#reportRepo.setStatus(reportID, 'completed', previousQueryResults.value.status_message);
113+
114+
return Promise.resolve(Result.Success(reportID));
115+
}
116+
117+
// create a query object if a previous query was not found
102118
const reportQuery = new ReportQuery({query: request.query!, report_id: reportID});
103119
const querySaved = await this.#mapper.Create(user.id!, reportQuery);
104120
if (querySaved.isError) { return Promise.resolve(Result.Pass(querySaved))}
105121
const queryID = querySaved.value.id!
106122

107-
// fetch file metadata
108-
const fileInfo = await this.#fileRepo.listPathMetadata(...request.file_ids!);
109-
if (fileInfo.isError) {return Promise.resolve(Result.Failure('unable to find file information'))}
110-
const files = fileInfo.value;
123+
// check if the query is a SELECT * query; if so return original file instead of querying
124+
// verify there's only one file being queried
125+
if (request.file_ids!.length === 1) {
126+
const fileID = request.file_ids![0];
127+
// trim and case densensitize query to eliminate any syntax variance
128+
const normalizedQuery = request.query?.trim().replace(/\s+/g, ' ').replace(';', '').toLowerCase();
129+
if (normalizedQuery === `select * from table_${fileID}`) {
130+
// set the original file as the report file and return report ID
131+
const resultSet = await this.setResultFile(reportID, queryID, fileID);
132+
if (resultSet.isError) {
133+
const errorMessage = `error attaching record to report ${reportID}: ${resultSet.error.error}`;
134+
void this.#reportRepo.setStatus(reportID, 'error', errorMessage);
135+
Logger.error(errorMessage);
136+
}
137+
138+
// if everything was successful, set the report status to completed
139+
const successMessage = `results now available. Download them at "/containers/${containerID}/files/${fileID}/download"`;
140+
void this.#reportRepo.setStatus(reportID, 'completed', successMessage);
141+
return Promise.resolve(Result.Success(reportID));
142+
}
143+
}
144+
145+
const queryMetadata: ReportQueryMetadata = {
146+
container_id: containerID,
147+
data_source_id: dataSourceID,
148+
request: request,
149+
user: user,
150+
report_id: reportID,
151+
query: reportQuery,
152+
query_id: queryID
153+
}
111154

112-
// create a connection string based on the type of storage being used
155+
// kickoff the query itself if there are no early return scenarios
156+
return this.kickoffQuery(queryMetadata, describe);
157+
}
158+
159+
// create a connection string based on the type of storage being used
160+
async createConnectionString(containerID: string, dataSourceID: string): Promise<Result<string>> {
113161
const uploadPath = `containers/${containerID}/datasources/${dataSourceID}`;
114162
let storageConnection: string;
115163
if (Config.file_storage_method === 'filesystem') {
@@ -141,28 +189,46 @@ export default class ReportQueryRepository extends Repository implements Reposit
141189
return Promise.resolve(Result.Failure(`error: unsupported or unimplemented file storage method being used`));
142190
}
143191

192+
return Promise.resolve(Result.Success(storageConnection));
193+
}
194+
195+
async kickoffQuery(queryMetadata: ReportQueryMetadata, describe: boolean): Promise<Result<string>> {
196+
// fetch file metadata
197+
const fileInfo = await this.#fileRepo.listPathMetadata(...queryMetadata.request.file_ids!);
198+
if (fileInfo.isError) {return Promise.resolve(Result.Pass(fileInfo))}
199+
const files = fileInfo.value;
200+
201+
const getConnString = await this.createConnectionString(queryMetadata.container_id, queryMetadata.data_source_id);
202+
if (getConnString.isError) {return Promise.resolve(Result.Pass(getConnString))}
203+
const storageConnection = getConnString.value;
204+
205+
// set report status to "processing"
206+
let statusSet = await this.#reportRepo.setStatus(
207+
queryMetadata.report_id, 'processing',
208+
`executing query ${queryMetadata.query_id}: "${queryMetadata.query.query}" as part of report ${queryMetadata.report_id}`
209+
);
210+
if (statusSet.isError) {return Promise.resolve(Result.Failure(`unable to set report status`))}
211+
212+
// kick off the describe or query process
144213
if (describe) {
145-
this.processDescribe(reportID, request.query!, storageConnection, files as FileMetadata[]);
214+
this.processDescribe(
215+
queryMetadata.report_id,
216+
queryMetadata.request.query!,
217+
storageConnection,
218+
files as FileMetadata[]);
146219
} else {
147220
this.processQuery(
148-
reportID,
149-
request.query!,
221+
queryMetadata.report_id,
222+
queryMetadata.request.query!,
150223
storageConnection,
151224
files as FileMetadata[],
152-
queryID,
153-
user
225+
queryMetadata.query_id,
226+
queryMetadata.user
154227
);
155228
}
156229

157-
// set report status to "processing"
158-
let statusSet = await this.#reportRepo.setStatus(
159-
reportID, 'processing',
160-
`executing query ${queryID}: "${reportQuery.query}" as part of report ${reportID}`
161-
);
162-
if (statusSet.isError) {return Promise.resolve(Result.Failure(`unable to set report status`))}
163-
164230
// return report ID to the user so they can poll for results
165-
return Promise.resolve(Result.Success(reportID));
231+
return Promise.resolve(Result.Success(queryMetadata.report_id));
166232
}
167233

168234
async processQuery(

server/src/data_access_layer/repositories/data_warehouse/etl/type_transformation_repository.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,18 +392,12 @@ export default class TypeTransformationRepository extends Repository implements
392392
// if the data_source value for any given param is an old ID for an existing data source, replace it with the actual data source ID
393393
const backfillDataSources = (params: EdgeConnectionParameter[]) => {
394394
params.forEach(param => {
395-
console.log('old param val', param.value);
396395
if (param.value) {
397396
// backfill old ID with new ID if present
398397
const matchedSource = dataSources.find(src => src?.DataSourceRecord?.old_id === param.value);
399-
console.log('old id', matchedSource?.DataSourceRecord?.old_id);
400-
console.log('new id', matchedSource?.DataSourceRecord?.id);
401398
if (matchedSource) param.value = matchedSource!.DataSourceRecord!.id!;
402-
console.log('new param val', param.value);
403399
} else {
404-
console.log('dsID', dataSourceID);
405400
param.value = dataSourceID;
406-
console.log('new param val', param.value);
407401
}
408402
})
409403
}

server/src/domain_objects/data_warehouse/data/report_query.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { BaseDomainClass } from "../../../common_classes/base_domain_class";
22
import {IsArray, IsOptional, IsString} from 'class-validator';
33
import Report from './report';
4+
import { User } from "../../access_management/user";
45

56
/*
67
ReportQuery represents a query and its execution status.
@@ -39,6 +40,26 @@ export default class ReportQuery extends BaseDomainClass{
3940
}
4041
}
4142

43+
// input type for passing info between query methods
44+
export type ReportQueryMetadata = {
45+
container_id: string;
46+
data_source_id: string;
47+
request: TimeseriesInitialRequest;
48+
user: User;
49+
report_id: string;
50+
query: ReportQuery;
51+
query_id: string;
52+
}
53+
54+
// domain object for the results of checkQueryExists query
55+
export class CompletedQueryMatch {
56+
@IsString()
57+
query_id?: string;
58+
59+
@IsString()
60+
status_message?: string;
61+
}
62+
4263
// initial object used to create request for the timeseries rust module
4364
export class TimeseriesInitialRequest {
4465
@IsOptional()

server/src/services/blob_storage/azure_blob_impl.ts

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -214,25 +214,30 @@ export default class AzureBlobImpl implements BlobStorage {
214214
}
215215

216216
async renameFile(f: File): Promise<Result<boolean>> {
217-
const newFileClient = this._ContainerClient.getBlockBlobClient(`${f.adapter_file_path}${f.short_uuid}${f.file_name}`);
218-
const oldFileClient = this._ContainerClient.getBlockBlobClient(`${f.adapter_file_path}${f.file_name}${f.short_uuid}`);
217+
// only perform the rename if uuid is present; otherwise, file should already be accessible
218+
if (!f.short_uuid) {
219+
return Promise.resolve(Result.Success(true));
220+
} else {
221+
const newFileClient = this._ContainerClient.getBlockBlobClient(`${f.adapter_file_path}${f.short_uuid}${f.file_name}`);
222+
const oldFileClient = this._ContainerClient.getBlockBlobClient(`${f.adapter_file_path}${f.file_name}${f.short_uuid}`);
219223

220-
try {
221-
const copyPoller = await newFileClient.beginCopyFromURL(oldFileClient.url);
222-
const copy_res = await copyPoller.pollUntilDone();
224+
try {
225+
const copyPoller = await newFileClient.beginCopyFromURL(oldFileClient.url);
226+
const copy_res = await copyPoller.pollUntilDone();
223227

224-
if (copy_res._response.status === 201 || copy_res._response.status === 202) {
225-
const delete_res = await oldFileClient.delete();
228+
if (copy_res._response.status === 201 || copy_res._response.status === 202) {
229+
const delete_res = await oldFileClient.delete();
226230

227-
if (delete_res._response.status === 201 || delete_res._response.status === 202) {
228-
return Promise.resolve(Result.Success(true));
231+
if (delete_res._response.status === 201 || delete_res._response.status === 202) {
232+
return Promise.resolve(Result.Success(true));
233+
}
229234
}
235+
} catch (e) {
236+
Logger.error(`azure rename blob error: ${e}`);
237+
return Promise.resolve(Result.Success(false));
230238
}
231-
} catch (e) {
232-
Logger.error(`azure rename blob error: ${e}`);
239+
233240
return Promise.resolve(Result.Success(false));
234241
}
235-
236-
return Promise.resolve(Result.Success(false));
237242
}
238243
}

server/src/services/blob_storage/filesystem_impl.ts

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -172,27 +172,32 @@ export default class Filesystem implements BlobStorage {
172172
}
173173

174174
renameFile(f: File): Promise<Result<boolean>> {
175-
try {
176-
/* eslint-disable-next-line security/detect-non-literal-fs-filename --
177-
* TypeScript wants to guard against malicious file renaming,
178-
* but since the rename is generated server-side and not by the end user,
179-
* there is no security risk
180-
**/
181-
const rename_res = fs.rename(
182-
`${f.adapter_file_path}${f.file_name}${f.short_uuid}`,
183-
`${f.adapter_file_path}${f.short_uuid}${f.file_name}`,
184-
(err) => {
185-
if (err) throw err;
186-
});
187-
188-
if (rename_res === null || rename_res === undefined) {
189-
return Promise.resolve(Result.Success(true));
175+
// only rename file if short uuid is present; otherwise, file should already be accessible
176+
if (!f.short_uuid) {
177+
return Promise.resolve(Result.Success(true));
178+
} else {
179+
try {
180+
/* eslint-disable-next-line security/detect-non-literal-fs-filename --
181+
* TypeScript wants to guard against malicious file renaming,
182+
* but since the rename is generated server-side and not by the end user,
183+
* there is no security risk
184+
**/
185+
const rename_res = fs.rename(
186+
`${f.adapter_file_path}${f.file_name}${f.short_uuid}`,
187+
`${f.adapter_file_path}${f.short_uuid}${f.file_name}`,
188+
(err) => {
189+
if (err) throw err;
190+
});
191+
192+
if (rename_res === null || rename_res === undefined) {
193+
return Promise.resolve(Result.Success(true));
194+
}
195+
} catch (e) {
196+
Logger.error(`filesystem rename error: ${e}`);
197+
return Promise.resolve(Result.Success(false));
190198
}
191-
} catch (e) {
192-
Logger.error(`filesystem rename error: ${e}`);
199+
193200
return Promise.resolve(Result.Success(false));
194201
}
195-
196-
return Promise.resolve(Result.Success(false));
197202
}
198203
}

0 commit comments

Comments
 (0)