10
10
* We use proper atomic operations when needed.
11
11
*/
12
12
const async = require ( 'async' ) ;
13
+ const crypto = require ( 'crypto' ) ;
13
14
14
15
const constants = require ( '../../../constants' ) ;
15
16
@@ -39,6 +40,10 @@ const CONNECT_TIMEOUT_MS = 5000;
39
40
// MongoDB default
40
41
const SOCKET_TIMEOUT_MS = 360000 ;
41
42
const CONCURRENT_CURSORS = 10 ;
43
+ // Search
44
+ const MAX_TIME_MS = 300000 ;
45
+ const TEMP_SEARCH_PREFIX = '__temp_search' ;
46
+ const SEARCH_PREFIX = '__search' ;
42
47
43
48
const initialInstanceID = process . env . INITIAL_INSTANCE_ID ;
44
49
@@ -109,6 +114,11 @@ class MongoClientInterface {
109
114
! Number . isNaN ( process . env . CONCURRENT_CURSORS ) )
110
115
? Number . parseInt ( process . env . CONCURRENT_CURSORS , 10 )
111
116
: CONCURRENT_CURSORS ;
117
+
118
+ this . maxTimeMs = ( process . env . MAX_TIME_MS &&
119
+ ! Number . isNaN ( process . env . MAX_TIME_MS ) )
120
+ ? Number . parseInt ( process . env . MAX_TIME_MS , 10 )
121
+ : MAX_TIME_MS ;
112
122
}
113
123
114
124
setup ( cb ) {
@@ -939,9 +949,10 @@ class MongoClientInterface {
939
949
params , log , cb ) ;
940
950
}
941
951
942
- internalListObject ( bucketName , params , extension , log , cb ) {
943
- const c = this . getCollection ( bucketName ) ;
944
- const stream = new MongoReadStream ( c , params , params . mongifiedSearch ) ;
952
+ internalListObject ( c , params , extension , log , cb ) {
953
+ // eslint-disable-next-line
954
+ params . maxTimeMs = this . maxTimeMs ;
955
+ const stream = new MongoReadStream ( c , params , params . searchOptions ) ;
945
956
const skip = new Skip ( {
946
957
extension,
947
958
gte : params . gte ,
@@ -963,7 +974,7 @@ class MongoClientInterface {
963
974
newParams . gte = range ;
964
975
965
976
// then continue listing the next key range
966
- this . internalListObject ( bucketName , newParams , extension , log , cb ) ;
977
+ this . internalListObject ( c , newParams , extension , log , cb ) ;
967
978
} ) ;
968
979
969
980
stream
@@ -993,22 +1004,125 @@ class MongoClientInterface {
993
1004
return undefined ;
994
1005
}
995
1006
1007
+ /*
1008
+ * Execute the user-defined query in a stage then sort it for
1009
+ * stateless paging. The output is stored in a temporary
1010
+ * collection in a special namespace that will be periodically
1011
+ * erased (e.g. once a day).
1012
+ *
1013
+ * All search queries are bounded by MAX_TIME_MS env (default is
1014
+ * 5mn).
1015
+ */
1016
+ doSearch ( c , searchCollection , params , extension , searchOptions , log , cb ) {
1017
+ // use temp name to avoid races
1018
+ const tempCollection =
1019
+ TEMP_SEARCH_PREFIX +
1020
+ crypto . randomBytes ( 16 ) . toString ( 'hex' ) ;
1021
+ const _cursor = c . aggregate ( [
1022
+ { $match : searchOptions } , // user query
1023
+ { $out : tempCollection } , // a job will clean it up
1024
+ ] ,
1025
+ {
1026
+ maxTimeMs : this . maxTimeMs , // always limit
1027
+ allowDiskUse : true , // stage large queries on disk
1028
+ } ,
1029
+ null ) ;
1030
+ _cursor . toArray ( err => {
1031
+ if ( err ) {
1032
+ log . error ( 'doSearch: error' , {
1033
+ error : err . message ,
1034
+ } ) ;
1035
+ return cb ( err ) ;
1036
+ }
1037
+ // final rename
1038
+ this . db . renameCollection (
1039
+ tempCollection ,
1040
+ searchCollection ,
1041
+ {
1042
+ dropTarget : true ,
1043
+ } ,
1044
+ err => {
1045
+ if ( err ) {
1046
+ log . error ( 'doSearch: renaming' , {
1047
+ error : err . message ,
1048
+ tempCollection,
1049
+ searchCollection,
1050
+ } ) ;
1051
+ return cb ( err ) ;
1052
+ }
1053
+ log . info ( 'doSearch: done' , {
1054
+ searchCollection,
1055
+ } ) ;
1056
+ return undefined ;
1057
+ } ) ;
1058
+ // fallthrough
1059
+ // eslint-disable-next-line
1060
+ params . searchOptions = searchOptions ;
1061
+ return this . internalListObject (
1062
+ c ,
1063
+ params , extension ,
1064
+ log , cb ) ;
1065
+ } ) ;
1066
+ }
1067
+
1068
+ /*
1069
+ * Check if the used defined query has been cached otherwise
1070
+ * launch the search
1071
+ */
1072
+ prepareSearch ( bucketName , params , extension , searchOptions , log , cb ) {
1073
+ const c = this . getCollection ( bucketName ) ;
1074
+ // generate the search collection name
1075
+ const searchCollection =
1076
+ SEARCH_PREFIX +
1077
+ crypto . createHash ( 'md5' ) .
1078
+ update ( JSON . stringify ( searchOptions ) ) .
1079
+ digest ( 'hex' ) ;
1080
+ this . db . listCollections ( {
1081
+ name : searchCollection ,
1082
+ } ) . toArray ( ( err , items ) => {
1083
+ if ( err ) {
1084
+ log . error ( 'prepareSearch: listing collection' , {
1085
+ error : err . message ,
1086
+ } ) ;
1087
+ return cb ( err ) ;
1088
+ }
1089
+ if ( items . length > 0 ) {
1090
+ // result is cached
1091
+ return this . internalListObject (
1092
+ this . db . collection ( searchCollection ) ,
1093
+ params , extension ,
1094
+ log , cb ) ;
1095
+ }
1096
+ return this . doSearch (
1097
+ c , searchCollection ,
1098
+ params , extension , searchOptions ,
1099
+ log , cb ) ;
1100
+ } ) ;
1101
+ }
1102
+
996
1103
listObject ( bucketName , params , log , cb ) {
997
1104
const extName = params . listingType ;
998
1105
const extension = new listAlgos [ extName ] ( params , log ) ;
999
1106
const internalParams = extension . genMDParams ( ) ;
1000
- internalParams . mongifiedSearch = params . mongifiedSearch ;
1001
- return this . internalListObject ( bucketName , internalParams , extension ,
1002
- log , cb ) ;
1107
+ if ( params . mongifiedSearch ) {
1108
+ return this . prepareSearch (
1109
+ bucketName , internalParams , extension ,
1110
+ params . mongifiedSearch , log , cb ) ;
1111
+ }
1112
+ return this . internalListObject (
1113
+ this . getCollection ( bucketName ) ,
1114
+ internalParams , extension ,
1115
+ log , cb ) ;
1003
1116
}
1004
1117
1005
1118
listMultipartUploads ( bucketName , params , log , cb ) {
1006
1119
const extName = params . listingType ;
1007
1120
const extension = new listAlgos [ extName ] ( params , log ) ;
1008
1121
const internalParams = extension . genMDParams ( ) ;
1009
- internalParams . mongifiedSearch = params . mongifiedSearch ;
1010
- return this . internalListObject ( bucketName , internalParams , extension ,
1011
- log , cb ) ;
1122
+ return this . internalListObject (
1123
+ this . getCollection ( bucketName ) ,
1124
+ internalParams , extension ,
1125
+ log , cb ) ;
1012
1126
}
1013
1127
1014
1128
checkHealth ( implName , log , cb ) {
0 commit comments