10
10
* We use proper atomic operations when needed.
11
11
*/
12
12
const async = require ( 'async' ) ;
13
+ const crypto = require ( 'crypto' ) ;
13
14
14
15
const constants = require ( '../../../constants' ) ;
15
16
@@ -39,6 +40,9 @@ const CONNECT_TIMEOUT_MS = 5000;
39
40
// MongoDB default
40
41
const SOCKET_TIMEOUT_MS = 360000 ;
41
42
const CONCURRENT_CURSORS = 10 ;
43
+ // Search
44
+ const MAX_TIME_MS = 300000 ;
45
+ const SEARCH_PREFIX = '__search' ;
42
46
43
47
const initialInstanceID = process . env . INITIAL_INSTANCE_ID ;
44
48
@@ -109,6 +113,11 @@ class MongoClientInterface {
109
113
! Number . isNaN ( process . env . CONCURRENT_CURSORS ) )
110
114
? Number . parseInt ( process . env . CONCURRENT_CURSORS , 10 )
111
115
: CONCURRENT_CURSORS ;
116
+
117
+ this . maxTimeMs = ( process . env . MAX_TIME_MS &&
118
+ ! Number . isNaN ( process . env . MAX_TIME_MS ) )
119
+ ? Number . parseInt ( process . env . MAX_TIME_MS , 10 )
120
+ : MAX_TIME_MS ;
112
121
}
113
122
114
123
setup ( cb ) {
@@ -939,9 +948,9 @@ class MongoClientInterface {
939
948
params , log , cb ) ;
940
949
}
941
950
942
- internalListObject ( bucketName , params , extension , log , cb ) {
943
- const c = this . getCollection ( bucketName ) ;
944
- const stream = new MongoReadStream ( c , params , params . mongifiedSearch ) ;
951
+ internalListObject ( c , params , extension , log , cb ) {
952
+ const stream =
953
+ new MongoReadStream ( c , params ) ;
945
954
const skip = new Skip ( {
946
955
extension,
947
956
gte : params . gte ,
@@ -963,7 +972,7 @@ class MongoClientInterface {
963
972
newParams . gte = range ;
964
973
965
974
// then continue listing the next key range
966
- this . internalListObject ( bucketName , newParams , extension , log , cb ) ;
975
+ this . internalListObject ( c , newParams , extension , log , cb ) ;
967
976
} ) ;
968
977
969
978
stream
@@ -993,22 +1002,98 @@ class MongoClientInterface {
993
1002
return undefined ;
994
1003
}
995
1004
1005
+ /*
1006
+ * Execute the user-defined query in a stage then sort it for
1007
+ * stateless paging. The output is stored in a temporary
1008
+ * collection in a special namespace that will be periodically
1009
+ * erased (e.g. once a day).
1010
+ *
1011
+ * All search queries are bounded by MAX_TIME_MS env (default is
1012
+ * 5mn).
1013
+ */
1014
+ doSearch ( c , tempCollection , params , extension , searchOptions , log , cb ) {
1015
+ const _cursor = c . aggregate ( [
1016
+ { $match : searchOptions } , // user query
1017
+ { $sort : { _id : 1 } } , // needed for paging
1018
+ { $out : tempCollection } , // a job will clean it up
1019
+ ] ,
1020
+ {
1021
+ maxTimeMs : this . maxTimeMs , // always limit
1022
+ allowDiskUse : true , // stage large queries on disk
1023
+ } ,
1024
+ null ) ;
1025
+ _cursor . toArray ( err => {
1026
+ if ( err ) {
1027
+ log . error ( 'doSearch: error' , {
1028
+ error : err . message ,
1029
+ } ) ;
1030
+ return cb ( err ) ;
1031
+ }
1032
+ return this . internalListObject (
1033
+ this . db . collection ( tempCollection ) ,
1034
+ params , extension ,
1035
+ log , cb ) ;
1036
+ } ) ;
1037
+ }
1038
+
1039
+ /*
1040
+ * Check if the used defined query has been cached otherwise
1041
+ * launch the search
1042
+ */
1043
+ prepareSearch ( bucketName , params , extension , searchOptions , log , cb ) {
1044
+ const c = this . getCollection ( bucketName ) ;
1045
+ // generate the temp collection name
1046
+ const tempCollection =
1047
+ SEARCH_PREFIX +
1048
+ crypto . createHash ( 'md5' ) .
1049
+ update ( JSON . stringify ( searchOptions ) ) .
1050
+ digest ( 'hex' ) ;
1051
+ this . db . listCollections ( {
1052
+ name : tempCollection ,
1053
+ } ) . toArray ( ( err , items ) => {
1054
+ if ( err ) {
1055
+ log . error ( 'prepareSearch: listing collection' , {
1056
+ error : err . message ,
1057
+ } ) ;
1058
+ return cb ( err ) ;
1059
+ }
1060
+ if ( items . length > 0 ) {
1061
+ // result is cached
1062
+ return this . internalListObject (
1063
+ this . db . collection ( tempCollection ) ,
1064
+ params , extension ,
1065
+ log , cb ) ;
1066
+ }
1067
+ return this . doSearch (
1068
+ c , tempCollection ,
1069
+ params , extension , searchOptions ,
1070
+ log , cb ) ;
1071
+ } ) ;
1072
+ }
1073
+
996
1074
listObject ( bucketName , params , log , cb ) {
997
1075
const extName = params . listingType ;
998
1076
const extension = new listAlgos [ extName ] ( params , log ) ;
999
1077
const internalParams = extension . genMDParams ( ) ;
1000
- internalParams . mongifiedSearch = params . mongifiedSearch ;
1001
- return this . internalListObject ( bucketName , internalParams , extension ,
1002
- log , cb ) ;
1078
+ if ( params . mongifiedSearch ) {
1079
+ return this . prepareSearch (
1080
+ bucketName , internalParams , extension ,
1081
+ params . mongifiedSearch , log , cb ) ;
1082
+ }
1083
+ return this . internalListObject (
1084
+ this . getCollection ( bucketName ) ,
1085
+ internalParams , extension ,
1086
+ log , cb ) ;
1003
1087
}
1004
1088
1005
1089
listMultipartUploads ( bucketName , params , log , cb ) {
1006
1090
const extName = params . listingType ;
1007
1091
const extension = new listAlgos [ extName ] ( params , log ) ;
1008
1092
const internalParams = extension . genMDParams ( ) ;
1009
- internalParams . mongifiedSearch = params . mongifiedSearch ;
1010
- return this . internalListObject ( bucketName , internalParams , extension ,
1011
- log , cb ) ;
1093
+ return this . internalListObject (
1094
+ this . getCollection ( bucketName ) ,
1095
+ internalParams , extension ,
1096
+ log , cb ) ;
1012
1097
}
1013
1098
1014
1099
checkHealth ( implName , log , cb ) {
0 commit comments