@@ -105,7 +105,7 @@ impl SearchState {
105105 #[ tracing:: instrument( skip( self , store) ) ]
106106 pub fn add_resource ( & self , resource : & Resource , store : & Db ) -> AtomicServerResult < ( ) > {
107107 let fields = self . get_schema_fields ( ) ?;
108- let subject = resource. get_subject ( ) ;
108+ let subject = resource. get_subject ( ) . to_string ( ) ;
109109 let writer = self . writer . read ( ) ?;
110110
111111 let mut doc = tantivy:: TantivyDocument :: default ( ) ;
@@ -153,7 +153,19 @@ impl SearchState {
153153pub fn build_schema ( ) -> AtomicServerResult < tantivy:: schema:: Schema > {
154154 let mut schema_builder = tantivy:: schema:: Schema :: builder ( ) ;
155155 // The STORED flag makes the index store the full values. Can be useful.
156- schema_builder. add_text_field ( "subject" , TEXT | STORED ) ;
156+
157+ // The raw tokenizer is used to index the subject field as is, without any tokenization.
158+ // If we don't do this the subject will be split into multiple tokens which breaks the search.
159+ schema_builder. add_text_field (
160+ "subject" ,
161+ tantivy:: schema:: TextOptions :: default ( )
162+ . set_stored ( )
163+ . set_indexing_options (
164+ tantivy:: schema:: TextFieldIndexing :: default ( )
165+ . set_tokenizer ( "raw" )
166+ . set_index_option ( tantivy:: schema:: IndexRecordOption :: Basic ) ,
167+ ) ,
168+ ) ;
157169 schema_builder. add_text_field ( "title" , TEXT | STORED ) ;
158170 schema_builder. add_text_field ( "description" , TEXT | STORED ) ;
159171 schema_builder. add_json_field ( "propvals" , STORED | TEXT ) ;
@@ -177,6 +189,12 @@ pub fn get_index(config: &Config) -> AtomicServerResult<(IndexWriter, tantivy::I
177189 e
178190 )
179191 } ) ?;
192+
193+ // Register the raw tokenizer
194+ index
195+ . tokenizers ( )
196+ . register ( "raw" , tantivy:: tokenizer:: RawTokenizer :: default ( ) ) ;
197+
180198 let heap_size_bytes = 50_000_000 ;
181199 let index_writer = index. writer ( heap_size_bytes) ?;
182200 Ok ( ( index_writer, index) )
@@ -245,9 +263,9 @@ fn get_resource_title(resource: &Resource) -> String {
245263
246264#[ cfg( test) ]
247265mod tests {
266+ use super :: * ;
248267 use atomic_lib:: { urls, Resource , Storelike } ;
249268
250- use super :: resource_to_facet;
251269 #[ test]
252270 fn facet_contains_subfacet ( ) {
253271 let store = atomic_lib:: Db :: init_temp ( "facet_contains" ) . unwrap ( ) ;
@@ -278,11 +296,66 @@ mod tests {
278296 let query_facet_direct_parent = resource_to_facet ( & resources[ 1 ] , & store) . unwrap ( ) ;
279297 let query_facet_root = resource_to_facet ( & resources[ 0 ] , & store) . unwrap ( ) ;
280298
281- // println!("Index: {:?}", index_facet);
282- // println!("query direct: {:?}", query_facet_direct_parent);
283- // println!("query root: {:?}", query_facet_root);
284-
285299 assert ! ( query_facet_direct_parent. is_prefix_of( & index_facet) ) ;
286300 assert ! ( query_facet_root. is_prefix_of( & index_facet) ) ;
287301 }
302+
303+ #[ test]
304+ fn test_update_resource ( ) {
305+ let unique_string = atomic_lib:: utils:: random_string ( 10 ) ;
306+
307+ let config = crate :: config:: build_temp_config ( & unique_string)
308+ . map_err ( |e| format ! ( "Initialization failed: {}" , e) )
309+ . expect ( "failed init config" ) ;
310+
311+ let store = atomic_lib:: Db :: init_temp ( & unique_string) . unwrap ( ) ;
312+
313+ let search_state = SearchState :: new ( & config) . unwrap ( ) ;
314+ let fields = search_state. get_schema_fields ( ) . unwrap ( ) ;
315+
316+ // Create initial resource
317+ let mut resource = Resource :: new_generate_subject ( & store) ;
318+ resource
319+ . set_string ( urls:: NAME . into ( ) , "Initial Title" , & store)
320+ . unwrap ( ) ;
321+ store. add_resource ( & resource) . unwrap ( ) ;
322+
323+ // Add to search index
324+ search_state. add_resource ( & resource, & store) . unwrap ( ) ;
325+ search_state. writer . write ( ) . unwrap ( ) . commit ( ) . unwrap ( ) ;
326+
327+ // Update the resource
328+ resource
329+ . set_string ( urls:: NAME . into ( ) , "Updated Title" , & store)
330+ . unwrap ( ) ;
331+ resource. save ( & store) . unwrap ( ) ;
332+
333+ // Update in search index
334+ search_state
335+ . remove_resource ( resource. get_subject ( ) )
336+ . unwrap ( ) ;
337+ search_state. add_resource ( & resource, & store) . unwrap ( ) ;
338+ search_state. writer . write ( ) . unwrap ( ) . commit ( ) . unwrap ( ) ;
339+
340+ // Make sure changes are visible to searcher
341+ search_state. reader . reload ( ) . unwrap ( ) ;
342+
343+ let searcher = search_state. reader . searcher ( ) ;
344+
345+ // Search for the old title - should return no results
346+ let query_parser =
347+ tantivy:: query:: QueryParser :: for_index ( & search_state. index , vec ! [ fields. title] ) ;
348+ let query = query_parser. parse_query ( "Initial" ) . unwrap ( ) ;
349+ let top_docs = searcher
350+ . search ( & query, & tantivy:: collector:: TopDocs :: with_limit ( 1 ) )
351+ . unwrap ( ) ;
352+ assert_eq ! ( top_docs. len( ) , 0 , "Old title should not be found in index" ) ;
353+
354+ // Search for the new title - should return one result
355+ let query = query_parser. parse_query ( "Updated" ) . unwrap ( ) ;
356+ let top_docs = searcher
357+ . search ( & query, & tantivy:: collector:: TopDocs :: with_limit ( 1 ) )
358+ . unwrap ( ) ;
359+ assert_eq ! ( top_docs. len( ) , 1 , "New title should be found in index" ) ;
360+ }
288361}
0 commit comments