Skip to content

Commit

Permalink
Allow setting stopper_strategy
Browse files Browse the repository at this point in the history
The stopper is only used to stop stemmed words by default. It can stop all words
or be disabled.
  • Loading branch information
johnl committed Mar 22, 2020
1 parent b5536d3 commit 3688c2c
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
html/
doc/
*.gem
Gemfile.lock
Gemfile.lock
vendor/
7 changes: 7 additions & 0 deletions lib/xapian_fu/xapian_db.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ class DocNotFound < XapianFuError ; end
#
# db = XapianDb.new(:language => :italian, :stopper => false)
#
# The <tt>:stopper_strategy</tt> option specifies the default stop strategy
# that will be used when indexing and can be: <tt>:none</tt>, <tt>:all</tt> or
# <tt>:stemmed</tt>. Defaults to <tt>:stemmed</tt>
#
# == Spelling suggestions
#
# The <tt>:spelling</tt> option controls generation of a spelling
Expand Down Expand Up @@ -172,6 +176,8 @@ class XapianDb # :nonew:
attr_reader :field_options
attr_accessor :weights_function
attr :field_weights
# The default stopper strategy
attr_accessor :stopper_strategy

def initialize( options = { } )
@options = { :index_positions => true, :spelling => true }.merge(options)
Expand All @@ -196,6 +202,7 @@ def initialize( options = { } )
@language = @options.fetch(:language, :english)
@stemmer = @options.fetch(:stemmer, @language)
@stopper = @options.fetch(:stopper, @language)
@stopper_strategy = @options.fetch(:stopper_strategy, :stemmed)
@field_options = {}
setup_fields(@options[:fields])
@store_values << @options[:store]
Expand Down
22 changes: 22 additions & 0 deletions lib/xapian_fu/xapian_doc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,27 @@ def stopper
end
end

STOPPER_STRATEGIES = {
:none => 0,
:all => 1,
:stemmed => 2
}

def stopper_strategy
if @stopper_strategy
@stopper_strategy
else
@stopper_strategy =
if ! @options[:stopper_strategy].nil?
@options[:stopper_strategy]
elsif db
db.stopper_strategy
else
:stemmed
end
end
end

# Return this document's language which is set on initialize, inherited
# from the database or defaults to :english
def language
Expand Down Expand Up @@ -276,6 +297,7 @@ def generate_terms
tg.document = xapian_document
tg.stopper = stopper if stopper
tg.stemmer = stemmer
tg.set_stopper_strategy(XapianDoc::STOPPER_STRATEGIES.fetch(stopper_strategy, 2))
tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
index_method = db.index_positions ? :index_text : :index_text_without_positions
fields.each do |k,o|
Expand Down
34 changes: 34 additions & 0 deletions spec/xapian_doc_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -261,4 +261,38 @@
end
end

describe "stopper_strategy" do
it "should stop all stop words when stopper_strategy is set to :all " do
xdb = XapianDb.new(:stopper_strategy => :all )
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
terms = xdoc.terms.collect { |t| t.term }
terms.should_not include "for"
terms.should include "fish"
end

it "should stop stemmed words by when stopper_strategy is set to :stemmed " do
xdb = XapianDb.new(:stopper_strategy => :stemmed)
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
terms = xdoc.terms.collect { |t| t.term }
terms.should_not include "Zfor"
terms.should include "fish"
end

it "should stop no words by when stopper_strategy is set to :none " do
xdb = XapianDb.new(:stopper_strategy => :none)
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
terms = xdoc.terms.collect { |t| t.term }
terms.should include "Zfor"
terms.should include "for"
terms.should include "fish"
end

it "should stop stemmed words by default " do
xdb = XapianDb.new
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
terms = xdoc.terms.collect { |t| t.term }
terms.should_not include "Zfor"
terms.should include "fish"
end
end
end
1 change: 1 addition & 0 deletions xapian-fu.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Gem::Specification.new do |s|

s.add_development_dependency("rspec", "~> 2.7")
s.add_development_dependency("rake", "~> 0")
s.add_development_dependency("irb", "~> 0")
s.add_development_dependency("rdoc", "~> 4")

s.requirements << "libxapian-dev, or the xapian-ruby gem"
Expand Down

0 comments on commit 3688c2c

Please sign in to comment.