require 'active_record'
require 'set'
-# 0.10 problems
-# Ferret::Search::Similarity, Ferret::Search::Similarity.default missing
-# IndexReader#latest? segfaults when used on multiple indexes
-# :offset and :limit get ignored by search_each
-# query_parser ignores or_default
# Yet another Ferret Mixin.
#
@@ -98,42 +93,35 @@ module FerretMixin
# helper that defines a method that adds the given field to a lucene
# document instance
def define_to_field_method(field, options = {})
- options = {
- :store => :no,
- :index => :yes,
- :term_vector => :with_positions_offsets,
- :boost => 1.0 }.update(options)
- fields_for_ferret[field] = options
- define_method("#{field}_to_ferret".to_sym) do
+ default_opts = { :store => Ferret::Document::Field::Store::NO,
+ :index => Ferret::Document::Field::Index::TOKENIZED,
+ :term_vector => Ferret::Document::Field::TermVector::NO,
+ :binary => false,
+ :boost => 1.0
+ }
+ default_opts.update(options) if options.is_a?(Hash)
+ fields_for_ferret << field
+ define_method("#{field}_to_ferret".to_sym) do
begin
+ #val = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.method(field).call
val = content_for_field_name(field)
rescue
- logger.warn("Error retrieving value for field #{field}: #{$!}")
+ logger.debug("Error retrieving value for field #{field}: #{$!}")
val = ''
end
logger.debug("Adding field #{field} with value '#{val}' to index")
- val
- end
- end
-
- def add_fields(field_config)
- if field_config.respond_to?(:each_pair)
- field_config.each_pair do |key,val|
- define_to_field_method(key,val)
- end
- elsif field_config.respond_to?(:each)
- field_config.each do |field|
- define_to_field_method(field)
- end
+ Ferret::Document::Field.new(field.to_s, val,
+ default_opts[:store],
+ default_opts[:index],
+ default_opts[:term_vector],
+ default_opts[:binary],
+ default_opts[:boost])
end
end
# TODO: do we need to define this at this level ? Maybe it's
# sufficient to do this only in classes calling acts_as_ferret ?
- #
- # moved below inside class_eval in #acts_as_ferret, let's see
- # what happens ;-)
- #def reloadable?; false end
+ def reloadable?; false end
@@ferret_indexes = Hash.new
def ferret_indexes; @@ferret_indexes end
@@ -148,13 +136,7 @@ module FerretMixin
# fields:: names all fields to include in the index. If not given,
# all attributes of the class will be indexed. You may also give
# symbols pointing to instance methods of your model here, i.e.
- # to retrieve and index data from a related model.
- #
- # additional_fields:: names fields to include in the index, in addition
- # to those derived from the db scheme. use if you want to add
- # custom fields derived from methods to the db fields (which will be picked
- # by aaf). This option will be ignored when the fields option is given, in
- # that case additional fields get specified there.
+ # to retrieve and index data from a related model.
#
# index_dir:: declares the directory where to put the index for this class.
# The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
@@ -174,30 +156,30 @@ module FerretMixin
# default value is 1000
#
# ferret_options may be:
- # or_default:: - whether query terms are required by
- # default (the default, false), or not (true)
+ # occur_default:: - whether query terms are required by
+ # default (the default), or not. Specify one of
+ # Ferret::Search::BooleanClause::Occur::MUST or
+ # Ferret::Search::BooleanClause::Occur::SHOULD
#
# analyzer:: the analyzer to use for query parsing (default: nil,
- # wihch means the ferret StandardAnalyzer gets used)
+ # wihch means the ferret default Analyzer gets used)
#
def acts_as_ferret(options={}, ferret_options={})
configuration = {
+ :fields => nil,
:index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
:store_class_name => false,
:single_index => false,
:max_results => 1000
}
ferret_configuration = {
- #:or_default => false, # lead to 'cannot convert false to Integer'
- #in some cases
- :handle_parser_errors => true
- #:max_clauses => 512,
- #:default_field => '*',
- #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
- # :wild_card_downcase => true
+ :occur_default => Ferret::Search::BooleanClause::Occur::MUST,
+ :handle_parse_errors => true,
+ :default_search_field => '*',
+ :analyzer => Ferret::Analysis::StandardAnalyzer.new,
+ # :wild_lower => true
}
configuration.update(options) if options.is_a?(Hash)
-
# apply appropriate settings for shared index
if configuration[:single_index]
configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
@@ -207,8 +189,7 @@ module FerretMixin
# these properties are somewhat vital to the plugin and shouldn't
# be overwritten by the user:
ferret_configuration.update(
-
- :key => (configuration[:single_index] ? [:id, :class_name] : :id),
+ :key => (configuration[:single_index] ? ['id', 'class_name'] : 'id'),
:path => configuration[:index_dir],
:auto_flush => true,
:create_if_missing => true
@@ -227,18 +208,21 @@ module FerretMixin
cattr_accessor :configuration
cattr_accessor :ferret_configuration
- @@fields_for_ferret = Hash.new
+ @@fields_for_ferret = Array.new
@@configuration = configuration
@@ferret_configuration = ferret_configuration
-
- if configuration[:fields]
- add_fields(configuration[:fields])
+
+ if configuration[:fields].respond_to?(:each_pair)
+ configuration[:fields].each_pair do |key,val|
+ define_to_field_method(key,val)
+ end
+ elsif configuration[:fields].respond_to?(:each)
+ configuration[:fields].each do |field|
+ define_to_field_method(field)
+ end
else
- add_fields(self.new.attributes.keys.map { |k| k.to_sym })
- add_fields(configuration[:additional_fields])
+ @@fields_for_ferret = nil
end
-
- def self.reloadable?; false end
EOV
FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
end
@@ -258,36 +242,11 @@ module FerretMixin
# When calling this method manually, you can give any additional
# model classes that should also go into this index as parameters.
# Useful when using the :single_index option.
- # Note that attributes named the same in different models will share
- # the same field options in the shared index.
- def rebuild_index(*models)
- models << self
- # default attributes for fields
- fi = Ferret::Index::FieldInfos.new(:store => :no,
- :index => :yes,
- :term_vector => :no,
- :boost => 1.0)
- # primary key
- fi.add_field(:id, :store => :yes, :index => :untokenized)
- # class_name
- if configuration[:store_class_name]
- fi.add_field(:class_name, :store => :yes, :index => :untokenized)
- end
- # collect field options from all models
- fields = {}
- models.each do |model|
- fields.update(model.fields_for_ferret)
- end
- logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
- fields.each_pair do |field, options|
- fi.add_field(field, { :store => :no,
- :index => :yes }.update(options))
- end
- fi.create_index(ferret_configuration[:path])
-
- index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
+ def rebuild_index(*additional_models)
+ index = Ferret::Index::Index.new(ferret_configuration.merge(:create => true))
+ additional_models << self
batch_size = 1000
- models.each do |model|
+ additional_models.each do |model|
# index in batches of 1000 to limit memory consumption (fixes #24)
model.transaction do
0.step(model.count, batch_size) do |i|
@@ -381,7 +340,7 @@ module FerretMixin
# determine all field names in the shared index
def single_index_field_names(models)
@single_index_field_names ||= (
- searcher = Ferret::Search::Searcher.new(class_index_dir)
+ searcher = Ferret::Search::IndexSearcher.new(class_index_dir)
if searcher.reader.respond_to?(:get_field_names)
(searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
else
@@ -414,19 +373,17 @@ module FerretMixin
# class_clauses << "class_name:#{model}"
#end
#q << " AND (#{class_clauses.join(' OR ')})"
-
- qp = Ferret::QueryParser.new (ferret_configuration)
- qp.fields = ferret_index.send(:reader).field_names
+ qp = Ferret::QueryParser.new(ferret_configuration[:default_search_field], ferret_configuration.update(:fields => single_index_field_names(options[:models])))
original_query = qp.parse(q)
end
#else
q = Ferret::Search::BooleanQuery.new
- q.add_query(original_query, :must)
+ q.add_query(original_query, Ferret::Search::BooleanClause::Occur::MUST)
model_query = Ferret::Search::BooleanQuery.new
options[:models].each do |model|
- model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
+ model_query.add_query(Ferret::Search::TermQuery.new(Ferret::Index::Term.new('class_name', model.name)), Ferret::Search::BooleanClause::Occur::SHOULD)
end
- q.add_query(model_query, :must)
+ q.add_query(model_query, Ferret::Search::BooleanClause::Occur::MUST)
#end
end
#puts q.to_s
@@ -474,14 +431,11 @@ module FerretMixin
# instead of the result list!
#
def find_id_by_contents(q, options = {})
- deprecated_options_support(options)
- options[:limit] = configuration[:max_results] if options[:limit] == :all
-
+ options[:num_docs] = configuration[:max_results] if options[:num_docs] == :all
result = []
index = self.ferret_index
- #hits = index.search(q, options)
- #hits.each do |hit, score|
- total_hits = index.search_each(q, options) do |hit, score|
+ hits = index.search(q, options)
+ hits.each do |hit, score|
# only collect result data if we intend to return it
doc = index[hit]
model = configuration[:store_class_name] ? doc[:class_name] : self.name
@@ -492,7 +446,7 @@ module FerretMixin
end
end
logger.debug "id_score_model array: #{result.inspect}"
- return block_given? ? total_hits : result
+ return block_given? ? hits.total_hits : result
end
# requires the store_class_name option of acts_as_ferret to be true
@@ -515,20 +469,20 @@ module FerretMixin
# be yielded, and the total number of hits is returned.
#
def id_multi_search(query, additional_models = [], options = {})
- deprecated_options_support(options)
- options[:limit] = configuration[:max_results] if options[:limit] == :all
+ options[:num_docs] = configuration[:max_results] if options[:num_docs] == :all
additional_models << self
searcher = multi_index(additional_models)
result = []
- total_hits = searcher.search_each (query, options) do |hit, score|
- doc = searcher[hit]
+ hits = searcher.search(query, options)
+ hits.each { |hit, score|
+ doc = searcher.doc(hit)
if block_given?
yield doc[:class_name], doc[:id].to_i, score
else
result << { :model => doc[:class_name], :id => doc[:id], :score => score }
end
- end
- return block_given? ? total_hits : result
+ }
+ return block_given? ? hits.total_hits : result
end
# returns a MultiIndex instance operating on a MultiReader
@@ -537,17 +491,7 @@ module FerretMixin
key = model_classes.inject("") { |s, clazz| s << clazz.name }
@@multi_indexes[key] ||= MultiIndex.new(model_classes, ferret_configuration)
end
-
- def deprecated_options_support(options)
- if options[:num_docs]
- logger.warn ":num_docs is deprecated, use :limit instead!"
- options[:limit] ||= options[:num_docs]
- end
- if options[:first_doc]
- logger.warn ":first_doc is deprecated, use :offset instead!"
- options[:offset] ||= options[:first_doc]
- end
- end
+
end
@@ -563,9 +507,7 @@ module FerretMixin
# add to index
def ferret_create
logger.debug "ferret_create/update: #{self.class.name} : #{self.id}"
- if @ferret_reindex
- self.class.ferret_index << self.to_doc
- end
+ self.class.ferret_index << self.to_doc if @ferret_reindex
@ferret_reindex = true
true
end
@@ -575,11 +517,12 @@ module FerretMixin
def ferret_destroy
logger.debug "ferret_destroy: #{self.class.name} : #{self.id}"
begin
- query = Ferret::Search::TermQuery.new(:id, self.id.to_s)
+ query = Ferret::Search::TermQuery.new(Ferret::Index::Term.new('id',self.id.to_s))
if self.class.configuration[:single_index]
bq = Ferret::Search::BooleanQuery.new
- bq.add_query(query, :must)
- bq.add_query(Ferret::Search::TermQuery.new(:class_name, self.class.name), :must)
+ bq.add_query(query, Ferret::Search::BooleanClause::Occur::MUST)
+ bq.add_query(Ferret::Search::TermQuery.new(Ferret::Index::Term.new('class_name', self.class.name)),
+ Ferret::Search::BooleanClause::Occur::MUST)
query = bq
end
self.class.ferret_index.query_delete(query)
@@ -593,31 +536,36 @@ module FerretMixin
def to_doc
logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}"
# Churn through the complete Active Record and add it to the Ferret document
- doc = Ferret::Document.new
+ doc = Ferret::Document::Document.new
# store the id of each item
- doc[:id] = self.id
-
+ doc << Ferret::Document::Field.new( "id", self.id,
+ Ferret::Document::Field::Store::YES,
+ Ferret::Document::Field::Index::UNTOKENIZED )
# store the class name if configured to do so
if configuration[:store_class_name]
- doc[:class_name] = self.class.name
+ doc << Ferret::Document::Field.new( "class_name", self.class.name,
+ Ferret::Document::Field::Store::YES,
+ Ferret::Document::Field::Index::UNTOKENIZED ) # have to tokenize to be able to use class_name field in queries ?!
end
# iterate through the fields and add them to the document
- #if fields_for_ferret
+ if fields_for_ferret
# have user defined fields
- fields_for_ferret.each_pair do |field, config|
- doc[field] = self.send("#{field}_to_ferret") unless config[:ignore]
- end
- #else
+ fields_for_ferret.each do |field|
+ doc << self.send("#{field}_to_ferret")
+ end
+ else
# take all fields
- # TODO shouldn't be needed any more
- # puts "remove me!"
- # self.attributes.each_pair do |key,val|
- # unless key == :id
- # logger.debug "add field #{key} with value #{val}"
- # doc[key] = val.to_s
- # end
- # end
- #end
+ self.attributes.each_pair do |key,val|
+ unless key == :id
+ logger.debug "add field #{key} with value #{val}"
+ doc << Ferret::Document::Field.new(
+ key,
+ val.to_s,
+ Ferret::Document::Field::Store::NO,
+ Ferret::Document::Field::Index::TOKENIZED)
+ end
+ end
+ end
return doc
end
@@ -699,7 +647,7 @@ module FerretMixin
term_query.boost = cur.score / best_score
end
begin
- query.add_query(term_query, :should)
+ query.add_query(term_query, Ferret::Search::BooleanClause::Occur::SHOULD)
rescue Ferret::Search::BooleanQuery::TooManyClauses
break
end
@@ -708,7 +656,8 @@ module FerretMixin
end
# exclude ourselves
t = Ferret::Index::Term.new('id', self.id.to_s)
- query.add_query(Ferret::Search::TermQuery.new(t), :must_not)
+ query.add_query(Ferret::Search::TermQuery.new(t),
+ Ferret::Search::BooleanClause::Occur::MUST_NOT)
return query
end