Commit 2e82af5d by Truong Ba Dieu

Connect to solr

parent a1896d33
# Load DSL and Setup Up Stages
require 'capistrano/setup'
# Includes default deployment tasks
require 'capistrano/deploy'
# Includes tasks from other gems included in your Gemfile
#
# For documentation on these, see for example:
#
# https://github.com/capistrano/rvm
# https://github.com/capistrano/rbenv
# https://github.com/capistrano/chruby
# https://github.com/capistrano/bundler
# https://github.com/capistrano/rails
#
require 'capistrano/rails'
require 'capistrano/rvm'
# require 'capistrano/rbenv'
# require 'capistrano/chruby'
require 'capistrano/bundler'
# require 'capistrano/rails/assets'
# require 'capistrano/rails/migrations'
# Loads custom tasks from `lib/capistrano/tasks' if you have any defined.
Dir.glob('lib/capistrano/tasks/*.cap').each { |r| import r }
...@@ -42,7 +42,8 @@ group :development, :test do ...@@ -42,7 +42,8 @@ group :development, :test do
gem 'spring' gem 'spring'
end end
gem 'kaminari', '0.16.3' # gem 'kaminari', '0.16.3'
gem 'will_paginate', '~> 3.0.6'
gem 'font-awesome-sass', '4.3.2.1' gem 'font-awesome-sass', '4.3.2.1'
# Bootstrap 3: https://github.com/twbs/bootstrap-sass # Bootstrap 3: https://github.com/twbs/bootstrap-sass
gem 'bootstrap-sass', '3.3.4.1' gem 'bootstrap-sass', '3.3.4.1'
...@@ -57,8 +58,13 @@ gem "breadcrumbs_on_rails" ...@@ -57,8 +58,13 @@ gem "breadcrumbs_on_rails"
gem 'momentjs-rails', '>= 2.8.1' gem 'momentjs-rails', '>= 2.8.1'
gem 'bootstrap3-datetimepicker-rails', '~> 4.7.14' gem 'bootstrap3-datetimepicker-rails', '~> 4.7.14'
gem 'draper', '~> 1.3' gem 'draper', '~> 1.3'
gem 'sunspot_rails' gem 'rsolr'
gem 'sunspot_solr'
gem 'capistrano', '~> 3.1.0'
gem 'capistrano-bundler', '~> 1.1.2'
gem 'capistrano-rails', '~> 1.1.1'
gem 'capistrano-rvm', github: "capistrano/rvm"
group :development, :test do group :development, :test do
# gem 'capybara' # Integration test tool to simulate a user on a website. # gem 'capybara' # Integration test tool to simulate a user on a website.
...@@ -77,4 +83,5 @@ group :development, :test do ...@@ -77,4 +83,5 @@ group :development, :test do
gem 'rspec-rails', '~> 3.0.1' gem 'rspec-rails', '~> 3.0.1'
gem 'rspec-support' gem 'rspec-support'
gem "pry" gem "pry"
gem "quiet_assets"
end end
\ No newline at end of file
GIT
remote: git://github.com/capistrano/rvm.git
revision: 9cfef39cf0022839dca6b5b330dfefeb5fc363e7
specs:
capistrano-rvm (0.1.2)
capistrano (~> 3.0)
sshkit (~> 1.2)
GEM GEM
remote: https://rubygems.org/ remote: https://rubygems.org/
specs: specs:
...@@ -53,6 +61,16 @@ GEM ...@@ -53,6 +61,16 @@ GEM
builder (3.2.2) builder (3.2.2)
byebug (5.0.0) byebug (5.0.0)
columnize (= 0.9.0) columnize (= 0.9.0)
capistrano (3.1.0)
i18n
rake (>= 10.0.0)
sshkit (~> 1.3)
capistrano-bundler (1.1.4)
capistrano (~> 3.1)
sshkit (~> 1.2)
capistrano-rails (1.1.3)
capistrano (~> 3.1)
capistrano-bundler (~> 1.1)
capybara (2.4.4) capybara (2.4.4)
mime-types (>= 1.16) mime-types (>= 1.16)
nokogiri (>= 1.3.3) nokogiri (>= 1.3.3)
...@@ -67,6 +85,7 @@ GEM ...@@ -67,6 +85,7 @@ GEM
coffee-script-source coffee-script-source
execjs execjs
coffee-script-source (1.9.1.1) coffee-script-source (1.9.1.1)
colorize (0.7.7)
columnize (0.9.0) columnize (0.9.0)
daemons (1.2.3) daemons (1.2.3)
database_cleaner (1.4.1) database_cleaner (1.4.1)
...@@ -121,9 +140,6 @@ GEM ...@@ -121,9 +140,6 @@ GEM
railties (>= 4.2.0) railties (>= 4.2.0)
thor (>= 0.14, < 2.0) thor (>= 0.14, < 2.0)
json (1.8.3) json (1.8.3)
kaminari (0.16.3)
actionpack (>= 3.0.0)
activesupport (>= 3.0.0)
launchy (2.4.3) launchy (2.4.3)
addressable (~> 2.3) addressable (~> 2.3)
loofah (2.0.2) loofah (2.0.2)
...@@ -139,14 +155,18 @@ GEM ...@@ -139,14 +155,18 @@ GEM
multi_json (1.11.2) multi_json (1.11.2)
multi_xml (0.5.5) multi_xml (0.5.5)
mysql2 (0.3.18) mysql2 (0.3.18)
net-scp (1.2.1)
net-ssh (>= 2.6.5)
net-ssh (2.9.2)
nokogiri (1.6.6.2) nokogiri (1.6.6.2)
mini_portile (~> 0.6.0) mini_portile (~> 0.6.0)
orm_adapter (0.5.0) orm_adapter (0.5.0)
pr_geohash (1.0.0)
pry (0.10.1) pry (0.10.1)
coderay (~> 1.1.0) coderay (~> 1.1.0)
method_source (~> 0.8.1) method_source (~> 0.8.1)
slop (~> 3.4) slop (~> 3.4)
quiet_assets (1.1.0)
railties (>= 3.1, < 5.0)
rack (1.6.4) rack (1.6.4)
rack-test (0.6.3) rack-test (0.6.3)
rack (>= 1.0) rack (>= 1.0)
...@@ -224,14 +244,10 @@ GEM ...@@ -224,14 +244,10 @@ GEM
actionpack (>= 3.0) actionpack (>= 3.0)
activesupport (>= 3.0) activesupport (>= 3.0)
sprockets (>= 2.8, < 4.0) sprockets (>= 2.8, < 4.0)
sunspot (2.2.0) sshkit (1.7.1)
pr_geohash (~> 1.0) colorize (>= 0.7.0)
rsolr (~> 1.0.7) net-scp (>= 1.1.2)
sunspot_rails (2.2.0) net-ssh (>= 2.8.0)
nokogiri
rails (>= 3)
sunspot (= 2.2.0)
sunspot_solr (2.2.0)
temple (0.7.6) temple (0.7.6)
thin (1.6.3) thin (1.6.3)
daemons (~> 1.0, >= 1.0.9) daemons (~> 1.0, >= 1.0.9)
...@@ -260,6 +276,7 @@ GEM ...@@ -260,6 +276,7 @@ GEM
binding_of_caller (>= 0.7.2) binding_of_caller (>= 0.7.2)
railties (>= 4.0) railties (>= 4.0)
sprockets-rails (>= 2.0, < 4.0) sprockets-rails (>= 2.0, < 4.0)
will_paginate (3.0.7)
xpath (2.0.0) xpath (2.0.0)
nokogiri (~> 1.3) nokogiri (~> 1.3)
...@@ -271,6 +288,10 @@ DEPENDENCIES ...@@ -271,6 +288,10 @@ DEPENDENCIES
bootstrap3-datetimepicker-rails (~> 4.7.14) bootstrap3-datetimepicker-rails (~> 4.7.14)
breadcrumbs_on_rails breadcrumbs_on_rails
byebug byebug
capistrano (~> 3.1.0)
capistrano-bundler (~> 1.1.2)
capistrano-rails (~> 1.1.1)
capistrano-rvm!
capybara capybara
coffee-rails (~> 4.1.0) coffee-rails (~> 4.1.0)
database_cleaner database_cleaner
...@@ -284,11 +305,12 @@ DEPENDENCIES ...@@ -284,11 +305,12 @@ DEPENDENCIES
font-awesome-sass (= 4.3.2.1) font-awesome-sass (= 4.3.2.1)
jbuilder (~> 2.0) jbuilder (~> 2.0)
jquery-rails jquery-rails
kaminari (= 0.16.3)
momentjs-rails (>= 2.8.1) momentjs-rails (>= 2.8.1)
mysql2 mysql2
pry pry
quiet_assets
rails (= 4.2.1) rails (= 4.2.1)
rsolr
rspec rspec
rspec-core rspec-core
rspec-expectations rspec-expectations
...@@ -300,11 +322,10 @@ DEPENDENCIES ...@@ -300,11 +322,10 @@ DEPENDENCIES
shoulda-matchers shoulda-matchers
slim slim
spring spring
sunspot_rails
sunspot_solr
thin thin
turbolinks turbolinks
turnip turnip
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
vacuum vacuum
web-console (~> 2.0) web-console (~> 2.0)
will_paginate (~> 3.0.6)
...@@ -5,7 +5,7 @@ class CategoriesController < ApplicationController ...@@ -5,7 +5,7 @@ class CategoriesController < ApplicationController
def show def show
@category = Category.find(params[:id]) @category = Category.find(params[:id])
@products = @category.products.page(params[:page]) @products = @category.products.paginate(:page => params[:page])
add_breadcrumb @category.name add_breadcrumb @category.name
end end
......
...@@ -6,12 +6,13 @@ class HomeController < ApplicationController ...@@ -6,12 +6,13 @@ class HomeController < ApplicationController
before_action :get_recommend before_action :get_recommend
def index def index
@products = Product.order(release_date: :desc).page(params[:page]) @products = Product.order(release_date: :desc).paginate(:page => params[:page])
end end
def search def search
add_breadcrumb "Search" add_breadcrumb "Search"
@products = Product.search_keyword(params) @solr_products = Product.search_keyword(params)
@products = Product.where("id IN (?)", @solr_products.map{|h| h["id"]})
end end
def cart def cart
......
...@@ -6,30 +6,33 @@ class Product < ActiveRecord::Base ...@@ -6,30 +6,33 @@ class Product < ActiveRecord::Base
# default 'public/images/product_default.jpg' # default 'public/images/product_default.jpg'
end end
paginates_per 5 self.per_page = ENV["default_perpage"]
validates :pid, :title, :price, :category_id, :image, :stock, :release_date, :public_date, presence: true validates :pid, :title, :price, :category_id, :stock, :release_date, :public_date, presence: true
validates :pid, uniqueness: true validates :pid, uniqueness: true
scope :recommend, -> { where(recommend: true) } scope :recommend, -> { where(recommend: true) }
searchable do after_save :update_solr
text :pid, :title, :author, :publisher, :studio, :currency, :image_uid before_destroy :remove_solr
boolean :recommend
double :price # searchable do
integer :category_id, :user_id, :stock # text :pid, :title, :author, :publisher, :studio
date :release_date # time :release_date
end # end
def self.search_keyword(params) def self.search_keyword(params)
where("title LIKE ?", "%#{params[:keyword]}%").order(release_date: :desc).page(params[:page]) # where("title LIKE ?", "%#{params[:keyword]}%").order(release_date: :desc).paginate(:page => params[:page])
# params[:page] ||= 1 # params[:page] ||= 1
# Product.search do # p = Product.search do
# fulltext params[:keyword] # fulltext params[:keyword]
# paginate :page => params[:page], :per_page => 5 # paginate :page => params[:page], :per_page => ENV["default_perpage"]
# order_by(:release_date, :desc) # order_by(:release_date, :desc)
# end # end
# p.results
SolrService.search(params)
end end
def can_buy?(quantity) def can_buy?(quantity)
...@@ -39,4 +42,13 @@ class Product < ActiveRecord::Base ...@@ -39,4 +42,13 @@ class Product < ActiveRecord::Base
def substract_stock(change) def substract_stock(change)
self.update_columns(stock: stock - change) self.update_columns(stock: stock - change)
end end
private
def update_solr
SolrService.add({id: id, title: title})
end
def remove_solr
SolrService.remove(id: id)
end
end end
require 'rubygems'
require 'rsolr'
class SolrService
def self.search(params={})
params[:page] ||= 1
params[:keyword] ||= ""
solr = RSolr.connect :url => ENV["solr_url"]
res = solr.paginate params[:page], ENV["default_perpage"], 'select', :params => {:q => "*#{params[:keyword]}*"}
res["response"]["docs"]
end
# add or update index to solr
# documents is {}, or [{},{}]
def self.add(documents)
solr = RSolr.connect :url => ENV["solr_url"]
solr.add documents
solr.commit
end
# delete index solr
# id is num or arr num
def self.remove(ids)
if ids.present?
solr = RSolr.connect :url => ENV["solr_url"]
solr.delete_by_id ids
solr.commit
end
end
end
...@@ -26,7 +26,10 @@ class VacuumAwsService ...@@ -26,7 +26,10 @@ class VacuumAwsService
def self.parse_item(item) def self.parse_item(item)
# only get product have pid, title, price # only get product have pid, title, price
if item.present? && item["ASIN"].present? && item["ItemAttributes"]["Title"].present? && item["ItemAttributes"]["ListPrice"].present? if item.present? && item["ASIN"].present? &&
item["ItemAttributes"]["Title"].present? &&
item["ItemAttributes"]["ListPrice"].present? &&
item["LargeImage"].present?
{ {
pid: item["ASIN"], pid: item["ASIN"],
title: item["ItemAttributes"]["Title"], title: item["ItemAttributes"]["Title"],
......
...@@ -10,4 +10,4 @@ ...@@ -10,4 +10,4 @@
= render partial: "products/item", collection: @products, as: :product = render partial: "products/item", collection: @products, as: :product
.text-center .text-center
= paginate @products = will_paginate @products
\ No newline at end of file \ No newline at end of file
...@@ -4,4 +4,4 @@ h4= "Search with keyword '#{params[:keyword]}'" ...@@ -4,4 +4,4 @@ h4= "Search with keyword '#{params[:keyword]}'"
= render partial: "products/item", collection: @products, as: :product = render partial: "products/item", collection: @products, as: :product
.text-center .text-center
= paginate @products = will_paginate @solr_products
\ No newline at end of file \ No newline at end of file
...@@ -2,4 +2,8 @@ aws_access_key_id: "AKIAJ77C4CTZOP7TUVWQ" ...@@ -2,4 +2,8 @@ aws_access_key_id: "AKIAJ77C4CTZOP7TUVWQ"
aws_secret_access_key: "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" aws_secret_access_key: "cYJYb/MLGV0M6oi1+DjlliL1cfxmh78tKXnT6ZmX"
associate_tag: "zigexn6400-22" associate_tag: "zigexn6400-22"
solr_url: "http://tomcat:tomcat@localhost:8080/solr/venshop"
default_perpage: 5
default_send_mail: "no-reply@venshop.com" default_send_mail: "no-reply@venshop.com"
\ No newline at end of file
# config valid only for Capistrano 3.1
lock '3.1.0'
set :application, 'dieutb_venshop'
set :repo_url, 'git@gitlab.zigexn.vn:dieutb/VenShop.git'
# Default branch is :master
# ask :branch, proc { `git rev-parse --abbrev-ref HEAD`.chomp }
# Default deploy_to directory is /var/www/my_app
set :deploy_to, '/web/training/dieutb/'
# Default value for :scm is :git
# set :scm, :git
# Default value for :format is :pretty
# set :format, :pretty
# Default value for :log_level is :debug
# set :log_level, :debug
# Default value for :pty is false
# set :pty, true
# Default value for :linked_files is []
set :linked_files, %w{config/database.yml}
# Default value for linked_dirs is []
set :linked_dirs, %w{bin log tmp/pids tmp/cache tmp/sockets vendor/bundle public/system}
# Default value for default_env is {}
# set :default_env, { path: "/opt/ruby/bin:$PATH" }
# Default value for keep_releases is 5
# set :keep_releases, 5
namespace :deploy do
desc 'Restart application'
task :restart do
on roles(:app), in: :sequence, wait: 5 do
# Your restart mechanism here, for example:
execute :touch, release_path.join('tmp/restart.txt')
end
end
after :publishing, 'deploy:restart'
after :finishing, 'deploy:cleanup'
after :restart, :clear_cache do
on roles(:web), in: :groups, limit: 3, wait: 10 do
# Here we can do anything such as:
# within release_path do
# execute :rake, 'cache:clear'
# end
end
end
end
# Simple Role Syntax
# ==================
# Supports bulk-adding hosts to roles, the primary
# server in each group is considered to be the first
# unless any hosts have the primary property set.
# Don't declare `role :all`, it's a meta role
set :stage, :production
role :app, %w{deploy@45.55.247.4}
role :web, %w{deploy@45.55.247.4}
role :db, %w{deploy@45.55.247.4}
# Extended Server Syntax
# ======================
# This can be used to drop a more detailed server
# definition into the server list. The second argument
# something that quacks like a hash can be used to set
# extended properties on the server.
server '45.55.247.4', user: 'root', roles: %w{web app}
# you can set custom ssh options
# it's possible to pass any option but you need to keep in mind that net/ssh understand limited list of options
# you can see them in [net/ssh documentation](http://net-ssh.github.io/net-ssh/classes/Net/SSH.html#method-c-start)
# set it globally
# set :ssh_options, {
# keys: %w(/home/rlisowski/.ssh/id_rsa),
# forward_agent: false,
# auth_methods: %w(password)
# }
# and/or per server
# server 'example.com',
# user: 'user_name',
# roles: %w{web app},
# ssh_options: {
# user: 'user_name', # overrides user setting above
# keys: %w(/home/user_name/.ssh/id_rsa),
# forward_agent: false,
# auth_methods: %w(publickey password)
# # password: 'please use keys'
# }
# setting per server overrides global ssh_options
...@@ -19,4 +19,4 @@ test: ...@@ -19,4 +19,4 @@ test:
# Do not keep production secrets in the repository, # Do not keep production secrets in the repository,
# instead read values from the environment. # instead read values from the environment.
production: production:
secret_key_base: <%= ENV["SECRET_KEY_BASE"] %> secret_key_base: a5cb22cc7065463cf7cb5b8e0deea024062b5349d723de32915b97f99dacbff92fd87a57787f897bfb86372ee0a5987a37454184d1dd1d854848d296e47f2381
production:
solr:
hostname: localhost
port: 8983
log_level: WARNING
path: /solr/production
# read_timeout: 2
# open_timeout: 0.5
development:
solr:
hostname: localhost
port: 8982
log_level: INFO
path: /solr/development
test:
solr:
hostname: localhost
port: 8981
log_level: WARNING
path: /solr/test
\ No newline at end of file
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20150716093626) do ActiveRecord::Schema.define(version: 20150717065015) do
create_table "categories", force: :cascade do |t| create_table "categories", force: :cascade do |t|
t.string "name", limit: 255 t.string "name", limit: 255
...@@ -28,7 +28,7 @@ ActiveRecord::Schema.define(version: 20150716093626) do ...@@ -28,7 +28,7 @@ ActiveRecord::Schema.define(version: 20150716093626) do
t.string "pid", limit: 255 t.string "pid", limit: 255
t.integer "item_count", limit: 4, default: 0 t.integer "item_count", limit: 4, default: 0
t.decimal "item_total", precision: 12, scale: 2, default: 0.0 t.decimal "item_total", precision: 12, scale: 2, default: 0.0
t.integer "state", limit: 4, default: 1 t.integer "state", limit: 4, default: 0
t.datetime "completed_at" t.datetime "completed_at"
t.integer "user_id", limit: 4 t.integer "user_id", limit: 4
t.datetime "created_at", null: false t.datetime "created_at", null: false
......
...@@ -4,8 +4,10 @@ namespace :crawler do ...@@ -4,8 +4,10 @@ namespace :crawler do
categories = %w(Books Music Electronics Software Kitchen) categories = %w(Books Music Electronics Software Kitchen)
categories.each do |search_cate| categories.each do |search_cate|
puts '111111111111111111111111111'
category = Category.find_or_create_by(name: search_cate) category = Category.find_or_create_by(name: search_cate)
(1..5).each do |page| (1..5).each do |page|
puts '2222222222222222222222222222'
next if category.products.count >= 20 # limit 20 products each category next if category.products.count >= 20 # limit 20 products each category
response = VacuumAwsService.item_search({ response = VacuumAwsService.item_search({
query: { query: {
...@@ -15,6 +17,7 @@ namespace :crawler do ...@@ -15,6 +17,7 @@ namespace :crawler do
'ResponseGroup' => 'Medium' 'ResponseGroup' => 'Medium'
} }
}) })
puts '3333333333333333333333333333333'
items = VacuumAwsService.parse_items(response) items = VacuumAwsService.parse_items(response)
items.each do |item| items.each do |item|
......
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- The content of this page will be statically included into the top
of the admin page. Uncomment this as an example to see there the content
will show up.
<hr>
<i>This line will appear before the first table</i>
<tr>
<td colspan="2">
This row will be appended to the end of the first table
</td>
</tr>
<hr>
-->
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->
<elevate>
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
<query text="ipod">
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
</query>
</elevate>
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Syntax:
# "source" => "target"
# "source".length() > 0 (source cannot be empty.)
# "target".length() >= 0 (target can be empty.)
# example:
# "À" => "A"
# "\u00C0" => "A"
# "\u00C0" => "\u0041"
# "ß" => "ss"
# "\t" => " "
# "\n" => ""
# À => A
"\u00C0" => "A"
# Á => A
"\u00C1" => "A"
# Â => A
"\u00C2" => "A"
# Ã => A
"\u00C3" => "A"
# Ä => A
"\u00C4" => "A"
# Å => A
"\u00C5" => "A"
# Æ => AE
"\u00C6" => "AE"
# Ç => C
"\u00C7" => "C"
# È => E
"\u00C8" => "E"
# É => E
"\u00C9" => "E"
# Ê => E
"\u00CA" => "E"
# Ë => E
"\u00CB" => "E"
# Ì => I
"\u00CC" => "I"
# Í => I
"\u00CD" => "I"
# Î => I
"\u00CE" => "I"
# Ï => I
"\u00CF" => "I"
# IJ => IJ
"\u0132" => "IJ"
# Ð => D
"\u00D0" => "D"
# Ñ => N
"\u00D1" => "N"
# Ò => O
"\u00D2" => "O"
# Ó => O
"\u00D3" => "O"
# Ô => O
"\u00D4" => "O"
# Õ => O
"\u00D5" => "O"
# Ö => O
"\u00D6" => "O"
# Ø => O
"\u00D8" => "O"
# Œ => OE
"\u0152" => "OE"
# Þ
"\u00DE" => "TH"
# Ù => U
"\u00D9" => "U"
# Ú => U
"\u00DA" => "U"
# Û => U
"\u00DB" => "U"
# Ü => U
"\u00DC" => "U"
# Ý => Y
"\u00DD" => "Y"
# Ÿ => Y
"\u0178" => "Y"
# à => a
"\u00E0" => "a"
# á => a
"\u00E1" => "a"
# â => a
"\u00E2" => "a"
# ã => a
"\u00E3" => "a"
# ä => a
"\u00E4" => "a"
# å => a
"\u00E5" => "a"
# æ => ae
"\u00E6" => "ae"
# ç => c
"\u00E7" => "c"
# è => e
"\u00E8" => "e"
# é => e
"\u00E9" => "e"
# ê => e
"\u00EA" => "e"
# ë => e
"\u00EB" => "e"
# ì => i
"\u00EC" => "i"
# í => i
"\u00ED" => "i"
# î => i
"\u00EE" => "i"
# ï => i
"\u00EF" => "i"
# ij => ij
"\u0133" => "ij"
# ð => d
"\u00F0" => "d"
# ñ => n
"\u00F1" => "n"
# ò => o
"\u00F2" => "o"
# ó => o
"\u00F3" => "o"
# ô => o
"\u00F4" => "o"
# õ => o
"\u00F5" => "o"
# ö => o
"\u00F6" => "o"
# ø => o
"\u00F8" => "o"
# œ => oe
"\u0153" => "oe"
# ß => ss
"\u00DF" => "ss"
# þ => th
"\u00FE" => "th"
# ù => u
"\u00F9" => "u"
# ú => u
"\u00FA" => "u"
# û => u
"\u00FB" => "u"
# ü => u
"\u00FC" => "u"
# ý => y
"\u00FD" => "y"
# ÿ => y
"\u00FF" => "y"
# ff => ff
"\uFB00" => "ff"
# fi => fi
"\uFB01" => "fi"
# fl => fl
"\uFB02" => "fl"
# ffi => ffi
"\uFB03" => "ffi"
# ffl => ffl
"\uFB04" => "ffl"
# ſt => ft
"\uFB05" => "ft"
# st => st
"\uFB06" => "st"
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# Use a protected word file to protect against the stemmer reducing two
# unrelated words to the same base word.
# Some non-words that normally won't be encountered,
# just to test that they won't be stemmed.
dontstems
zwhacky
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml" and
should be in the conf directory under the solr home
(i.e. ./solr/conf/schema.xml by default)
or located where the classloader for the Solr webapp can find it.
This example schema is the recommended starting point for users.
It should be kept correct and concise, usable out-of-the-box.
For more information, on how to customize this file, please see
http://wiki.apache.org/solr/SchemaXml
PERFORMANCE NOTE: this schema includes many optional features and should not
be used for benchmarking. To improve performance one could
- set stored="false" for all fields possible (esp large fields) when you
only need to search on the field but don't need to return the original
value.
- set indexed="false" if you don't need to search on the field, but only
return the field as a result of searching on other indexed fields.
- remove all unneeded copyField statements
- for best index size and searching performance, set "index" to false
for all general text fields, use copyField to copy them to the
catchall "text" field, and use that for searching.
- For maximum indexing performance, use the StreamingUpdateSolrServer
java client.
- Remember to run the JVM in server mode, and use a higher logging level
that avoids logging every request
-->
<schema name="sunspot" version="1.0">
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldType.
Class names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="string" class="solr.StrField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="text" class="solr.TextField" omitNorms="false">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="boolean" class="solr.BoolField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="date" class="solr.DateField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sdouble" class="solr.SortableDoubleField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sfloat" class="solr.SortableFloatField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sint" class="solr.SortableIntField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tint" class="solr.TrieIntField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tfloat" class="solr.TrieFloatField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tdate" class="solr.TrieDateField"
omitNorms="true"/>
<!-- Special field type for spell correction. Be careful about
adding filters here, as they apply *before* your values go in
the spellcheck. For example, the lowercase filter here means
all spelling suggestions will be lower case (without it,
though, you'd have duplicate suggestions for lower and proper
cased words). -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
</types>
<fields>
<!-- Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the
<types> section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
compressed: [false] if this field should be stored using gzip compression
(this will only apply if the field type is compressable; among
the standard field types, only TextField and StrField are)
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
termVectors: [false] set to true to store the term vector for a
given field.
When using MoreLikeThis, fields used for similarity should be
stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
default: a value that should be used if no value is specified
when adding a document.
-->
<!-- *** This field is used by Sunspot! *** -->
<field name="id" stored="true" type="string" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="type" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="class_name" stored="false" type="string" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="text" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="lat" stored="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="lng" stored="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="random_*" stored="false" type="rand" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="_local*" stored="false" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_text" stored="false" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_texts" stored="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_b" stored="false" type="boolean" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bm" stored="false" type="boolean" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bs" stored="true" type="boolean" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bms" stored="true" type="boolean" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_d" stored="false" type="date" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dm" stored="false" type="date" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ds" stored="true" type="date" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dms" stored="true" type="date" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_e" stored="false" type="sdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_em" stored="false" type="sdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_es" stored="true" type="sdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ems" stored="true" type="sdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_f" stored="false" type="sfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fm" stored="false" type="sfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fs" stored="true" type="sfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fms" stored="true" type="sfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_i" stored="false" type="sint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_im" stored="false" type="sint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_is" stored="true" type="sint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ims" stored="true" type="sint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_l" stored="false" type="slong" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_lm" stored="false" type="slong" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ls" stored="true" type="slong" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_lms" stored="true" type="slong" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_s" stored="false" type="string" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_sm" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ss" stored="true" type="string" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_sms" stored="true" type="string" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_it" stored="false" type="tint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_itm" stored="false" type="tint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_its" stored="true" type="tint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_itms" stored="true" type="tint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ft" stored="false" type="tfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ftm" stored="false" type="tfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fts" stored="true" type="tfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ftms" stored="true" type="tfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dt" stored="false" type="tdate" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dtm" stored="false" type="tdate" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dts" stored="true" type="tdate" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dtms" stored="true" type="tdate" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_textv" stored="false" termVectors="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_textsv" stored="true" termVectors="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_et" stored="false" termVectors="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_etm" stored="false" termVectors="true" type="tdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ets" stored="true" termVectors="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_etms" stored="true" termVectors="true" type="tdouble" multiValued="true" indexed="true"/>
<!-- Type used to index the lat and lon components for the "location" FieldType -->
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" multiValued="false"/>
<dynamicField name="*_p" type="location" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_ll" stored="false" type="location" multiValued="false" indexed="true"/>
<dynamicField name="*_llm" stored="false" type="location" multiValued="true" indexed="true"/>
<dynamicField name="*_lls" stored="true" type="location" multiValued="false" indexed="true"/>
<dynamicField name="*_llms" stored="true" type="location" multiValued="true" indexed="true"/>
<field name="textSpell" stored="false" type="textSpell" multiValued="true" indexed="true"/>
<!-- required by Solr 4 -->
<field name="_version_" type="string" indexed="true" stored="true" multiValued="false" />
</fields>
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="AND"/>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster
searching. -->
<!-- Use copyField to copy the fields you want to run spell checking
on into one field. For example: -->
<copyField source="*_text" dest="textSpell" />
<copyField source="*_s" dest="textSpell" />
</schema>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
user=
solr_hostname=localhost
solr_port=8983
rsyncd_port=18983
data_dir=
webapp_name=solr
master_host=
master_data_dir=
master_status_dir=
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
For more details about configurations options that may appear in
this file, see http://wiki.apache.org/solr/SolrConfigXml.
-->
<config>
<!-- In all configuration below, a prefix of "solr." for class names
is an alias that causes solr to search appropriate packages,
including org.apache.solr.(search|update|request|core|analysis)
You may also specify a fully qualified Java classname if you
have your own custom plugins.
-->
<!-- Controls what version of Lucene various components of Solr
adhere to. Generally, you want to use the latest version to
get all bug fixes and improvements. It is highly recommended
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
<luceneMatchVersion>LUCENE_41</luceneMatchVersion>
<!-- <lib/> directives can be used to instruct Solr to load an Jars
identified and use them to resolve any "plugins" specified in
your solrconfig.xml or schema.xml (ie: Analyzers, Request
Handlers, etc...).
All directories and paths are resolved relative to the
instanceDir.
Please note that <lib/> directives are processed in the order
that they appear in your solrconfig.xml file, and are "stacked"
on top of each other when building a ClassLoader - so if you have
plugin jars with dependencies on other jars, the "lower level"
dependency jars should be loaded first.
If a "./lib" directory exists in your instanceDir, all files
found in it are included as if you had used the following
syntax...
<lib dir="./lib" />
-->
<dataDir>${solr.data.dir:}</dataDir>
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory is filesystem
based and tries to pick the best implementation for the current
JVM and platform. solr.NRTCachingDirectoryFactory, the default,
wraps solr.StandardDirectoryFactory and caches small files in memory
for better NRT performance.
One can force a particular implementation via solr.MMapDirectoryFactory,
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
solr.RAMDirectoryFactory is memory based, not
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Index Config - These settings control low-level behavior of indexing
Most example settings here show the default value, but are commented
out, to more easily see where customizations have been made.
Note: This replaces <indexDefaults> and <mainIndex> from older versions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<indexConfig>
<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
LimitTokenCountFilterFactory in your fieldType definition. E.g.
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
-->
<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
<!-- <writeLockTimeout>1000</writeLockTimeout> -->
<!-- The maximum number of simultaneous threads that may be
indexing documents at once in IndexWriter; if more than this
many threads arrive they will wait for others to finish.
Default in Solr/Lucene is 8. -->
<maxIndexingThreads>2</maxIndexingThreads>
<useCompoundFile>true</useCompoundFile>
<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
indexing for buffering added documents and deletions before they are
flushed to the Directory.
maxBufferedDocs sets a limit on the number of documents buffered
before flushing.
If both ramBufferSizeMB and maxBufferedDocs is set, then
Lucene will flush based on whichever limit is hit first. -->
<ramBufferSizeMB>20</ramBufferSizeMB> -->
<maxBufferedDocs>10000</maxBufferedDocs>
<!-- Expert: Merge Policy
The Merge Policy in Lucene controls how merging of segments is done.
The default since Solr/Lucene 3.3 is TieredMergePolicy.
The default since Lucene 2.3 was the LogByteSizeMergePolicy,
Even older versions of Lucene used LogDocMergePolicy.
-->
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
<int name="maxMergeAtOnce">4</int>
<int name="segmentsPerTier">4</int>
</mergePolicy>
<unlockOnStartup>true</unlockOnStartup>
</indexConfig>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- Enables a transaction log, used for real-time get, durability, and
and solr cloud replica recovery. The log can grow as big as
uncommitted changes to the index, so use of a hard autoCommit
is recommended (see below).
"dir" - the target directory for transaction logs, defaults to the
solr data directory. -->
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
<autoCommit>
<maxTime>15000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>5000</maxTime>
</autoSoftCommit>
</updateHandler>
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Solr Internal Query Caches
There are two implementations of cache available for Solr,
LRUCache, based on a synchronized LinkedHashMap, and
FastLRUCache, based on a ConcurrentHashMap.
FastLRUCache has faster gets and slower puts in single
threaded operation and thus is generally faster than LRUCache
when the hit ratio of the cache is high (> 75%), and may be
faster under other scenarios on multi-cpu systems.
-->
<!-- Filter Cache
Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query. When a
new searcher is opened, its caches may be prepopulated or
"autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For
LRUCache, the autowarmed items will be the most recently
accessed items.
Parameters:
class - the SolrCache implementation LRUCache or
(LRUCache or FastLRUCache)
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (see java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache class="solr.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- Query Result Cache
Caches results of searches - ordered lists of document ids
(DocList) based on a query, a sort, and the range of documents requested.
-->
<queryResultCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- Document Cache
Caches Lucene Document objects (the stored fields for each
document). Since Lucene internal document ids are transient,
this cache will not be autowarmed.
-->
<documentCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- Lazy Field Loading
If true, stored fields that are not requested will be loaded
lazily. This can result in a significant speed improvement
if the usual case is to not load all stored fields,
especially if the skipped fields are large compressed text
fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Use Filter For Sorted Query
A possible optimization that attempts to use a filter to
satisfy a search. If the requested sort does not include
score, then the filterCache will be checked for a filter
matching the query. If found, the filter will be used as the
source of document ids, and then the sort will be applied to
that.
For most situations, this will not be useful unless you
frequently get the same search repeatedly with different sort
options, and none of them ever use "score"
-->
<!--
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- Result Window Size
An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache.
-->
<queryResultWindowSize>20</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the
queryResultCache.
-->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- Query Related Event Listeners
Various IndexSearcher related events can trigger Listeners to
take actions.
newSearcher - fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka
registered). It can be used to prime certain caches to
prevent long request times for certain requests.
firstSearcher - fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from.
-->
<!-- Use Cold Searcher
If a search request comes in and there is no current
registered searcher, then immediately register the still
warming searcher and use it. If "false" then all requests
will block until the first searcher is done warming.
-->
<useColdSearcher>false</useColdSearcher>
<!-- Max Warming Searchers
Maximum number of searchers that may be warming in the
background concurrently. An error is returned if this limit
is exceeded.
Recommend values of 1-2 for read-only slaves, higher for
masters w/o cache warming.
-->
<maxWarmingSearchers>5</maxWarmingSearchers>
</query>
<!-- Request Dispatcher
This section contains instructions for how the SolrDispatchFilter
should behave when processing requests for this SolrCore.
handleSelect is a legacy option that affects the behavior of requests
such as /select?qt=XXX
handleSelect="true" will cause the SolrDispatchFilter to process
the request and dispatch the query to a handler specified by the
"qt" param, assuming "/select" isn't already registered.
handleSelect="false" will cause the SolrDispatchFilter to
ignore "/select" requests, resulting in a 404 unless a handler
is explicitly registered with the name "/select"
handleSelect="true" is not recommended for new users, but is the default
for backwards compatibility
-->
<requestDispatcher handleSelect="false" >
<!-- Request Parsing
These settings indicate how Solr Requests may be parsed, and
what restrictions may be placed on the ContentStreams from
those requests
enableRemoteStreaming - enables use of the stream.file
and stream.url parameters for specifying remote streams.
multipartUploadLimitInKB - specifies the max size (in KiB) of
Multipart File Uploads that Solr will allow in a Request.
formdataUploadLimitInKB - specifies the max size (in KiB) of
form data (application/x-www-form-urlencoded) sent via
POST. You can use POST to pass request parameters not
fitting into the URL.
*** WARNING ***
The settings below authorize Solr to fetch remote files, You
should make sure your system has some authentication before
using enableRemoteStreaming="true"
-->
<!-- HTTP Caching
Set HTTP caching related parameters (for proxy caches and clients).
The options below instruct Solr not to output any HTTP Caching
related headers
-->
<httpCaching never304="true" />
<!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header (as well as an Expires header
if the value contains "max-age=")
By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set
never304="true"
-->
<!--
<httpCaching never304="true" >
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
-->
<!-- To enable Solr to respond with automatically generated HTTP
Caching headers, and to response to Cache Validation requests
correctly, set the value of never304="false"
This will cause Solr to generate Last-Modified and ETag
headers based on the properties of the Index.
The following options can also be specified to affect the
values of these headers...
lastModFrom - the default value is "openTime" which means the
Last-Modified value (and validation against If-Modified-Since
requests) will all be relative to when the current Searcher
was opened. You can change it to lastModFrom="dirLastMod" if
you want the value to exactly correspond to when the physical
index was last modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
different even if the index has not changed (ie: when making
significant changes to your config file)
(lastModifiedFrom and etagSeed are both ignored if you use
the never304="true" option)
-->
<!--
<httpCaching lastModifiedFrom="openTime"
etagSeed="Solr">
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
-->
</requestDispatcher>
<!-- Request Handlers
http://wiki.apache.org/solr/SolrRequestHandler
Incoming queries will be dispatched to a specific handler by name
based on the path specified in the request.
Legacy behavior: If the request path uses "/select" but no Request
Handler has that name, and if handleSelect="true" has been specified in
the requestDispatcher, then the Request Handler is dispatched based on
the qt parameter. Handlers without a leading '/' are accessed this way
like so: http://host/app/[core/]select?qt=name If no qt is
given, then the requestHandler that declares default="true" will be
used or the one named "standard".
If a Request Handler is declared with startup="lazy", then it will
not be initialized until the first request that uses it.
-->
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
For processing Search Queries, the primary Request Handler
provided with Solr is "SearchHandler" It delegates to a sequent
of SearchComponents (see below) and supports distributed
queries across multiple shards
-->
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.collate">true</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<!-- A request handler that returns indented JSON by default -->
<requestHandler name="/query" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="wt">json</str>
<str name="indent">true</str>
<str name="df">text</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<!-- realtime get handler, guaranteed to return the latest stored fields of
any document, without the need to commit or open a new searcher. The
current implementation relies on the updateLog feature being enabled. -->
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
<str name="wt">json</str>
<str name="indent">true</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.UpdateRequestHandler">
</requestHandler>
<requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
<lst name="defaults">
<str name="stream.contentType">application/json</str>
</lst>
</requestHandler>
<requestHandler name="/update/csv" class="solr.CSVRequestHandler">
<lst name="defaults">
<str name="stream.contentType">application/csv</str>
</lst>
</requestHandler>
<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<!-- capture link hrefs but ignore div attributes -->
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler"
startup="lazy" />
<!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<requestHandler name="/replication" class="solr.ReplicationHandler" >
</requestHandler>
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<!-- Multiple "Spell Checkers" can be declared and used by this
component
-->
<lst name="spellchecker">
<str name="name">default</str>
<!-- change field to textSpell and use copyField in schema.xml
to spellcheck multiple fields -->
<str name="field">textSpell</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="spellchecker">
<str name="name">example</str>
<str name="field">title_text</str>
<str name="buildOnCommit">true</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">name</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
-->
</searchComponent>
<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
<searchComponent name="terms" class="solr.TermsComponent"/>
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case -->
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<!-- A regular-expression-based fragmenter
(for sentence extraction)
-->
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
<!-- Configure the standard encoder -->
<encoder name="html"
class="solr.highlight.HtmlEncoder" />
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple"
class="solr.highlight.SimpleFragListBuilder"/>
<!-- Configure the single fragListBuilder -->
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>
<!-- Configure the weighted fragListBuilder -->
<fragListBuilder name="weighted"
default="true"
class="solr.highlight.WeightedFragListBuilder"/>
<!-- default tag FragmentsBuilder -->
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<!--
<lst name="defaults">
<str name="hl.multiValuedSeparatorChar">/</str>
</lst>
-->
</fragmentsBuilder>
<!-- multi-colored tag FragmentsBuilder -->
<fragmentsBuilder name="colored"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<lst name="defaults">
<str name="hl.tag.pre"><![CDATA[
<b style="background:yellow">,<b style="background:lawgreen">,
<b style="background:aquamarine">,<b style="background:magenta">,
<b style="background:palegreen">,<b style="background:coral">,
<b style="background:wheat">,<b style="background:khaki">,
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
<str name="hl.tag.post"><![CDATA[</b>]]></str>
</lst>
</fragmentsBuilder>
<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
<str name="hl.bs.type">WORD</str>
<!-- language and country are used when constructing Locale object. -->
<!-- And the Locale object will be used when getting instance of BreakIterator -->
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<requestHandler class="solr.MoreLikeThisHandler" name="/mlt">
<lst name="defaults">
<str name="mlt.mintf">1</str>
<str name="mlt.mindf">2</str>
</lst>
</requestHandler>
<!-- Admin Handlers - This will register all the standard admin RequestHandlers. -->
<requestHandler name="/admin/" class="solr.admin.AdminHandlers" />
</config>
pizza
history
\ No newline at end of file
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# a couple of test stopwords to test that the words are really being
# configured from this file:
stopworda
stopwordb
#Standard english stop words taken from Lucene's StopAnalyzer
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma
<?xml version="1.0" encoding="UTF-8" ?>
<solr persistent="false">
<cores adminPath="/admin/cores" host="${host:}" hostPort="${jetty.port:}">
<core name="default" instanceDir="." dataDir="default/data"/>
<core name="development" instanceDir="." dataDir="development/data"/>
<core name="test" instanceDir="." dataDir="test/data"/>
</cores>
</solr>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment