Commit 78a9beae by Mai Hoang Thai Ha

search feature

parent bac18c77
...@@ -66,3 +66,5 @@ gem 'httparty', '~> 0.18.1' ...@@ -66,3 +66,5 @@ gem 'httparty', '~> 0.18.1'
gem 'active_storage_validations', '~> 0.9.5' gem 'active_storage_validations', '~> 0.9.5'
gem 'devise', '~> 4.8' gem 'devise', '~> 4.8'
gem 'jquery-rails', '~> 4.4' gem 'jquery-rails', '~> 4.4'
gem 'rsolr', '~> 2.3'
gem 'rsolr-ext', '~> 1.0', '>= 1.0.3'
GIT GIT
remote: https://github.com/kaminari/kaminari remote: https://github.com/kaminari/kaminari
revision: 45f13dbdaa98be3733ffe6b0e8e948da6919f116 revision: 73b93405b95615b5ad3f53c3dffe419a59890cad
specs: specs:
kaminari (1.2.1) kaminari (1.2.1)
activesupport (>= 4.1.0) activesupport (>= 4.1.0)
...@@ -18,62 +18,62 @@ GIT ...@@ -18,62 +18,62 @@ GIT
GEM GEM
remote: https://rubygems.org/ remote: https://rubygems.org/
specs: specs:
actioncable (6.1.4) actioncable (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
nio4r (~> 2.0) nio4r (~> 2.0)
websocket-driver (>= 0.6.1) websocket-driver (>= 0.6.1)
actionmailbox (6.1.4) actionmailbox (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
activejob (= 6.1.4) activejob (= 6.1.4.1)
activerecord (= 6.1.4) activerecord (= 6.1.4.1)
activestorage (= 6.1.4) activestorage (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
mail (>= 2.7.1) mail (>= 2.7.1)
actionmailer (6.1.4) actionmailer (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
actionview (= 6.1.4) actionview (= 6.1.4.1)
activejob (= 6.1.4) activejob (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
mail (~> 2.5, >= 2.5.4) mail (~> 2.5, >= 2.5.4)
rails-dom-testing (~> 2.0) rails-dom-testing (~> 2.0)
actionpack (6.1.4) actionpack (6.1.4.1)
actionview (= 6.1.4) actionview (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
rack (~> 2.0, >= 2.0.9) rack (~> 2.0, >= 2.0.9)
rack-test (>= 0.6.3) rack-test (>= 0.6.3)
rails-dom-testing (~> 2.0) rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.2.0) rails-html-sanitizer (~> 1.0, >= 1.2.0)
actiontext (6.1.4) actiontext (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
activerecord (= 6.1.4) activerecord (= 6.1.4.1)
activestorage (= 6.1.4) activestorage (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
nokogiri (>= 1.8.5) nokogiri (>= 1.8.5)
actionview (6.1.4) actionview (6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
builder (~> 3.1) builder (~> 3.1)
erubi (~> 1.4) erubi (~> 1.4)
rails-dom-testing (~> 2.0) rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.1, >= 1.2.0) rails-html-sanitizer (~> 1.1, >= 1.2.0)
active_storage_validations (0.9.5) active_storage_validations (0.9.5)
rails (>= 5.2.0) rails (>= 5.2.0)
activejob (6.1.4) activejob (6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
globalid (>= 0.3.6) globalid (>= 0.3.6)
activemodel (6.1.4) activemodel (6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
activerecord (6.1.4) activerecord (6.1.4.1)
activemodel (= 6.1.4) activemodel (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
activestorage (6.1.4) activestorage (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
activejob (= 6.1.4) activejob (= 6.1.4.1)
activerecord (= 6.1.4) activerecord (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
marcel (~> 1.0.0) marcel (~> 1.0.0)
mini_mime (>= 1.1.0) mini_mime (>= 1.1.0)
activesupport (6.1.4) activesupport (6.1.4.1)
concurrent-ruby (~> 1.0, >= 1.0.2) concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2) i18n (>= 1.6, < 2)
minitest (>= 5.1) minitest (>= 5.1)
...@@ -84,10 +84,10 @@ GEM ...@@ -84,10 +84,10 @@ GEM
ast (2.4.2) ast (2.4.2)
bcrypt (3.1.16) bcrypt (3.1.16)
bindex (0.8.1) bindex (0.8.1)
bootsnap (1.7.5) bootsnap (1.9.1)
msgpack (~> 1.0) msgpack (~> 1.0)
builder (3.2.4) builder (3.2.4)
bullet (6.1.4) bullet (6.1.5)
activesupport (>= 3.0.0) activesupport (>= 3.0.0)
uniform_notifier (~> 1.11) uniform_notifier (~> 1.11)
byebug (11.1.3) byebug (11.1.3)
...@@ -110,9 +110,28 @@ GEM ...@@ -110,9 +110,28 @@ GEM
responders responders
warden (~> 1.2.3) warden (~> 1.2.3)
erubi (1.10.0) erubi (1.10.0)
ffi (1.15.3) faraday (1.8.0)
globalid (0.4.2) faraday-em_http (~> 1.0)
activesupport (>= 4.2.0) faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.1)
faraday-patron (~> 1.0)
faraday-rack (~> 1.0)
multipart-post (>= 1.2, < 3)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
faraday-rack (1.0.0)
ffi (1.15.4)
globalid (0.5.2)
activesupport (>= 5.0)
httparty (0.18.1) httparty (0.18.1)
mime-types (~> 3.0) mime-types (~> 3.0)
multi_xml (>= 0.5.2) multi_xml (>= 0.5.2)
...@@ -124,29 +143,30 @@ GEM ...@@ -124,29 +143,30 @@ GEM
rails-dom-testing (>= 1, < 3) rails-dom-testing (>= 1, < 3)
railties (>= 4.2.0) railties (>= 4.2.0)
thor (>= 0.14, < 2.0) thor (>= 0.14, < 2.0)
listen (3.5.1) listen (3.7.0)
rb-fsevent (~> 0.10, >= 0.10.3) rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10) rb-inotify (~> 0.9, >= 0.9.10)
loofah (2.10.0) loofah (2.12.0)
crass (~> 1.0.2) crass (~> 1.0.2)
nokogiri (>= 1.5.9) nokogiri (>= 1.5.9)
mail (2.7.1) mail (2.7.1)
mini_mime (>= 0.1.1) mini_mime (>= 0.1.1)
marcel (1.0.1) marcel (1.0.2)
method_source (1.0.0) method_source (1.0.0)
mime-types (3.3.1) mime-types (3.3.1)
mime-types-data (~> 3.2015) mime-types-data (~> 3.2015)
mime-types-data (3.2021.0704) mime-types-data (3.2021.0901)
mini_mime (1.1.0) mini_mime (1.1.1)
minitest (5.14.4) minitest (5.14.4)
msgpack (1.4.2) msgpack (1.4.2)
multi_xml (0.6.0) multi_xml (0.6.0)
multipart-post (2.1.1)
mysql2 (0.5.3) mysql2 (0.5.3)
nio4r (2.5.7) nio4r (2.5.8)
nokogiri (1.11.7-x86_64-linux) nokogiri (1.12.5-x86_64-linux)
racc (~> 1.4) racc (~> 1.4)
orm_adapter (0.5.0) orm_adapter (0.5.0)
parallel (1.20.1) parallel (1.21.0)
parser (3.0.2.0) parser (3.0.2.0)
ast (~> 2.4.1) ast (~> 2.4.1)
pry (0.14.1) pry (0.14.1)
...@@ -155,39 +175,39 @@ GEM ...@@ -155,39 +175,39 @@ GEM
pry-rails (0.3.9) pry-rails (0.3.9)
pry (>= 0.10.4) pry (>= 0.10.4)
public_suffix (4.0.6) public_suffix (4.0.6)
puma (5.3.2) puma (5.5.0)
nio4r (~> 2.0) nio4r (~> 2.0)
racc (1.5.2) racc (1.5.2)
rack (2.2.3) rack (2.2.3)
rack-mini-profiler (2.3.2) rack-mini-profiler (2.3.3)
rack (>= 1.2.0) rack (>= 1.2.0)
rack-proxy (0.7.0) rack-proxy (0.7.0)
rack rack
rack-test (1.1.0) rack-test (1.1.0)
rack (>= 1.0, < 3) rack (>= 1.0, < 3)
rails (6.1.4) rails (6.1.4.1)
actioncable (= 6.1.4) actioncable (= 6.1.4.1)
actionmailbox (= 6.1.4) actionmailbox (= 6.1.4.1)
actionmailer (= 6.1.4) actionmailer (= 6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
actiontext (= 6.1.4) actiontext (= 6.1.4.1)
actionview (= 6.1.4) actionview (= 6.1.4.1)
activejob (= 6.1.4) activejob (= 6.1.4.1)
activemodel (= 6.1.4) activemodel (= 6.1.4.1)
activerecord (= 6.1.4) activerecord (= 6.1.4.1)
activestorage (= 6.1.4) activestorage (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
bundler (>= 1.15.0) bundler (>= 1.15.0)
railties (= 6.1.4) railties (= 6.1.4.1)
sprockets-rails (>= 2.0.0) sprockets-rails (>= 2.0.0)
rails-dom-testing (2.0.3) rails-dom-testing (2.0.3)
activesupport (>= 4.2.0) activesupport (>= 4.2.0)
nokogiri (>= 1.6) nokogiri (>= 1.6)
rails-html-sanitizer (1.3.0) rails-html-sanitizer (1.4.2)
loofah (~> 2.3) loofah (~> 2.3)
railties (6.1.4) railties (6.1.4.1)
actionpack (= 6.1.4) actionpack (= 6.1.4.1)
activesupport (= 6.1.4) activesupport (= 6.1.4.1)
method_source method_source
rake (>= 0.13) rake (>= 0.13)
thor (~> 1.0) thor (~> 1.0)
...@@ -201,22 +221,28 @@ GEM ...@@ -201,22 +221,28 @@ GEM
actionpack (>= 5.0) actionpack (>= 5.0)
railties (>= 5.0) railties (>= 5.0)
rexml (3.2.5) rexml (3.2.5)
rubocop (1.18.3) rsolr (2.3.0)
builder (>= 2.1.2)
faraday (>= 0.9.0)
rsolr-ext (1.0.3)
rsolr (>= 1.0.2)
rubocop (1.22.0)
parallel (~> 1.10) parallel (~> 1.10)
parser (>= 3.0.0.0) parser (>= 3.0.0.0)
rainbow (>= 2.2.2, < 4.0) rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0) regexp_parser (>= 1.8, < 3.0)
rexml rexml
rubocop-ast (>= 1.7.0, < 2.0) rubocop-ast (>= 1.12.0, < 2.0)
ruby-progressbar (~> 1.7) ruby-progressbar (~> 1.7)
unicode-display_width (>= 1.4.0, < 3.0) unicode-display_width (>= 1.4.0, < 3.0)
rubocop-ast (1.8.0) rubocop-ast (1.12.0)
parser (>= 3.0.1.1) parser (>= 3.0.1.1)
rubocop-rails (2.11.3) rubocop-rails (2.12.2)
activesupport (>= 4.2.0) activesupport (>= 4.2.0)
rack (>= 1.1) rack (>= 1.1)
rubocop (>= 1.7.0, < 2.0) rubocop (>= 1.7.0, < 2.0)
ruby-progressbar (1.11.0) ruby-progressbar (1.11.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2) rubyzip (2.3.2)
sass-rails (6.0.0) sass-rails (6.0.0)
sassc-rails (~> 2.1, >= 2.1.1) sassc-rails (~> 2.1, >= 2.1.1)
...@@ -239,7 +265,7 @@ GEM ...@@ -239,7 +265,7 @@ GEM
actionpack (>= 3.1) actionpack (>= 3.1)
railties (>= 3.1) railties (>= 3.1)
slim (>= 3.0, < 5.0) slim (>= 3.0, < 5.0)
spring (2.1.1) spring (3.0.0)
sprockets (4.0.2) sprockets (4.0.2)
concurrent-ruby (~> 1.0) concurrent-ruby (~> 1.0)
rack (> 1, < 3) rack (> 1, < 3)
...@@ -255,7 +281,7 @@ GEM ...@@ -255,7 +281,7 @@ GEM
turbolinks-source (5.2.0) turbolinks-source (5.2.0)
tzinfo (2.0.4) tzinfo (2.0.4)
concurrent-ruby (~> 1.0) concurrent-ruby (~> 1.0)
unicode-display_width (2.0.0) unicode-display_width (2.1.0)
uniform_notifier (1.14.2) uniform_notifier (1.14.2)
warden (1.2.9) warden (1.2.9)
rack (>= 2.0.9) rack (>= 2.0.9)
...@@ -264,11 +290,11 @@ GEM ...@@ -264,11 +290,11 @@ GEM
activemodel (>= 6.0.0) activemodel (>= 6.0.0)
bindex (>= 0.4.0) bindex (>= 0.4.0)
railties (>= 6.0.0) railties (>= 6.0.0)
webdrivers (4.6.0) webdrivers (4.6.1)
nokogiri (~> 1.6) nokogiri (~> 1.6)
rubyzip (>= 1.3.0) rubyzip (>= 1.3.0)
selenium-webdriver (>= 3.0, < 4.0) selenium-webdriver (>= 3.0, < 4.0)
webpacker (5.4.0) webpacker (5.4.3)
activesupport (>= 5.2) activesupport (>= 5.2)
rack-proxy (>= 0.6.1) rack-proxy (>= 0.6.1)
railties (>= 5.2) railties (>= 5.2)
...@@ -302,6 +328,8 @@ DEPENDENCIES ...@@ -302,6 +328,8 @@ DEPENDENCIES
puma (~> 5.0) puma (~> 5.0)
rack-mini-profiler (~> 2.0) rack-mini-profiler (~> 2.0)
rails (~> 6.1.3, >= 6.1.3.2) rails (~> 6.1.3, >= 6.1.3.2)
rsolr (~> 2.3)
rsolr-ext (~> 1.0, >= 1.0.3)
rubocop-rails (~> 2.11, >= 2.11.3) rubocop-rails (~> 2.11, >= 2.11.3)
sass-rails (~> 6.0) sass-rails (~> 6.0)
selenium-webdriver selenium-webdriver
......
...@@ -2,11 +2,21 @@ class JobsController < ApplicationController ...@@ -2,11 +2,21 @@ class JobsController < ApplicationController
before_action :history, only: :show before_action :history, only: :show
def index def index
if job_params.present? @search = params
search solr = Solr.new(@search)
if params[:city_slug]
jobs_query = solr.query_by_city
@name = City.find_by(slug: params[:city_slug]).name
elsif params[:industry_slug]
jobs_query = solr.query_by_industry
@name = Industry.find_by(slug: params[:industry_slug]).name
else else
@jobs = Job.sort_by_date(page: params[:page], per_page: Job::JOB_PER_PAGE) jobs_query = solr.query_all
@name = 'Jobs'
end end
get_jobs(jobs_query)
@jobs = Kaminari.paginate_array(@jobs).page(params[:page]).per(Job::JOB_PER_PAGE)
end end
def show def show
...@@ -23,15 +33,9 @@ class JobsController < ApplicationController ...@@ -23,15 +33,9 @@ class JobsController < ApplicationController
history.update(updated_at: Time.current) history.update(updated_at: Time.current)
end end
def search def get_jobs(query)
if job_params.key?(:model) && job_params.key?(:slug) # search by model jobs_ids = query['docs'].map { |j| j['job_id'] }
model = params[:model].classify.constantize @jobs = Job.includes(:cities, :cities_jobs, :company).find(jobs_ids)
@target = model.find_by(slug: job_params[:slug]) @jobs_count = query['numFound']
@jobs = @target.jobs.sort_by_date(page: params[:page], per_page: Job::JOB_PER_PAGE)
end
end
def job_params
params.permit(:model, :slug, :search)
end end
end end
...@@ -14,16 +14,21 @@ module ApplicationHelper ...@@ -14,16 +14,21 @@ module ApplicationHelper
def show_breadcrumb(list, model) def show_breadcrumb(list, model)
a = list.map do |item| a = list.map do |item|
link_to item.name, job_list_path(model: model, slug: item.slug), class: 'mx-1 text-decoration-none text-info' case model
when :city
link_to item.name, city_jobs_path(city_slug: item.slug), class: 'mx-1 text-decoration-none text-info'
when :industry
link_to item.name, industry_jobs_path(industry_slug: item.slug), class: 'mx-1 text-decoration-none text-info'
end
end end
a.join('|').html_safe a.join('|').html_safe
end end
# breadcrumb def view_search_result
# def show_breadcrumb(job) if params[:search].blank?
# a = link_to 'Home', root_path, class: 'text-decoration-none text-info mx-1 ' @name.to_s
# span = tag.span '>', class: 'mx-1' else
# arr = [a, show_item(job.cities, :city), show_item(job.industries, :industry), job.title] "Totals #{@jobs_count} result for #{params[:search]} in #{@name}"
# arr.join(span).html_safe end
# end end
end end
class Solr
def initialize(params = { search: '*:*' })
@solr = RSolr.connect(url: 'http://192.168.1.8:8983/solr/VenJob/')
@params = params
end
def add_db
Job.includes(:cities, :industries, :company).find_in_batches do |jobs|
jobs_index = jobs.map do |job|
{
job_id: job.id,
job_title: job.title,
company_name: job.company.name,
job_level: job.position,
min_salary: job.min_salary,
max_salary: job.max_salary,
cities_name: job.cities.map { |c| c.name.gsub(' / ', ', ') },
city_id: job.cities&.ids,
industries_name: job.industries.map { |i| i.name.gsub(' / ', ', ') },
industry_id: job.industries&.ids
}
end
jobs_index.each do |add_jobs|
@solr.add(add_jobs)
rescue Exception
next
end
@solr.commit
end
end
def delete_db
@solr.delete_by_query('*:*')
@solr.commit
end
def query_all
q = "search:*#{@params[:search]}*"
fq = ''
send_request(q, fq)
end
def query_by_city
city = City.find_by(slug: @params[:city_slug])
return { "numFound": 0, "docs": [] } unless city
city_name = city.name
q = "search:*#{@params[:search]}*"
fq = "cities_name: \"#{escape_str(city_name)}\""
send_request(q, fq)
end
def query_by_industry
industry = Industry.find_by(slug: @params[:industry_slug])
return { "numFound": 0, "docs": [] } unless industry
industry_name = industry.name
q = "search:*#{@params[:search]}*"
fq = "industries_name: \"#{escape_str(industry_name)}\""
send_request(q, fq)
end
def send_request(q_param, fq_param)
response = @solr.get 'select', params: {
q: q_param,
fq: fq_param,
rows: Job.count
}
response['response']
end
def escape_str(str)
RSolr.solr_escape(str)
end
end
...@@ -22,5 +22,5 @@ ...@@ -22,5 +22,5 @@
.col-lg-4.col-md-6.text-center .col-lg-4.col-md-6.text-center
.mt-5 .mt-5
.city-item .city-item
h3.h4.mb-2.see-more-text= link_to city[0], job_list_path(model: :city, slug: city[1]) , class:'text-decoration-none fw-normal text-reset' h3.h4.mb-2.see-more-text= link_to city[0], city_jobs_path(city_slug: city[1]) , class:'text-decoration-none fw-normal text-reset'
p.text-muted.mb-0= pluralize(count, 'job') p.text-muted.mb-0= pluralize(count, 'job')
\ No newline at end of file
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
- @job_quantity_by_industry.each do |name, job_count| - @job_quantity_by_industry.each do |name, job_count|
.col-lg-6.col-md-6.text-center .col-lg-6.col-md-6.text-center
.mt-5 .mt-5
h3.h4.mb-2.see-more-text= link_to name[0], job_list_path(model: :industry, slug: name[1]) , class:'text-decoration-none fw-normal text-reset' h3.h4.mb-2.see-more-text= link_to name[0], industry_jobs_path(industry_slug: name[1]) , class:'text-decoration-none fw-normal text-reset'
p.text-muted.mb-0= pluralize(job_count, 'job') p.text-muted.mb-0= pluralize(job_count, 'job')
hr.divider.my-4 hr.divider.my-4
- provide(:title, 'Job list page') - provide(:title, 'Job list page')
/ search box / search box
= render 'shared/search' = render 'shared/search'
.container .container
h1.mt-5 h3
= @name = view_search_result
hr.my-4 hr.my-4
.no-padding.d-flex.align-items-center.flex-column .no-padding.d-flex.align-items-center.flex-column
.page-info.p-2 .page-info.p-2
...@@ -34,4 +35,4 @@ ...@@ -34,4 +35,4 @@
.page-info.p-2 .page-info.p-2
= page_entries_info @jobs = page_entries_info @jobs
.page-info.p-2 .page-info.p-2
= paginate @jobs = paginate @jobs
\ No newline at end of file \ No newline at end of file
.search.text-center.my-5 .search.text-center.my-5
.container .container
.row -if params[:city_slug]
.col-md-10.mb-md-0.no-padding = form_tag(city_jobs_path(city_slug: params[:city_slug]), method: :get, class: "form-inline") do
span.fa.fa-search.form-control-feedback .row
= search_field_tag :search, params[:search], placeholder: 'Find a job', class: 'form-control rounded-left no-border-radius bg-light h-100' .col-md-10.mb-md-0.no-padding
.col-md-2.mb-md-0.no-padding = search_field_tag :search, params[:search], placeholder: 'Find a job in this location', class: 'form-control rounded-left no-border-radius bg-light h-100'
= button_tag '', class: 'h-100 w-100 btn btn-block btn-lg btn-info', name: nil .col-md-2.mb-md-0.no-padding
| Search = submit_tag "Search", class: "h-100 w-100 btn btn-block btn-lg btn-info"
\ No newline at end of file -elsif params[:industry_slug]
= form_tag(industry_jobs_path(industry_slug: params[:industry_slug]), method: :get, class: "form-inline") do
.row
.col-md-10.mb-md-0.no-padding
= search_field_tag :search, params[:search], placeholder: 'Find a job in this industy', class: 'form-control rounded-left no-border-radius bg-light h-100'
.col-md-2.mb-md-0.no-padding
= submit_tag "Search", class: "h-100 w-100 btn btn-block btn-lg btn-info"
- else
= form_tag(jobs_path, method: :get, class: "form-inline") do
.row
.col-md-10.mb-md-0.no-padding
= search_field_tag :search, params[:search], placeholder: 'Find a job (name, company, position)', class: 'form-control rounded-left no-border-radius bg-light h-100'
.col-md-2.mb-md-0.no-padding
= submit_tag "Search", class: "h-100 w-100 btn btn-block btn-lg btn-info"
\ No newline at end of file
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
h2.text-white h2.text-white
| Total: #{pluralize(@total_job, "job")} | Total: #{pluralize(@total_job, "job")}
= render 'shared/search' = render 'shared/search'
/ , url_options: jobs_path
/ latest job / latest job
.latest-job.mb-5 .latest-job.mb-5
...@@ -36,7 +37,7 @@ ...@@ -36,7 +37,7 @@
.row.align-items-start.mb-3 .row.align-items-start.mb-3
- @top_cities.each do |city, city_jobs| - @top_cities.each do |city, city_jobs|
.col-4.city-item .col-4.city-item
= link_to city[0], job_list_path(model: :city, slug: city[1]), class: 'city-name' = link_to city[0], city_jobs_path(city_slug: city[1]), class: 'city-name'
span.job-count span.job-count
| ( | (
= pluralize(city_jobs, 'job') = pluralize(city_jobs, 'job')
...@@ -50,7 +51,7 @@ ...@@ -50,7 +51,7 @@
.row.align-items-start .row.align-items-start
- @top_industries.each do |industry, industry_jobs| - @top_industries.each do |industry, industry_jobs|
.col-4.industry-item .col-4.industry-item
= link_to industry[0], job_list_path(model: :industry, slug: industry[1]), class: 'industry-name' = link_to industry[0], industry_jobs_path(industry_slug: industry[1]), class: 'industry-name'
span.job-count span.job-count
| ( | (
= pluralize(industry_jobs, 'job') = pluralize(industry_jobs, 'job')
......
...@@ -27,9 +27,14 @@ Rails.application.routes.draw do ...@@ -27,9 +27,14 @@ Rails.application.routes.draw do
resources :cities, only: %i[index] resources :cities, only: %i[index]
resources :industries, only: %i[index] resources :industries, only: %i[index]
resources :favorite_jobs, only: %i[index create destroy] resources :favorite_jobs, only: %i[index create destroy]
resources :jobs, only: %i[index show] resources :jobs, only: %i[index show] do
collection do
get 'city/:city_slug', action: :index, as: :city
get 'industry/:industry_slug', action: :index, as: :industry
end
end
get '/jobs/:model/:slug', to: 'jobs#index', as: :job_list # get '/jobs/:model/:slug', to: 'jobs#index', as: :job_list
get '/my', to: 'users#show', as: :user_profile get '/my', to: 'users#show', as: :user_profile
get '/my/jobs', to: 'applies#index', as: :apply_jobs get '/my/jobs', to: 'applies#index', as: :apply_jobs
get '/apply', to: 'applies#new', as: :apply_job get '/apply', to: 'applies#new', as: :apply_job
......
class AddSalaryToJobs < ActiveRecord::Migration[6.1]
def change
add_column :jobs, :min_salary, :integer
add_column :jobs, :max_salary, :integer
end
end
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2021_09_14_062751) do ActiveRecord::Schema.define(version: 2021_09_27_111928) do
create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.string "name", null: false t.string "name", null: false
...@@ -141,6 +141,8 @@ ActiveRecord::Schema.define(version: 2021_09_14_062751) do ...@@ -141,6 +141,8 @@ ActiveRecord::Schema.define(version: 2021_09_14_062751) do
t.bigint "company_id", null: false t.bigint "company_id", null: false
t.datetime "created_at", precision: 6, null: false t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false t.datetime "updated_at", precision: 6, null: false
t.integer "min_salary"
t.integer "max_salary"
t.index ["company_id"], name: "index_jobs_on_company_id" t.index ["company_id"], name: "index_jobs_on_company_id"
end end
......
require 'rsolr'
namespace :solr do
desc 'solr index data'
task add_data: :environment do
logger.info "start at #{Time.current}"
Solr.new.add_db
logger.info "Solr index data succesfully at #{Time.current}"
logger.info '----------------------------------------------'
end
desc 'delete data'
task delete_data: :environment do
Solr.new.delete_db
logger.info "delete all data at #{Time.current}"
logger.info '----------------------------------------------'
end
def logger
ActiveSupport::Logger.new('log/solr_logs.log')
end
end
...@@ -57,6 +57,35 @@ namespace :crawler do ...@@ -57,6 +57,35 @@ namespace :crawler do
expiration_date = default_value.to_time expiration_date = default_value.to_time
end end
end end
if salary.blank?
logger.info 'Remove this job because salary is empty'
next
end
# salary
if salary.include?('USD')
parsed = parse_salary(salary.remove('USD'), 23_000)
if parsed.length == 1
min_salary = parsed[0]
max_salary = parsed[0]
else
min_salary = parsed[0]
max_salary = parsed[1]
end
elsif salary.include?('VND')
parsed = parse_salary(salary.remove('tr', 'VND'), 1_000_000)
if parsed.length == 1
min_salary = parsed[0]
max_salary = parsed[0]
else
min_salary = parsed[0]
max_salary = parsed[1]
end
else
min_salary = 999_999_999
max_salary = 999_999_999
end
# benefits, description, requirement, other_info # benefits, description, requirement, other_info
job_detail_rows = job_page.css('section.job-detail-content div.detail-row') job_detail_rows = job_page.css('section.job-detail-content div.detail-row')
benefits, description, requirement, other_info = '' benefits, description, requirement, other_info = ''
...@@ -80,6 +109,8 @@ namespace :crawler do ...@@ -80,6 +109,8 @@ namespace :crawler do
job_object = Job.find_or_create_by({ title: job_title, job_object = Job.find_or_create_by({ title: job_title,
job_type: job_type, job_type: job_type,
salary: salary, salary: salary,
min_salary: min_salary,
max_salary: max_salary,
experience: experience, experience: experience,
position: level, position: level,
expiration_date: expiration_date, expiration_date: expiration_date,
...@@ -88,11 +119,11 @@ namespace :crawler do ...@@ -88,11 +119,11 @@ namespace :crawler do
requirement: requirement, requirement: requirement,
other_info: other_info, other_info: other_info,
company_id: company_object.id }) company_id: company_object.id })
industry_objects = industries.map { |industry| Industry.find_or_create_by(name: industry) } industry_objects = industries.map { |industry| Industry.find_or_create_by(name: industry, slug: industry.to_slug) }
job_object.industries << industry_objects job_object.industries << industry_objects
# Cities # Cities
cities = job_page.css('.job-detail-content .detail-box .map p a').map(&:text) cities = job_page.css('.job-detail-content .detail-box .map p a').map(&:text)
city_objects = cities.map { |city| City.find_or_create_by(name: city) } city_objects = cities.map { |city| City.find_or_create_by(name: city, slug: city.to_slug) }
job_object.cities << city_objects job_object.cities << city_objects
rescue URI::InvalidURIError => e rescue URI::InvalidURIError => e
puts "[Error] #{e.message}" puts "[Error] #{e.message}"
...@@ -114,7 +145,7 @@ namespace :crawler do ...@@ -114,7 +145,7 @@ namespace :crawler do
list_job = parsed_page.css('div.list-of-working-positions ul.list-jobs li a') list_job = parsed_page.css('div.list-of-working-positions ul.list-jobs li a')
list_job.each do |part| list_job.each do |part|
industry = part.text.squish.strip industry = part.text.squish.strip
Industry.find_or_create_by(name: industry) Industry.find_or_create_by(name: industry, slug: industry.to_slug)
end end
end end
...@@ -131,8 +162,15 @@ namespace :crawler do ...@@ -131,8 +162,15 @@ namespace :crawler do
end end
City.find_or_create_by( City.find_or_create_by(
name: city_name, name: city_name,
region: region region: region,
slug: city_name.to_slug
) )
end end
end end
def parse_salary(salary, multiplication)
salary.split('-').map do |i|
i.to_i * multiplication
end
end
end end
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Default Solr Home Directory
=============================
This directory is the default Solr home directory which holds
configuration files and Solr indexes (called cores).
Basic Directory Structure
-------------------------
The Solr Home directory typically contains the following...
* solr.xml *
This is the primary configuration file Solr looks for when starting;
it specifies high-level configuration options that apply to all
of your Solr cores, such as cluster-wide SolrCloud settings like
the ZooKeeper client timeout.
In addition, you can also declare Solr cores in this file, however
it is recommended to just use automatic core discovery instead of
listing cores in solr.xml.
If no solr.xml file is found, then Solr assumes that there should be
a single SolrCore named "collection1" and that the "Instance Directory"
for collection1 should be the same as the Solr Home Directory.
For more information about solr.xml, please see:
https://lucene.apache.org/solr/guide/solr-cores-and-solr-xml.html
* Individual SolrCore Instance Directories *
Although solr.xml can be configured to look for SolrCore Instance Directories
in any path, simple sub-directories of the Solr Home Dir using relative paths
are common for many installations.
* Core Discovery *
During startup, Solr will scan sub-directories of Solr home looking for
a specific file named core.properties. If core.properties is found in a
sub-directory (at any depth), Solr will initialize a core using the properties
defined in core.properties. For an example of core.properties, please see:
example/solr/collection1/core.properties
For more information about core discovery, please see:
https://lucene.apache.org/solr/guide/defining-core-properties.html
* A Shared 'lib' Directory *
Although solr.xml can be configured with an optional "sharedLib" attribute
that can point to any path, it is common to use a "./lib" sub-directory of the
Solr Home Directory.
* ZooKeeper Files *
When using SolrCloud using the embedded ZooKeeper option for Solr, it is
common to have a "zoo.cfg" file and "zoo_data" directories in the Solr Home
Directory. Please see the SolrCloud documentation for more details.
https://lucene.apache.org/solr/guide/solrcloud.html
# Set of Catalan contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
d
l
m
n
s
t
# Set of French contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
l
m
t
qu
n
s
j
d
c
jusqu
quoiqu
lorsqu
puisqu
# Set of Irish contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
d
m
b
# Set of Italian contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
c
l
all
dall
dell
nell
sull
coll
pell
gl
agl
dagl
degl
negl
sugl
un
m
t
s
v
d
# Set of Irish hyphenations for StopFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
h
n
t
# Set of overrides for the dutch stemmer
# TODO: load this as a resource from the analyzer and sync it in build.xml
fiets fiets
bromfiets bromfiets
ei eier
kind kinder
# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
# This means that when modifying this list, you might need to add some
# redundant entries, for example containing forms with both أ and ا
من
ومن
منها
منه
في
وفي
فيها
فيه
و
ف
ثم
او
أو
ب
بها
به
ا
أ
اى
اي
أي
أى
لا
ولا
الا
ألا
إلا
لكن
ما
وما
كما
فما
عن
مع
اذا
إذا
ان
أن
إن
انها
أنها
إنها
انه
أنه
إنه
بان
بأن
فان
فأن
وان
وأن
وإن
التى
التي
الذى
الذي
الذين
الى
الي
إلى
إلي
على
عليها
عليه
اما
أما
إما
ايضا
أيضا
كل
وكل
لم
ولم
لن
ولن
هى
هي
هو
وهى
وهي
وهو
فهى
فهي
فهو
انت
أنت
لك
لها
له
هذه
هذا
تلك
ذلك
هناك
كانت
كان
يكون
تكون
وكانت
وكان
غير
بعض
قد
نحو
بين
بينما
منذ
ضمن
حيث
الان
الآن
خلال
بعد
قبل
حتى
عند
عندما
لدى
جميع
# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
а
аз
ако
ала
бе
без
беше
би
бил
била
били
било
близо
бъдат
бъде
бяха
в
вас
ваш
ваша
вероятно
вече
взема
ви
вие
винаги
все
всеки
всички
всичко
всяка
във
въпреки
върху
г
ги
главно
го
д
да
дали
до
докато
докога
дори
досега
доста
е
едва
един
ето
за
зад
заедно
заради
засега
затова
защо
защото
и
из
или
им
има
имат
иска
й
каза
как
каква
какво
както
какъв
като
кога
когато
което
които
кой
който
колко
която
къде
където
към
ли
м
ме
между
мен
ми
мнозина
мога
могат
може
моля
момента
му
н
на
над
назад
най
направи
напред
например
нас
не
него
нея
ни
ние
никой
нито
но
някои
някой
няма
обаче
около
освен
особено
от
отгоре
отново
още
пак
по
повече
повечето
под
поне
поради
после
почти
прави
пред
преди
през
при
пък
първо
с
са
само
се
сега
си
скоро
след
сме
според
сред
срещу
сте
съм
със
също
т
тази
така
такива
такъв
там
твой
те
тези
ти
тн
то
това
тогава
този
той
толкова
точно
трябва
тук
тъй
тя
тях
у
харесва
ч
че
често
чрез
ще
щом
я
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
a
abans
ací
ah
així
això
al
als
aleshores
algun
alguna
algunes
alguns
alhora
allà
allí
allò
altra
altre
altres
amb
ambdós
ambdues
apa
aquell
aquella
aquelles
aquells
aquest
aquesta
aquestes
aquests
aquí
baix
cada
cadascú
cadascuna
cadascunes
cadascuns
com
contra
d'un
d'una
d'unes
d'uns
dalt
de
del
dels
des
després
dins
dintre
donat
doncs
durant
e
eh
el
els
em
en
encara
ens
entre
érem
eren
éreu
es
és
esta
està
estàvem
estaven
estàveu
esteu
et
etc
ets
fins
fora
gairebé
ha
han
has
havia
he
hem
heu
hi
ho
i
igual
iguals
ja
l'hi
la
les
li
li'n
llavors
m'he
ma
mal
malgrat
mateix
mateixa
mateixes
mateixos
me
mentre
més
meu
meus
meva
meves
molt
molta
moltes
molts
mon
mons
n'he
n'hi
ne
ni
no
nogensmenys
només
nosaltres
nostra
nostre
nostres
o
oh
oi
on
pas
pel
pels
per
però
perquè
poc
poca
pocs
poques
potser
propi
qual
quals
quan
quant
que
què
quelcom
qui
quin
quina
quines
quins
s'ha
s'han
sa
semblant
semblants
ses
seu
seus
seva
seva
seves
si
sobre
sobretot
sóc
solament
sols
son
són
sons
sota
sou
t'ha
t'han
t'he
ta
tal
també
tampoc
tan
tant
tanta
tantes
teu
teus
teva
teves
ton
tons
tot
tota
totes
tots
un
una
unes
uns
us
va
vaig
vam
van
vas
veu
vosaltres
vostra
vostre
vostres
a
s
k
o
i
u
v
z
dnes
cz
tímto
budeš
budem
byli
jseš
můj
svým
ta
tomto
tohle
tuto
tyto
jej
zda
proč
máte
tato
kam
tohoto
kdo
kteří
mi
nám
tom
tomuto
mít
nic
proto
kterou
byla
toho
protože
asi
ho
naši
napište
re
což
tím
takže
svých
její
svými
jste
aj
tu
tedy
teto
bylo
kde
ke
pravé
ji
nad
nejsou
či
pod
téma
mezi
přes
ty
pak
vám
ani
když
však
neg
jsem
tento
článku
články
aby
jsme
před
pta
jejich
byl
ještě
bez
také
pouze
první
vaše
která
nás
nový
tipy
pokud
může
strana
jeho
své
jiné
zprávy
nové
není
vás
jen
podle
zde
být
více
bude
již
než
který
by
které
co
nebo
ten
tak
při
od
po
jsou
jak
další
ale
si
se
ve
to
jako
za
zpět
ze
do
pro
je
na
atd
atp
jakmile
přičemž
on
ona
ono
oni
ony
my
vy
ji
mne
jemu
tomu
těm
těmu
němu
němuž
jehož
jíž
jelikož
jež
jakož
načež
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Danish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| This is a ranked list (commonest to rarest) of stopwords derived from
| a large text sample.
og | and
i | in
jeg | I
det | that (dem. pronoun)/it (pers. pronoun)
at | that (in front of a sentence)/to (with infinitive)
en | a/an
den | it (pers. pronoun)/that (dem. pronoun)
til | to/at/for/until/against/by/of/into, more
er | present tense of "to be"
som | who, as
på | on/upon/in/on/at/to/after/of/with/for, on
de | they
med | with/by/in, along
han | he
af | of/by/from/off/for/in/with/on, off
for | at/for/to/from/by/of/ago, in front/before, because
ikke | not
der | who/which, there/those
var | past tense of "to be"
mig | me/myself
sig | oneself/himself/herself/itself/themselves
men | but
et | a/an/one, one (number), someone/somebody/one
har | present tense of "to have"
om | round/about/for/in/a, about/around/down, if
vi | we
min | my
havde | past tense of "to have"
ham | him
hun | she
nu | now
over | over/above/across/by/beyond/past/on/about, over/past
da | then, when/as/since
fra | from/off/since, off, since
du | you
ud | out
sin | his/her/its/one's
dem | them
os | us/ourselves
op | up
man | you/one
hans | his
hvor | where
eller | or
hvad | what
skal | must/shall etc.
selv | myself/youself/herself/ourselves etc., even
her | here
alle | all/everyone/everybody etc.
vil | will (verb)
blev | past tense of "to stay/to remain/to get/to become"
kunne | could
ind | in
når | when
være | present tense of "to be"
dog | however/yet/after all
noget | something
ville | would
jo | you know/you see (adv), yes
deres | their/theirs
efter | after/behind/according to/for/by/from, later/afterwards
ned | down
skulle | should
denne | this
end | than
dette | this
mit | my/mine
også | also
under | under/beneath/below/during, below/underneath
have | have
dig | you
anden | other
hende | her
mine | my
alt | everything
meget | much/very, plenty of
sit | his, her, its, one's
sine | his, her, its, one's
vor | our
mod | against
disse | these
hvis | if
din | your/yours
nogle | some
hos | by/at
blive | be/become
mange | many
ad | by/through
bliver | present tense of "to be/to become"
hendes | her/hers
været | be
thi | for (conj)
jer | you
sådan | such, like this/like that
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A German stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| The number of forms in this list is reduced significantly by passing it
| through the German stemmer.
aber | but
alle | all
allem
allen
aller
alles
als | than, as
also | so
am | an + dem
an | at
ander | other
andere
anderem
anderen
anderer
anderes
anderm
andern
anderr
anders
auch | also
auf | on
aus | out of
bei | by
bin | am
bis | until
bist | art
da | there
damit | with it
dann | then
der | the
den
des
dem
die
das
daß | that
derselbe | the same
derselben
denselben
desselben
demselben
dieselbe
dieselben
dasselbe
dazu | to that
dein | thy
deine
deinem
deinen
deiner
deines
denn | because
derer | of those
dessen | of him
dich | thee
dir | to thee
du | thou
dies | this
diese
diesem
diesen
dieser
dieses
doch | (several meanings)
dort | (over) there
durch | through
ein | a
eine
einem
einen
einer
eines
einig | some
einige
einigem
einigen
einiger
einiges
einmal | once
er | he
ihn | him
ihm | to him
es | it
etwas | something
euer | your
eure
eurem
euren
eurer
eures
für | for
gegen | towards
gewesen | p.p. of sein
hab | have
habe | have
haben | have
hat | has
hatte | had
hatten | had
hier | here
hin | there
hinter | behind
ich | I
mich | me
mir | to me
ihr | you, to her
ihre
ihrem
ihren
ihrer
ihres
euch | to you
im | in + dem
in | in
indem | while
ins | in + das
ist | is
jede | each, every
jedem
jeden
jeder
jedes
jene | that
jenem
jenen
jener
jenes
jetzt | now
kann | can
kein | no
keine
keinem
keinen
keiner
keines
können | can
könnte | could
machen | do
man | one
manche | some, many a
manchem
manchen
mancher
manches
mein | my
meine
meinem
meinen
meiner
meines
mit | with
muss | must
musste | had to
nach | to(wards)
nicht | not
nichts | nothing
noch | still, yet
nun | now
nur | only
ob | whether
oder | or
ohne | without
sehr | very
sein | his
seine
seinem
seinen
seiner
seines
selbst | self
sich | herself
sie | they, she
ihnen | to them
sind | are
so | so
solche | such
solchem
solchen
solcher
solches
soll | shall
sollte | should
sondern | but
sonst | else
über | over
um | about, around
und | and
uns | us
unse
unsem
unsen
unser
unses
unter | under
viel | much
vom | von + dem
von | from
vor | before
während | while
war | was
waren | were
warst | wast
was | what
weg | away, off
weil | because
weiter | further
welche | which
welchem
welchen
welcher
welches
wenn | when
werde | will
werden | will
wie | how
wieder | again
will | want
wir | we
wird | will
wirst | willst
wo | where
wollen | want
wollte | wanted
würde | would
würden | would
zu | to
zum | zu + dem
zur | zu + der
zwar | indeed
zwischen | between
# Lucene Greek Stopwords list
# Note: by default this file is used after GreekLowerCaseFilter,
# so when modifying this file use 'σ' instead of 'ς'
ο
η
το
οι
τα
του
τησ
των
τον
την
και
κι
κ
ειμαι
εισαι
ειναι
ειμαστε
ειστε
στο
στον
στη
στην
μα
αλλα
απο
για
προσ
με
σε
ωσ
παρα
αντι
κατα
μετα
θα
να
δε
δεν
μη
μην
επι
ενω
εαν
αν
τοτε
που
πωσ
ποιοσ
ποια
ποιο
ποιοι
ποιεσ
ποιων
ποιουσ
αυτοσ
αυτη
αυτο
αυτοι
αυτων
αυτουσ
αυτεσ
αυτα
εκεινοσ
εκεινη
εκεινο
εκεινοι
εκεινεσ
εκεινα
εκεινων
εκεινουσ
οπωσ
ομωσ
ισωσ
οσο
οτι
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# a couple of test stopwords to test that the words are really being
# configured from this file:
stopworda
stopwordb
# Standard english stop words taken from Lucene's StopAnalyzer
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Spanish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| The following is a ranked list (commonest to rarest) of stopwords
| deriving from a large sample of text.
| Extra words have been added at the end.
de | from, of
la | the, her
que | who, that
el | the
en | in
y | and
a | to
los | the, them
del | de + el
se | himself, from him etc
las | the, them
por | for, by, etc
un | a
para | for
con | with
no | no
una | a
su | his, her
al | a + el
| es from SER
lo | him
como | how
más | more
pero | pero
sus | su plural
le | to him, her
ya | already
o | or
| fue from SER
este | this
| ha from HABER
sí | himself etc
porque | because
esta | this
| son from SER
entre | between
| está from ESTAR
cuando | when
muy | very
sin | without
sobre | on
| ser from SER
| tiene from TENER
también | also
me | me
hasta | until
hay | there is/are
donde | where
| han from HABER
quien | whom, that
| están from ESTAR
| estado from ESTAR
desde | from
todo | all
nos | us
durante | during
| estados from ESTAR
todos | all
uno | a
les | to them
ni | nor
contra | against
otros | other
| fueron from SER
ese | that
eso | that
| había from HABER
ante | before
ellos | they
e | and (variant of y)
esto | this
mí | me
antes | before
algunos | some
qué | what?
unos | a
yo | I
otro | other
otras | other
otra | other
él | he
tanto | so much, many
esa | that
estos | these
mucho | much, many
quienes | who
nada | nothing
muchos | many
cual | who
| sea from SER
poco | few
ella | she
estar | to be
| haber from HABER
estas | these
| estaba from ESTAR
| estamos from ESTAR
algunas | some
algo | something
nosotros | we
| other forms
mi | me
mis | mi plural
tú | thou
te | thee
ti | thee
tu | thy
tus | tu plural
ellas | they
nosotras | we
vosotros | you
vosotras | you
os | you
mío | mine
mía |
míos |
mías |
tuyo | thine
tuya |
tuyos |
tuyas |
suyo | his, hers, theirs
suya |
suyos |
suyas |
nuestro | ours
nuestra |
nuestros |
nuestras |
vuestro | yours
vuestra |
vuestros |
vuestras |
esos | those
esas | those
| forms of estar, to be (not including the infinitive):
estoy
estás
está
estamos
estáis
están
esté
estés
estemos
estéis
estén
estaré
estarás
estará
estaremos
estaréis
estarán
estaría
estarías
estaríamos
estaríais
estarían
estaba
estabas
estábamos
estabais
estaban
estuve
estuviste
estuvo
estuvimos
estuvisteis
estuvieron
estuviera
estuvieras
estuviéramos
estuvierais
estuvieran
estuviese
estuvieses
estuviésemos
estuvieseis
estuviesen
estando
estado
estada
estados
estadas
estad
| forms of haber, to have (not including the infinitive):
he
has
ha
hemos
habéis
han
haya
hayas
hayamos
hayáis
hayan
habré
habrás
habrá
habremos
habréis
habrán
habría
habrías
habríamos
habríais
habrían
había
habías
habíamos
habíais
habían
hube
hubiste
hubo
hubimos
hubisteis
hubieron
hubiera
hubieras
hubiéramos
hubierais
hubieran
hubiese
hubieses
hubiésemos
hubieseis
hubiesen
habiendo
habido
habida
habidos
habidas
| forms of ser, to be (not including the infinitive):
soy
eres
es
somos
sois
son
sea
seas
seamos
seáis
sean
seré
serás
será
seremos
seréis
serán
sería
serías
seríamos
seríais
serían
era
eras
éramos
erais
eran
fui
fuiste
fue
fuimos
fuisteis
fueron
fuera
fueras
fuéramos
fuerais
fueran
fuese
fueses
fuésemos
fueseis
fuesen
siendo
sido
| sed also means 'thirst'
| forms of tener, to have (not including the infinitive):
tengo
tienes
tiene
tenemos
tenéis
tienen
tenga
tengas
tengamos
tengáis
tengan
tendré
tendrás
tendrá
tendremos
tendréis
tendrán
tendría
tendrías
tendríamos
tendríais
tendrían
tenía
tenías
teníamos
teníais
tenían
tuve
tuviste
tuvo
tuvimos
tuvisteis
tuvieron
tuviera
tuvieras
tuviéramos
tuvierais
tuvieran
tuviese
tuvieses
tuviésemos
tuvieseis
tuviesen
teniendo
tenido
tenida
tenidos
tenidas
tened
# example set of basque stopwords
al
anitz
arabera
asko
baina
bat
batean
batek
bati
batzuei
batzuek
batzuetan
batzuk
bera
beraiek
berau
berauek
bere
berori
beroriek
beste
bezala
da
dago
dira
ditu
du
dute
edo
egin
ere
eta
eurak
ez
gainera
gu
gutxi
guzti
haiei
haiek
haietan
hainbeste
hala
han
handik
hango
hara
hari
hark
hartan
hau
hauei
hauek
hauetan
hemen
hemendik
hemengo
hi
hona
honek
honela
honetan
honi
hor
hori
horiei
horiek
horietan
horko
horra
horrek
horrela
horretan
horri
hortik
hura
izan
ni
noiz
nola
non
nondik
nongo
nor
nora
ze
zein
zen
zenbait
zenbat
zer
zergatik
ziren
zituen
zu
zuek
zuen
zuten
# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
# Note: by default this file is used after normalization, so when adding entries
# to this file, use the arabic 'ي' instead of 'ی'
انان
نداشته
سراسر
خياه
ايشان
وي
تاكنون
بيشتري
دوم
پس
ناشي
وگو
يا
داشتند
سپس
هنگام
هرگز
پنج
نشان
امسال
ديگر
گروهي
شدند
چطور
ده
و
دو
نخستين
ولي
چرا
چه
وسط
ه
كدام
قابل
يك
رفت
هفت
همچنين
در
هزار
بله
بلي
شايد
اما
شناسي
گرفته
دهد
داشته
دانست
داشتن
خواهيم
ميليارد
وقتيكه
امد
خواهد
جز
اورده
شده
بلكه
خدمات
شدن
برخي
نبود
بسياري
جلوگيري
حق
كردند
نوعي
بعري
نكرده
نظير
نبايد
بوده
بودن
داد
اورد
هست
جايي
شود
دنبال
داده
بايد
سابق
هيچ
همان
انجا
كمتر
كجاست
گردد
كسي
تر
مردم
تان
دادن
بودند
سري
جدا
ندارند
مگر
يكديگر
دارد
دهند
بنابراين
هنگامي
سمت
جا
انچه
خود
دادند
زياد
دارند
اثر
بدون
بهترين
بيشتر
البته
به
براساس
بيرون
كرد
بعضي
گرفت
توي
اي
ميليون
او
جريان
تول
بر
مانند
برابر
باشيم
مدتي
گويند
اكنون
تا
تنها
جديد
چند
بي
نشده
كردن
كردم
گويد
كرده
كنيم
نمي
نزد
روي
قصد
فقط
بالاي
ديگران
اين
ديروز
توسط
سوم
ايم
دانند
سوي
استفاده
شما
كنار
داريم
ساخته
طور
امده
رفته
نخست
بيست
نزديك
طي
كنيد
از
انها
تمامي
داشت
يكي
طريق
اش
چيست
روب
نمايد
گفت
چندين
چيزي
تواند
ام
ايا
با
ان
ايد
ترين
اينكه
ديگري
راه
هايي
بروز
همچنان
پاعين
كس
حدود
مختلف
مقابل
چيز
گيرد
ندارد
ضد
همچون
سازي
شان
مورد
باره
مرسي
خويش
برخوردار
چون
خارج
شش
هنوز
تحت
ضمن
هستيم
گفته
فكر
بسيار
پيش
براي
روزهاي
انكه
نخواهد
بالا
كل
وقتي
كي
چنين
كه
گيري
نيست
است
كجا
كند
نيز
يابد
بندي
حتي
توانند
عقب
خواست
كنند
بين
تمام
همه
ما
باشند
مثل
شد
اري
باشد
اره
طبق
بعد
اگر
صورت
غير
جاي
بيش
ريزي
اند
زيرا
چگونه
بار
لطفا
مي
درباره
من
ديده
همين
گذاري
برداري
علت
گذاشته
هم
فوق
نه
ها
شوند
اباد
همواره
هر
اول
خواهند
چهار
نام
امروز
مان
هاي
قبل
كنم
سعي
تازه
را
هستند
زير
جلوي
عنوان
بود
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE
olla
olen
olet
on
olemme
olette
ovat
ole | negative form
oli
olisi
olisit
olisin
olisimme
olisitte
olisivat
olit
olin
olimme
olitte
olivat
ollut
olleet
en | negation
et
ei
emme
ette
eivät
|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
minä minun minut minua minussa minusta minuun minulla minulta minulle | I
sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
mitkä | (pl)
joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
| conjunctions
että | that
ja | and
jos | if
koska | because
kuin | than
mutta | but
niin | so
sekä | and
sillä | for
tai | or
vaan | but
vai | or
vaikka | although
| prepositions
kanssa | with
mukaan | according to
noin | about
poikki | across
yli | over, across
| other
kun | when
niin | so
nyt | now
itse | self
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A French stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
au | a + le
aux | a + les
avec | with
ce | this
ces | these
dans | with
de | of
des | de + les
du | de + le
elle | she
en | `of them' etc
et | and
eux | them
il | he
je | I
la | the
le | the
leur | their
lui | him
ma | my (fem)
mais | but
me | me
même | same; as in moi-même (myself) etc
mes | me (pl)
moi | me
mon | my (masc)
ne | not
nos | our (pl)
notre | our
nous | we
on | one
ou | where
par | by
pas | not
pour | for
qu | que before vowel
que | that
qui | who
sa | his, her (fem)
se | oneself
ses | his (pl)
son | his, her (masc)
sur | on
ta | thy (fem)
te | thee
tes | thy (pl)
toi | thee
ton | thy (masc)
tu | thou
un | a
une | a
vos | your (pl)
votre | your
vous | you
| single letter forms
c | c'
d | d'
j | j'
l | l'
à | to, at
m | m'
n | n'
s | s'
t | t'
y | there
| forms of être (not including the infinitive):
été
étée
étées
étés
étant
suis
es
est
sommes
êtes
sont
serai
seras
sera
serons
serez
seront
serais
serait
serions
seriez
seraient
étais
était
étions
étiez
étaient
fus
fut
fûmes
fûtes
furent
sois
soit
soyons
soyez
soient
fusse
fusses
fût
fussions
fussiez
fussent
| forms of avoir (not including the infinitive):
ayant
eu
eue
eues
eus
ai
as
avons
avez
ont
aurai
auras
aura
aurons
aurez
auront
aurais
aurait
aurions
auriez
auraient
avais
avait
avions
aviez
avaient
eut
eûmes
eûtes
eurent
aie
aies
ait
ayons
ayez
aient
eusse
eusses
eût
eussions
eussiez
eussent
| Later additions (from Jean-Christophe Deschamps)
ceci | this
cela | that
celà | that
cet | this
cette | this
ici | here
ils | they
les | the (pl)
leurs | their (pl)
quel | which
quels | which
quelle | which
quelles | which
sans | without
soi | oneself
a
ach
ag
agus
an
aon
ar
arna
as
b'
ba
beirt
bhúr
caoga
ceathair
ceathrar
chomh
chtó
chuig
chun
cois
céad
cúig
cúigear
d'
daichead
dar
de
deich
deichniúr
den
dhá
do
don
dtí
dár
faoi
faoin
faoina
faoinár
fara
fiche
gach
gan
go
gur
haon
hocht
i
iad
idir
in
ina
ins
inár
is
le
leis
lena
lenár
m'
mar
mo
na
nach
naoi
naonúr
níor
nócha
ocht
ochtar
os
roimh
sa
seacht
seachtar
seachtó
seasca
seisear
siad
sibh
sinn
sna
tar
thar
thú
triúr
trí
trína
trínár
tríocha
um
ár
é
éis
í
ó
ón
óna
ónár
# galican stopwords
a
aínda
alí
aquel
aquela
aquelas
aqueles
aquilo
aquí
ao
aos
as
así
á
ben
cando
che
co
coa
comigo
con
connosco
contigo
convosco
coas
cos
cun
cuns
cunha
cunhas
da
dalgunha
dalgunhas
dalgún
dalgúns
das
de
del
dela
delas
deles
desde
deste
do
dos
dun
duns
dunha
dunhas
e
el
ela
elas
eles
en
era
eran
esa
esas
ese
eses
esta
estar
estaba
está
están
este
estes
estiven
estou
eu
é
facer
foi
foron
fun
había
hai
iso
isto
la
las
lle
lles
lo
los
mais
me
meu
meus
min
miña
miñas
moi
na
nas
neste
nin
no
non
nos
nosa
nosas
noso
nosos
nós
nun
nunha
nuns
nunhas
o
os
ou
ó
ós
para
pero
pode
pois
pola
polas
polo
polos
por
que
se
senón
ser
seu
seus
sexa
sido
sobre
súa
súas
tamén
tan
te
ten
teñen
teño
ter
teu
teus
ti
tido
tiña
tiven
túa
túas
un
unha
unhas
uns
vos
vosa
vosas
voso
vosos
vós
# Also see http://www.opensource.org/licenses/bsd-license.html
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# This file was created by Jacques Savoy and is distributed under the BSD license.
# Note: by default this file also contains forms normalized by HindiNormalizer
# for spelling variation (see section below), such that it can be used whether or
# not you enable that feature. When adding additional entries to this list,
# please add the normalized form as well.
अंदर
अत
अपना
अपनी
अपने
अभी
आदि
आप
इत्यादि
इन
इनका
इन्हीं
इन्हें
इन्हों
इस
इसका
इसकी
इसके
इसमें
इसी
इसे
उन
उनका
उनकी
उनके
उनको
उन्हीं
उन्हें
उन्हों
उस
उसके
उसी
उसे
एक
एवं
एस
ऐसे
और
कई
कर
करता
करते
करना
करने
करें
कहते
कहा
का
काफ़ी
कि
कितना
किन्हें
किन्हों
किया
किर
किस
किसी
किसे
की
कुछ
कुल
के
को
कोई
कौन
कौनसा
गया
घर
जब
जहाँ
जा
जितना
जिन
जिन्हें
जिन्हों
जिस
जिसे
जीधर
जैसा
जैसे
जो
तक
तब
तरह
तिन
तिन्हें
तिन्हों
तिस
तिसे
तो
था
थी
थे
दबारा
दिया
दुसरा
दूसरे
दो
द्वारा
नहीं
ना
निहायत
नीचे
ने
पर
पर
पहले
पूरा
पे
फिर
बनी
बही
बहुत
बाद
बाला
बिलकुल
भी
भीतर
मगर
मानो
मे
में
यदि
यह
यहाँ
यही
या
यिह
ये
रखें
रहा
रहे
ऱ्वासा
लिए
लिये
लेकिन
वर्ग
वह
वह
वहाँ
वहीं
वाले
वुह
वे
वग़ैरह
संग
सकता
सकते
सबसे
सभी
साथ
साबुत
साभ
सारा
से
सो
ही
हुआ
हुई
हुए
है
हैं
हो
होता
होती
होते
होना
होने
# additional normalized forms of the above
अपनि
जेसे
होति
सभि
तिंहों
इंहों
दवारा
इसि
किंहें
थि
उंहों
ओर
जिंहें
वहिं
अभि
बनि
हि
उंहिं
उंहें
हें
वगेरह
एसे
रवासा
कोन
निचे
काफि
उसि
पुरा
भितर
हे
बहि
वहां
कोइ
यहां
जिंहों
तिंहें
किसि
कइ
यहि
इंहिं
जिधर
इंहें
अदि
इतयादि
हुइ
कोनसा
इसकि
दुसरे
जहां
अप
किंहों
उनकि
भि
वरग
हुअ
जेसा
नहिं
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list
| prepared by Anna Tordai
a
ahogy
ahol
aki
akik
akkor
alatt
által
általában
amely
amelyek
amelyekben
amelyeket
amelyet
amelynek
ami
amit
amolyan
amíg
amikor
át
abban
ahhoz
annak
arra
arról
az
azok
azon
azt
azzal
azért
aztán
azután
azonban
bár
be
belül
benne
cikk
cikkek
cikkeket
csak
de
e
eddig
egész
egy
egyes
egyetlen
egyéb
egyik
egyre
ekkor
el
elég
ellen
elő
először
előtt
első
én
éppen
ebben
ehhez
emilyen
ennek
erre
ez
ezt
ezek
ezen
ezzel
ezért
és
fel
felé
hanem
hiszen
hogy
hogyan
igen
így
illetve
ill.
ill
ilyen
ilyenkor
ison
ismét
itt
jól
jobban
kell
kellett
keresztül
keressünk
ki
kívül
között
közül
legalább
lehet
lehetett
legyen
lenne
lenni
lesz
lett
maga
magát
majd
majd
már
más
másik
meg
még
mellett
mert
mely
melyek
mi
mit
míg
miért
milyen
mikor
minden
mindent
mindenki
mindig
mint
mintha
mivel
most
nagy
nagyobb
nagyon
ne
néha
nekem
neki
nem
néhány
nélkül
nincs
olyan
ott
össze
ő
ők
őket
pedig
persze
s
saját
sem
semmi
sok
sokat
sokkal
számára
szemben
szerint
szinte
talán
tehát
teljes
tovább
továbbá
több
úgy
ugyanis
új
újabb
újra
után
utána
utolsó
vagy
vagyis
valaki
valami
valamint
való
vagyok
van
vannak
volt
voltam
voltak
voltunk
vissza
vele
viszont
volna
# example set of Armenian stopwords.
այդ
այլ
այն
այս
դու
դուք
եմ
են
ենք
ես
եք
է
էի
էին
էինք
էիր
էիք
էր
ըստ
թ
ի
ին
իսկ
իր
կամ
համար
հետ
հետո
մենք
մեջ
մի
ն
նա
նաև
նրա
նրանք
որ
որը
որոնք
որպես
ու
ում
պիտի
վրա
և
# from appendix D of: A Study of Stemming Effects on Information
# Retrieval in Bahasa Indonesia
ada
adanya
adalah
adapun
agak
agaknya
agar
akan
akankah
akhirnya
aku
akulah
amat
amatlah
anda
andalah
antar
diantaranya
antara
antaranya
diantara
apa
apaan
mengapa
apabila
apakah
apalagi
apatah
atau
ataukah
ataupun
bagai
bagaikan
sebagai
sebagainya
bagaimana
bagaimanapun
sebagaimana
bagaimanakah
bagi
bahkan
bahwa
bahwasanya
sebaliknya
banyak
sebanyak
beberapa
seberapa
begini
beginian
beginikah
beginilah
sebegini
begitu
begitukah
begitulah
begitupun
sebegitu
belum
belumlah
sebelum
sebelumnya
sebenarnya
berapa
berapakah
berapalah
berapapun
betulkah
sebetulnya
biasa
biasanya
bila
bilakah
bisa
bisakah
sebisanya
boleh
bolehkah
bolehlah
buat
bukan
bukankah
bukanlah
bukannya
cuma
percuma
dahulu
dalam
dan
dapat
dari
daripada
dekat
demi
demikian
demikianlah
sedemikian
dengan
depan
di
dia
dialah
dini
diri
dirinya
terdiri
dong
dulu
enggak
enggaknya
entah
entahlah
terhadap
terhadapnya
hal
hampir
hanya
hanyalah
harus
haruslah
harusnya
seharusnya
hendak
hendaklah
hendaknya
hingga
sehingga
ia
ialah
ibarat
ingin
inginkah
inginkan
ini
inikah
inilah
itu
itukah
itulah
jangan
jangankan
janganlah
jika
jikalau
juga
justru
kala
kalau
kalaulah
kalaupun
kalian
kami
kamilah
kamu
kamulah
kan
kapan
kapankah
kapanpun
dikarenakan
karena
karenanya
ke
kecil
kemudian
kenapa
kepada
kepadanya
ketika
seketika
khususnya
kini
kinilah
kiranya
sekiranya
kita
kitalah
kok
lagi
lagian
selagi
lah
lain
lainnya
melainkan
selaku
lalu
melalui
terlalu
lama
lamanya
selama
selama
selamanya
lebih
terlebih
bermacam
macam
semacam
maka
makanya
makin
malah
malahan
mampu
mampukah
mana
manakala
manalagi
masih
masihkah
semasih
masing
mau
maupun
semaunya
memang
mereka
merekalah
meski
meskipun
semula
mungkin
mungkinkah
nah
namun
nanti
nantinya
nyaris
oleh
olehnya
seorang
seseorang
pada
padanya
padahal
paling
sepanjang
pantas
sepantasnya
sepantasnyalah
para
pasti
pastilah
per
pernah
pula
pun
merupakan
rupanya
serupa
saat
saatnya
sesaat
saja
sajalah
saling
bersama
sama
sesama
sambil
sampai
sana
sangat
sangatlah
saya
sayalah
se
sebab
sebabnya
sebuah
tersebut
tersebutlah
sedang
sedangkan
sedikit
sedikitnya
segala
segalanya
segera
sesegera
sejak
sejenak
sekali
sekalian
sekalipun
sesekali
sekaligus
sekarang
sekarang
sekitar
sekitarnya
sela
selain
selalu
seluruh
seluruhnya
semakin
sementara
sempat
semua
semuanya
sendiri
sendirinya
seolah
seperti
sepertinya
sering
seringnya
serta
siapa
siapakah
siapapun
disini
disinilah
sini
sinilah
sesuatu
sesuatunya
suatu
sesudah
sesudahnya
sudah
sudahkah
sudahlah
supaya
tadi
tadinya
tak
tanpa
setelah
telah
tentang
tentu
tentulah
tentunya
tertentu
seterusnya
tapi
tetapi
setiap
tiap
setidaknya
tidak
tidakkah
tidaklah
toh
waduh
wah
wahai
sewaktu
walau
walaupun
wong
yaitu
yakni
yang
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
ad | a (to) before vowel
al | a + il
allo | a + lo
ai | a + i
agli | a + gli
all | a + l'
agl | a + gl'
alla | a + la
alle | a + le
con | with
col | con + il
coi | con + i (forms collo, cogli etc are now very rare)
da | from
dal | da + il
dallo | da + lo
dai | da + i
dagli | da + gli
dall | da + l'
dagl | da + gll'
dalla | da + la
dalle | da + le
di | of
del | di + il
dello | di + lo
dei | di + i
degli | di + gli
dell | di + l'
degl | di + gl'
della | di + la
delle | di + le
in | in
nel | in + el
nello | in + lo
nei | in + i
negli | in + gli
nell | in + l'
negl | in + gl'
nella | in + la
nelle | in + le
su | on
sul | su + il
sullo | su + lo
sui | su + i
sugli | su + gli
sull | su + l'
sugl | su + gl'
sulla | su + la
sulle | su + le
per | through, by
tra | among
contro | against
io | I
tu | thou
lui | he
lei | she
noi | we
voi | you
loro | they
mio | my
mia |
miei |
mie |
tuo |
tua |
tuoi | thy
tue |
suo |
sua |
suoi | his, her
sue |
nostro | our
nostra |
nostri |
nostre |
vostro | your
vostra |
vostri |
vostre |
mi | me
ti | thee
ci | us, there
vi | you, there
lo | him, the
la | her, the
li | them
le | them, the
gli | to him, the
ne | from there etc
il | the
un | a
uno | a
una | a
ma | but
ed | and
se | if
perché | why, because
anche | also
come | how
dov | where (as dov')
dove | where
che | who, that
chi | who
cui | whom
non | not
più | more
quale | who, that
quanto | how much
quanti |
quanta |
quante |
quello | that
quelli |
quella |
quelle |
questo | this
questi |
questa |
queste |
si | yes
tutto | all
tutti | all
| single letter forms:
a | at
c | as c' for ce or ci
e | and
i | the
l | as l'
o | or
| forms of avere, to have (not including the infinitive):
ho
hai
ha
abbiamo
avete
hanno
abbia
abbiate
abbiano
avrò
avrai
avrà
avremo
avrete
avranno
avrei
avresti
avrebbe
avremmo
avreste
avrebbero
avevo
avevi
aveva
avevamo
avevate
avevano
ebbi
avesti
ebbe
avemmo
aveste
ebbero
avessi
avesse
avessimo
avessero
avendo
avuto
avuta
avuti
avute
| forms of essere, to be (not including the infinitive):
sono
sei
è
siamo
siete
sia
siate
siano
sarò
sarai
sarà
saremo
sarete
saranno
sarei
saresti
sarebbe
saremmo
sareste
sarebbero
ero
eri
era
eravamo
eravate
erano
fui
fosti
fu
fummo
foste
furono
fossi
fosse
fossimo
fossero
essendo
| forms of fare, to do (not including the infinitive, fa, fat-):
faccio
fai
facciamo
fanno
faccia
facciate
facciano
farò
farai
farà
faremo
farete
faranno
farei
faresti
farebbe
faremmo
fareste
farebbero
facevo
facevi
faceva
facevamo
facevate
facevano
feci
facesti
fece
facemmo
faceste
fecero
facessi
facesse
facessimo
facessero
facendo
| forms of stare, to be (not including the infinitive):
sto
stai
sta
stiamo
stanno
stia
stiate
stiano
starò
starai
starà
staremo
starete
staranno
starei
staresti
starebbe
staremmo
stareste
starebbero
stavo
stavi
stava
stavamo
stavate
stavano
stetti
stesti
stette
stemmo
steste
stettero
stessi
stesse
stessimo
stessero
stando
#
# This file defines a stopword set for Japanese.
#
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
# for frequency lists, etc. that can be useful for making your own set (if desired)
#
# Note that there is an overlap between these stopwords and the terms stopped when used
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
# that comments are not allowed on the same line as stopwords.
#
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
# using the same character width as the entries in this file. Since this StopFilter is
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
# entries to be in half-width and your kana entries to be in full-width.
#
ある
いる
する
から
こと
として
れる
など
なっ
ない
この
ため
その
あっ
よう
また
もの
という
あり
まで
られ
なる
これ
によって
により
おり
より
による
なり
られる
において
なかっ
なく
しかし
について
だっ
その後
できる
それ
ので
なお
のみ
でき
における
および
いう
さらに
でも
たり
その他
に関する
たち
ます
なら
に対して
特に
せる
及び
これら
とき
では
にて
ほか
ながら
うち
そして
とともに
ただし
かつて
それぞれ
または
ほど
ものの
に対する
ほとんど
と共に
といった
です
とも
ところ
ここ
##### End of file
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
# the original list of over 800 forms was refined:
# pronouns, adverbs, interjections were removed
#
# prepositions
aiz
ap
ar
apakš
ārpus
augšpus
bez
caur
dēļ
gar
iekš
iz
kopš
labad
lejpus
līdz
no
otrpus
pa
par
pār
pēc
pie
pirms
pret
priekš
starp
šaipus
uz
viņpus
virs
virspus
zem
apakšpus
# Conjunctions
un
bet
jo
ja
ka
lai
tomēr
tikko
turpretī
arī
kaut
gan
tādēļ
ne
tikvien
vien
ir
te
vai
kamēr
# Particles
ar
diezin
droši
diemžēl
nebūt
ik
it
taču
nu
pat
tiklab
iekšpus
nedz
tik
nevis
turpretim
jeb
iekam
iekām
iekāms
kolīdz
līdzko
tiklīdz
jebšu
tālab
tāpēc
nekā
itin
jau
jel
nezin
tad
tikai
vis
tak
iekams
vien
# modal verbs
būt
biju
biji
bija
bijām
bijāt
esmu
esi
esam
esat
būšu
būsi
būs
būsim
būsiet
tikt
tiku
tiki
tika
tikām
tikāt
tieku
tiec
tiek
tiekam
tiekat
tikšu
tiks
tiksim
tiksiet
tapt
tapi
tapāt
topat
tapšu
tapsi
taps
tapsim
tapsiet
kļūt
kļuvu
kļuvi
kļuva
kļuvām
kļuvāt
kļūstu
kļūsti
kļūst
kļūstam
kļūstat
kļūšu
kļūsi
kļūs
kļūsim
kļūsiet
# verbs
varēt
varēju
varējām
varēšu
varēsim
var
varēji
varējāt
varēsi
varēsiet
varat
varēja
varēs
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment