Commit 1b7a0aa8 by Tô Ngọc Ánh

Merge branch 'solr' into 'master'

Solr

See merge request !14
parents 5441a47c ceafc8f3
Pipeline #1068 failed with stages
in 0 seconds
......@@ -26,7 +26,7 @@
# Ignore master key for decrypting credentials and more.
/config/master.key
/lib/data
/lib/import/csv/data
/public/uploads/
.env
......@@ -70,4 +70,6 @@ gem 'whenever', require: false
gem 'kaminari'
gem 'devise'
gem 'carrierwave', '~> 2.0'
gem 'rsolr'
gem 'settingslogic'
##
......@@ -94,6 +94,8 @@ GEM
railties (>= 3.2)
erubi (1.9.0)
execjs (2.7.0)
faraday (1.0.1)
multipart-post (>= 1.2, < 3)
ffi (1.13.1)
globalid (0.4.2)
activesupport (>= 4.2.0)
......@@ -135,6 +137,7 @@ GEM
mini_portile2 (2.4.0)
minitest (5.14.1)
msgpack (1.3.3)
multipart-post (2.1.1)
mysql2 (0.5.3)
nio4r (2.5.2)
nokogiri (1.10.10)
......@@ -177,6 +180,9 @@ GEM
responders (3.0.1)
actionpack (>= 5.0)
railties (>= 5.0)
rsolr (2.3.0)
builder (>= 2.1.2)
faraday (>= 0.9.0)
ruby-vips (2.0.17)
ffi (~> 1.9)
ruby_dep (1.5.0)
......@@ -195,6 +201,7 @@ GEM
selenium-webdriver (3.142.7)
childprocess (>= 0.5, < 4.0)
rubyzip (>= 1.2.2)
settingslogic (2.0.9)
spring (2.1.0)
spring-watcher-listen (2.0.1)
listen (>= 2.7, < 4.0)
......@@ -250,9 +257,11 @@ DEPENDENCIES
nokogiri
puma (~> 3.11)
rails (~> 5.2.4, >= 5.2.4.3)
rsolr
rubyzip
sass-rails (~> 5.0)
selenium-webdriver
settingslogic
spring
spring-watcher-listen (~> 2.0.0)
turbolinks (~> 5)
......
class JobsController < ApplicationController
before_action :get_data_search_bar, only: %i[index search]
def index
@locations = Location.select(:id, :city)
@industries = Industry.select(:id, :name)
object = params[:model].classify.constantize.find_by_slug(params[:slug])
@keyword = object.try(:name) || object.try(:city)
params[params[:model]] = object.id
@jobs = object.jobs.all.includes(:company, :locations).page(params[:page])
end
......@@ -16,10 +17,29 @@ class JobsController < ApplicationController
save_history(@job.id)
end
def search
solr = SolrServer.new
results = solr.search(params)
@paginatable_array = Kaminari.paginate_array(results['response']['docs'], total_count: results['response']['numFound']).page(params[:page])
@keyword = set_search_keyword(params)
end
private
def save_history(job_id)
history = current_user.histories.find_or_create_by(job_id: job_id)
history.update_attributes(updated_at: Time.now)
end
def get_data_search_bar
@locations = Location.select(:id, :city)
@industries = Industry.select(:id, :name)
end
def set_search_keyword(search_params)
search_text = search_params[:search].blank? ? 'All Jobs' : search_params[:search]
industry_name = @industries.detect { |v| v.id == search_params[:industry].to_i }.try(:name) unless search_params[:industry].blank?
city_name = @locations.detect { |v| v.id == search_params[:location].to_i }.try(:city) unless search_params[:location].blank?
"#{search_text} #{industry_name} #{city_name}"
end
end
require './lib/common/convert_slug'
class Company < ApplicationRecord
include ConvertSlug
......
require './lib/common/convert_slug'
class Industry < ApplicationRecord
include ConvertSlug
......
......@@ -7,7 +7,8 @@ class Job < ApplicationRecord
NUMBER_LATEST_JOB = 6
WORDS_SHORT_DESCRIPTION = 250
NUMBER_SEARCH_RESULTS = 20
belongs_to :company
has_many :applied_jobs
has_many :histories
......
require './lib/common/convert_slug.rb'
class Location < ApplicationRecord
include ConvertSlug
......
class Settings < Settingslogic
source "#{Rails.root}/config/application.yml"
namespace Rails.env
end
$('#favorite-<%= @favorite.job_id %>')
.html("<%= j render 'favorites/link_favorite', job_id: @favorite.job_id %>")
$('#sum-favorited').html("<%= current_user.favorites.size %>")
$('#favorited-<%= @favorite.job_id %>').remove()
var num_child = $('.form-group .form-item').length
if (num_child == 0) {
$('#favorited-<%= @favorite.job_id %>').remove()
if (num_child == 1) {
var prev = $('a[rel=prev]')
if (prev.length > 0) {
window.location = prev.attr('href')
......
<div class='card flex-md-row align-items-center my-2'>
<div class='card-body'>
<%= link_to search_result['title'], job_path(id: search_result['id']), class: 'card-title font-weight-bold text-decoration-none' %>
<p class='card-text'><%= search_result['company'] %></p>
<p class='mb-0'>
<strong>Work place:</strong>
<%= search_result['locations'].join(' | ') %>
</p>
<p><strong>Salary: </strong><%= search_result['salary'] %></p>
<p class='card-text'><%= strip_tags(search_result['description']).truncate(Job::WORDS_SHORT_DESCRIPTION) %></p>
</div>
<div id="favorite-<%= search_result['id'] %>" class='p-2'>
<%= render 'favorites/link_favorite', job_id: search_result['id'] %>
</div>
</div>
......@@ -4,7 +4,7 @@
<div class='content'>
<% if @jobs.any? %>
<div class='message text-center'>
<h3>We found <%= pluralize(@jobs.total_count, 'result') %> for "<%= @keyword %>" </h3>
<h4>We found <%= pluralize(@jobs.total_count, 'result') %> for <strong><%= @keyword %></strong> </h4>
</div>
<hr>
<%= paginate @jobs %>
......
<div class='px-5'>
<%= render 'shared/searchbar', my_class: 'd-flex flex-column flex-md-row' %>
</div>
<div class='content'>
<% if @paginatable_array.any? %>
<div class='message text-center'>
<h4>We found <%= pluralize(@paginatable_array.total_count, 'result') %> for <strong><%= @keyword %></strong></h4>
</div>
<hr>
<%= paginate @paginatable_array %>
<%= render partial: 'jobs/search_result', collection: @paginatable_array %>
<%= paginate @paginatable_array %>
<% else %>
<h4 class="text-center">Sorry! We can't found what you want!</h4>
<% end %>
</div>
\ No newline at end of file
<%= form_tag '#', method: :get, class: "mt-4 form-group #{my_class}" do %>
<%= search_field_tag :search, params[:keyword], placeholder: 'Search', class: 'form-control m-2' %>
<%= select_tag :industry, options_from_collection_for_select(@industries, :id, :name, '1'), class: 'form-control m-2' %>
<%= select_tag :location, options_from_collection_for_select(@locations, :id, :city, '1'), class: 'form-control m-2' %>
<%= submit_tag 'Search', class: 'btn btn-outline-success m-2' %>
<%= form_tag search_jobs_path, method: :get, class: "mt-4 form-group #{my_class}", enforce_utf8: false do %>
<%= search_field_tag :search, params[:search], placeholder: 'Search', class: 'form-control m-2' %>
<%= select_tag :industry, options_from_collection_for_select(@industries, :id, :name, params[:industry]), prompt: 'All Industries', class: 'form-control m-2' %>
<%= select_tag :location, options_from_collection_for_select(@locations, :id, :city, params[:location]), prompt: 'All Locations', class: 'form-control m-2' %>
<%= submit_tag 'Search', name: nil, class: 'btn btn-outline-success m-2' %>
<% end %>
......@@ -15,5 +15,8 @@ module VeNJobAnhtn
# Application configuration can go into files in config/initializers
# -- all .rb files in that directory are automatically loaded after loading
# the framework and any gems in your application.
# Auto Load
config.autoload_paths += Dir[Rails.root.join('lib','{import,common,solr}')]
end
end
development:
solr_server: 'http://192.168.1.133:8983/solr/venjob'
......@@ -6,6 +6,7 @@ Rails.application.routes.draw do
get 'industries', to: 'industries#index'
get 'detail/:id', to: 'jobs#show', as: :job
get 'jobs/:model/:slug', to: 'jobs#index', as: :jobs
get 'jobs', to: 'jobs#search', as: :search_jobs
get 'my', to: 'users#my_page', as: :my_page
get 'my/jobs', to: 'applied_jobs#index', as: :my_applied_jobs
......
require 'open-uri'
class Crawler
def initialize(logger)
@logger = logger
end
def crawl_data(page_number, base_link)
crawl_industries_locations
job_links = get_job_links(page_number, base_link)
job_links.each do |link|
next if link.empty?
crawl_job(link)
end
end
def get_job_links(page_number, link)
job_links = []
page_number.times do
document = Nokogiri::HTML(URI.open(link))
jobs_xml = document.xpath('//div/a[@class="job_link"]/@href')
jobs_xml.each { |item| job_links << item.value }
next_page = document.at_css('.next-page a')
break if next_page.nil?
link = next_page[:href]
module Crawler
class Crawler
def initialize(logger)
@logger = logger
end
job_links
end
def crawl_company(company_link)
uri = URI.parse(URI.escape(company_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
company_name = document.css('.content .name').text
return if company_name.empty?
company_address = document.css('.content p')[1].text
company_description = document.css('.main-about-us').css('.content').text
Company.find_or_create_by(name: company_name) do |company|
company.address = company_address
company.description = company_description
def crawl_data(page_number, base_link)
crawl_industries_locations
job_links = get_job_links(page_number, base_link)
job_links.each do |link|
next if link.empty?
crawl_job(link)
end
end
rescue StandardError => e
@logger.error "#{e.message} - Company link: #{uri}"
end
def crawl_job(job_link)
uri = URI.parse(URI.escape(job_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
job_title = document.at_css('.job-desc p.title').text
return if job_title.empty?
job_company_link = document.at_css('.job-desc a.job-company-name')[:href]
job_company = crawl_company(job_company_link)
return if job_company.nil?
job_location_name = document.css('.map p a').map { |val| val.text.strip }
job_locations = Location.where(city: job_location_name)
job_industry_names = document.at_xpath('//li[./strong/em[contains(@class, "mdi mdi-briefcase")]]').css('p a').map { |val| val.text.strip }
job_industries = Industry.where(name: job_industry_names)
job_salary = document.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').try(:text).try(:strip)
job_level = document.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').try(:text).try(:strip)
job_experience = document.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p').try(:text).try(:strip)
job_exp_date = document.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').try(:text).try(:strip)
job_description = document.css('.job-detail-content .detail-row').to_s
Job.find_or_create_by(title: job_title,
company_id: job_company.id,
level: job_level,
experience: job_experience,
salary: job_salary,
expiration_date: job_exp_date) do |job|
job.description = job_description
job.industries << job_industries
job.locations << job_locations
def get_job_links(page_number, link)
job_links = []
page_number.times do
document = Nokogiri::HTML(URI.open(link))
jobs_xml = document.xpath('//div/a[@class="job_link"]/@href')
jobs_xml.each { |item| job_links << item.value }
next_page = document.at_css('.next-page a')
break if next_page.nil?
link = next_page[:href]
end
job_links
end
rescue StandardError => e
@logger.error "#{e.message} - Job link: #{uri}"
end
def crawl_industries_locations
document = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
industries = document.css('#industry option').map(&:text)
locations = document.css('#location option').map(&:text)
industries.each do |val|
Industry.find_or_create_by(name: val)
def crawl_company(company_link)
uri = URI.parse(URI.escape(company_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
company_name = document.css('.content .name').text
return if company_name.empty?
company_address = document.css('.content p')[1].text
company_description = document.css('.main-about-us').css('.content').text
Company.find_or_create_by(name: company_name) do |company|
company.address = company_address
company.description = company_description
end
rescue StandardError => e
@logger.error "#{e.message} - Company link: #{uri}"
end
locations.take(Location::CITY_VIETNAM_NUMBER).each do |val|
Location.find_or_create_by(city: val) do |location|
location.oversea = false
def crawl_job(job_link)
uri = URI.parse(URI.escape(job_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
job_title = document.at_css('.job-desc p.title').text
return if job_title.empty?
job_company_link = document.at_css('.job-desc a.job-company-name')[:href]
job_company = crawl_company(job_company_link)
return if job_company.nil?
job_location_name = document.css('.map p a').map { |val| val.text.strip }
job_locations = Location.where(city: job_location_name)
job_industry_names = document.at_xpath('//li[./strong/em[contains(@class, "mdi mdi-briefcase")]]').css('p a').map { |val| val.text.strip }
job_industries = Industry.where(name: job_industry_names)
job_salary = document.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').try(:text).try(:strip)
job_level = document.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').try(:text).try(:strip)
job_experience = document.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p').try(:text).try(:strip)
job_exp_date = document.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').try(:text).try(:strip)
job_description = document.css('.job-detail-content .detail-row').to_s
Job.find_or_create_by(title: job_title,
company_id: job_company.id,
level: job_level,
experience: job_experience,
salary: job_salary,
expiration_date: job_exp_date) do |job|
job.description = job_description
job.industries << job_industries
job.locations << job_locations
end
rescue StandardError => e
@logger.error "#{e.message} - Job link: #{uri}"
end
locations.last(locations.count - Location::CITY_VIETNAM_NUMBER).each do |val|
Location.find_or_create_by(city: val) do |location|
location.oversea = true
def crawl_industries_locations
document = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
industries = document.css('#industry option').map(&:text)
locations = document.css('#location option').map(&:text)
industries.each do |val|
Industry.find_or_create_by(name: val)
end
locations.take(Location::CITY_VIETNAM_NUMBER).each do |val|
Location.find_or_create_by(city: val) do |location|
location.oversea = false
end
end
locations.last(locations.count - Location::CITY_VIETNAM_NUMBER).each do |val|
Location.find_or_create_by(city: val) do |location|
location.oversea = true
end
end
end
end
......
require 'csv'
require './lib/common/extract_zip'
class CsvImport
include ExtractZip
module Csv::Services
class CsvImport
include ExtractZip
def initialize(logger)
@logger = logger
end
def initialize(logger)
@logger = logger
end
def import_job(direction)
CSV.foreach("#{direction}/jobs.csv", headers: true).with_index(2) do |row, index|
next if row['category'].blank? || row['category'].match(/^[0-9]+$/).present?
def import_job(direction)
CSV.foreach("#{direction}/jobs.csv", headers: true).with_index(2) do |row, index|
next if row['category'].blank? || row['category'].match(/^[0-9]+$/).present?
title = row['name'].strip
company = Company.find_or_create_by(name: row['company name'].strip) do |c|
c.description = "Contact email: #{row['contact email']}\n"\
"Contact name: #{row['contact name']}\n"\
"Contact phone: #{row['contact phone']}"
c.address = "#{row['company address']}, #{row['company province']}"
end
industry = Industry.find_or_create_by(name: row['category'].strip)
level = row['level'].try(:strip)
salary = row['salary'].try(:strip)
locations_name = row['work place'].tr('"[]', '').split(',')
locations = Location.where(city: locations_name)
locations = locations_name.map { |city| Location.create(oversea: false, city: city) } if locations.empty?
description = "Benefits:\n#{row['benefit']}\n"\
"Descriptions:\n#{row['description']}\n"\
"Requirements:\n#{row['requirement']}"
title = row['name'].strip
company = Company.find_or_create_by(name: row['company name'].strip) do |c|
c.description = "Contact email: #{row['contact email']}\n"\
"Contact name: #{row['contact name']}\n"\
"Contact phone: #{row['contact phone']}"
c.address = "#{row['company address']}, #{row['company province']}"
end
industry = Industry.find_or_create_by(name: row['category'].strip)
level = row['level'].try(:strip)
salary = row['salary'].try(:strip)
locations_name = row['work place'].tr('"[]', '').split(',')
locations = Location.where(city: locations_name)
locations = locations_name.map { |city| Location.create(oversea: false, city: city) } if locations.empty?
description = "Benefits:\n#{row['benefit']}\n"\
"Descriptions:\n#{row['description']}\n"\
"Requirements:\n#{row['requirement']}"
Job.find_or_create_by(title: title, company_id: company.id, level: level, salary: salary) do |job|
job.industries << industry
job.locations << locations
job.description = description
Job.find_or_create_by(title: title, company_id: company.id, level: level, salary: salary) do |job|
job.industries << industry
job.locations << locations
job.description = description
end
puts title
rescue StandardError => e
@logger.error "Job #{index}: #{e.message}"
end
puts title
rescue StandardError => e
@logger.error "Job #{index}: #{e.message}"
end
end
end
class SolrServer
def initialize
@solr = RSolr.connect(url: Settings.solr_server)
end
def search(search_params)
query, fq = set_query_search(search_params)
@solr.paginate(search_params[:page], Job::NUMBER_SEARCH_RESULTS, 'select', params: { q: query, fq: fq })
end
private
def set_query_search(search_params)
industry_id = search_params[:industry].blank? ? "*" : RSolr.solr_escape(search_params[:industry])
location_id = search_params[:location].blank? ? "*" : RSolr.solr_escape(search_params[:location])
keyword = search_params[:search].blank? ? "*" : RSolr.solr_escape(search_params[:search])
query = "title:(#{keyword}) OR company:(#{keyword})"
fq = ["industry_ids:#{industry_id}", "location_ids:#{location_id}"]
[query, fq]
end
end
require './lib/common/ftp'
require './lib/common/csv'
require './lib/common/crawler'
namespace :import_data do
logger = Logger.new('./log/import_data.log')
desc 'crawl industries locations jobs'
task :crawler, %i[page_number link] => [:environment] do |_, args|
args.with_defaults(page_number: 1, link: 'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html')
crawler = Crawler.new(logger)
crawler = Crawler::Crawler.new(logger)
crawler.crawl_data(args[:page_number].to_i, args[:link])
end
desc 'Download csv file from FTP and import'
task csv: :environment do
destination_dir = './lib/data'
destination_dir = "#{Rails.root}/lib/import/csv/data"
Dir.mkdir destination_dir unless File.exist?(destination_dir)
ftp = Ftp.new('192.168.1.156', 'training', 'training')
ftp.download_file('jobs.zip', destination_dir)
ftp.close
csv = CsvImport.new(logger)
csv = Csv::Services::CsvImport.new(logger)
csv.extract_zip("#{destination_dir}/jobs.zip", destination_dir)
csv.import_job(destination_dir)
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment