Commit b7c9fd57 by Trịnh Hoàng Phúc

Fix review 13/05/2020

parent db3ba1fb
Pipeline #614 canceled with stages
in 0 seconds
...@@ -16,10 +16,10 @@ class Job < ApplicationRecord ...@@ -16,10 +16,10 @@ class Job < ApplicationRecord
has_and_belongs_to_many :industries has_and_belongs_to_many :industries
has_and_belongs_to_many :cities has_and_belongs_to_many :cities
validate :updated_date_job_cannot_be_greater_than_expiration_date, on: :create validate :updated_date_job_cannot_be_greater_than_expiration_date
validates :title, length: { minimum: 6 } validates :title, length: { minimum: 6 }
validates :title, :updated_date_job, :level, :expiration_date, :salary, :min_salary, :max_salary, presence: true, on: :create validates :title, :updated_date_job, :level, :expiration_date, :salary, :min_salary, :max_salary, presence: true
validates :min_salary, :max_salary, numericality: { only_integer: true } validates :min_salary, :max_salary, numericality: { only_integer: true }
def updated_date_job_cannot_be_greater_than_expiration_date def updated_date_job_cannot_be_greater_than_expiration_date
......
class CrawlerService class CrawlerService
def self.convert_salary salary def self.convert_salary(salary)
if salary == "Cạnh tranh" return [0, 999_999_999] if salary == "Cạnh tranh"
[0, 999999999]
elsif salary.include? "Dưới" vn_salary = salary.tr("^[0-9]{1,2}[.,]\d{1-2}", " ")
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i .tr(",",".")
[0, max_salary] .split(" ")
elsif salary.include? "Trên" .map { |s| (s.to_f*1_000_000).to_i }
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999 return [0, vn_salary[0]] if salary.include? "Dưới"
[min_salary, max_salary] return [vn_salary[0], 0] if salary.include? "Trên"
else
range_salary = salary.split("-") [vn_salary[0], vn_salary[1]]
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i end
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
[min_salary, max_salary] def self.imports(job_attributes, company_attributes, cities, industries)
ActiveRecord::Base.transaction do
raise Exception.new "Not enough data transferred" if job_attributes.nil? || company_attributes.nil? || cities.nil? || industries.nil?
job_attributes[:company_id] = Company.find_or_create_by(company_attributes).id
job = Job.find_or_create_by(job_attributes)
if job.errors.full_messages.present?
raise Exception.new "#{job.errors.full_messages.join(",")}"
raise ActiveRecord::Rollback
end
cities = cities.map do |city|
City.find_or_create_by({title: city})
end
industries = industries.map do |industry|
Industry.find_or_create_by({title: industry})
end
if cities.length > 0
cities.each do |city|
job.cities << city
end
end
if industries.length > 0
industries.each do |industry|
job.industries << industry
end
end
end end
end end
end end
\ No newline at end of file
class JobService
def self.check_exist_or_create_job job_attributes
job = Job.find_or_create_by(job_attributes)
return job
end
end
\ No newline at end of file
class AddForeignToCities < ActiveRecord::Migration[6.0] class AddForeignToCities < ActiveRecord::Migration[6.0]
def change def change
add_column :cities, :foreign, :boolean, :default => false add_column :cities, :foreign, :boolean, default: false
end end
end end
class AddColumnsToJobs < ActiveRecord::Migration[6.0] class AddColumnsToJobs < ActiveRecord::Migration[6.0]
def change def change
add_column :jobs, :min_salary, :bigint, :default => 0 add_column :jobs, :min_salary, :bigint, default: 0
add_column :jobs, :max_salary, :bigint, :default => 0 add_column :jobs, :max_salary, :bigint, default: 0
add_column :jobs, :benefit, :text add_column :jobs, :benefit, :text
add_column :jobs, :job_requirements, :text add_column :jobs, :job_requirements, :text
add_column :jobs, :other_information, :text add_column :jobs, :other_information, :text
......
...@@ -20,8 +20,12 @@ namespace :crawler do ...@@ -20,8 +20,12 @@ namespace :crawler do
next next
end end
# Set salary, min-salary, max-salary # Set salary, min-salary, max-salary
if item.at_css(".figure .figcaption .caption .salary").text.include? "USD"
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}"
next
end
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","") salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
min_salary, max_salary = CrawlerService.convert_salary salary min_salary, max_salary = CrawlerService.convert_salary(salary)
# Job attributes # Job attributes
job_attributes = { job_attributes = {
title: item.at_css(".figure .figcaption .title a @title").text, title: item.at_css(".figure .figcaption .title a @title").text,
...@@ -30,7 +34,7 @@ namespace :crawler do ...@@ -30,7 +34,7 @@ namespace :crawler do
min_salary: min_salary, min_salary: min_salary,
max_salary: max_salary max_salary: max_salary
} }
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .has-background ul li").each do |ele| html_job_detail.css(".job-detail-content .row .has-background ul li").each do |ele|
type = ele.at_css("strong").text type = ele.at_css("strong").text
case type case type
when "Hết hạn nộp" when "Hết hạn nộp"
...@@ -42,8 +46,8 @@ namespace :crawler do ...@@ -42,8 +46,8 @@ namespace :crawler do
end end
end end
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-row").each do |ele| html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-row").each do |ele|
if ele.at_css("h3").present? next if ele.at_css(".detail-title").nil?
type = ele.at_css("h3").text type = ele.at_css(".detail-title").text
case type case type
when "Phúc lợi " when "Phúc lợi "
job_attributes[:benefit] = ele.at_css("ul").inner_html.squish job_attributes[:benefit] = ele.at_css("ul").inner_html.squish
...@@ -55,47 +59,27 @@ namespace :crawler do ...@@ -55,47 +59,27 @@ namespace :crawler do
job_attributes[:other_information] = ele.inner_html.squish.gsub("<h3 class=\"detail-title\">Thông tin khác</h3>","") job_attributes[:other_information] = ele.inner_html.squish.gsub("<h3 class=\"detail-title\">Thông tin khác</h3>","")
end end
end end
end next if item.at_css(".figure .image a @href").text == "javascript:void(0);"
if item.at_css(".figure .image a @href").text != "javascript:void(0);"
# Company attributes # Company attributes
html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text))) html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text)))
if html_company_detail.at_css(".jobsby-company").present? next if html_company_detail.at_css(".jobsby-company").nil?
company_attributes = { company_attributes = {
title: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content .name").text, title: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content .name").text,
address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text, address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text,
logo: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .img @src").text, logo: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .img @src").text,
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish
} }
# Check exist or create company
job_attributes[:company_id] = Company.find_or_create_by(company_attributes).id
end
end
# Create job
job = JobService.check_exist_or_create_job job_attributes
if job.errors.full_messages.present?
logger.error "#{job.errors.full_messages.join(",")}"
next
end
# Defind cities array # Defind cities array
cities = item.css(".figure .figcaption .caption .location ul li").map do |city| cities = item.css(".figure .figcaption .caption .location ul li").map do |city|
city = City.find_or_create_by({title: city.text.squish}) city.text.squish
end
# Create city_job
if cities.length > 0
cities.each do |city|
job.cities << city
end
end end
# Defind industries array # Defind industries array
industries = html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").map do |ele| industries = html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").map do |industry|
industry = Industry.find_or_create_by({title: ele.text.gsub(",","").squish}) industry.text.tr(",","").squish
end
# Create industry_job
if industries.length > 0
industries.each do |industry|
job.industries << industry
end
end end
CrawlerService.imports(job_attributes, company_attributes, cities, industries)
logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}" logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}"
rescue Exception => e rescue Exception => e
logger.error e logger.error e
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment