Commit db3ba1fb by Trịnh Hoàng Phúc

Fix review 12/05/2020

parent 079c82d0
Pipeline #612 failed with stages
in 0 seconds
class City < ApplicationRecord class City < ApplicationRecord
validates :title, presence: true validates :title, presence: true, uniqueness: true
has_and_belongs_to_many :jobs has_and_belongs_to_many :jobs
end end
class Industry < ApplicationRecord class Industry < ApplicationRecord
validates :title, presence: true validates :title, presence: true, uniqueness: true
has_and_belongs_to_many :jobs has_and_belongs_to_many :jobs
end end
class Job < ApplicationRecord class Job < ApplicationRecord
validates :title, presence: true scope :by_cities, -> (city_id) {includes(:cities).where("cities.id = ?", city_id).references(:cities)}
scope :by_industries, -> (industry_id) {includes(:industries).where("industries.id = ?", industry_id).references(:industries)}
scope :by_companies, -> (company_id) {where("company_id = #{company_id}")}
EXPORT_CSV_ATTRIBUTES = %w(title updated_date_job level years_of_experience salary expiration_date).freeze
belongs_to :company belongs_to :company
...@@ -12,9 +16,15 @@ class Job < ApplicationRecord ...@@ -12,9 +16,15 @@ class Job < ApplicationRecord
has_and_belongs_to_many :industries has_and_belongs_to_many :industries
has_and_belongs_to_many :cities has_and_belongs_to_many :cities
scope :by_cities, -> (city_id) {includes(:cities).where("cities.id = ?", city_id).references(:cities)} validate :updated_date_job_cannot_be_greater_than_expiration_date, on: :create
scope :by_industries, -> (industry_id) {includes(:industries).where("industries.id = ?", industry_id).references(:industries)}
scope :by_companies, -> (company_id) {where("company_id = #{company_id}")}
EXPORT_CSV_ATTRIBUTES = %w(title updated_date_job level years_of_experience salary expiration_date).freeze validates :title, length: { minimum: 6 }
validates :title, :updated_date_job, :level, :expiration_date, :salary, :min_salary, :max_salary, presence: true, on: :create
validates :min_salary, :max_salary, numericality: { only_integer: true }
def updated_date_job_cannot_be_greater_than_expiration_date
if DateTime.parse(updated_date_job).to_i > DateTime.parse(expiration_date).to_i
errors.add(:updated_date_job, "can't be greater than expiration date")
end
end
end end
class CityService
def import cities
City.import cities
end
def check_exist_or_create_city city_title
cities = City.where("title LIKE ?", city_title)
if cities.length == 0
city = City.create(title: city_title)
else
city = cities[0]
end
return city
end
end
\ No newline at end of file
class CompanyService
def check_exist_or_create_company company_attributes
find_company = Company.find_or_create_by(company_attributes)
return find_company.id
end
end
\ No newline at end of file
class CrawlerService
def self.convert_salary salary
if salary == "Cạnh tranh"
[0, 999999999]
elsif salary.include? "Dưới"
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
[0, max_salary]
elsif salary.include? "Trên"
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999
[min_salary, max_salary]
else
range_salary = salary.split("-")
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
[min_salary, max_salary]
end
end
end
\ No newline at end of file
class IndustryService
def import industries
Industry.import industries
end
def check_exist_or_create_industry industry_title
industries = Industry.where("title LIKE ?", industry_title)
if industries.length == 0
industry = Industry.create(title: industry_title)
else
industry = industries[0]
end
return industry
end
end
\ No newline at end of file
class JobService class JobService
def check_exist_or_create_job job_attributes def self.check_exist_or_create_job job_attributes
job = Job.find_or_create_by(job_attributes) job = Job.find_or_create_by(job_attributes)
return job return job
end end
......
...@@ -5,11 +5,8 @@ namespace :crawler do ...@@ -5,11 +5,8 @@ namespace :crawler do
desc "Crawler Careerbuilder" desc "Crawler Careerbuilder"
task job: :environment do task job: :environment do
# Define exception logger # Define crawler logger
exception_logger = Logger.new("log/exception_logger.log") logger = Logger.new("log/crawler_logger.log")
# Define skip logger
skip_url_logger = Logger.new("log/skip_url_logger.log")
# Loop page # Loop page
(1..2).each do |page| (1..2).each do |page|
...@@ -17,22 +14,14 @@ namespace :crawler do ...@@ -17,22 +14,14 @@ namespace :crawler do
html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html")) html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
# Loop item # Loop item
html_jobs.css(".jobs-side-list .job-item").each do |item| html_jobs.css(".jobs-side-list .job-item").each do |item|
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
if html_job_detail.at_css(".search-result-list-detail .tabs div#tab-1").nil?
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}"
next
end
# Set salary, min-salary, max-salary # Set salary, min-salary, max-salary
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","") salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
if salary == "Cạnh tranh" min_salary, max_salary = CrawlerService.convert_salary salary
min_salary = 0
max_salary = 999999999
elsif salary.include? "Dưới"
min_salary = 0
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
elsif salary.include? "Trên"
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999
else
range_salary = salary.split("-")
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
end
# Job attributes # Job attributes
job_attributes = { job_attributes = {
title: item.at_css(".figure .figcaption .title a @title").text, title: item.at_css(".figure .figcaption .title a @title").text,
...@@ -41,11 +30,6 @@ namespace :crawler do ...@@ -41,11 +30,6 @@ namespace :crawler do
min_salary: min_salary, min_salary: min_salary,
max_salary: max_salary max_salary: max_salary
} }
# Defind industry ids array
industries = []
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
if html_job_detail.at_css(".search-result-list-detail .container .no-gutters").present?
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .has-background ul li").each do |ele| html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .has-background ul li").each do |ele|
type = ele.at_css("strong").text type = ele.at_css("strong").text
case type case type
...@@ -83,17 +67,18 @@ namespace :crawler do ...@@ -83,17 +67,18 @@ namespace :crawler do
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish
} }
# Check exist or create company # Check exist or create company
job_attributes[:company_id] = CompanyService.new.check_exist_or_create_company company_attributes job_attributes[:company_id] = Company.find_or_create_by(company_attributes).id
end end
end end
# Create job # Create job
job = JobService.new.check_exist_or_create_job job_attributes job = JobService.check_exist_or_create_job job_attributes
if job.errors.full_messages.present?
logger.error "#{job.errors.full_messages.join(",")}"
next
end
# Defind cities array # Defind cities array
cities = [] cities = item.css(".figure .figcaption .caption .location ul li").map do |city|
item.css(".figure .figcaption .caption .location ul li").each do |city| city = City.find_or_create_by({title: city.text.squish})
city = city_service.check_exist_or_create_city city.text.squish
cities << city
end end
# Create city_job # Create city_job
if cities.length > 0 if cities.length > 0
...@@ -101,23 +86,19 @@ namespace :crawler do ...@@ -101,23 +86,19 @@ namespace :crawler do
job.cities << city job.cities << city
end end
end end
# Create industry_job # Defind industries array
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").each do |ele| industries = html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").map do |ele|
industry = industry_service.check_exist_or_create_industry ele.text.gsub(",","").squish industry = Industry.find_or_create_by({title: ele.text.gsub(",","").squish})
industries << industry
end end
# Create industry_job
if industries.length > 0 if industries.length > 0
industries.each do |industry| industries.each do |industry|
job.industries << industry job.industries << industry
end end
end end
else logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}"
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
end
rescue Exception => e rescue Exception => e
exception_logger.info e logger.error e
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
next next
end end
end end
...@@ -142,7 +123,7 @@ namespace :crawler do ...@@ -142,7 +123,7 @@ namespace :crawler do
end end
cities = cities_in_country + cities_foreign cities = cities_in_country + cities_foreign
if cities.length > 0 if cities.length > 0
city_service.import cities City.import cities
end end
end end
...@@ -156,15 +137,7 @@ namespace :crawler do ...@@ -156,15 +137,7 @@ namespace :crawler do
} }
end end
if industries.length > 0 if industries.length > 0
industry_service.import industries Industry.import industries
end end
end end
def city_service
@city_service ||= CityService.new
end
def industry_service
@industry_service ||= IndustryService.new
end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment