Commit db3ba1fb by Trịnh Hoàng Phúc

Fix review 12/05/2020

parent 079c82d0
Pipeline #612 failed with stages
in 0 seconds
class City < ApplicationRecord
validates :title, presence: true
validates :title, presence: true, uniqueness: true
has_and_belongs_to_many :jobs
end
class Industry < ApplicationRecord
validates :title, presence: true
validates :title, presence: true, uniqueness: true
has_and_belongs_to_many :jobs
end
class Job < ApplicationRecord
validates :title, presence: true
scope :by_cities, -> (city_id) {includes(:cities).where("cities.id = ?", city_id).references(:cities)}
scope :by_industries, -> (industry_id) {includes(:industries).where("industries.id = ?", industry_id).references(:industries)}
scope :by_companies, -> (company_id) {where("company_id = #{company_id}")}
EXPORT_CSV_ATTRIBUTES = %w(title updated_date_job level years_of_experience salary expiration_date).freeze
belongs_to :company
......@@ -12,9 +16,15 @@ class Job < ApplicationRecord
has_and_belongs_to_many :industries
has_and_belongs_to_many :cities
scope :by_cities, -> (city_id) {includes(:cities).where("cities.id = ?", city_id).references(:cities)}
scope :by_industries, -> (industry_id) {includes(:industries).where("industries.id = ?", industry_id).references(:industries)}
scope :by_companies, -> (company_id) {where("company_id = #{company_id}")}
validate :updated_date_job_cannot_be_greater_than_expiration_date, on: :create
EXPORT_CSV_ATTRIBUTES = %w(title updated_date_job level years_of_experience salary expiration_date).freeze
validates :title, length: { minimum: 6 }
validates :title, :updated_date_job, :level, :expiration_date, :salary, :min_salary, :max_salary, presence: true, on: :create
validates :min_salary, :max_salary, numericality: { only_integer: true }
def updated_date_job_cannot_be_greater_than_expiration_date
if DateTime.parse(updated_date_job).to_i > DateTime.parse(expiration_date).to_i
errors.add(:updated_date_job, "can't be greater than expiration date")
end
end
end
class CityService
def import cities
City.import cities
end
def check_exist_or_create_city city_title
cities = City.where("title LIKE ?", city_title)
if cities.length == 0
city = City.create(title: city_title)
else
city = cities[0]
end
return city
end
end
\ No newline at end of file
class CompanyService
def check_exist_or_create_company company_attributes
find_company = Company.find_or_create_by(company_attributes)
return find_company.id
end
end
\ No newline at end of file
class CrawlerService
def self.convert_salary salary
if salary == "Cạnh tranh"
[0, 999999999]
elsif salary.include? "Dưới"
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
[0, max_salary]
elsif salary.include? "Trên"
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999
[min_salary, max_salary]
else
range_salary = salary.split("-")
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
[min_salary, max_salary]
end
end
end
\ No newline at end of file
class IndustryService
def import industries
Industry.import industries
end
def check_exist_or_create_industry industry_title
industries = Industry.where("title LIKE ?", industry_title)
if industries.length == 0
industry = Industry.create(title: industry_title)
else
industry = industries[0]
end
return industry
end
end
\ No newline at end of file
class JobService
def check_exist_or_create_job job_attributes
def self.check_exist_or_create_job job_attributes
job = Job.find_or_create_by(job_attributes)
return job
end
......
......@@ -5,11 +5,8 @@ namespace :crawler do
desc "Crawler Careerbuilder"
task job: :environment do
# Define exception logger
exception_logger = Logger.new("log/exception_logger.log")
# Define skip logger
skip_url_logger = Logger.new("log/skip_url_logger.log")
# Define crawler logger
logger = Logger.new("log/crawler_logger.log")
# Loop page
(1..2).each do |page|
......@@ -17,22 +14,14 @@ namespace :crawler do
html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
# Loop item
html_jobs.css(".jobs-side-list .job-item").each do |item|
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
if html_job_detail.at_css(".search-result-list-detail .tabs div#tab-1").nil?
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}"
next
end
# Set salary, min-salary, max-salary
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
if salary == "Cạnh tranh"
min_salary = 0
max_salary = 999999999
elsif salary.include? "Dưới"
min_salary = 0
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
elsif salary.include? "Trên"
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999
else
range_salary = salary.split("-")
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
end
min_salary, max_salary = CrawlerService.convert_salary salary
# Job attributes
job_attributes = {
title: item.at_css(".figure .figcaption .title a @title").text,
......@@ -41,11 +30,6 @@ namespace :crawler do
min_salary: min_salary,
max_salary: max_salary
}
# Defind industry ids array
industries = []
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
if html_job_detail.at_css(".search-result-list-detail .container .no-gutters").present?
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .has-background ul li").each do |ele|
type = ele.at_css("strong").text
case type
......@@ -83,17 +67,18 @@ namespace :crawler do
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish
}
# Check exist or create company
job_attributes[:company_id] = CompanyService.new.check_exist_or_create_company company_attributes
job_attributes[:company_id] = Company.find_or_create_by(company_attributes).id
end
end
# Create job
job = JobService.new.check_exist_or_create_job job_attributes
job = JobService.check_exist_or_create_job job_attributes
if job.errors.full_messages.present?
logger.error "#{job.errors.full_messages.join(",")}"
next
end
# Defind cities array
cities = []
item.css(".figure .figcaption .caption .location ul li").each do |city|
city = city_service.check_exist_or_create_city city.text.squish
cities << city
cities = item.css(".figure .figcaption .caption .location ul li").map do |city|
city = City.find_or_create_by({title: city.text.squish})
end
# Create city_job
if cities.length > 0
......@@ -101,23 +86,19 @@ namespace :crawler do
job.cities << city
end
end
# Create industry_job
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").each do |ele|
industry = industry_service.check_exist_or_create_industry ele.text.gsub(",","").squish
industries << industry
# Defind industries array
industries = html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").map do |ele|
industry = Industry.find_or_create_by({title: ele.text.gsub(",","").squish})
end
# Create industry_job
if industries.length > 0
industries.each do |industry|
job.industries << industry
end
end
else
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
end
logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}"
rescue Exception => e
exception_logger.info e
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
logger.error e
next
end
end
......@@ -142,7 +123,7 @@ namespace :crawler do
end
cities = cities_in_country + cities_foreign
if cities.length > 0
city_service.import cities
City.import cities
end
end
......@@ -156,15 +137,7 @@ namespace :crawler do
}
end
if industries.length > 0
industry_service.import industries
Industry.import industries
end
end
def city_service
@city_service ||= CityService.new
end
def industry_service
@industry_service ||= IndustryService.new
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment