Commit 8d674ad5 by Xuan Trung Le

fix data cralwer

parent 5c5c6c7b
......@@ -15,10 +15,10 @@ class Crawler
doc = Nokogiri::HTML(open(link))
if doc.css('#template_vantai').blank? &&
doc.css('#template_1').blank? &&
doc.css('#template_2').blank? &&
doc.css('#template_3').blank? &&
doc.css('#template_4').blank?
doc.css('#template_1').blank? &&
doc.css('#template_2').blank? &&
doc.css('#template_3').blank? &&
doc.css('#template_4').blank?
params = use_template_default(doc, link)
job_details << params
......@@ -31,7 +31,6 @@ class Crawler
params = {}
params_company = {}
description = []
# get job's name
params[:name] = doc.css('.top-job .top-job-info h1').text.strip
params[:updated_date] = doc.css('.datepost').text.split(':')[1].strip
......@@ -77,10 +76,13 @@ class Crawler
def self.crawl_company_infomation(doc)
# get company information
params = {}
if doc.css('.box1Detail .TitleDetailNew span').length > 0
params[:name] = doc.css('.box1Detail .TitleDetailNew span').text
params[:location] = doc.css('.box1Detail .TitleDetailNew label')[0].text
params[:description] = doc.css('.desc_company p').text
end
params[:name] ||= 'Bảo mật'
params[:name] = doc.css('.box1Detail .TitleDetailNew span').text
params[:location] = doc.css('.box1Detail .TitleDetailNew label')[0].text
params[:description] = doc.css('.desc_company p').text
return params
end
......
......@@ -17,11 +17,23 @@ class Job < ApplicationRecord
experience: item[:experience],
expiry_date: item[:expiry_date],
updated_date: item[:updated_date])
job.city = City.find_or_initialize_by(name: (item[:city] ||= '').split(':')[0]) #TODO fix this line
# City
unless item[:city].blank?
job.city = City.find_or_create_by(name: (item[:city] ||= '').split(':')[0])
end
# Company
job.company = Company.find_or_initialize_by(name: item[:company_name])
job.company.location = item[:company_location]
job.company.description = item[:company_description]
job.company.city = job.city
# Industry
unless item[:industry].blank?
item[:industry].split(',').each do |name|
job.industries << Industry.find_or_create_by(name: name.strip)
end
end
job.save
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment