Fix code

parent 5204717b
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
<% @job.each do |job| %> <% @job.each do |job| %>
<% if !job.cities.blank? %> <% if !job.cities.blank? %>
<ul> <ul>
<div class="title"><strong><%= (@company.find_by(id: job.company_id)).name %></strong></div> <div class="title"><strong><%= job.title %></strong></div>
<%= job.title %> <div><%= (@company.find_by(id: job.company_id)).name %></div>
<div class="salary"><i class="fas fa-dollar-sign"></i>Lương: <%= job.salary %></div> <div class="salary"><i class="fas fa-dollar-sign"></i>Lương: <%= job.salary %></div>
<div><i class="fas fa-map-marker"></i> <div><i class="fas fa-map-marker"></i>
<% job.cities.each do |location| %> <% job.cities.each do |location| %>
......
class Crontab class Crontab
def find_company def find_company
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html")) company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"))
company_link = company_info.css('div.caption a.company-name').map{ |link| link['href'] } company_link = company_info.css('div.caption a.company-name').map { |link| link['href'] }
company_link.each do |link| company_link.each do |link|
if link.include?('\u2019') next if link == 'javascript:void(0);'
link.gsub!('\u2019',"'")
end
next if link == 'javascript:void(0);'
if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html' if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link)))) company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil? unless (company_page.search('p.name').text).nil?
begin begin
name_company = company_page.search('p.name').text name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company) get_name_company = Company.find_by(name: name_company)
if get_name_company.nil? if get_name_company.nil?
company = Company.create!(name: name_company, company = Company.create!(name: name_company,
address: address_company, address: address_company,
introduction: introduction_company) introduction: introduction_company)
end end
rescue StandardError => e rescue StandardError => e
puts e puts e
end end
end end
end end
...@@ -29,30 +26,28 @@ class Crontab ...@@ -29,30 +26,28 @@ class Crontab
end end
def find_job def find_job
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_link = page_access.css('a.job_link').map{ |link| link['href'] } get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link| get_link.each do |link|
if link.include?('\u2013') page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
link.gsub!('\u2013','–') get_row = page_job.search('div.bg-blue div.row')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row != "" if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: get_name_company) company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row') description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') } arr_column = get_row.css('div.has-background').map { |data| data.text.split(' ').join(' ') }
job_table = Job.find_by(title: title_job) job_table = Job.find_by(title: title_job)
arr_column.each_with_index do | val, key | arr_column.each do |val|
if !company_table.nil? unless company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
if val.include?('Ngày cập nhật') if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ') arr_data = val.gsub('Ngày cập nhật ', '').split(' ')
date = arr_data.first date_update = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && job_check.nil?
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Kinh nghiệm ', '*').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
experience = arr_sub[1] experience = arr_sub[1]
level = arr_sub[2] level = arr_sub[2]
expiration_date = arr_sub[3] expiration_date = arr_sub[3]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -61,10 +56,10 @@ class Crontab ...@@ -61,10 +56,10 @@ class Crontab
expiration_date: expiration_date, expiration_date: expiration_date,
description: description, description: description,
company_id: company_table.id) company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && job_check.nil?
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
level = arr_sub[1] level = arr_sub[1]
expiration_date = arr_sub[2] expiration_date = arr_sub[2]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -77,24 +72,24 @@ class Crontab ...@@ -77,24 +72,24 @@ class Crontab
end end
end end
if !job_table.nil? && !company_table.nil? if !job_table.nil? && !company_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip } location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc| location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}") city_table = City.find_by(name: loc)
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil if CityJob.find_by(job_id: job_table.id, city_id: city_table.id).nil?
puts "Created #{job_table.id} - #{city_table.id}.#{loc}" puts "Created City #{city_table.id} => #{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id) city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end end
end end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip } industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_rel.each do |ind| industry_rel.each do |ind|
industry_table = Industry.find_by(name: "#{ind}") industry_table = Industry.find_by(name: ind)
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id).nil?
puts "#{job_table.id} - #{industry_table.id}.#{ind}" puts "Created Industry #{job_table.id} - #{industry_table.id} => #{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id) industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end end
end end
end end
end end
end end
end end
end end
\ No newline at end of file
...@@ -6,23 +6,23 @@ require 'zip' ...@@ -6,23 +6,23 @@ require 'zip'
action = Crawler.new action = Crawler.new
crontab = Crontab.new crontab = Crontab.new
namespace :import do namespace :import do
desc "crawler data" desc 'crawler data'
task crawler: :environment do task crawler: :environment do
action.crawl_city action.crawl_city
action.crawl_industry action.crawl_industry
action.crawl_company action.crawl_company
action.crawl_job_relationships action.crawl_job_relationships
end end
desc "get file CSV from server" desc 'get file CSV from Server'
task csv_get: :environment do task csv_get: :environment do
action.get_file_csv action.get_file_csv
action.extract_zip('./jobs.zip','.') action.extract_zip('./jobs.zip','.')
end end
desc "Import data from CSV" desc 'Import data from CSV'
task data_csv: :environment do task data_csv: :environment do
action.import_file_csv action.import_file_csv
end end
desc "Crontab" desc 'Crontab'
task auto: :environment do task auto: :environment do
crontab.find_company crontab.find_company
...@@ -33,4 +33,4 @@ namespace :import do ...@@ -33,4 +33,4 @@ namespace :import do
task log: :environment do task log: :environment do
action.logger action.logger
end end
end end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment