Fix code

parent 5204717b
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
<% @job.each do |job| %> <% @job.each do |job| %>
<% if !job.cities.blank? %> <% if !job.cities.blank? %>
<ul> <ul>
<div class="title"><strong><%= (@company.find_by(id: job.company_id)).name %></strong></div> <div class="title"><strong><%= job.title %></strong></div>
<%= job.title %> <div><%= (@company.find_by(id: job.company_id)).name %></div>
<div class="salary"><i class="fas fa-dollar-sign"></i>Lương: <%= job.salary %></div> <div class="salary"><i class="fas fa-dollar-sign"></i>Lương: <%= job.salary %></div>
<div><i class="fas fa-map-marker"></i> <div><i class="fas fa-map-marker"></i>
<% job.cities.each do |location| %> <% job.cities.each do |location| %>
......
class Crawler class Crawler
def crawl_city def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_name = page.search('select#location') get_name = page.search('select#location')
...@@ -14,15 +14,17 @@ ...@@ -14,15 +14,17 @@
end end
end end
end end
def crawl_industry def crawl_industry
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_name = page.search('select#industry') get_name = page.search('select#industry')
data_industry = get_name.search('option').map{ |p| p.text.strip } data_industry = get_name.search('option').map { |p| p.text.strip }
data_industry.each do |name_industry| data_industry.each do |name_industry|
industry = Industry.create!(name: name_industry) industry = Industry.create!(name: name_industry)
end end
end end
def crawl_company def crawl_company
for n in 1..10 for n in 1..10
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html")) company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
...@@ -36,10 +38,10 @@ ...@@ -36,10 +38,10 @@
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link)))) company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil? if !(company_page.search('p.name').text).nil?
begin begin
name_company = company_page.search('p.name').text name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company) get_name_company = Company.find_by(name: name_company)
if get_name_company.nil? if get_name_company.nil?
company = Company.create!(name: name_company, company = Company.create!(name: name_company,
address: address_company, address: address_company,
...@@ -53,32 +55,31 @@ ...@@ -53,32 +55,31 @@
end end
end end
end end
def crawl_job_relationships def crawl_job_relationships
for n in 1..10 for n in 1..10
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html")) page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
get_link = page_access.css('a.job_link').map{ |link| link['href'] } get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link| get_link.each do |link|
if link.include?('\u2013')
link.gsub!('\u2013','–')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link)))) page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row') get_row = page_job.search('div.bg-blue div.row')
if get_row != "" if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: "#{get_name_company}") company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row') description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') } arr_column = get_row.css('div.has-background').map { |data| data.text.split(' ').join(' ') }
arr_column.each_with_index do | val, key | arr_column.each_with_index do |val, key |
if !company_table.nil? unless company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
if val.include?('Ngày cập nhật') if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ') arr_data = val.gsub('Ngày cập nhật ', '').split(' ')
date = arr_data.first date = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && job_check.nil?
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Kinh nghiệm ', '*').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
experience = arr_sub[1] experience = arr_sub[1]
level = arr_sub[2] level = arr_sub[2]
expiration_date = arr_sub[3] expiration_date = arr_sub[3]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -87,10 +88,10 @@ ...@@ -87,10 +88,10 @@
expiration_date: expiration_date, expiration_date: expiration_date,
description: description, description: description,
company_id: company_table.id) company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && job_check.nil?
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
level = arr_sub[1] level = arr_sub[1]
expiration_date = arr_sub[2] expiration_date = arr_sub[2]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -101,24 +102,23 @@ ...@@ -101,24 +102,23 @@
company_id: company_table.id) company_id: company_table.id)
end end
end end
if !company_table.nil? next if !company_table.nil?
job_table = Job.find_by(title: title_job) job_table = Job.find_by(title: title_job)
if !job_table.nil? unless job_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip } location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc| location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}") city_table = City.find_by(name: loc)
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil if CityJob.find_by(job_id: job_table.id, city_id: city_table.id).nil?
puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}" puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id) city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end
end end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip } end
industry_rel.each do |ind| industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_table = Industry.find_by(name: "#{ind}") industry_rel.each do |ind|
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil industry_table = Industry.find_by(name: ind)
puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}" if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id).nil?
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id) puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}"
end industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end end
end end
end end
...@@ -151,51 +151,52 @@ ...@@ -151,51 +151,52 @@
file = "jobs.csv" file = "jobs.csv"
CSV.foreach(file, headers: true) do |row| CSV.foreach(file, headers: true) do |row|
begin begin
company_name = row["company name"].strip company_name = row["company name"]
company_address = row["company address"] company_address = row["company address"]
company_introduction = row["benefit"] company_introduction = row[:benefit]
company_table = Company.find_by(name: "#{company_name}") company_table = Company.find_by(name: company_name)
if company_table == nil if company_table.nil?
company_table = Company.create!(name: company_name, company_table = Company.create!(name: company_name,
address: company_address, address: company_address,
introduction: company_introduction) introduction: company_introduction)
end end
title_job = row["name"].strip title_job = row[:name]
description_job = row["description"] description_job = row[:description]
level = row["level"] level = row[:level]
salary = row["salary"] salary = row[:salary]
if company_table != nil && Job.find_by(title: title_job, level: level, salary: salary, company_id: company_table.id) == nil unless company_table.nil?
job_table = Job.create!(title: title_job, job_table = Job.create!(title: title_job,
description: description_job, description: description_job,
level: level, level: level,
salary: salary, salary: salary,
company_id: company_table.id) company_id: company_table.id)
end puts job_table.id
industry = row["category"].strip end
industry_find = Industry.find_by(name: industry) industry = row[:category]
if industry_find == nil industry_find = Industry.find_by(name: industry)
industry_table = Industry.create!(name: industry) if industry_find.nil?
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id) industry_table = Industry.create!(name: industry)
else industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id) else
end industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
puts "=========================================" end
puts job_table.id, title_job, industry, salary puts job_table.id, title_job, industry, salary
location_data = row["work place"].strip location_data = row["work place"]
location = (location_data.gsub('["','')).gsub('"]','').strip location = location_data.gsub('["', '').gsub('"]', '')
location_find = City.find_by(name: location) location_find = City.find_by(name: location)
if location_find == nil if location_find.nil?
city_table = City.create!(name: location) city_table = City.create!(name: location)
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id) city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
else else
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id) city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end end
puts "Location: #{location}" puts "Location: #{location}"
rescue StandardError => e rescue StandardError => e
puts e puts e
end end
end end
end end
def logger def logger
# config.log_level = :info # config.log_level = :info
Rails.logger = Logger.new(STDOUT) Rails.logger = Logger.new(STDOUT)
...@@ -203,4 +204,4 @@ ...@@ -203,4 +204,4 @@
Rails.logger.level = Logger::DEBUG Rails.logger.level = Logger::DEBUG
Rails.logger.datetime_format = "%Y-%m-%d %H:%M:%S" Rails.logger.datetime_format = "%Y-%m-%d %H:%M:%S"
end end
end end
\ No newline at end of file
class Crontab class Crontab
def find_company def find_company
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html")) company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"))
company_link = company_info.css('div.caption a.company-name').map{ |link| link['href'] } company_link = company_info.css('div.caption a.company-name').map { |link| link['href'] }
company_link.each do |link| company_link.each do |link|
if link.include?('\u2019') next if link == 'javascript:void(0);'
link.gsub!('\u2019',"'")
end
next if link == 'javascript:void(0);'
if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html' if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link)))) company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil? unless (company_page.search('p.name').text).nil?
begin begin
name_company = company_page.search('p.name').text name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company) get_name_company = Company.find_by(name: name_company)
if get_name_company.nil? if get_name_company.nil?
company = Company.create!(name: name_company, company = Company.create!(name: name_company,
address: address_company, address: address_company,
introduction: introduction_company) introduction: introduction_company)
end end
rescue StandardError => e rescue StandardError => e
puts e puts e
end end
end end
end end
...@@ -29,30 +26,28 @@ class Crontab ...@@ -29,30 +26,28 @@ class Crontab
end end
def find_job def find_job
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_link = page_access.css('a.job_link').map{ |link| link['href'] } get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link| get_link.each do |link|
if link.include?('\u2013') page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
link.gsub!('\u2013','–') get_row = page_job.search('div.bg-blue div.row')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row != "" if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: get_name_company) company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row') description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') } arr_column = get_row.css('div.has-background').map { |data| data.text.split(' ').join(' ') }
job_table = Job.find_by(title: title_job) job_table = Job.find_by(title: title_job)
arr_column.each_with_index do | val, key | arr_column.each do |val|
if !company_table.nil? unless company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
if val.include?('Ngày cập nhật') if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ') arr_data = val.gsub('Ngày cập nhật ', '').split(' ')
date = arr_data.first date_update = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && job_check.nil?
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Kinh nghiệm ', '*').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
experience = arr_sub[1] experience = arr_sub[1]
level = arr_sub[2] level = arr_sub[2]
expiration_date = arr_sub[3] expiration_date = arr_sub[3]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -61,10 +56,10 @@ class Crontab ...@@ -61,10 +56,10 @@ class Crontab
expiration_date: expiration_date, expiration_date: expiration_date,
description: description, description: description,
company_id: company_table.id) company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && Job.find_by(title: title_job, company_id: company_table.id) == nil elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && job_check.nil?
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*') arr_sub = val.gsub('Lương ', '').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0] salary = arr_sub[0]
level = arr_sub[1] level = arr_sub[1]
expiration_date = arr_sub[2] expiration_date = arr_sub[2]
job = Job.create!(title: title_job, job = Job.create!(title: title_job,
level: level, level: level,
...@@ -77,24 +72,24 @@ class Crontab ...@@ -77,24 +72,24 @@ class Crontab
end end
end end
if !job_table.nil? && !company_table.nil? if !job_table.nil? && !company_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip } location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc| location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}") city_table = City.find_by(name: loc)
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil if CityJob.find_by(job_id: job_table.id, city_id: city_table.id).nil?
puts "Created #{job_table.id} - #{city_table.id}.#{loc}" puts "Created City #{city_table.id} => #{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id) city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end end
end end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip } industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_rel.each do |ind| industry_rel.each do |ind|
industry_table = Industry.find_by(name: "#{ind}") industry_table = Industry.find_by(name: ind)
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id).nil?
puts "#{job_table.id} - #{industry_table.id}.#{ind}" puts "Created Industry #{job_table.id} - #{industry_table.id} => #{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id) industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end end
end end
end end
end end
end end
end end
end end
\ No newline at end of file
...@@ -6,23 +6,23 @@ require 'zip' ...@@ -6,23 +6,23 @@ require 'zip'
action = Crawler.new action = Crawler.new
crontab = Crontab.new crontab = Crontab.new
namespace :import do namespace :import do
desc "crawler data" desc 'crawler data'
task crawler: :environment do task crawler: :environment do
action.crawl_city action.crawl_city
action.crawl_industry action.crawl_industry
action.crawl_company action.crawl_company
action.crawl_job_relationships action.crawl_job_relationships
end end
desc "get file CSV from server" desc 'get file CSV from Server'
task csv_get: :environment do task csv_get: :environment do
action.get_file_csv action.get_file_csv
action.extract_zip('./jobs.zip','.') action.extract_zip('./jobs.zip','.')
end end
desc "Import data from CSV" desc 'Import data from CSV'
task data_csv: :environment do task data_csv: :environment do
action.import_file_csv action.import_file_csv
end end
desc "Crontab" desc 'Crontab'
task auto: :environment do task auto: :environment do
crontab.find_company crontab.find_company
...@@ -33,4 +33,4 @@ namespace :import do ...@@ -33,4 +33,4 @@ namespace :import do
task log: :environment do task log: :environment do
action.logger action.logger
end end
end end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment