Fix code

parent 5204717b
......@@ -2,8 +2,8 @@
<% @job.each do |job| %>
<% if !job.cities.blank? %>
<ul>
<div class="title"><strong><%= (@company.find_by(id: job.company_id)).name %></strong></div>
<%= job.title %>
<div class="title"><strong><%= job.title %></strong></div>
<div><%= (@company.find_by(id: job.company_id)).name %></div>
<div class="salary"><i class="fas fa-dollar-sign"></i>Lương: <%= job.salary %></div>
<div><i class="fas fa-map-marker"></i>
<% job.cities.each do |location| %>
......
class Crawler
class Crawler
def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_name = page.search('select#location')
......@@ -14,15 +14,17 @@
end
end
end
def crawl_industry
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_name = page.search('select#industry')
data_industry = get_name.search('option').map{ |p| p.text.strip }
data_industry = get_name.search('option').map { |p| p.text.strip }
data_industry.each do |name_industry|
industry = Industry.create!(name: name_industry)
end
end
def crawl_company
for n in 1..10
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
......@@ -36,10 +38,10 @@
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil?
begin
name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text
name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company)
get_name_company = Company.find_by(name: name_company)
if get_name_company.nil?
company = Company.create!(name: name_company,
address: address_company,
......@@ -53,32 +55,31 @@
end
end
end
def crawl_job_relationships
for n in 1..10
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
get_link = page_access.css('a.job_link').map{ |link| link['href'] }
get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link|
if link.include?('\u2013')
link.gsub!('\u2013','–')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: "#{get_name_company}")
title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') }
arr_column.each_with_index do | val, key |
if !company_table.nil?
company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map { |data| data.text.split(' ').join(' ') }
arr_column.each_with_index do |val, key |
unless company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ')
arr_data = val.gsub('Ngày cập nhật ', '').split(' ')
date = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && Job.find_by(title: title_job, company_id: company_table.id) == nil
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && job_check.nil?
arr_sub = val.gsub('Lương ', '').gsub(' Kinh nghiệm ', '*').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
expiration_date = arr_sub[3]
job = Job.create!(title: title_job,
level: level,
......@@ -87,10 +88,10 @@
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && Job.find_by(title: title_job, company_id: company_table.id) == nil
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
level = arr_sub[1]
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && job_check.nil?
arr_sub = val.gsub('Lương ', '').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0]
level = arr_sub[1]
expiration_date = arr_sub[2]
job = Job.create!(title: title_job,
level: level,
......@@ -101,24 +102,23 @@
company_id: company_table.id)
end
end
if !company_table.nil?
job_table = Job.find_by(title: title_job)
if !job_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip }
location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}")
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil
puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end
next if !company_table.nil?
job_table = Job.find_by(title: title_job)
unless job_table.nil?
location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc|
city_table = City.find_by(name: loc)
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id).nil?
puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip }
industry_rel.each do |ind|
industry_table = Industry.find_by(name: "#{ind}")
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil
puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end
end
industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_rel.each do |ind|
industry_table = Industry.find_by(name: ind)
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id).nil?
puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end
end
end
......@@ -151,51 +151,52 @@
file = "jobs.csv"
CSV.foreach(file, headers: true) do |row|
begin
company_name = row["company name"].strip
company_address = row["company address"]
company_introduction = row["benefit"]
company_table = Company.find_by(name: "#{company_name}")
if company_table == nil
company_table = Company.create!(name: company_name,
address: company_address,
introduction: company_introduction)
end
title_job = row["name"].strip
description_job = row["description"]
level = row["level"]
salary = row["salary"]
if company_table != nil && Job.find_by(title: title_job, level: level, salary: salary, company_id: company_table.id) == nil
job_table = Job.create!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company_table.id)
end
industry = row["category"].strip
industry_find = Industry.find_by(name: industry)
if industry_find == nil
industry_table = Industry.create!(name: industry)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
else
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
end
puts "========================================="
puts job_table.id, title_job, industry, salary
location_data = row["work place"].strip
location = (location_data.gsub('["','')).gsub('"]','').strip
location_find = City.find_by(name: location)
if location_find == nil
city_table = City.create!(name: location)
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
else
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end
puts "Location: #{location}"
company_name = row["company name"]
company_address = row["company address"]
company_introduction = row[:benefit]
company_table = Company.find_by(name: company_name)
if company_table.nil?
company_table = Company.create!(name: company_name,
address: company_address,
introduction: company_introduction)
end
title_job = row[:name]
description_job = row[:description]
level = row[:level]
salary = row[:salary]
unless company_table.nil?
job_table = Job.create!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company_table.id)
puts job_table.id
end
industry = row[:category]
industry_find = Industry.find_by(name: industry)
if industry_find.nil?
industry_table = Industry.create!(name: industry)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
else
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
end
puts job_table.id, title_job, industry, salary
location_data = row["work place"]
location = location_data.gsub('["', '').gsub('"]', '')
location_find = City.find_by(name: location)
if location_find.nil?
city_table = City.create!(name: location)
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
else
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end
puts "Location: #{location}"
rescue StandardError => e
puts e
puts e
end
end
end
def logger
# config.log_level = :info
Rails.logger = Logger.new(STDOUT)
......@@ -203,4 +204,4 @@
Rails.logger.level = Logger::DEBUG
Rails.logger.datetime_format = "%Y-%m-%d %H:%M:%S"
end
end
\ No newline at end of file
end
class Crontab
def find_company
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"))
company_link = company_info.css('div.caption a.company-name').map{ |link| link['href'] }
company_link.each do |link|
if link.include?('\u2019')
link.gsub!('\u2019',"'")
end
next if link == 'javascript:void(0);'
company_link = company_info.css('div.caption a.company-name').map { |link| link['href'] }
company_link.each do |link|
next if link == 'javascript:void(0);'
if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil?
unless (company_page.search('p.name').text).nil?
begin
name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company)
if get_name_company.nil?
company = Company.create!(name: name_company,
address: address_company,
introduction: introduction_company)
company = Company.create!(name: name_company,
address: address_company,
introduction: introduction_company)
end
rescue StandardError => e
puts e
rescue StandardError => e
puts e
end
end
end
......@@ -29,30 +26,28 @@ class Crontab
end
def find_job
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_link = page_access.css('a.job_link').map{ |link| link['href'] }
get_link.each do |link|
if link.include?('\u2013')
link.gsub!('\u2013','–')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link|
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') }
arr_column = get_row.css('div.has-background').map { |data| data.text.split(' ').join(' ') }
job_table = Job.find_by(title: title_job)
arr_column.each_with_index do | val, key |
if !company_table.nil?
arr_column.each do |val|
unless company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ')
date = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && Job.find_by(title: title_job, company_id: company_table.id) == nil
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
arr_data = val.gsub('Ngày cập nhật ', '').split(' ')
date_update = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true && job_check.nil?
arr_sub = val.gsub('Lương ', '').gsub(' Kinh nghiệm ', '*').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
expiration_date = arr_sub[3]
job = Job.create!(title: title_job,
level: level,
......@@ -61,10 +56,10 @@ class Crontab
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && Job.find_by(title: title_job, company_id: company_table.id) == nil
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
level = arr_sub[1]
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false && job_check.nil?
arr_sub = val.gsub('Lương ', '').gsub(' Cấp bậc ', '*').gsub(' Hết hạn nộp ', '*').split('*')
salary = arr_sub[0]
level = arr_sub[1]
expiration_date = arr_sub[2]
job = Job.create!(title: title_job,
level: level,
......@@ -77,24 +72,24 @@ class Crontab
end
end
if !job_table.nil? && !company_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip }
location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}")
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil
puts "Created #{job_table.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
city_table = City.find_by(name: loc)
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id).nil?
puts "Created City #{city_table.id} => #{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end
end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip }
industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_rel.each do |ind|
industry_table = Industry.find_by(name: "#{ind}")
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil
puts "#{job_table.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
industry_table = Industry.find_by(name: ind)
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id).nil?
puts "Created Industry #{job_table.id} - #{industry_table.id} => #{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end
end
end
end
end
end
end
\ No newline at end of file
end
......@@ -6,23 +6,23 @@ require 'zip'
action = Crawler.new
crontab = Crontab.new
namespace :import do
desc "crawler data"
desc 'crawler data'
task crawler: :environment do
action.crawl_city
action.crawl_industry
action.crawl_company
action.crawl_job_relationships
end
desc "get file CSV from server"
desc 'get file CSV from Server'
task csv_get: :environment do
action.get_file_csv
action.extract_zip('./jobs.zip','.')
end
desc "Import data from CSV"
desc 'Import data from CSV'
task data_csv: :environment do
action.import_file_csv
end
desc "Crontab"
desc 'Crontab'
task auto: :environment do
crontab.find_company
......@@ -33,4 +33,4 @@ namespace :import do
task log: :environment do
action.logger
end
end
\ No newline at end of file
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment