Commit c657a9de by Mai Hoang Thai Ha

fix some bugs

parent ea11d8e6
......@@ -6,25 +6,25 @@ namespace :crawler do
desc 'importjob'
task jobs: :environment do
parsed_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html').body)
(1..3).each do |page|
parsed_page = Nokogiri::HTML(HTTParty.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html").body)
jobs_item = parsed_page.css('div.job-item .job_link')
(0..jobs_item.length - 1).each do |item|
job_link = jobs_item[item].attribute('href').text
job_page = Nokogiri::HTML(HTTParty.get(job_link).body)
job_desc = job_page.css('div.job-desc')
jobs_item.each do |item|
job_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/vi/tim-viec-lam/' +
CGI.escape(item.attribute('href').text.remove('https://careerbuilder.vn/vi/tim-viec-lam/'))).body)
job_detail = job_page.css('section.job-detail-content')
# title - company
title = job_desc.css('h1.title').text
company = job_desc.css('a.job-company-name').text
title = job_page.css('div.job-desc h1.title').text
company = job_page.css('div.job-desc a.job-company-name').text
# info box
info_box = job_detail.css('div.detail-box')
info_box_item = info_box.css('ul li')
city_box = info_box.css('div.map a')
info_box_item = job_detail.css('.detail-box ul li')
# city, update_at, industry, type, salary, experience, level, expiration_date
update_at, industry, type, salary, experience, level, expiration_date = ''
city = city_box.text
(0..info_box_item.length - 1).each do |part|
info = info_box_item[part].text
city = job_detail.css('.detail-box .map a').text
info_box_item.each do |info_item|
info = info_item.text
if info.include?(key = 'Ngày cập nhật')
update_at = info.squish.remove(key).strip
elsif info.include?(key = 'Ngành nghề')
......@@ -41,29 +41,31 @@ namespace :crawler do
expiration_date = info.squish.remove(key).strip
end
end
# benefit
job_detail_row = job_detail.css('div.detail-row')
benefit_list = []
other_info_list = []
benefits = job_detail.css('ul.welfare-list li')
(0..benefits.length - 1).each do |part|
benefit = benefits[part].text.strip
benefits.each do |part|
benefit = part.text.strip
benefit_list << benefit
end
# description, requirement
description, requirement = ''
(0..job_detail_row.length - 1).each do |part|
job_detail_text = job_detail_row[part].text
job_detail_row = job_detail.css('div.detail-row')
job_detail_row.each do |part|
job_detail_text = part.text
if job_detail_text.include?('Mô tả Công việc')
description = job_detail_text.partition('Mô tả Công việc').last.squish.strip
elsif job_detail_text.include?('Yêu Cầu Công Việc')
requirement = job_detail_text.partition('Yêu Cầu Công Việc').last.squish.strip
end
end
# benefit
# other info
other_info = job_detail.css('div.content_fck ul li')
(0..other_info.length - 1).each do |part|
info = other_info[part].text.squish.strip
other_info.each do |part|
info = part.text.squish.strip
other_info_list << info
end
job = {
......@@ -86,14 +88,16 @@ namespace :crawler do
puts job
end
end
end
desc 'crawler industry form CareerBuilder'
task industries: :environment do
parsed_page ||= Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/tim-viec-lam.html').body)
parsed_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/tim-viec-lam.html').body)
list_job = parsed_page.css('div.list-of-working-positions ul.list-jobs li a')
industry_list = []
(0..list_job.length - 1).each do |part|
industry = list_job[part].text.squish.strip
list_job.each do |part|
industry = part.text.squish.strip
industry_list << industry
end
p industry_list
......@@ -104,8 +108,9 @@ namespace :crawler do
parsed_page ||= Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/tim-viec-lam.html').body)
list_location = parsed_page.css('div.main-jobs-by-location ul li')
city_list = []
(0..list_location.length - 1).each do |part|
city_item = list_location[part].text
list_location.each do |part|
city_item = part.text
region = 1
if city_item.include?(key = 'Việc làm tại')
city_item = city_item.remove(key).strip
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment