Commit 2b3e5d23 by Mai Hoang Thai Ha

add argument to crawl 5 or all page ,merge branch migration, import data to City and Industry

parent 6aa95db8
This diff is collapsed. Click to expand it.
require 'open-uri'
require 'csv'
require 'zip'
require "rails/all"
namespace :crawler do
desc 'importjob'
desc 'crawler from CareerBuilder'
task jobs: :environment do
(1..3).each do |page|
total_pages = 0
if ARGV.length <= 1
ARGV.each do |a|
task a.to_sym { ; }
case a
when 'TEST'
total_pages = 5
when 'ALL'
first_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html').body)
jobs_per_page = first_page.css('div.job-item').count
total_jobs = first_page.css('.search-result-list .job-found p').text.split(' ').first.gsub(',', '').to_i
total_pages = (total_jobs.to_f / jobs_per_page).round
else
exit
end
end
elsif
# exit
exit
end
(1..total_pages).each do |page|
parsed_page = Nokogiri::HTML(HTTParty.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html").body)
jobs_item = parsed_page.css('div.job-item .job_link')
puts page
jobs_item.each do |item|
job_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/vi/tim-viec-lam/' +
CGI.escape(item.attribute('href').text.remove('https://careerbuilder.vn/vi/tim-viec-lam/'))).body)
......@@ -100,7 +122,9 @@ namespace :crawler do
industry = part.text.squish.strip
industry_list << industry
end
p industry_list
industry_list.each do |industry|
Industry.create!(name: industry)
end
end
desc 'crawler city form CareerBuilder'
......@@ -122,6 +146,11 @@ namespace :crawler do
}
city_list << city
end
puts city_list
city_list.each do |city|
City.create!(
name: city[:name],
region: city[:region]
)
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment