Commit 9a32f409 by Mai Hoang Thai Ha

fixed ARGV

parent 8ab617be
require 'open-uri' require 'open-uri'
require 'csv' require 'csv'
require 'zip' require 'zip'
require "rails/all"
namespace :crawler do namespace :crawler do
desc 'crawler from CareerBuilder' desc 'crawler from CareerBuilder'
task jobs: :environment do task jobs: :environment do
ARGV.each { |a| task a.to_sym { ; } }
total_pages = 0 total_pages = 0
if ARGV.length <= 1 if ARGV.length == 1 && ARGV[0] == 'TEST'
ARGV.each do |a|
task a.to_sym { ; }
case a
when 'TEST'
total_pages = 1 total_pages = 1
when 'ALL' elsif ARGV.length == 1 && ARGV[0] == 'ALL'
first_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html').body) first_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html').body)
jobs_per_page = first_page.css('div.job-item').count jobs_per_page = first_page.css('div.job-item').count
total_jobs = first_page.css('.search-result-list .job-found p').text.split(' ').first.gsub(',', '').to_i total_jobs = first_page.css('.search-result-list .job-found p').text.split(' ').first.gsub(',', '').to_i
...@@ -23,11 +18,6 @@ namespace :crawler do ...@@ -23,11 +18,6 @@ namespace :crawler do
else else
exit exit
end end
end
elsif
# exit
exit
end
(1..total_pages).each do |page| (1..total_pages).each do |page|
parsed_page = Nokogiri::HTML(HTTParty.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html").body) parsed_page = Nokogiri::HTML(HTTParty.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html").body)
jobs_item = parsed_page.css('div.job-item .job_link') jobs_item = parsed_page.css('div.job-item .job_link')
...@@ -38,14 +28,13 @@ namespace :crawler do ...@@ -38,14 +28,13 @@ namespace :crawler do
# title - company # title - company
title = job_page.css('div.job-desc h1.title').text title = job_page.css('div.job-desc h1.title').text
company = job_page.css('div.job-desc a.job-company-name').text company = job_page.css('div.job-desc a.job-company-name').text
Company.create!(name: company)
# info box # info box
info_box_item = job_detail.css('.detail-box ul li') info_box_item = job_detail.css('.detail-box ul li')
# city, update_at, industry, type, salary, experience, level, expiration_date # city, update_at, industry, type, salary, experience, level, expiration_date
job_industries = [] job_industries = []
update_at, job_type, salary, experience, level, expiration_date = '' update_at, job_type, salary, experience, level, expiration_date = ''
job_cities = [] job_cities = []
job_detail.css('.detail-box .map a').each do |part| job_detail.css('.detail-box .map p a').each do |part|
city = part.text city = part.text
job_cities << city job_cities << city
end end
...@@ -96,6 +85,7 @@ namespace :crawler do ...@@ -96,6 +85,7 @@ namespace :crawler do
other_info_list << info other_info_list << info
end end
company = Company.find_or_create_by(name: company)
job = Job.find_or_create_by( job = Job.find_or_create_by(
title: title, title: title,
job_type: job_type, job_type: job_type,
...@@ -108,7 +98,7 @@ namespace :crawler do ...@@ -108,7 +98,7 @@ namespace :crawler do
requirement: requirement, requirement: requirement,
other_info: other_info_list.each { |info| } other_info: other_info_list.each { |info| }
) )
Company.find_or_initialize_by(name: company).jobs << job company.jobs << job
job_industries.each do |industry| job_industries.each do |industry|
industry_id = Industry.find_or_create_by(name: industry) industry_id = Industry.find_or_create_by(name: industry)
job.industries << industry_id job.industries << industry_id
...@@ -132,7 +122,7 @@ namespace :crawler do ...@@ -132,7 +122,7 @@ namespace :crawler do
industry_list << industry industry_list << industry
end end
industry_list.each do |industry| industry_list.each do |industry|
Industry.create!(name: industry) Industry.create(name: industry)
end end
end end
...@@ -156,7 +146,7 @@ namespace :crawler do ...@@ -156,7 +146,7 @@ namespace :crawler do
city_list << city city_list << city
end end
city_list.each do |city| city_list.each do |city|
City.create!( City.create(
name: city[:name], name: city[:name],
region: city[:region] region: city[:region]
) )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment