Commit 5907329d by Tô Ngọc Ánh

improve code syntax (crawl company, industry, location)

parent b45a5d2a
class Location < ApplicationRecord class Location < ApplicationRecord
CITY_VIETNAM_NUMBER = 70
has_many :jobs, through: :locations_jobs has_many :jobs, through: :locations_jobs
end end
require "open-uri"
task crawl_companies_jobs: :environment do task crawl_companies_jobs: :environment do
require "open-uri" crawl_companies_and_jobs(10)
crawl_companies_and_jobs(3)
end end
task crawl_industries_locations: :environment do task crawl_industries_locations: :environment do
require "open-uri"
crawl_industries_and_locations crawl_industries_and_locations
end end
def crawl_companies_and_jobs(page) def crawl_companies_and_jobs(page)
for i in 1..page page.times do |i|
company_links, job_links = get_company_and_job_links("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i}-vi.html") company_links, job_links = get_company_and_job_links("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i}-vi.html")
crawl_companies(company_links) crawl_companies(company_links)
crawl_jobs(job_links)
end end
end end
...@@ -35,14 +35,23 @@ def crawl_company(company_link) ...@@ -35,14 +35,23 @@ def crawl_company(company_link)
begin begin
document = Nokogiri::HTML(open(company_link)) document = Nokogiri::HTML(open(company_link))
company_name = document.css(".content .name").text company_name = document.css(".content .name").text
return if company_name.empty?
exist = Company.find_by(name: company_name).present? exist = Company.find_by(name: company_name).present?
return if exist || company_name.empty? return if exist
puts company_name puts company_name
company_address = document.css(".content p")[1].text company_address = document.css(".content p")[1].text
company_description = document.css(".main-about-us").css('.content').text company_description = document.css(".main-about-us").css('.content').text
company = Company.create!(name: company_name, address: company_address, description: company_description) company = Company.create!(name: company_name, address: company_address, description: company_description)
rescue => exception rescue => exception
return exception puts exception
end
end
def crawl_jobs(job_links)
job_links.each do |link|
crawl_job(link)
end end
end end
...@@ -69,24 +78,19 @@ def crawl_industries_and_locations ...@@ -69,24 +78,19 @@ def crawl_industries_and_locations
locations = locations_xml.map(&:text) locations = locations_xml.map(&:text)
industries.each do |industry| industries.each do |val|
exist = Industry.find_by(name: industry).present? Industry.find_or_create_by(name: val)
break if exist
puts industry
Industry.create!(name: industry)
end end
locations.take(70).each do |location| locations.take(Location::CITY_VIETNAM_NUMBER).each do |val|
exist = Location.find_by(city: location).present? Location.find_or_create_by(city: val) do |location|
break if exist location.oversea = false
puts location end
Location.create!(oversea: false, city: location )
end end
locations.last(locations.count - 70).each do |location| locations.last(locations.count - Location::CITY_VIETNAM_NUMBER).each do |val|
exist = Location.find_by(city: location).present? Location.find_or_create_by(city: val) do |location|
break if exist location.oversea = true
puts location end
Location.create!(oversea: true, city: location )
end end
end end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment