fix association in crawl city_job, industry_job

parent 7b024ad9
Pipeline #761 failed with stages
in 0 seconds
class TopPagesController < ApplicationController
def index
@total_jobs = Job.all
@total_jobs = Job.ids
@jobs = Job.limit(5).order(created_at: :desc)
@jobs_of_cities = CityJob.limit(9).group('city_id').order('Count(*) DESC').count
@jobs_of_industries = IndustryJob.limit(9).group('industry_id').order('Count(*) DESC').count
......
......@@ -4,8 +4,9 @@ require 'zip'
class Crawler
def initialize(logger)
def initialize(logger, url)
@mylogger = logger
@url = url
@NAME_DOMAIN = '192.168.1.156'
@USERNAME_FTP = 'training'
@PASSWORD_FTP = 'training'
......@@ -14,10 +15,12 @@ class Crawler
def crawl_city_industry
crawl_city
crawl_industry
crawl_company
crawl_job_relationships
end
def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#location')
data_city = get_name.search('option').map(&:text).map(&:strip)
......@@ -33,7 +36,7 @@ class Crawler
end
def crawl_industry
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#industry')
data_industry = get_name.search('option').map { |p| p.text.strip }
......@@ -69,8 +72,8 @@ class Crawler
end
def crawl_job_relationships
# (1..10).each do |n|
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"))
(1..10).each do |n|
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link|
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
......@@ -123,7 +126,7 @@ class Crawler
end
end
end
# end
end
end
def get_file_csv
......
......@@ -36,21 +36,19 @@ class InforJob
end
def create_city_rel(row, info_job)
location_rel = row.css('div.map p a').children.map(&:text).map(&:strip)
location_rel.each do |loc|
city_table = City.find_by(name: loc)
puts "Created City: #{info_job.id} - #{city_table.id}.#{loc}"
CityJob.find_or_create_by!(job_id: info_job.id, city_id: city_table.id)
end
location_rel = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
city_table = City.where(name: location_rel)
puts "#{info_job.cities << city_table}"
info_job.cities << city_table
end
def create_industry_rel(row, info_job)
industry_rel = row.css('li a').children.map(&:text).map(&:strip)
industry_rel.each do |ind|
industry_table = Industry.find_by(name: ind)
puts "Created Industry: #{info_job.id} - #{industry_table.id}.#{ind}"
IndustryJob.find_or_create_by!(job_id: info_job.id, industry_id: industry_table.id)
end
industry_rel = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industry_table = Industry.where(name: industry_rel)
puts "#{info_job.industries << industry_table}"
info_job.industries << industry_table
end
def create_job(title, link_page, row, company_table)
......
......@@ -4,7 +4,7 @@ require 'src/crontab.rb'
namespace :import do
desc 'crawler data'
task crawler: :environment do
action = Crawler.new(logger)
action = Crawler.new(logger, url)
action.crawl_city_industry
end
desc 'Crontab'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment