fix association in crawl city_job, industry_job

parent 7b024ad9
Pipeline #761 failed with stages
in 0 seconds
class TopPagesController < ApplicationController class TopPagesController < ApplicationController
def index def index
@total_jobs = Job.all @total_jobs = Job.ids
@jobs = Job.limit(5).order(created_at: :desc) @jobs = Job.limit(5).order(created_at: :desc)
@jobs_of_cities = CityJob.limit(9).group('city_id').order('Count(*) DESC').count @jobs_of_cities = CityJob.limit(9).group('city_id').order('Count(*) DESC').count
@jobs_of_industries = IndustryJob.limit(9).group('industry_id').order('Count(*) DESC').count @jobs_of_industries = IndustryJob.limit(9).group('industry_id').order('Count(*) DESC').count
......
...@@ -4,8 +4,9 @@ require 'zip' ...@@ -4,8 +4,9 @@ require 'zip'
class Crawler class Crawler
def initialize(logger) def initialize(logger, url)
@mylogger = logger @mylogger = logger
@url = url
@NAME_DOMAIN = '192.168.1.156' @NAME_DOMAIN = '192.168.1.156'
@USERNAME_FTP = 'training' @USERNAME_FTP = 'training'
@PASSWORD_FTP = 'training' @PASSWORD_FTP = 'training'
...@@ -14,10 +15,12 @@ class Crawler ...@@ -14,10 +15,12 @@ class Crawler
def crawl_city_industry def crawl_city_industry
crawl_city crawl_city
crawl_industry crawl_industry
crawl_company
crawl_job_relationships
end end
def crawl_city def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#location') get_name = page.search('select#location')
data_city = get_name.search('option').map(&:text).map(&:strip) data_city = get_name.search('option').map(&:text).map(&:strip)
...@@ -33,7 +36,7 @@ class Crawler ...@@ -33,7 +36,7 @@ class Crawler
end end
def crawl_industry def crawl_industry
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#industry') get_name = page.search('select#industry')
data_industry = get_name.search('option').map { |p| p.text.strip } data_industry = get_name.search('option').map { |p| p.text.strip }
...@@ -69,8 +72,8 @@ class Crawler ...@@ -69,8 +72,8 @@ class Crawler
end end
def crawl_job_relationships def crawl_job_relationships
# (1..10).each do |n| (1..10).each do |n|
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html")) page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
get_link = page_access.css('a.job_link').map { |link| link['href'] } get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link| get_link.each do |link|
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link)))) page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
...@@ -123,7 +126,7 @@ class Crawler ...@@ -123,7 +126,7 @@ class Crawler
end end
end end
end end
# end end
end end
def get_file_csv def get_file_csv
......
...@@ -36,21 +36,19 @@ class InforJob ...@@ -36,21 +36,19 @@ class InforJob
end end
def create_city_rel(row, info_job) def create_city_rel(row, info_job)
location_rel = row.css('div.map p a').children.map(&:text).map(&:strip) location_rel = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
location_rel.each do |loc| city_table = City.where(name: location_rel)
city_table = City.find_by(name: loc)
puts "Created City: #{info_job.id} - #{city_table.id}.#{loc}" puts "#{info_job.cities << city_table}"
CityJob.find_or_create_by!(job_id: info_job.id, city_id: city_table.id) info_job.cities << city_table
end
end end
def create_industry_rel(row, info_job) def create_industry_rel(row, info_job)
industry_rel = row.css('li a').children.map(&:text).map(&:strip) industry_rel = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industry_rel.each do |ind| industry_table = Industry.where(name: industry_rel)
industry_table = Industry.find_by(name: ind)
puts "Created Industry: #{info_job.id} - #{industry_table.id}.#{ind}" puts "#{info_job.industries << industry_table}"
IndustryJob.find_or_create_by!(job_id: info_job.id, industry_id: industry_table.id) info_job.industries << industry_table
end
end end
def create_job(title, link_page, row, company_table) def create_job(title, link_page, row, company_table)
......
...@@ -4,7 +4,7 @@ require 'src/crontab.rb' ...@@ -4,7 +4,7 @@ require 'src/crontab.rb'
namespace :import do namespace :import do
desc 'crawler data' desc 'crawler data'
task crawler: :environment do task crawler: :environment do
action = Crawler.new(logger) action = Crawler.new(logger, url)
action.crawl_city_industry action.crawl_city_industry
end end
desc 'Crontab' desc 'Crontab'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment