Commit 8fecd429 by Ngô Trung Hưng

fix -part 4

parent 716b0bd9
Pipeline #725 failed with stages
in 0 seconds
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
require 'open-uri' require 'open-uri'
# Crawler data # Crawler data
class InterfaceWeb class Crawler
COMPANY_SECURITY = 1 COMPANY_SECURITY = 1
NUMBER_LINK = 1 NUMBER_LINK = 1
SIZE_LI_INTERFACE_5 = 10 SIZE_LI_INTERFACE_5 = 10
...@@ -92,6 +92,28 @@ class InterfaceWeb ...@@ -92,6 +92,28 @@ class InterfaceWeb
end end
end end
def make_data
puts 'Please wait for crawl jobs data! . . .'
link_crawl = link_job_and_companies
arr_link = []
link_crawl[1].each do |val|
break if @@stop_crawl == val
arr_link << val
end
arr_link.reverse!.each_with_index do |path, i|
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path))))
if page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].present?
crawl_data_jobs_interface_1(page)
elsif page.search('section .template-200').text.present?
crawl_data_jobs_interface_2(page)
elsif page.search('.DetailJobNew ul li').size == SIZE_LI_INTERFACE_5 && !page.search('.right-col ul li').text.include?('Độ tuổi')
crawl_data_jobs_interface_5(page)
end
puts "#{i} - #{path}"
end
puts 'Crawler data jobs success!'
end
private private
def add_data(data) def add_data(data)
...@@ -201,28 +223,4 @@ class InterfaceWeb ...@@ -201,28 +223,4 @@ class InterfaceWeb
CityJob.create!(job_id: id_job, city_id: id_cities) CityJob.create!(job_id: id_job, city_id: id_cities)
end end
end end
public
def make_data
puts 'Please wait for crawl jobs data! . . .'
link_crawl = link_job_and_companies
arr_link = []
link_crawl[1].each do |val|
break if @@stop_crawl == val
arr_link << val
end
arr_link.reverse!.each_with_index do |path, i|
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path))))
if page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].present?
crawl_data_jobs_interface_1(page)
elsif page.search('section .template-200').text.present?
crawl_data_jobs_interface_2(page)
elsif page.search('.DetailJobNew ul li').size == SIZE_LI_INTERFACE_5 && !page.search('.right-col ul li').text.include?('Độ tuổi')
crawl_data_jobs_interface_5(page)
end
puts "#{i} - #{path}"
end
puts 'Crawler data jobs success!'
end
end end
...@@ -10,7 +10,7 @@ namespace :crawler do ...@@ -10,7 +10,7 @@ namespace :crawler do
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc' company.short_description = 'Vui lòng xem trong mô tả công việc'
end end
cw = InterfaceWeb.new cw = Crawler.new
cw.craw_data_cities cw.craw_data_cities
cw.craw_data_companies cw.craw_data_companies
cw.make_data cw.make_data
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment