Commit 716b0bd9 by Ngô Trung Hưng

fix -part 3

parent 814a4af9
Pipeline #722 canceled with stages
in 0 seconds
......@@ -5,6 +5,7 @@ require 'open-uri'
# Crawler data
class InterfaceWeb
COMPANY_SECURITY = 1
NUMBER_LINK = 1
SIZE_LI_INTERFACE_5 = 10
INTERNATIONAL = 0
DOMESTIC = 1
......@@ -32,10 +33,9 @@ class InterfaceWeb
File.write('tmp/link.txt', website_jobs[0])
data << website_companies << website_jobs
end
def link_job_and_companies
@link_job_and_companies ||= crawl_link(2)
@link_job_and_companies ||= crawl_link(NUMBER_LINK)
end
def self.safe_link(url)
......@@ -55,7 +55,6 @@ class InterfaceWeb
data_list_cities.each_with_index do |val, index|
area = index > RANGE ? INTERNATIONAL : DOMESTIC
City.find_or_create_by(name: val) do |city|
city.name = val
city.area = area
end
end
......@@ -82,7 +81,6 @@ class InterfaceWeb
begin
if name.present? && address.present? && desc.present?
Company.find_or_create_by(name: name.strip) do |company|
company.name = name.strip
company.address = address
company.short_description = desc
end
......@@ -96,100 +94,94 @@ class InterfaceWeb
private
def add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
id_company = Company.find_by name: company_name
def add_data(data)
id_company = Company.find_by name: data[:company_name]
id_company = id_company.present? ? id_company.id : COMPANY_SECURITY
id_job = Job.create!(name: name,
id_job = Job.create!(name: data[:name],
company_id: id_company,
level: level,
experience: exprience,
salary: salary,
create_date: created_date,
expiration_date: expiration_date,
description: description)
make_foreign_industries_table(industry_name, id_job.id)
make_foreign_cities_table(city_name, id_job.id)
rescue StandardError => e
level: data[:level],
experience: data[:exprience],
salary: data[:salary],
create_date: data[:created_date],
expiration_date: data[:expiration_date],
description: data[:description])
make_foreign_industries_table(data[:industry_name], id_job.id)
make_foreign_cities_table(data[:city_name], id_job.id)
rescue StandardError => e
puts e
end
private
def crawl_data_jobs_interface_1(page)
name = page.search('.apply-now-content .job-desc .title').text
company_name = page.search('.apply-now-content .job-desc .job-company-name').text
data = {}
data[:name] = page.search('.apply-now-content .job-desc .title').text
data[:company_name] = page.search('.apply-now-content .job-desc .job-company-name').text
location = []
length = page.search('.detail-box .map p a').size
length.times do |n|
location << page.search(".detail-box .map p a:nth-child(#{n + 1})").text
end
city_name = location.join(',')
created_date = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].text
expiration_date = page.search('.item-blue .detail-box ul li:last')[1].text.delete!("[\n,\t,\r]").split(' ').last
salary = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[1].text
data[:city_name] = location.join(',')
data[:created_date] = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].text
data[:expiration_date] = page.search('.item-blue .detail-box ul li:last')[1].text.delete!("[\n,\t,\r]").split(' ').last
data[:salary] = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[1].text
industries = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(2) a').text
industries = industries.delete!("[\n,\t,\r]").split(' ').select(&:present?)
industry_name = industries.join(',')
description = page.search('.tabs .tab-content .detail-row:nth-child(n)').to_s
data[:industry_name] = industries.join(',')
data[:description] = page.search('.tabs .tab-content .detail-row:nth-child(n)').to_s
get_level = page.search('.item-blue .detail-box:last ul li:nth-child(3)').text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
get_level = get_level[1].to_s.strip
if get_level.blank?
g_level = page.search('.item-blue .detail-box:last ul li:nth-child(2)').text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
level = g_level[1].to_s.strip
data[:level] = g_level[1].to_s.strip
else
g_level = get_level
level = g_level
data[:level] = get_level
end
exp = page.search('.item-blue .detail-box:last ul li:nth-child(2)').text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exp = exp[1].to_s.strip
exprience = exp
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
data[:exprience] = exp
add_data(data)
end
private
def crawl_data_jobs_interface_2(page)
name = page.search('.apply-now-content .job-desc .title').text
company_name = page.search('.top-job .top-job-info .tit_company').text
data = {}
data[:name] = page.search('.apply-now-content .job-desc .title').text
data[:company_name] = page.search('.top-job .top-job-info .tit_company').text
locations = []
length = page.search('.info-workplace .value a').size
length.times do |n|
locations << page.search(".info-workplace .value a:nth-child(#{n + 1})").text
end
city_name = locations.join(',')
created_date = ''
data[:city_name] = locations.join(',')
data[:created_date] = ''
expiration_date = page.search('.info li:nth-child(4)').text
expiration_date = expiration_date.blank? ? '' : expiration_date.delete!("[\n,\t,\r]").split(' ').last
salary = page.search('.info li:nth-child(3)').text.split('Lương').last.strip
industry_name = page.search('.info li:nth-child(5) .value').text
description = page.search('.left-col').to_s
data[:expiration_date] = expiration_date.blank? ? '' : expiration_date.delete!("[\n,\t,\r]").split(' ').last
data[:salary] = page.search('.info li:nth-child(3)').text.split('Lương').last.strip
data[:industry_name] = page.search('.info li:nth-child(5) .value').text
data[:description] = page.search('.left-col').to_s
lv = page.search('.boxtp .info li:nth-child(2)').text
level = lv.blank? ? '' : lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
data[:level] = lv.blank? ? '' : lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
exp = page.search('.info li:nth-child(6)').text
exprience = exp.blank? ? '' : exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
data[:exprience] = exp.blank? ? '' : exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
add_data(data)
end
private
def crawl_data_jobs_interface_5(page)
name = page.search('.info-company h1').text
company_name = page.search('.info-company .text-job h2').text
city_name = page.search('.DetailJobNew ul li:nth-child(1) a').text
created_date = ''
expiration_date = page.search('.DetailJobNew li:nth-child(9) span').text.strip
salary = page.search('.DetailJobNew li:nth-child(3) span').text.strip
industry_name = page.search('.DetailJobNew li:nth-child(2) span').text.strip
description = page.search('.left-col .detail-row')
level = page.search('.DetailJobNew ul li:nth-child(6) span').text.strip
exprience = page.search('.DetailJobNew li:nth-child(5) span').text.strip
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
data = {}
data[:name] = page.search('.info-company h1').text
data[:company_name] = page.search('.info-company .text-job h2').text
data[:city_name] = page.search('.DetailJobNew ul li:nth-child(1) a').text
data[:created_date] = ''
data[:expiration_date] = page.search('.DetailJobNew li:nth-child(9) span').text.strip
data[:salary] = page.search('.DetailJobNew li:nth-child(3) span').text.strip
data[:industry_name] = page.search('.DetailJobNew li:nth-child(2) span').text.strip
data[:description] = page.search('.left-col .detail-row')
data[:level] = page.search('.DetailJobNew ul li:nth-child(6) span').text.strip
data[:exprience] = page.search('.DetailJobNew li:nth-child(5) span').text.strip
add_data(data)
end
private
def make_foreign_industries_table(data, id_job)
unless data.blank? && id_job.blank?
unless data.blank? && id_job.blank?
content = data.split(',')
content.each do |val|
val.gsub!('&amp;', '&') if val.include?('&amp;')
......@@ -200,16 +192,13 @@ class InterfaceWeb
end
end
private
def make_foreign_cities_table(data, id_job)
unless data.blank? && id_job.blank?
cities = data.split(',')
cities.each do |city|
data_city = City.find_by name: city.strip
id_cities = data_city.blank? ? City.create!(name: city.strip, area: DOMESTIC).id : data_city.id
CityJob.create!(job_id: id_job, city_id: id_cities)
end
return if data.blank? && id_job.blank?
cities = data.split(',')
cities.each do |city|
data_city = City.find_by name: city.strip
id_cities = data_city.blank? ? City.create!(name: city.strip, area: DOMESTIC).id : data_city.id
CityJob.create!(job_id: id_job, city_id: id_cities)
end
end
......
......@@ -7,7 +7,6 @@ require 'src/interface_web'
namespace :crawler do
task populate: :environment do
Company.find_or_create_by(name: 'Bảo mật') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc'
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment