Commit bf921c4c by Ngô Trung Hưng

..

parent 33c084b5
Pipeline #763 failed with stages
in 0 seconds
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# Description/Explanation of Person class # Description/Explanation of Person class
class City < ApplicationRecord class City < ApplicationRecord
RANGE = 69
has_many :city_jobs has_many :city_jobs
has_many :jobs, through: :city_jobs has_many :jobs, through: :city_jobs
enum area: { international: 0, domestic: 1 } enum area: { international: 0, domestic: 1 }
......
...@@ -2,5 +2,6 @@ ...@@ -2,5 +2,6 @@
# Description/Explanation of Person class # Description/Explanation of Person class
class Company < ApplicationRecord class Company < ApplicationRecord
COMPANY_SECURITY = 1
has_many :jobs has_many :jobs
end end
...@@ -6,8 +6,6 @@ require 'logger' ...@@ -6,8 +6,6 @@ require 'logger'
# Crawler data # Crawler data
class Base class Base
COMPANY_SECURITY = 1
attr_accessor :job, :page attr_accessor :job, :page
def initialize(page) def initialize(page)
...@@ -54,7 +52,7 @@ class Base ...@@ -54,7 +52,7 @@ class Base
end end
def fill_created_date def fill_created_date
page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].text page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].try(:text)
end end
def fill_expiration_date def fill_expiration_date
...@@ -74,20 +72,16 @@ class Base ...@@ -74,20 +72,16 @@ class Base
job[:description] = page.search('.tabs .tab-content .detail-row').to_s job[:description] = page.search('.tabs .tab-content .detail-row').to_s
end end
def check def exist_experience?
noname = page.search('//ul//li').text noname = page.search('//ul//li').text
noname.include?('Kinh nghiệm') noname.include?('Kinh nghiệm')
end end
def fill_lever def fill_lever
if check exist_experience? ? page.xpath('//ul//li[position()=3]//p')[1].text.strip : page.xpath('//ul//li[position()=2]//p')[1].text
page.xpath('//ul//li[position()=3]//p')[1].text.strip
else
page.xpath('//ul//li[position()=2]//p')[1].text
end
end end
def fill_experience def fill_experience
check ? page.xpath('//ul//li[position()=2]//p')[1].text.strip : '' exist_experience? ? page.xpath('//ul//li[position()=2]//p')[1].text.strip : ''
end end
end end
...@@ -4,9 +4,6 @@ require 'open-uri' ...@@ -4,9 +4,6 @@ require 'open-uri'
# Crawler data # Crawler data
class Crawler class Crawler
COMPANY_SECURITY = 1
RANGE = 69
attr_accessor :number_link attr_accessor :number_link
def initialize(number_link) def initialize(number_link)
...@@ -48,7 +45,7 @@ class Crawler ...@@ -48,7 +45,7 @@ class Crawler
page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html')) page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
locations = page.search('#location option').map(&:text) locations = page.search('#location option').map(&:text)
locations.each_with_index do |val, index| locations.each_with_index do |val, index|
area = index > RANGE ? City.areas['international'] : City.areas['domestic'] area = index > City::RANGE ? City.areas['international'] : City.areas['domestic']
City.find_or_create_by(name: val) { |city| city.area = area } City.find_or_create_by(name: val) { |city| city.area = area }
end end
end end
......
...@@ -45,7 +45,7 @@ class CrawlerJob < Crawler ...@@ -45,7 +45,7 @@ class CrawlerJob < Crawler
end end
def add_data(data) def add_data(data)
id_company = (Company.find_by name: data[:company_name]).try(:id) || COMPANY_SECURITY id_company = (Company.find_by name: data[:company_name]).try(:id) || Company::COMPANY_SECURITY
job = Job.create(name: data[:name], job = Job.create(name: data[:name],
company_id: id_company, company_id: id_company,
level: data[:level], level: data[:level],
...@@ -61,8 +61,6 @@ class CrawlerJob < Crawler ...@@ -61,8 +61,6 @@ class CrawlerJob < Crawler
end end
def create_industry_relation(data, job) def create_industry_relation(data, job)
return if data.blank? && id_job.blank?
industries = data.split(',') industries = data.split(',')
industries.each do |val| industries.each do |val|
val.gsub!('&amp;', '&') if val.include?('&amp;') val.gsub!('&amp;', '&') if val.include?('&amp;')
......
...@@ -10,8 +10,6 @@ class BlueInterface < Base ...@@ -10,8 +10,6 @@ class BlueInterface < Base
page.search('.info-workplace .value a').map(&:text).join(',') page.search('.info-workplace .value a').map(&:text).join(',')
end end
def fill_created_date; end
def fill_expiration_date def fill_expiration_date
page.xpath('//ul//li[position()=4]//div').text page.xpath('//ul//li[position()=4]//div').text
end end
...@@ -28,13 +26,13 @@ class BlueInterface < Base ...@@ -28,13 +26,13 @@ class BlueInterface < Base
page.search('.left-col').to_s page.search('.left-col').to_s
end end
def check def exist_level?
noname = page.xpath('//ul//li[position()=2]/b').last.text noname = page.xpath('//ul//li[position()=2]/b').last.text
noname.include?('Cấp bậc') noname.include?('Cấp bậc')
end end
def fill_lever def fill_lever
check ? page.xpath('//ul//li[position()=2]/div').last.text : '' exist_level? ? page.xpath('//ul//li[position()=2]/div').last.text : ''
end end
def fill_experience def fill_experience
......
...@@ -14,8 +14,6 @@ class GreenInterface < Base ...@@ -14,8 +14,6 @@ class GreenInterface < Base
page.search('.DetailJobNew ul li:nth-child(1) a').text page.search('.DetailJobNew ul li:nth-child(1) a').text
end end
def fill_created_date; end
def fill_expiration_date def fill_expiration_date
page.xpath('//ul//li[last()-1]//span').children[1].text page.xpath('//ul//li[last()-1]//span').children[1].text
end end
...@@ -36,12 +34,12 @@ class GreenInterface < Base ...@@ -36,12 +34,12 @@ class GreenInterface < Base
page.search('.DetailJobNew li:nth-child(2) span').text.strip page.search('.DetailJobNew li:nth-child(2) span').text.strip
end end
def check_exp def exist_experience?
noname = page.search('.DetailJobNew li span').text noname = page.search('.DetailJobNew li span').text
noname.include?('Kinh nghiệm') noname.include?('Kinh nghiệm')
end end
def fill_experience def fill_experience
check_exp ? page.search('.DetailJobNew li:nth-child(5) span').text.strip : '' exist_experience? ? page.search('.DetailJobNew li:nth-child(5) span').text.strip : ''
end end
end end
...@@ -5,7 +5,7 @@ require 'open-uri' ...@@ -5,7 +5,7 @@ require 'open-uri'
# rake task # rake task
namespace :crawler do namespace :crawler do
task populate: :environment do task populate: :environment do
NUMBER_LINK_WILL_BE_CRAWLER = 100 NUMBER_LINK_WILL_BE_CRAWLER = 5
Company.find_or_create_by(name: 'Bảo mật') do |company| Company.find_or_create_by(name: 'Bảo mật') do |company|
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc' company.short_description = 'Vui lòng xem trong mô tả công việc'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment