Commit 37488f20 by Ngô Trung Hưng

autoload

parent bf921c4c
Pipeline #788 canceled with stages
in 0 seconds
...@@ -10,10 +10,7 @@ module Venjob ...@@ -10,10 +10,7 @@ module Venjob
class Application < Rails::Application class Application < Rails::Application
# Initialize configuration defaults for originally generated Rails version. # Initialize configuration defaults for originally generated Rails version.
config.load_defaults 5.2 config.load_defaults 5.2
config.autoload_paths += [ config.autoload_paths << Rails.root.join('lib')
Rails.root.join('lib/src'),
Rails.root.join('lib/src/base'),
Rails.root.join('lib/src/interface')]
# Settings in config/environments/* take precedence over those specified here. # Settings in config/environments/* take precedence over those specified here.
# Application configuration can go into files in config/initializers # Application configuration can go into files in config/initializers
# -- all .rb files in that directory are automatically loaded after loading # -- all .rb files in that directory are automatically loaded after loading
......
...@@ -5,7 +5,9 @@ require 'open-uri' ...@@ -5,7 +5,9 @@ require 'open-uri'
require 'logger' require 'logger'
# Crawler data # Crawler data
class Base module Src
module Base
class Base
attr_accessor :job, :page attr_accessor :job, :page
def initialize(page) def initialize(page)
...@@ -84,4 +86,6 @@ class Base ...@@ -84,4 +86,6 @@ class Base
def fill_experience def fill_experience
exist_experience? ? page.xpath('//ul//li[position()=2]//p')[1].text.strip : '' exist_experience? ? page.xpath('//ul//li[position()=2]//p')[1].text.strip : ''
end end
end
end
end end
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
require 'open-uri' require 'open-uri'
# Crawler data # Crawler data
class Crawler module Src
class Crawler
attr_accessor :number_link attr_accessor :number_link
def initialize(number_link) def initialize(number_link)
...@@ -62,4 +63,5 @@ class Crawler ...@@ -62,4 +63,5 @@ class Crawler
logger.error "Crawler data companies has error: #{e}" logger.error "Crawler data companies has error: #{e}"
end end
end end
end
end end
# frozen_string_literal: true # frozen_string_literal: true
# Crawler data job # Autoload
class CrawlerJob < Crawler module Src
# Crawler job
class CrawlerJob < Crawler
SIZE_LI = 8 SIZE_LI = 8
def crawl_link def crawl_link
...@@ -33,11 +35,11 @@ class CrawlerJob < Crawler ...@@ -33,11 +35,11 @@ class CrawlerJob < Crawler
parse_data.each do |path| parse_data.each do |path|
page = safe_link(path) page = safe_link(path)
if page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].present? if page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].present?
@data = RedInterface.new(page).create_data @data = Src::Interface::RedInterface.new(page).create_data
elsif page.search('section .template-200').text.present? elsif page.search('section .template-200').text.present?
@data = BlueInterface.new(page).create_data @data = Src::Interface::BlueInterface.new(page).create_data
elsif page.search('.DetailJobNew ul li').size == SIZE_LI && page.search('.right-col ul li').text.exclude?('Độ tuổi') elsif page.search('.DetailJobNew ul li').size == SIZE_LI && page.search('.right-col ul li').text.exclude?('Độ tuổi')
@data = GreenInterface.new(page).create_data @data = Src::Interface::GreenInterface.new(page).create_data
end end
add_data(@data) add_data(@data)
refresh_first_link refresh_first_link
...@@ -76,4 +78,5 @@ class CrawlerJob < Crawler ...@@ -76,4 +78,5 @@ class CrawlerJob < Crawler
job.cities << city job.cities << city
end end
end end
end
end end
# frozen_string_literal: true # frozen_string_literal: true
# Inherience from base # Inherience from base
class BlueInterface < Base module Src
module Interface
class BlueInterface < Base::Base
def fill_company_name def fill_company_name
page.search('.top-job .top-job-info .tit_company').text page.search('.top-job .top-job-info .tit_company').text
end end
...@@ -38,4 +40,6 @@ class BlueInterface < Base ...@@ -38,4 +40,6 @@ class BlueInterface < Base
def fill_experience def fill_experience
page.xpath('//ul//li[position()=7]/b').text page.xpath('//ul//li[position()=7]/b').text
end end
end
end
end end
# frozen_string_literal: true # frozen_string_literal: true
# ahihi # ahihi
class GreenInterface < Base module Src
module Interface
class GreenInterface < Base::Base
def fill_name def fill_name
page.search('.info-company h1').text page.search('.info-company h1').text
end end
...@@ -42,4 +44,6 @@ class GreenInterface < Base ...@@ -42,4 +44,6 @@ class GreenInterface < Base
def fill_experience def fill_experience
exist_experience? ? page.search('.DetailJobNew li:nth-child(5) span').text.strip : '' exist_experience? ? page.search('.DetailJobNew li:nth-child(5) span').text.strip : ''
end end
end
end
end end
# frozen_string_literal: true # frozen_string_literal: true
# Inherience from base # Inherience from base
class RedInterface < Base module Src
module Interface
class RedInterface < Base::Base
end
end
end end
...@@ -10,9 +10,9 @@ namespace :crawler do ...@@ -10,9 +10,9 @@ namespace :crawler do
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc' company.short_description = 'Vui lòng xem trong mô tả công việc'
end end
cw = Crawler.new(NUMBER_LINK_WILL_BE_CRAWLER) cw = Src::Crawler.new(NUMBER_LINK_WILL_BE_CRAWLER)
cw.craw_data_cities cw.craw_data_cities
cw.craw_data_companies cw.craw_data_companies
CrawlerJob.new(NUMBER_LINK_WILL_BE_CRAWLER).craw_data_jobs Src::CrawlerJob.new(NUMBER_LINK_WILL_BE_CRAWLER).craw_data_jobs
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment