Commit 95480c2e by Thanh Hung Pham

Merge remote-tracking branch 'origin/master'

parents 42dc461e 454004db
......@@ -14,5 +14,7 @@ module VeNJOB
# Settings in config/environments/* take precedence over those specified here.
# Application configuration should go into files in config/initializers
# -- all .rb files in that directory are automatically loaded.
config.autoload_paths += %W[#{config.root}/lib]
config.eager_load_paths += %W[#{config.root}/lib]
end
end
......@@ -24,5 +24,4 @@ set :output, { error: 'log/cron_error_log.log', standard: 'log/cron_log.log' }
every 1.day, at: '12:00 pm' do
rake 'crawler:load'
rake 'import:csv'
end
end
......@@ -5,7 +5,7 @@ class CreateApplies < ActiveRecord::Migration[5.1]
t.references :job, index: true
t.datetime :applied_at
t.string :ip_address
t.string :user_agrent
t.string :user_agent
t.timestamps
end
......
......@@ -17,7 +17,7 @@ ActiveRecord::Schema.define(version: 20170628020034) do
t.bigint "job_id"
t.datetime "applied_at"
t.string "ip_address"
t.string "user_agrent"
t.string "user_agent"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["job_id"], name: "index_applies_on_job_id"
......
......@@ -5,3 +5,5 @@
#
# movies = Movie.create([{ name: 'Star Wars' }, { name: 'Lord of the Rings' }])
# Character.create(name: 'Luke', movie: movies.first)
Area.new(name: 'Viet Nam').save
Area.new(name: 'International').save
......@@ -3,7 +3,7 @@ require 'open-uri'
require 'nokogiri'
require 'logger'
class Careerbuilder
class Crawler::Careerbuilder
attr_reader :domain, :thread_count, :logger
def initialize(domain, thread_count = 1)
......@@ -18,8 +18,7 @@ class Careerbuilder
def crawl
@logger.info('Start crawl')
doc = Nokogiri::HTML(open('http://careerbuilder.vn'))
import_area
doc = Nokogiri::HTML(open(@domain))
import_category(doc)
import_city(doc)
......@@ -86,7 +85,12 @@ class Careerbuilder
company_name = doc.xpath("//div[@class='tit_company']").text.strip # Company name
company_address = doc.xpath("//div[@class='box1Detail']/p[@class='TitleDetailNew']/label[@itemprop='address']/label[@itemprop='addressLocality']").text.strip # Company Address
company_description = doc.xpath("//div[@class='desc_company content_fck']").text.strip # Company description
Company.new(name: company_name, address: company_address, description: company_description).save if Company.where(name: company_name).blank?
company = Company.find_or_create_by(name: company_name)
company.name = company_name
company.address = company_address
company.description = company_description
company.save
# Job Information
job_name = doc.xpath("//div[@class='LeftJobCB']/div[@class='top-job']/div[@class='top-job-info']/h1").text.strip # Job name
......@@ -105,31 +109,28 @@ class Careerbuilder
job_expiry_date = doc.xpath("//ul[@class='DetailJobNew']/li/p[span/text()='Hết hạn nộp: ']/text()").to_s
Job.new(name: job_name, description: job_description,
salary: job_salary,
city: City.find_by_name(job_location),
level: job_level, experience: job_experience, status: 0,
expiry_date: job_expiry_date.to_datetime).save
city = City.find_by_name(job_location)
job = Job.find_or_create_by(name: job_name, city: city, company: company)
job.description = job_description
job.salary = job_salary
job.level = job_level
job.experience = job_experience
job.status = 0
job.expiry_date = job_expiry_date.to_datetime
job.save
job_category.split(',').each do |category|
JobCategory.new(job: Job.find_by_name(job_name), category: Category.find_by_name(category)).save
category = Category.find_by_name(category)
JobCategory.find_or_create_by(job: job, category: category)
end
end
def import_area
Area.new(name: 'Viet Nam').save if Area.where(name: 'Viet Nam').blank?
Area.new(name: 'International').save if Area.where(name: 'International').blank?
rescue StandardError => e
logger.error("[method: ] #{import_category}")
logger.error(e.message)
logger.error(e.backtrace)
end
def import_category(doc)
categories = doc.xpath("//div[@class='s-home2']/div[@id='NewSearchJob3']/form/div[@class='search-horizontal']/div[@class='ui-widget box_multiSelect_industry']/select/option")
categories = categories.slice(1..categories.size - 2)
categories = categories.drop(1)
categories.each do |category|
Category.new(name: category.text.strip).save if Category.where(name: category.text.strip).blank?
Category.find_or_create_by(name: category.text.strip)
end
rescue StandardError => e
logger.error("[method: ] #{import_category}")
......@@ -139,10 +140,10 @@ class Careerbuilder
def import_city(doc)
cities = doc.xpath("//div[@class='s-home2']//div[@id='NewSearchJob3']/form/div[@class='search-horizontal']/div[@class='ui-widget box_multiSelect_location']/select/option").drop(1)
area_id = 1
area = Area.find_by_name('Viet Nam')
cities.each do |city|
area_id = 2 if city.text == 'Angola'
City.new(name: city.text.strip, area: Area.find(area_id)).save if City.where(name: city.text.strip).blank?
area = Area.find_by_name('International') if city.text == 'Angola'
City.find_or_create_by(name: city.text.strip, area: area)
end
rescue StandardError => e
logger.error("[method: ] #{import_city}")
......
namespace :crawler do
desc 'client crawler'
task load: :environment do
require "#{Rails.root}/lib/tasks/careerbuilder"
thread_count = ENV['THREAD_COUNT'] || 1
Careerbuilder.new('http://careerbuilder.vn', thread_count.to_i).crawl
Crawler::Careerbuilder.new('http://careerbuilder.vn', thread_count.to_i).crawl
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment