Commit f7c44114 by Xuan Trung Le

fix crawler data

parent 65db38d6
...@@ -17,14 +17,10 @@ class Crawler ...@@ -17,14 +17,10 @@ class Crawler
link = URI.escape(link) link = URI.escape(link)
doc = Nokogiri::HTML(open(link), nil, 'utf-8') doc = Nokogiri::HTML(open(link), nil, 'utf-8')
if doc.css('#template_vantai').blank? && if doc.css('#template_vantai, #template_1,
doc.css('#template_1').blank? && #template_2, #template_3, #template_4,
doc.css('#template_2').blank? && #template_5, #template_6, #template_7,
doc.css('#template_3').blank? && #newyear_02').blank?
doc.css('#template_4').blank? &&
doc.css('#template_5').blank? &&
doc.css('#template_6').blank? &&
doc.css('#template_7').blank?
params = use_template_default(doc, link) params = use_template_default(doc, link)
job_details << params job_details << params
...@@ -84,7 +80,7 @@ class Crawler ...@@ -84,7 +80,7 @@ class Crawler
params = {} params = {}
if doc.css('.box1Detail .TitleDetailNew span').length > 0 if doc.css('.box1Detail .TitleDetailNew span').length > 0
params[:name] = doc.css('.box1Detail .TitleDetailNew span').text params[:name] = doc.css('.box1Detail .TitleDetailNew span').text
params[:location] = doc.css('.box1Detail .TitleDetailNew label')[0].text params[:location] = doc.at('.box1Detail .TitleDetailNew label').text
params[:description] = doc.css('.desc_company p').text params[:description] = doc.css('.desc_company p').text
end end
params[:name] ||= 'Bảo mật' params[:name] ||= 'Bảo mật'
......
require "crawler.rb"
class ImportData
def job
end
end
...@@ -9,6 +9,10 @@ class Job < ApplicationRecord ...@@ -9,6 +9,10 @@ class Job < ApplicationRecord
def self.create_new_jobs(arr_jobs) def self.create_new_jobs(arr_jobs)
arr_jobs.each do |item| arr_jobs.each do |item|
job_cities = []
city_name = []
job_industries = []
industry_name = []
job = Job.new(name: item[:name], job = Job.new(name: item[:name],
salary: item[:salary], salary: item[:salary],
description: item[:description], description: item[:description],
...@@ -19,8 +23,16 @@ class Job < ApplicationRecord ...@@ -19,8 +23,16 @@ class Job < ApplicationRecord
updated_date: item[:updated_date]) updated_date: item[:updated_date])
# City # City
unless item[:city].blank? unless item[:city].blank?
item[:city].split(',').each do |name| city_name = item[:city].split(',').map(&:strip)
job.cities << City.find_or_create_by(name: name.strip)
job_cities = City.where(name: city_name)
job_cities.each do |city|
job.cities << city
end
city_name = city_name - job_cities.pluck(:name)
city_name.each do |name|
job.cities << City.create(name: name)
end end
end end
...@@ -32,16 +44,30 @@ class Job < ApplicationRecord ...@@ -32,16 +44,30 @@ class Job < ApplicationRecord
# Industry # Industry
unless item[:industry].blank? unless item[:industry].blank?
item[:industry].split(',').each do |name| industry_name = item[:industry].split(',').map(&:strip)
job.industries << Industry.find_or_create_by(name: name.strip) job_industries = Industry.where(name: industry_name)
job_industries.each do |industry|
job.industries << industry
end
industry_name = industry_name - job_industries.pluck(:name)
industry_name.each do |name|
job.industries << Industry.create(name: name)
end end
end end
puts "Saving #{item[:name]} ......................................" puts "Saving #{item[:name]} ......................................"
job.save
if job.save
puts "Job was successfully created"
else
puts "Error..."
end
end end
end end
def self.filter_link_exist(links) def self.filter_link_exist(links)
return links - Job.all.map{|job| job.original_link} return links - Job.where(original_link: links).pluck(:original_link)
end end
end end
class AddOriginalLinkToJobs < ActiveRecord::Migration[5.1] class AddOriginalLinkToJobs < ActiveRecord::Migration[5.1]
def change def change
add_column :jobs, :original_link, :string add_column :jobs, :original_link, :string, index: true, uniqe: true
end end
end end
class CreateJoinTableJobsCites < ActiveRecord::Migration[5.1] class CreateJoinTableJobsCites < ActiveRecord::Migration[5.1]
def change def change
create_join_table :jobs, :cities do |t| create_join_table :jobs_cities do |t|
t.index [:job_id, :city_id] t.references :job, index: true
t.index [:city_id, :job_id] t.references :city, index: true
end end
end end
end end
class CreateJoinTableCitiesCompanies < ActiveRecord::Migration[5.1] class CreateJoinTableCitiesCompanies < ActiveRecord::Migration[5.1]
def change def change
create_join_table :cities, :companies, id: false do |t| create_join_table :cities_companies, id: false do |t|
t.index [:city_id, :company_id] t.references :company, index: true
t.index [:company_id, :city_id] t.references :city, index: true
end end
end end
end end
ENV["RAILS_ENV"] ||= "production"
require "./app/data/crawler.rb" require "./app/data/crawler.rb"
namespace :data do namespace :data do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment