Commit f7c44114 by Xuan Trung Le

fix crawler data

parent 65db38d6
......@@ -17,14 +17,10 @@ class Crawler
link = URI.escape(link)
doc = Nokogiri::HTML(open(link), nil, 'utf-8')
if doc.css('#template_vantai').blank? &&
doc.css('#template_1').blank? &&
doc.css('#template_2').blank? &&
doc.css('#template_3').blank? &&
doc.css('#template_4').blank? &&
doc.css('#template_5').blank? &&
doc.css('#template_6').blank? &&
doc.css('#template_7').blank?
if doc.css('#template_vantai, #template_1,
#template_2, #template_3, #template_4,
#template_5, #template_6, #template_7,
#newyear_02').blank?
params = use_template_default(doc, link)
job_details << params
......@@ -84,7 +80,7 @@ class Crawler
params = {}
if doc.css('.box1Detail .TitleDetailNew span').length > 0
params[:name] = doc.css('.box1Detail .TitleDetailNew span').text
params[:location] = doc.css('.box1Detail .TitleDetailNew label')[0].text
params[:location] = doc.at('.box1Detail .TitleDetailNew label').text
params[:description] = doc.css('.desc_company p').text
end
params[:name] ||= 'Bảo mật'
......
require "crawler.rb"
class ImportData
def job
end
end
......@@ -9,6 +9,10 @@ class Job < ApplicationRecord
def self.create_new_jobs(arr_jobs)
arr_jobs.each do |item|
job_cities = []
city_name = []
job_industries = []
industry_name = []
job = Job.new(name: item[:name],
salary: item[:salary],
description: item[:description],
......@@ -19,8 +23,16 @@ class Job < ApplicationRecord
updated_date: item[:updated_date])
# City
unless item[:city].blank?
item[:city].split(',').each do |name|
job.cities << City.find_or_create_by(name: name.strip)
city_name = item[:city].split(',').map(&:strip)
job_cities = City.where(name: city_name)
job_cities.each do |city|
job.cities << city
end
city_name = city_name - job_cities.pluck(:name)
city_name.each do |name|
job.cities << City.create(name: name)
end
end
......@@ -32,16 +44,30 @@ class Job < ApplicationRecord
# Industry
unless item[:industry].blank?
item[:industry].split(',').each do |name|
job.industries << Industry.find_or_create_by(name: name.strip)
industry_name = item[:industry].split(',').map(&:strip)
job_industries = Industry.where(name: industry_name)
job_industries.each do |industry|
job.industries << industry
end
industry_name = industry_name - job_industries.pluck(:name)
industry_name.each do |name|
job.industries << Industry.create(name: name)
end
end
puts "Saving #{item[:name]} ......................................"
job.save
if job.save
puts "Job was successfully created"
else
puts "Error..."
end
end
end
def self.filter_link_exist(links)
return links - Job.all.map{|job| job.original_link}
return links - Job.where(original_link: links).pluck(:original_link)
end
end
class AddOriginalLinkToJobs < ActiveRecord::Migration[5.1]
def change
add_column :jobs, :original_link, :string
add_column :jobs, :original_link, :string, index: true, uniqe: true
end
end
class CreateJoinTableJobsCites < ActiveRecord::Migration[5.1]
def change
create_join_table :jobs, :cities do |t|
t.index [:job_id, :city_id]
t.index [:city_id, :job_id]
create_join_table :jobs_cities do |t|
t.references :job, index: true
t.references :city, index: true
end
end
end
class CreateJoinTableCitiesCompanies < ActiveRecord::Migration[5.1]
def change
create_join_table :cities, :companies, id: false do |t|
t.index [:city_id, :company_id]
t.index [:company_id, :city_id]
create_join_table :cities_companies, id: false do |t|
t.references :company, index: true
t.references :city, index: true
end
end
end
ENV["RAILS_ENV"] ||= "production"
require "./app/data/crawler.rb"
namespace :data do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment