Commit e1d92db5 by Hung0326 Committed by GitHub

Merge pull request #10 from Hung0326/dev

import data from csv
parents 564970e4 d369638b
...@@ -2,6 +2,13 @@ class Industry < ApplicationRecord ...@@ -2,6 +2,13 @@ class Industry < ApplicationRecord
has_many :industry_jobs has_many :industry_jobs
has_many :jobs, through: :industry_jobs has_many :jobs, through: :industry_jobs
scope :sort_asc, -> { order(name: :asc)} scope :sort_asc, -> { order(name: :asc)}
# Ex:- scope :active, lambda {where(:active => true)}
# Ex:- scope :active, -> {where(:active => true)} def self.top_hot
hash = {}
data_industries = Industry.all
data_industries.each do |val|
hash[val.name] = val.jobs.count
end
hash.sort_by { |k,v| v }.reverse
end
end end
...@@ -46,7 +46,7 @@ class Clawler ...@@ -46,7 +46,7 @@ class Clawler
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc' company.short_description = 'Vui lòng xem trong mô tả công việc'
end end
@data = Interface_web.craw_data_companies() @data = InterfaceWeb.craw_data_companies()
puts 'Save info companies to database . . .' puts 'Save info companies to database . . .'
@data[:name].each_with_index do |name, index| @data[:name].each_with_index do |name, index|
if Company.find_by(name: name).blank? if Company.find_by(name: name).blank?
...@@ -62,24 +62,23 @@ class Clawler ...@@ -62,24 +62,23 @@ class Clawler
# FILL DATA JOBS # FILL DATA JOBS
def self.make_jobs def self.make_jobs
Job.update_all(newdata: 0) Job.update_all(newdata: 0)
@data_jobs = Interface_web.make_data() @data_jobs = InterfaceWeb.make_data()
puts 'Save to database . . .' puts 'Save to database . . .'
i = @data_jobs[:name].length @data_jobs[:name].each_with_index do |n,i|
i.times do |n| name = n.to_s
name = @data_jobs[:name][n].to_s company_name = @data_jobs[:company_name][i].to_s.strip
company_name = @data_jobs[:company_name][n].to_s.strip
id_company = Company.find_by name: company_name id_company = Company.find_by name: company_name
if id_company != nil if id_company != nil
id_company = id_company.id id_company = id_company.id
else else
id_company = 1 id_company = 1
end end
level = @data_jobs[:level][n].to_s level = @data_jobs[:level][i].to_s
experience = @data_jobs[:exprience][n].to_s experience = @data_jobs[:exprience][i].to_s
salary = @data_jobs[:salary][n].to_s salary = @data_jobs[:salary][i].to_s
create_date = @data_jobs[:created_date][n].to_s create_date = @data_jobs[:created_date][i].to_s
expiration_date = @data_jobs[:expiration_date][n].to_s expiration_date = @data_jobs[:expiration_date][i].to_s
description = @data_jobs[:description][n].to_s description = @data_jobs[:description][i].to_s
id_job = Job.create!(name: name, id_job = Job.create!(name: name,
company_id: id_company, company_id: id_company,
...@@ -90,8 +89,8 @@ class Clawler ...@@ -90,8 +89,8 @@ class Clawler
expiration_date: expiration_date, expiration_date: expiration_date,
description: description, description: description,
newdata: 1) newdata: 1)
self.make_foreign_industries_table(@data_jobs[:industry_name][n],id_job.id) self.make_foreign_industries_table(@data_jobs[:industry_name][i],id_job.id)
self.make_foreign_cities_table(@data_jobs[:city_name][n],id_job.id) self.make_foreign_cities_table(@data_jobs[:city_name][i],id_job.id)
end end
end end
......
...@@ -2,7 +2,7 @@ require 'net/ftp' ...@@ -2,7 +2,7 @@ require 'net/ftp'
require 'src/unzip' require 'src/unzip'
require 'csv' require 'csv'
class FTP_sever class FtpSever
CONTENT_SERVER_DOMAIN_NAME = '192.168.1.156' CONTENT_SERVER_DOMAIN_NAME = '192.168.1.156'
CONTENT_SERVER_USER_NAME = 'training' CONTENT_SERVER_USER_NAME = 'training'
CONTENT_SERVER_USER_PASSWORD = 'training' CONTENT_SERVER_USER_PASSWORD = 'training'
...@@ -13,9 +13,9 @@ class FTP_sever ...@@ -13,9 +13,9 @@ class FTP_sever
begin begin
extract_zip('./jobs.zip','lib/csv') extract_zip('./jobs.zip','lib/csv')
File.delete('./jobs.zip') if File.exist?('./jobs.zip') File.delete('./jobs.zip') if File.exist?('./jobs.zip')
puts "Unzip done\n" puts "Extract file done"
rescue rescue
puts "File not found\n" puts "File not found"
end end
end end
end end
...@@ -24,16 +24,7 @@ class FTP_sever ...@@ -24,16 +24,7 @@ class FTP_sever
donwload_csv() donwload_csv()
table = CSV.parse(File.read("lib/csv/jobs.csv"), headers: true) table = CSV.parse(File.read("lib/csv/jobs.csv"), headers: true)
end end
# puts table['name']
# puts table['company name'].size
# puts table['company province'].size
##puts table['category'].size
# puts table['company address'].size
# puts table['level'].size
# puts table['salary'].size
# puts table['benefit'].size
# puts table['requirement'].size
# puts table['description'].size
def self.parse_csv_industries(data) def self.parse_csv_industries(data)
puts 'Import data industries . . .' puts 'Import data industries . . .'
industries = [] industries = []
...@@ -74,6 +65,7 @@ class FTP_sever ...@@ -74,6 +65,7 @@ class FTP_sever
company.address = data['company address'][index] company.address = data['company address'][index]
company.short_description = data['benefit'][index] company.short_description = data['benefit'][index]
end end
puts index
rescue => exception rescue => exception
puts '---' puts '---'
end end
...@@ -85,13 +77,14 @@ class FTP_sever ...@@ -85,13 +77,14 @@ class FTP_sever
def self.parse_csv_jobs(data) def self.parse_csv_jobs(data)
Job.update_all(newdata: 0) Job.update_all(newdata: 0)
data['name'].each_with_index do |name, index| data['name'].each_with_index do |name, index|
desc = data['requirement'][index] << '\n' << data['description'][index] desc = (data['requirement'][index]).to_s + '\n' << (data['description'][index]).to_s
id_company = Company.find_by name: data['company name'][index].to_s.strip id_company = Company.find_by name: data['company name'][index].to_s.strip
if id_company.blank? if id_company.blank?
id_company = 1 id_company = 1
else else
id_company = id_company.id id_company = id_company.id
end end
begin
id_job = Job.create!( name: name, id_job = Job.create!( name: name,
company_id: id_company, company_id: id_company,
level: data['level'][index], level: data['level'][index],
...@@ -101,7 +94,36 @@ class FTP_sever ...@@ -101,7 +94,36 @@ class FTP_sever
expiration_date: "", expiration_date: "",
description: desc, description: desc,
newdata: 1 ) newdata: 1 )
make_foreign_cities_table(data['work place'][index],id_job.id)
make_foreign_industries_table(data['category'][index],id_job.id)
puts index
rescue => exception
puts '---------'
end
end
end
def self.make_foreign_cities_table(data,id_job)
data = data.to_s.delete("[]\"")
id_cities = City.find_by name: data.strip
if id_cities.blank?
id_cities = City.create!(name: data.strip, area: 1).id
else
id_cities = id_cities.id
end
CityJob.create!(job_id: id_job, city_id: id_cities)
end
def self.make_foreign_industries_table(data,id_job)
data = data.to_s.gsub(',','/').gsub('/',' / ')
id_industry = Industry.find_by name: (data.strip)
if id_industry.blank?
id_industry = Industry.create!(name: data.strip).id
else
id_industry = id_industry.id
end end
IndustryJob.create!(industry_id: id_industry,
job_id: id_job)
end end
def self.import_data_from_csv def self.import_data_from_csv
...@@ -109,7 +131,7 @@ class FTP_sever ...@@ -109,7 +131,7 @@ class FTP_sever
parse_csv_industries(data) parse_csv_industries(data)
parse_csv_cities(data) parse_csv_cities(data)
parse_csv_companies(data) parse_csv_companies(data)
# parse_csv_jobs(data) parse_csv_jobs(data)
end end
end end
\ No newline at end of file
class Interface_web class InterfaceWeb
# func get "n" link company & job # func get "n" link company & job
def self.crawl_link_for_companies_jobs(page) def self.crawl_link_for_companies_jobs(page)
puts "Crawling link on page...\nPLease wait...\n" puts "Crawling link on page...\nPLease wait...\n"
...@@ -226,7 +226,5 @@ class Interface_web ...@@ -226,7 +226,5 @@ class Interface_web
end end
end end
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4) # else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# crawl_data_jobs_interface_3(path) # crawl_data_jobs_interface_3(path)
\ No newline at end of file
require 'src/crawler' require 'src/crawler'
require 'src/ftp' require 'src/ftp'
namespace :db do namespace :crawler do
task populate: :environment do task populate: :environment do
# Clawler.make_industries Clawler.make_industries
# Clawler.make_cities Clawler.make_cities
# Clawler.make_companies Clawler.make_companies
# Clawler.make_jobs Clawler.make_jobs
end end
task csv: :environment do task csv: :environment do
FTP_sever.import_data_from_csv Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc'
end
FtpSever.import_data_from_csv
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment