Commit 564970e4 by Ngô Trung Hưng

import_data_from_csv

parent ad336f02
...@@ -21,6 +21,7 @@ gem 'turbolinks', '~> 5' ...@@ -21,6 +21,7 @@ gem 'turbolinks', '~> 5'
# Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder
gem 'jbuilder', '~> 2.5' gem 'jbuilder', '~> 2.5'
gem 'nokogiri' gem 'nokogiri'
gem 'rubyzip'
# Use Redis adapter to run Action Cable in production # Use Redis adapter to run Action Cable in production
# gem 'redis', '~> 4.0' # gem 'redis', '~> 4.0'
# Use ActiveModel has_secure_password # Use ActiveModel has_secure_password
......
...@@ -239,6 +239,7 @@ DEPENDENCIES ...@@ -239,6 +239,7 @@ DEPENDENCIES
rails (~> 5.2.4, >= 5.2.4.3) rails (~> 5.2.4, >= 5.2.4.3)
rails_12factor rails_12factor
rubocop rubocop
rubyzip
sass-rails (~> 5.0) sass-rails (~> 5.0)
selenium-webdriver selenium-webdriver
spring spring
......
...@@ -14,7 +14,7 @@ default: &default ...@@ -14,7 +14,7 @@ default: &default
encoding: utf8 encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root username: root
password: '12345678' password: '1'
socket: /var/run/mysqld/mysqld.sock socket: /var/run/mysqld/mysqld.sock
......
File deleted
...@@ -13,10 +13,12 @@ class Clawler ...@@ -13,10 +13,12 @@ class Clawler
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end end
puts "Save data to database... \n------------------------" puts "Save data to database... \n------------------------"
data_list_cities.length.times do |i| data_list_cities.each_with_index do |val, index|
area = i > 69 ? 0 : 1 area = index > 69 ? 0 : 1
name = (data_list_cities[i].to_s) City.find_or_create_by(name: val) do |city|
City.create!(name: name, area: area) city.name = val
city.area = area
end
end end
end end
...@@ -30,21 +32,15 @@ class Clawler ...@@ -30,21 +32,15 @@ class Clawler
data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip
end end
puts "Save data to database... \n------------------------" puts "Save data to database... \n------------------------"
data_list_industries.length.times do |i| data_list_industries.each do |val|
name = data_list_industries[i].to_s val.gsub!('&amp;','&') if val.include?('&amp;')
if name.include?('&amp;') Industry.find_or_create_by(name: val) { |industry| industry.name = val }
name.gsub!('&amp;','&')
end
Industry.create!(name: name)
end end
end end
# FILL DATA COMPANIES # FILL DATA COMPANIES
def self.make_companies def self.make_companies
# Company.create!(name: "Bảo mật",
# address: "Vui lòng xem trong mô tả công việc",
# short_description: "Vui lòng xem trong mô tả công việc")
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company| Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật' company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
...@@ -62,6 +58,7 @@ class Clawler ...@@ -62,6 +58,7 @@ class Clawler
end end
end end
end end
# FILL DATA JOBS # FILL DATA JOBS
def self.make_jobs def self.make_jobs
Job.update_all(newdata: 0) Job.update_all(newdata: 0)
...@@ -130,5 +127,4 @@ class Clawler ...@@ -130,5 +127,4 @@ class Clawler
city_id: id_cities) city_id: id_cities)
end end
end end
end end
require 'net/ftp'
require 'src/unzip'
require 'csv'
class FTP_sever class FTP_sever
CONTENT_SERVER_DOMAIN_NAME = '192.168.1.156' CONTENT_SERVER_DOMAIN_NAME = '192.168.1.156'
CONTENT_SERVER_USER_NAME = 'training' CONTENT_SERVER_USER_NAME = 'training'
...@@ -5,10 +9,107 @@ class FTP_sever ...@@ -5,10 +9,107 @@ class FTP_sever
def self.donwload_csv def self.donwload_csv
Net::FTP.open(CONTENT_SERVER_DOMAIN_NAME, CONTENT_SERVER_USER_NAME, CONTENT_SERVER_USER_PASSWORD) do |ftp| Net::FTP.open(CONTENT_SERVER_DOMAIN_NAME, CONTENT_SERVER_USER_NAME, CONTENT_SERVER_USER_PASSWORD) do |ftp|
debugger ftp.getbinaryfile('jobs.zip')
@file = ftp.getbinaryfile('jobs.zip') begin
@file.save! extract_zip('./jobs.zip','lib/csv')
puts "#{Time.now} << 'Donwload jobs.zip'" File.delete('./jobs.zip') if File.exist?('./jobs.zip')
puts "Unzip done\n"
rescue
puts "File not found\n"
end
end
end
def self.data_csv
donwload_csv()
table = CSV.parse(File.read("lib/csv/jobs.csv"), headers: true)
end
# puts table['name']
# puts table['company name'].size
# puts table['company province'].size
##puts table['category'].size
# puts table['company address'].size
# puts table['level'].size
# puts table['salary'].size
# puts table['benefit'].size
# puts table['requirement'].size
# puts table['description'].size
def self.parse_csv_industries(data)
puts 'Import data industries . . .'
industries = []
data['category'].each do |val|
industries << val.strip
end
industries.each do |val|
val.gsub!(',','/') if val.include?(',')
val.gsub!('/',' / ')
Industry.find_or_create_by(name: val) { |industry| industry.name = val }
end
puts 'Done parse csv industries'
end
def self.parse_csv_cities(data)
puts 'Import data cities . . .'
arr_city = ''
cities = data['work place'].select { |val| val.present? }
cities.uniq!
arr_city = cities.map{ |val| val.delete("[]\"") }
arr_city.each do |val|
if !val.blank?
City.find_or_create_by(name: val) do |city|
city.name = val
city.area = 1
end
end
end end
end end
def self.parse_csv_companies(data)
puts 'Import data companies . . .'
data['company name'].each_with_index do |name, index|
begin
Company.find_or_create_by(name: name.strip) do |company|
company.name = name.strip
company.address = data['company address'][index]
company.short_description = data['benefit'][index]
end
rescue => exception
puts '---'
end
end
puts 'Done import data companies'
end
def self.parse_csv_jobs(data)
Job.update_all(newdata: 0)
data['name'].each_with_index do |name, index|
desc = data['requirement'][index] << '\n' << data['description'][index]
id_company = Company.find_by name: data['company name'][index].to_s.strip
if id_company.blank?
id_company = 1
else
id_company = id_company.id
end
id_job = Job.create!( name: name,
company_id: id_company,
level: data['level'][index],
experience: "",
salary: data['salary'][index],
create_date: Time.now,
expiration_date: "",
description: desc,
newdata: 1 )
end
end
def self.import_data_from_csv
data = data_csv()
parse_csv_industries(data)
parse_csv_cities(data)
parse_csv_companies(data)
# parse_csv_jobs(data)
end
end end
\ No newline at end of file
...@@ -23,9 +23,9 @@ class Interface_web ...@@ -23,9 +23,9 @@ class Interface_web
data << website_companies << website_jobs data << website_companies << website_jobs
end end
@crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(15) # @crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(3)
def self.get_link_job_and_companies def self.get_link_job_and_companies
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(15) @crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(1)
end end
def self.base_link(url) def self.base_link(url)
...@@ -173,7 +173,6 @@ class Interface_web ...@@ -173,7 +173,6 @@ class Interface_web
end end
def self.crawl_data_jobs_interface_5(page) def self.crawl_data_jobs_interface_5(page)
# page = base_link(url)
@name << page.search(".info-company h1").text @name << page.search(".info-company h1").text
@company_name << page.search(".info-company .text-job h2").text @company_name << page.search(".info-company .text-job h2").text
...@@ -225,9 +224,6 @@ class Interface_web ...@@ -225,9 +224,6 @@ class Interface_web
end end
@data @data
end end
end end
......
require 'zip'
def extract_zip(file, destination)
FileUtils.mkdir_p(destination)
Zip::File.open(file) do |zip_file|
zip_file.each do |f|
fpath = File.join(destination, f.name)
zip_file.extract(f, fpath) unless File.exist?(fpath)
end
end
end
\ No newline at end of file
require 'src/crawler' require 'src/crawler'
require 'src/ftp'
namespace :db do namespace :db do
task populate: :environment do task populate: :environment do
Clawler.make_industries # Clawler.make_industries
Clawler.make_cities # Clawler.make_cities
Clawler.make_companies # Clawler.make_companies
Clawler.make_jobs # Clawler.make_jobs
end
task csv: :environment do
FTP_sever.import_data_from_csv
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment