Commit ee6a23cd by Tô Ngọc Ánh

csv import: prevent datatype error

parent 94c7493b
Pipeline #717 failed with stages
in 0 seconds
...@@ -45,9 +45,9 @@ group :development do ...@@ -45,9 +45,9 @@ group :development do
# Access an interactive console on exception pages or by calling 'console' anywhere in the code. # Access an interactive console on exception pages or by calling 'console' anywhere in the code.
gem 'web-console', '>= 3.3.0' gem 'web-console', '>= 3.3.0'
gem 'listen', '>= 3.0.5', '< 3.2' gem 'listen', '>= 3.0.5', '< 3.2'
gem 'dotenv-rails'
# Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring # Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring
gem 'spring' gem 'spring'
gem 'dotenv-rails'
gem 'spring-watcher-listen', '~> 2.0.0' gem 'spring-watcher-listen', '~> 2.0.0'
end end
......
require 'open-uri' require 'open-uri'
@logger ||= Logger.new("#{Rails.root}/log/crawler.log") @logger ||= Logger.new("./log/import_data.log")
namespace :crawl do namespace :crawl do
desc 'crawl industries locations jobs' desc 'crawl industries locations jobs'
...@@ -31,7 +31,7 @@ def get_job_links(page, link) ...@@ -31,7 +31,7 @@ def get_job_links(page, link)
end end
def crawl_company(company_link) def crawl_company(company_link)
uri = URI.parse(CGI.escape(company_link)) # fix error: uri must be ascii only uri = URI.parse(URI.escape(company_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri)) document = Nokogiri::HTML(URI.open(uri))
company_name = document.css('.content .name').text company_name = document.css('.content .name').text
return if company_name.empty? return if company_name.empty?
...@@ -48,7 +48,7 @@ rescue StandardError => e ...@@ -48,7 +48,7 @@ rescue StandardError => e
end end
def crawl_job(job_link) def crawl_job(job_link)
uri = URI.parse(CGI.escape(job_link)) # fix error: uri must be ascii only uri = URI.parse(URI.escape(job_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri)) document = Nokogiri::HTML(URI.open(uri))
job_title = document.at_css('.job-desc p.title').text job_title = document.at_css('.job-desc p.title').text
return if job_title.empty? return if job_title.empty?
......
...@@ -2,8 +2,10 @@ require 'csv' ...@@ -2,8 +2,10 @@ require 'csv'
require 'zip' require 'zip'
require_relative '../common/ftp' require_relative '../common/ftp'
namespace :ftp_import do @logger ||= Logger.new("./log/import_data.log")
desc 'FTP import csv file'
namespace :csv_import do
desc 'Download csv file from FTP and import'
task csv: :environment do task csv: :environment do
destination_dir = './lib/data' destination_dir = './lib/data'
ftp = Ftp.new('192.168.1.156', 'training', 'training') ftp = Ftp.new('192.168.1.156', 'training', 'training')
...@@ -26,10 +28,10 @@ def extract_zip(file, destination) ...@@ -26,10 +28,10 @@ def extract_zip(file, destination)
end end
def import_job(direction) def import_job(direction)
# i = 0 index = 0
CSV.foreach("#{direction}/jobs.csv", headers: true) do |row| CSV.foreach("#{direction}/jobs.csv", headers: true) do |row|
# i+=1 index += 1
next if row['name'].blank? || !row['category'].is_a?(String) || row['company name'].blank? next if integer?(row['category'])
title = row['name'].strip title = row['name'].strip
company = Company.find_or_create_by(name: row['company name']) do |c| company = Company.find_or_create_by(name: row['company name']) do |c|
...@@ -41,8 +43,7 @@ def import_job(direction) ...@@ -41,8 +43,7 @@ def import_job(direction)
industry = Industry.find_or_create_by(name: row['category'].strip) industry = Industry.find_or_create_by(name: row['category'].strip)
level = row['level'].try(:strip) level = row['level'].try(:strip)
salary = row['salary'].try(:strip) salary = row['salary'].try(:strip)
locations_name = row['work place'].is_a?(Array) ? row['work place'] : row['work place'].split(',') locations_name = row['work place'].tr('"[]', '').split(',')
# byebug if i == 61
locations = Location.where(city: locations_name) locations = Location.where(city: locations_name)
locations = locations_name.map { |city| Location.create(oversea: false, city: city) } if locations.empty? locations = locations_name.map { |city| Location.create(oversea: false, city: city) } if locations.empty?
description = "Benefits:\n#{row['benefit']}\n"\ description = "Benefits:\n#{row['benefit']}\n"\
...@@ -58,5 +59,9 @@ def import_job(direction) ...@@ -58,5 +59,9 @@ def import_job(direction)
end end
rescue StandardError => e rescue StandardError => e
puts e puts e
@logger.error e.message @logger.error "Job #{index}: #{e.message}"
end
def integer?(str)
str.to_i.to_s == str
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment