Commit ee6a23cd by Tô Ngọc Ánh

csv import: prevent datatype error

parent 94c7493b
Pipeline #717 failed with stages
in 0 seconds
......@@ -45,9 +45,9 @@ group :development do
# Access an interactive console on exception pages or by calling 'console' anywhere in the code.
gem 'web-console', '>= 3.3.0'
gem 'listen', '>= 3.0.5', '< 3.2'
gem 'dotenv-rails'
# Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring
gem 'spring'
gem 'dotenv-rails'
gem 'spring-watcher-listen', '~> 2.0.0'
end
......
require 'open-uri'
@logger ||= Logger.new("#{Rails.root}/log/crawler.log")
@logger ||= Logger.new("./log/import_data.log")
namespace :crawl do
desc 'crawl industries locations jobs'
......@@ -31,7 +31,7 @@ def get_job_links(page, link)
end
def crawl_company(company_link)
uri = URI.parse(CGI.escape(company_link)) # fix error: uri must be ascii only
uri = URI.parse(URI.escape(company_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
company_name = document.css('.content .name').text
return if company_name.empty?
......@@ -48,7 +48,7 @@ rescue StandardError => e
end
def crawl_job(job_link)
uri = URI.parse(CGI.escape(job_link)) # fix error: uri must be ascii only
uri = URI.parse(URI.escape(job_link)) # fix error: uri must be ascii only
document = Nokogiri::HTML(URI.open(uri))
job_title = document.at_css('.job-desc p.title').text
return if job_title.empty?
......
......@@ -2,8 +2,10 @@ require 'csv'
require 'zip'
require_relative '../common/ftp'
namespace :ftp_import do
desc 'FTP import csv file'
@logger ||= Logger.new("./log/import_data.log")
namespace :csv_import do
desc 'Download csv file from FTP and import'
task csv: :environment do
destination_dir = './lib/data'
ftp = Ftp.new('192.168.1.156', 'training', 'training')
......@@ -26,10 +28,10 @@ def extract_zip(file, destination)
end
def import_job(direction)
# i = 0
index = 0
CSV.foreach("#{direction}/jobs.csv", headers: true) do |row|
# i+=1
next if row['name'].blank? || !row['category'].is_a?(String) || row['company name'].blank?
index += 1
next if integer?(row['category'])
title = row['name'].strip
company = Company.find_or_create_by(name: row['company name']) do |c|
......@@ -41,8 +43,7 @@ def import_job(direction)
industry = Industry.find_or_create_by(name: row['category'].strip)
level = row['level'].try(:strip)
salary = row['salary'].try(:strip)
locations_name = row['work place'].is_a?(Array) ? row['work place'] : row['work place'].split(',')
# byebug if i == 61
locations_name = row['work place'].tr('"[]', '').split(',')
locations = Location.where(city: locations_name)
locations = locations_name.map { |city| Location.create(oversea: false, city: city) } if locations.empty?
description = "Benefits:\n#{row['benefit']}\n"\
......@@ -58,5 +59,9 @@ def import_job(direction)
end
rescue StandardError => e
puts e
@logger.error e.message
@logger.error "Job #{index}: #{e.message}"
end
def integer?(str)
str.to_i.to_s == str
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment