Set name variable

parent 53de3765
Pipeline #759 failed with stages
in 0 seconds
require 'net/ftp'
require 'csv'
require 'zip'
class Crawler
def initialize(logger)
......@@ -7,6 +11,11 @@ class Crawler
@PASSWORD_FTP = 'training'
end
def crawl_city_industry
crawl_city
crawl_industry
end
def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"))
get_name = page.search('select#location')
......
class Crontab
def initialize(logger)
require 'net/ftp'
require 'csv'
require 'zip'
class InforJob
def initialize(logger, url)
@mylogger = logger
@url = url
end
def crawl_all
find_company
find_job
end
def find_company(url)
company_info = Nokogiri::HTML(URI.open(url))
company_links = company_info.css('div.caption a.company-name').map { |link| link['href'] }
company_links.each do |link|
def find_company
info = Nokogiri::HTML(URI.open(@url))
links = info.css('div.caption a.company-name').map { |link| link['href'] }
links.each do |link|
next if link == 'javascript:void(0);'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
name_company = company_page.search('p.name')&.text
address_company = company_page.css('div.content p').children[1]&.text
introduction_company = company_page.css('div.main-about-us').text
next if name_company.blank?
page = Nokogiri::HTML(URI.open(URI.escape(link)))
name = page.search('p.name')&.text
return if name.blank?
address = page.css('div.content p').children[1]&.text
introduction = page.css('div.main-about-us').text
begin
get_name_company = Company.find_by(name: name_company)
if get_name_company.nil?
company = Company.create!(name: name_company,
address: address_company,
introduction: introduction_company)
end
puts name
Company.find_or_create_by!(name: name,
address: address,
introduction: introduction)
rescue StandardError => e
@mylogger.error "#{e.message}"
@mylogger.error e.message
end
end
end
def create_job(title_job, level, salary, experience, expiration_date, description, company_id)
Job.create!(title: title_job,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company_id)
end
def create_city_rel(get_row, job_find)
location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
def create_city_rel(row, info_job)
location_rel = row.css('div.map p a').children.map(&:text).map(&:strip)
location_rel.each do |loc|
city_table = City.find_by(name: loc)
next if city_table.nil?
unless CityJob.exists?(job_id: job_find.id, city_id: city_table.id)
puts "Created City: #{job_find.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_find.id, city_id: city_table.id)
end
puts "Created City: #{info_job.id} - #{city_table.id}.#{loc}"
CityJob.find_or_create_by!(job_id: info_job.id, city_id: city_table.id)
end
end
def create_industry_rel(get_row, job_find)
industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
def create_industry_rel(row, info_job)
industry_rel = row.css('li a').children.map(&:text).map(&:strip)
industry_rel.each do |ind|
industry_table = Industry.find_by(name: ind)
next if industry_table.nil?
unless IndustryJob.exists?(job_id: job_find.id, industry_id: industry_table.id)
puts "Created Industry: #{job_find.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: job_find.id, industry_id: industry_table.id)
end
puts "Created Industry: #{info_job.id} - #{industry_table.id}.#{ind}"
IndustryJob.find_or_create_by!(job_id: info_job.id, industry_id: industry_table.id)
end
end
def find_job(url)
page_access = Nokogiri::HTML(URI.open(url))
get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link|
link_page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = link_page_job.search('div.bg-blue div.row')
if get_row.present?
begin
get_name_company = link_page_job.search('div.job-desc a.job-company-name').text.strip
title_job = link_page_job.search('div.job-desc p').text.strip
description = link_page_job.search('div.detail-row')
salary = get_row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = get_row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
level = get_row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = get_row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
company_table = Company.find_by(name: get_name_company)
next if company_table.nil?
job_check = Job.exists?(title: title_job, company_id: company_table.id)
if job_check == false
create_job(title_job, level, salary, experience, expiration_date, description, company_table.id)
end
job_find = Job.find_by(title: title_job, company_id: company_table.id)
create_city_rel(get_row, job_find)
create_industry_rel(get_row, job_find)
rescue StandardError => e
@mylogger.error "#{e.message}"
end
def create_job(title, link_page, row, company_table)
description = link_page.search('div.detail-row').to_s
salary = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
level = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
info_job = Job.find_or_create_by!(title: title,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
create_city_rel(row, info_job)
create_industry_rel(row, info_job)
end
def find_job
info = Nokogiri::HTML(URI.open(@url))
link = info.css('a.job_link').map { |link| link['href'] }
link.each do |link|
link_page = Nokogiri::HTML(URI.open(URI.escape(link)))
row = link_page.search('div.bg-blue div.row')
next if row.blank?
begin
name_company = link_page.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: name_company)
next if company_table.blank?
title = link_page.search('div.job-desc p').text.strip
next if title.blank?
create_job(title, link_page, row, company_table)
rescue StandardError => e
puts e
# @mylogger.error e.message
end
end
end
end
require 'src/crawler.rb'
require 'src/crontab.rb'
require 'net/ftp'
require 'csv'
require 'zip'
namespace :import do
logger ||= Logger.new(Rails.root.join('log','my.log'))
url = 'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html'
desc 'crawler data'
task crawler: :environment do
action = Crawler.new(logger)
action.crawl_city
action.crawl_industry
action.crawl_company
action.crawl_job_relationships
action.crawl_city_industry
end
desc 'Crontab'
task auto: :environment do
action = Crawler.new(logger)
crontab = Crontab.new(logger)
crontab.find_company(url)
crontab.find_job(url)
crontab = InforJob.new(logger, url)
crontab.crawl_all
action.get_file_csv
action.extract_zip('./jobs.zip', 'lib/csv')
action.import_file_csv(Rails.root.join('lib', 'csv', 'jobs.csv'))
end
task find_job: :environment do
crontab = Crontab.new(logger)
crontab.find_company(url)
crontab.find_job(url)
def logger
Logger.new(Rails.root.join('log','my.log'))
end
def url
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html'
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment