rename variable, create import csv file

parent d58cf3ff
Pipeline #766 canceled with stages
in 0 seconds
......@@ -78,6 +78,14 @@
background-image: linear-gradient(160deg, black, #8c8686);
color: white;
}
.city-name{
.city-list:hover{
background-color: black;
.city-name, .count-job{
text-decoration: none;
color: white;
}
}
.city-name, .count-job{
text-decoration: none;
color: black;
}
class TopPagesController < ApplicationController
def index
@total_jobs = Job.ids
@total_jobs = Job.count
@jobs = Job.limit(5).order(created_at: :desc)
@jobs_of_cities = CityJob.limit(9).group('city_id').order('Count(*) DESC').count
@jobs_of_cities = CityJob.top_city
@jobs_of_industries = IndustryJob.limit(9).group('industry_id').order('Count(*) DESC').count
end
end
module ApplicationHelper
def full_title(page_title)
"#{page_title}"
page_title.to_s
end
end
......@@ -2,4 +2,7 @@ class CityJob < ApplicationRecord
belongs_to :city
belongs_to :job
def self.top_city
limit(9).group('city_id').order('Count(*) DESC').count
end
end
class IndustryJob < ApplicationRecord
belongs_to :industry
belongs_to :job
end
<% @jobs_of_cities.each do |city, count_job| %>
<div class="col-4">
<div class="row-table border border-dark rounded">
<div><strong><%= link_to "#{City.find(city).name}", '#', class: "city-name" %></strong></div>
<div class="row-table border border-dark rounded city-list">
<%= link_to '#' do%>
<div class="city-name"><strong><%= City.find(city).name %></strong></div>
<div class="count-job"><%= count_job %></div>
<% end %>
</div>
</div>
<% end %>
\ No newline at end of file
......@@ -10,7 +10,6 @@
<%= city.name %>
<% end %>
</div>
<% job.description.html_safe %>
<button type="button" class="btn btn-primary" id="button-follow">♥ Follow</button>
</div>
</div>
......
<% provide(:title, 'Venjob') %>
<div class="banner-ground">
<div class="top-banner">
<div class="total-job">Having <%= @total_jobs.count %> jobs for you!</div>
<div class="total-job">Having <%= @total_jobs %> jobs for you!</div>
</div>
</div>
<div class="search-bar"><%= render 'layouts/search_bar' %></div>
<br>
<div class="container">
<div class="search-bar"><%= render 'layouts/search_bar' %></div>
<br>
<div class="job-list"><%= render 'layouts/show_jobs' %></div>
</div>
<div class="city-banner">City</div>
<div class="container">
<div class="city-banner rounded">City</div>
<div class="row"><%= render 'layouts/show_cities' %></div>
<div class="all-industry">
<div class="row-table border border-dark rounded">All Cities</div>
<div class="all-city">
<div class="row-table border border-dark rounded"><strong>All Cities</strong></div>
</div>
</div>
<div class="industry-banner">Industry</div>
<div class="container">
<div class="industry-banner rounded">Industry</div>
<div class="row"><%= render 'layouts/show_industries' %></div>
<div class="all-industry">
<div class="row-table border border-dark rounded">All Industries</div>
<div class="row-table border border-dark rounded"><strong>All Industries</strong></div>
</div>
</div>
require 'net/ftp'
require 'csv'
require 'zip'
class Crawler
def initialize(logger, url)
@mylogger = logger
@logger = logger
@url = url
@NAME_DOMAIN = '192.168.1.156'
@USERNAME_FTP = 'training'
@PASSWORD_FTP = 'training'
end
def crawl_city_industry
crawl_city
crawl_industry
crawl_company
crawl_job_relationships
crawl_job
end
def crawl_city
......@@ -45,158 +39,86 @@ class Crawler
end
end
def crawl_company
(1..10).each do |n|
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
company_link = company_info.css('div.caption a.company-name').map{ |link| link['href'] }
company_link.each do |link|
next if link == 'javascript:void(0);'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil?
begin
name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: name_company)
if get_name_company.nil?
company = Company.create!(name: name_company,
address: address_company,
introduction: introduction_company)
end
rescue StandardError => e
@mylogger.error "#{e.message}"
end
end
end
def city_relationship(row, job)
location_relationship = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
cities_relationship = City.where(name: location_relationship)
job.cities << cities_relationship
end
def industry_relationship(row, job)
industry_relationship = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industries_relationship = Industry.where(name: industry_relationship)
job.industries << industries_relationship
end
def crawl_job_relationships
(1..10).each do |n|
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
get_link = page_access.css('a.job_link').map { |link| link['href'] }
get_link.each do |link|
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row.present?
begin
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: get_name_company)
title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row')
next if company_table.nil?
job_check = Job.find_by(title: title_job, company_id: company_table.id)
salary = get_row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = get_row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p').text.strip
level = get_row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = get_row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
if job_check.blank?
job = Job.create!(title: title_job,
def create_job(title, link_page, row, company)
description = link_page.search('div.detail-row').to_s
salary = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
level = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
job = Job.find_or_create_by!(title: title,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
end
find_job = Job.find_by(title: title_job, company_id: company_table.id)
puts find_job.title
if find_job.present?
location_rel = get_row.css('div.map p a').children.map { |location| location.text.strip }
location_rel.each do |loc|
city_table = City.find_by(name: loc)
next if city_table.nil?
unless CityJob.exists?(job_id: find_job.id, city_id: city_table.id).nil?
puts "Created City: #{find_job.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: find_job.id, city_id: city_table.id)
end
end
industry_rel = get_row.css('li a').children.map { |industry| industry.text.strip }
industry_rel.each do |ind|
industry_table = Industry.find_by(name: ind)
next if industry_table.nil?
unless IndustryJob.exists?(job_id: find_job.id, industry_id: industry_table.id)
puts "Created Industry: #{find_job.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: find_job.id, industry_id: industry_table.id)
end
end
end
rescue StandardError => e
@mylogger.error "#{e.message}"
end
end
end
end
end
company_id: company.id)
def get_file_csv
Net::FTP.open(@NAME_DOMAIN, @USERNAME_FTP, @PASSWORD_FTP) do |ftp|
ftp.getbinaryfile('jobs.zip')
end
city_relationship(row, job)
industry_relationship(row, job)
end
def extract_zip(file, destination)
FileUtils.mkdir_p(destination)
Zip::File.open(file) do |zip_file|
zip_file.each do |f|
fpath = File.join(destination, f.name)
zip_file.extract(f, fpath) unless File.exist?(fpath)
end
end
end
def crawl_company
(1..10).each do |n|
info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
links = info.css('div.caption a.company-name').map { |link| link['href'] }
links.each do |link|
next if link == 'javascript:void(0);'
page = Nokogiri::HTML(URI.open(URI.escape(link)))
name = page.search('p.name')&.text
return if name.blank?
def import_file_csv(file)
CSV.foreach(file, headers: true) do |row|
address = page.css('div.content p').children[1]&.text
introduction = page.css('div.main-about-us').text
begin
company_name = row["company name"]
company_address = row["company address"]
company_introduction = row["benefit"]
company_table = Company.find_by(name: company_name)
if company_table.nil?
company_table = Company.create!(name: company_name,
address: company_address,
introduction: company_introduction)
end
title_job = row["name"]
description_job = "#{row["description"]} #{row["requirement"]}"
level = row["level"]
salary = row["salary"]
job_table = Job.find_by(title: title_job)
if !company_table.nil? && job_table.nil?
job_table = Job.create!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company_table.id)
puts job_table.id
end
next if company_table.nil?
find_job = Job.find_by(title: title_job, company_id: company_table.id)
industry = row["category"]
industry_find = Industry.find_by(name: industry)
if industry_find.nil? && find_job.present?
industry_table = Industry.create!(name: industry)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
else
unless IndustryJob.exists?(job_id: find_job.id, industry_id: industry_find.id)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
Company.find_or_create_by!(name: name,
address: address,
introduction: introduction)
rescue StandardError => e
@logger.error e.message
end
end
puts job_table.id, title_job, industry, salary
location_data = row["work place"]
location = location_data.gsub('["', '').gsub('"]', '')
location_find = City.find_by(name: location)
if location_find.nil?
city_table = City.create!(name: location)
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
else
unless CityJob.exists?(job_id: find_job.id, city_id: location_find.id)
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end
end
def crawl_job
(1..10).each do |n|
info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
link = info.css('a.job_link').map { |link| link['href'] }
link.each do |link|
link_page = Nokogiri::HTML(URI.open(URI.escape(link)))
row = link_page.search('div.bg-blue div.row')
next if row.blank?
begin
company_name = link_page.search('div.job-desc a.job-company-name').text.strip
company = Company.find_by(name: company_name)
next if company.blank?
title = link_page.search('div.job-desc p').text.strip
next if title.blank?
create_job(title, link_page, row, company)
rescue StandardError => e
@mylogger.error "#{e.message}"
@logger.error e.message
end
end
end
end
end
require 'net/ftp'
require 'csv'
require 'zip'
class CSVimporter
def initialize(logger)
@logger = logger
@NAME_DOMAIN = '192.168.1.156'
@USERNAME_FTP = 'training'
@PASSWORD_FTP = 'training'
@extracting_directory = Rails.root.join('lib', 'csv')
@zip_directory = Rails.root.join('jobs.zip')
@importer = Rails.root.join('lib', 'csv', 'jobs.csv')
end
def import
get_file_csv
extract_zip
import_file_csv
end
def get_file_csv
Net::FTP.open(@NAME_DOMAIN, @USERNAME_FTP, @PASSWORD_FTP) do |ftp|
ftp.getbinaryfile('jobs.zip')
end
end
def extract_zip
FileUtils.mkdir_p(@extracting_directory)
Zip::File.open(@zip_directory) do |zip_file|
zip_file.each do |f|
fpath = File.join(@extracting_directory, f.name)
zip_file.extract(f, fpath) unless File.exist?(fpath)
end
end
end
def import_file_csv
CSV.foreach(@importer, headers: true) do |row|
begin
company_name = row["company name"]
next if company_name.blank?
company_address = row["company address"]
company_introduction = row["benefit"]
company = Company.find_or_create_by!(name: company_name,
address: company_address,
introduction: company_introduction)
title_job = row["name"]
next if title_job.blank?
description_job = "#{row["description"]} #{row["requirement"]}"
level = row["level"]
salary = row["salary"]
job = Job.find_or_create_by!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company.id)
industry_name = row["category"]
industries_relationship = Industry.where(name: industry_name)
if industries_relationship.blank?
industry = Industry.create!(name: industry_name)
industries_relationship = Industry.where(name: industry)
job.industries << industries_relationship
else
job.industries << industries_relationship
end
location_data = row["work place"]
location = location_data.gsub('["', '').gsub('"]', '')
location_relationship = City.where(name: location)
if location_relationship.blank?
city = City.create!(name: location)
location_relationship = City.where(name: city)
job.cities << location_relationship
else
job.cities << location_relationship
end
rescue StandardError => e
@logger.error e.message
end
end
end
end
\ No newline at end of file
require 'net/ftp'
require 'csv'
require 'zip'
class InforJob
class JobParser
def initialize(logger, url)
@mylogger = logger
@logger = logger
@url = url
end
......@@ -25,49 +22,46 @@ class InforJob
address = page.css('div.content p').children[1]&.text
introduction = page.css('div.main-about-us').text
begin
puts name
Company.find_or_create_by!(name: name,
address: address,
introduction: introduction)
rescue StandardError => e
@mylogger.error e.message
@logger.error e.message
end
end
end
def create_city_rel(row, info_job)
location_rel = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
city_table = City.where(name: location_rel)
def city_relationship(row, job)
location_relationship = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
cities_relationship = City.where(name: location_relationship)
puts "#{info_job.cities << city_table}"
info_job.cities << city_table
job.cities << cities_relationship
end
def create_industry_rel(row, info_job)
industry_rel = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industry_table = Industry.where(name: industry_rel)
def industry_relationship(row, job)
industry_relationship = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industries_relationship = Industry.where(name: industry_relationship)
puts "#{info_job.industries << industry_table}"
info_job.industries << industry_table
job.industries << industries_relationship
end
def create_job(title, link_page, row, company_table)
def create_job(title, link_page, row, company)
description = link_page.search('div.detail-row').to_s
salary = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
level = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
info_job = Job.find_or_create_by!(title: title,
job = Job.find_or_create_by!(title: title,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
company_id: company.id)
create_city_rel(row, info_job)
create_industry_rel(row, info_job)
city_relationship(row, job)
industry_relationship(row, job)
end
def find_job
......@@ -79,19 +73,18 @@ class InforJob
next if row.blank?
begin
name_company = link_page.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: name_company)
next if company_table.blank?
company_name = link_page.search('div.job-desc a.job-company-name').text.strip
company = Company.find_by(name: company_name)
next if company.blank?
title = link_page.search('div.job-desc p').text.strip
next if title.blank?
create_job(title, link_page, row, company_table)
create_job(title, link_page, row, company)
rescue StandardError => e
puts e
# @mylogger.error e.message
@logger.error e.message
end
end
end
end
require 'src/crawler.rb'
require 'src/crontab.rb'
require 'src/jobparser.rb'
require 'src/csvimporter.rb'
namespace :import do
desc 'crawler data'
......@@ -9,12 +10,10 @@ namespace :import do
end
desc 'Crontab'
task auto: :environment do
action = Crawler.new(logger)
crontab = InforJob.new(logger, url)
crontab = JobParser.new(logger, url)
csvimporter = CSVimporter.new(logger)
crontab.crawl_all
action.get_file_csv
action.extract_zip('./jobs.zip', 'lib/csv')
action.import_file_csv(Rails.root.join('lib', 'csv', 'jobs.csv'))
csvimporter.import
end
def logger
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment