Commit e457e016 by Ngo Trung Hung

fix rubocop

parent c874fae5
Pipeline #704 failed with stages
in 0 seconds
# frozen_string_literal: true
# Description/Explanation of Person class
class ApplicationController < ActionController::Base
require 'nokogiri'
require 'open-uri'
include CrawlerHelper
def page_not_found
respond_to do |format|
......
# frozen_string_literal: true
# Description/Explanation of Person class
class ApplyJobController < ApplicationController
def new
......
# frozen_string_literal: true
# Description/Explanation of Person class
class CityController < ApplicationController
def index
......
# frozen_string_literal: true
# Description/Explanation of Person class
class ErrorsController < ApplicationController
def file_not_found
render 'errors/file_not_found'
......
# frozen_string_literal: true
# Description/Explanation of Person class
class HomeController < ApplicationController
add_breadcrumb "Trang chủ", :root_path
def index
......
# frozen_string_literal: true
# Description/Explanation of Person class
class IndustryController < ApplicationController
def index
......
# frozen_string_literal: true
# Description/Explanation of Person class
class JobController < ApplicationController
def index
end
def detail
......@@ -42,5 +44,4 @@ class JobController < ApplicationController
@fill_data = company.jobs.page(params[:page])
render 'result_data'
end
end
class TestController < ApplicationController
def index
end
end
# frozen_string_literal: true
# Description/Explanation of Person class
module ApplicationHelper
def full_title(page_title)
base_title = "VenJob"
base_title = 'VenJob'
if page_title.empty?
base_title
else
......
require 'open-uri'
module CrawlerHelper
def crawl_industries_data
data_list_industries = []
page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
data = page.search("#industry option")
list_industries = data.to_s.split("</option>")
list_industries.each do |x|
data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
render plain: data_list_industries;
end
def crawl_cities_data
data_list_cities = []
page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
data = page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
render plain: data_list_cities;
end
def crawl_link_for_companies_jobs
data = []
website_companies = []
website_jobs = []
num_page_will_crawl = 1
num_page_will_crawl.times do |i|
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html"))
website_companies << page.search(".figcaption .caption a/@href").text.to_s.split('https://careerbuilder.vn/')
website_jobs << page.search(".figcaption .title .job_link @href").text.to_s.split('https://careerbuilder.vn/')
end
website_companies = website_companies.join(",")
website_companies = website_companies.split(",")
website_companies = website_companies.select { |val| val != ''}
website_jobs = website_jobs.join(",")
website_jobs = website_jobs.split(",")
website_jobs = website_jobs.select { |val| val != ''}
data << website_companies << website_jobs
end
def craw_data_companies
link_crawl = crawl_link_for_companies_jobs()
@data_companies = {}
@data_companies_name = []
@data_companies_address = []
@data_companies_description = []
link_crawl[0].each do |url|
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{url}"))))
if page.search(".company-info .info .content .name").text == ""
name = page.search(".section-page #cp_company_name").text
address = page.search(".section-page .cp_basic_info_details ul li:nth-child(1)").text
desc = page.search(".cp_aboutus_item .content_fck").text
if (name != "" && address != "" && desc != "")
@data_companies_name << name.to_s.rstrip
@data_companies_address << address.to_s.rstrip
@data_companies_description << desc
end
else
name = page.search(".company-info .info .content .name").text
address = page.search(".company-info .info .content p:nth-child(3)").text
desc = page.search(".main-about-us .content").text
if (name != "" && address != "" && desc != "")
@data_companies_name << name.to_s.rstrip
@data_companies_address << address.to_s.rstrip
@data_companies_description << desc
end
end
end
@data_companies[:name] = @data_companies_name
@data_companies[:address] = @data_companies_address
@data_companies_description.each do |val|
val.to_s.delete!("[\n,\t,\r]")
val.strip!
end
@data_companies[:description] = @data_companies_description
# render plain: "#{@data_companies[:name]} -- #{@data_companies[:address]} -- #{@data_companies[:description]}"
render plain: "#{@data_companies[:name]} = #{@data_companies[:name].length} "
# render plain: @data_companies
end
def base_link(url)
Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{url}"))))
end
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/ky-su-dau-thau-mep.35B45617.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/dai-dien-tieu-thu-sales-representative-quang-binh-tp-dong-hoi.35B4572F.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/tuyen-tai-xe-van-phong-cho-sep-han-quoc-tu-binh-thanh.35B45A41.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/dai-dien-tieu-thu-sales-representative-quang-nam-phuoc-son-hiep-duc-thang-binh.35B4572D.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/program-management-executive.35B428B5.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-tong-vu-phong-van-va-lam-test-truc-tuyen-nhan-viec-ngay-sau-3-5-ngay-nop-ho-so.35B44E79.html"))))
# Crawler job
def crawl_data_jobs_interface_1(url)
page = base_link(url)
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{url}"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/ky-su-dau-thau-mep.35B45617.html"))))
@name << page.search(".apply-now-content .job-desc .title").text
@data[:name] = @name
@company_name << page.search(".apply-now-content .job-desc .job-company-name").text
@data[:company_name] = @company_name
@city_name << page.search(".detail-box .map p a").text
@data[:city_name] = @city_name
@created_date << page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text
@data[:created_date] = @created_date
@expiration_date << page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last
@data[:expiration_date] = @expiration_date
@salary << page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text
@data[:salary] = @salary
industry_name = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text
industry_name = industry_name.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''}
@industry_name << industry_name.join(',')
@data[:industry_name] = @industry_name
@description << page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s.delete!("[\n,\t,\r]")
@data[:description] = @description
get_level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
get_level = get_level[1].to_s.strip
if get_level == ""
level = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
@level << level[1].to_s.strip
else
level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
@level << level[1].to_s.strip
end
@data[:level] = @level
exprience = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exprience = exprience[1].to_s.strip
@exprience << exprience
@data[:exprience] = @exprience
end
def crawl_data_jobs_interface_2(url)
page = base_link(url)
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/dai-dien-tieu-thu-sales-representative-quang-binh-tp-dong-hoi.35B4572F.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{url}"))))
#interface1
@name << page.search(".apply-now-content .job-desc .title").text
@data[:name] = @name
@company_name << page.search(".top-job .top-job-info .tit_company").text
@data[:company_name] = @company_name
@city_name << page.search(".info-workplace .value a").text
@data[:city_name] = @city_name
@created_date << ""
@data[:created_date] =@created_date
expiration_date = page.search(".info li:nth-child(4)").text
@expiration_date << expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last
@data[:expiration_date] = @expiration_date
@salary << page.search(".info li:nth-child(3)").text.split("Lương").last.strip
@data[:salary] = @salary
@industry_name << page.search(".info li:nth-child(5) .value").text
@data[:industry_name] = @industry_name
@description << page.search(".left-col").to_s.delete!("[\n,\t,\r]")
@data[:description] = @description
@level << page.search(".boxtp .info li:nth-child(2)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc').last.strip
@data[:level] = @level
@exprience << page.search(".info li:nth-child(6)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
@data[:exprience] = @exprience
end
def crawl_data_jobs_interface_3(url)
page = base_link(url)
@name << page.search(".intro_job h1").text
@data[:name] = @name
@company_name << page.search(".info-company .text-job h2").text
@data[:company_name] = @company_name
@city_name << page.search(".DetailJobNew ul li:nth-child(1) a").text
@data[:city_name] = @city_name
@created_date << ""
@data[:created_date] = @created_date
@expiration_date << page.search(".DetailJobNew .info ul li:nth-child(3) p").text.strip
@data[:expiration_date] = @expiration_date
@salary << page.search(".DetailJobNew .salary ul li:nth-child(3) p").text.strip
@data[:salary] = @salary
@industry_name << page.search(".DetailJobNew .salary ul li:nth-child(2) p").text.strip
@data[:industry_name] = @industry_name
@description << page.search(".content_job .detail-row").to_s.delete!("[\n,\t,\r]")
@data[:description] = @description
@level << page.search(".DetailJobNew .info ul li:nth-child(2) p").text.strip
@data[:level] = @level
@exprience << page.search(".DetailJobNew .info ul li:nth-child(1) p").text.strip
@data[:exprience] = @exprience
end
def crawl_data_jobs_interface_4(url)
page = base_link(url)
@name << page.search(".info-company h1").text
@data[:name] = @name
if page.search(".zone-company .text-job h2").text == ""
@company_name << page.search(".info-company .text-job h2").text
@industry_name << page.search(".DetailJobNew li:nth-child(3) span").text.strip
else
@company_name << page.search(".zone-company .text-job h2").text.strip
industry_name = page.search(".DetailJobNew li:nth-child(3) span a").text
@industry_name << industry_name.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''}
end
@data[:company_name] = @company_name
@data[:industry_name] = @industry_name
@city_name << page.search(".DetailJobNew ul li:nth-child(1) a").text
@data[:city_name] = @city_name
@created_date << ""
@data[:created_date] = @created_date
@expiration_date << page.search(".DetailJobNew li:nth-child(7) span").text
@data[:expiration_date] = @expiration_date
@salary << page.search(".DetailJobNew li:nth-child(6) span").text
@data[:salary] = @salary
@description << page.search(".left-col").to_s.delete!("[\n,\t,\r]")
@data[:description] = @description
@level << page.search(".DetailJobNew ul li:nth-child(2) span").text
@data[:level] = @level
@exprience << ""
@data[:exprience] = @exprience
end
def crawl_data_jobs_interface_5(url)
page = base_link(url)
@name << page.search(".info-company h1").text
@data[:name] = @name
@company_name << page.search(".info-company .text-job h2").text
@data[:company_name] = @company_name
@city_name << page.search(".DetailJobNew ul li:nth-child(1) a").text
@data[:city_name] = @city_name
@created_date << ""
@data[:created_date] = @created_date
@expiration_date << page.search(".DetailJobNew li:nth-child(9) span").text.strip
@data[:expiration_date] = @expiration_date
@salary << page.search(".DetailJobNew li:nth-child(3) span").text.strip
@data[:salary] = @salary
@industry_name << page.search(".DetailJobNew li:nth-child(2) span").text.strip
@data[:industry_name] = @industry_name
@description << page.search(".left-col .detail-row").to_s.delete!("[\n,\t,\r]")
@data[:description] = @description
@level << page.search(".DetailJobNew ul li:nth-child(6) span").text.strip
@data[:level] = @level
@exprience << page.search(".DetailJobNew li:nth-child(5) span").text.strip
@data[:exprience] = @exprience
end
def make_data
@data = {}
@name = []
@company_name = []
@level = []
@exprience = []
@salary = []
@created_date = []
@expiration_date = []
@description = []
@industry_name = []
@city_name = []
link_crawl = crawl_link_for_companies_jobs()
link_crawl[1].each do |path|
# debugger
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{path}"))))
if page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0] != nil
crawl_data_jobs_interface_1(path)
elsif page.search(".info li:nth-child(6)").text != ""
crawl_data_jobs_interface_2(path)
elsif page.search(".DetailJobNew ul li").size == 10
crawl_data_jobs_interface_5(path)
elsif page.search(".DetailJobNew ul li").size == 8
crawl_data_jobs_interface_4(path)
else
crawl_data_jobs_interface_3(path)
end
end
render plain: "#{@data}"
# render plain: "#{@data[:company_name]}--#{@data[:company_name].length}"
# name: => #{@data[:name][0]} -- #{@data[:name].length}
# company: => #{@data[:company_name][0]} -- #{@data[:company_name].length}
# level: => #{@data[:level][0]} -- #{@data[:level].length}
# industry: => #{@data[:industry_name][0]} -- #{@data[:industry_name].length}
# exprience: => #{@data[:exprience][0]} -- #{@data[:exprience].length}
# expiration date: => #{@data[:expiration_date][0]} -- #{@data[:expiration_date].length}
# created date: => #{@data[:created_date][0]} -- #{@data[:created_date].length}
# city: => #{@data[:city_name][0]} -- #{@data[:city_name].length}
# salary: => #{@data[:salary][0]} -- #{@data[:salary].length}
# description: => #{@data[:description][0]} -- #{@data[:description].length} "
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/truong-tram-y-te-cong-ty.35B44FDF.html"))))
# page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/vi/tim-viec-lam/hr-admin-executive.35B45B43.html"))))
end
end
\ No newline at end of file
# frozen_string_literal: true
# Description/Explanation of Person class
class ApplicationRecord < ActiveRecord::Base
self.abstract_class = true
end
# frozen_string_literal: true
# Description/Explanation of Person class
class AppliedJob < ApplicationRecord
belongs_to :user
belongs_to :job
......
# frozen_string_literal: true
# Description/Explanation of Person class
class City < ApplicationRecord
has_many :city_jobs
has_many :jobs, through: :city_jobs
......
# frozen_string_literal: true
# Description/Explanation of Person class
class CityJob < ApplicationRecord
belongs_to :city
belongs_to :job
......
# frozen_string_literal: true
# Description/Explanation of Person class
class Company < ApplicationRecord
has_many :jobs
end
# frozen_string_literal: true
# Description/Explanation of Person class
class Favorite < ApplicationRecord
belongs_to :user
belongs_to :job
......
# frozen_string_literal: true
# Description/Explanation of Person class
class History < ApplicationRecord
belongs_to :user
belongs_to :job
......
# frozen_string_literal: true
# Description/Explanation of Person class
class Industry < ApplicationRecord
has_many :industry_jobs
has_many :jobs, through: :industry_jobs
scope :sort_asc, -> { order(name: :asc)}
scope :sort_asc, -> { order(name: :asc) }
def self.top_hot
hash = {}
......@@ -9,7 +12,7 @@ class Industry < ApplicationRecord
data_industries.each do |val|
hash[val.name] = val.jobs.count
end
hash = hash.select { |k,v| v > 0}
hash = hash.select { |k,v| v > 0 }
hash.sort_by { |k,v| v }.reverse
end
end
# frozen_string_literal: true
# Description/Explanation of Person class
class IndustryJob < ApplicationRecord
belongs_to :industry
belongs_to :job
......
# frozen_string_literal: true
# Description/Explanation of Person class
class Job < ApplicationRecord
belongs_to :company
......
# frozen_string_literal: true
# Description/Explanation of Person class
class User < ApplicationRecord
has_many :applied_jobs
has_many :jobs, through: :applied_jobs
......
......@@ -14,7 +14,7 @@ default: &default
encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root
password: '1'
password: '12345678'
socket: /var/run/mysqld/mysqld.sock
......
......@@ -16,7 +16,7 @@ port ENV.fetch("PORT") { 3000 }
environment ENV.fetch("RAILS_ENV") { "development" }
# Specifies the `pidfile` that Puma will use.
pidfile ENV.fetch("PIDFILE") { "tmp/pids/server.pid" }
pidfile ENV.fetch('PIDFILE') { "tmp/pids/server.pid" }
# Specifies the number of `workers` to boot in clustered mode.
# Workers are forked webserver processes. If using threads and workers together
......
# frozen_string_literal: true
Rails.application.routes.draw do
get 'apply_job/create'
match 'apply', to: 'apply_job#new', via: :get, constraints: { job_id: /.*/ }
match '/confirm', to: 'apply_job#show', via: 'post'
root 'home#index'
match '/404', to: 'errors#file_not_found', via: :all
match '/422', to: 'errors#unprocessable', via: :all
match '/500', to: 'errors#internal_server_error', via: :all
match '/404', to: "errors#file_not_found", via: :all
match '/422', to: "errors#unprocessable", via: :all
match '/500', to: "errors#internal_server_error", via: :all
match 'apply', to: 'apply_job#new', via: :get, constraints: { job_id: /.*/}
match '/confirm', to: 'apply_job#show', via: 'post'
match 'detail/:id', to: 'job#detail', via: 'get'
match 'jobs/city/(:key_city)', to: 'job#find_data_by_city', via: 'get', constraints: { key_city: /.*/}
match 'jobs/industry/(:key_industry)', to: 'job#find_data_by_industry', via: 'get', constraints: { key_industry: /.*/}
match 'jobs/company/(:key_company)', to: 'job#find_data_by_company', via: 'get', constraints: { key_company: /.*/}
get 'detail/:id', to: 'job#detail'
get 'jobs/city/(:key_city)', to: 'job#find_data_by_city', constraints: { key_city: /.*/ }
get 'jobs/industry/(:key_industry)', to: 'job#find_data_by_industry', constraints: { key_industry: /.*/ }
get 'jobs/company/(:key_company)', to: 'job#find_data_by_company', constraints: { key_company: /.*/ }
match 'cities', to: 'city#index', via: 'get'
match 'industries', to: 'industry#index', via: 'get'
get 'cities', to: 'city#index'
get 'industries', to: 'industry#index'
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
end
# frozen_string_literal: true
env :PATH, ENV['PATH']
every 10.minutes do
rake "crawler:populate", output: {error: 'error.log', standard: 'cron.log'}
rake 'crawler:populate', output: { error: 'error.log', standard: 'cron.log' }
end
every :day, at: "19:00pm"do
rake "csv:import_csv"
every :day, at: '19:00pm' do
rake 'csv:import_csv'
end
every :day, at: "16:30pm" do
rake "crawler:deletelog"
every :day, at: '16:30pm' do
rake 'crawler:deletelog'
end
# frozen_string_literal: true
%w[
.ruby-version
.rbenv-vars
......
Crawling data location...
.
.
.
Save data to database...
------------------------
Crawl data companies
Crawling link on page...
PLease wait...
Result:
Company: 41 link
Job : 50 link
------------------------
Công ty TNHH SXTM XNK Minh Hiền
Học viện VTC Academy
FE CREDIT
CÔNG TY CỔ PHẦN ĐẦU TƯ CHÂU Á THÁI BÌNH DƯƠNG
Công ty TNHH Two Kings Distribution
CÔNG TY TNHH THƯƠNG MẠI DỊCH VỤ XUẤT NHẬP KHẨU TTH VIỆT NAM
Công Ty Cổ Phần Chuỗi Thực Phẩm TH
Công Ty Cổ Phần Quảng Cáo Thương Mại Sen Vàng
Công ty CP Công trình Giao Thông Đồng Nai
Skretting Vietnam - Nutreco International ( Vietnam)
Công Ty Cổ Phần Hà Nội Foods Việt Nam
Monroe Consulting Group Vietnam
CÔNG TY CỔ PHẦN AP SAIGON PETRO
CÔNG TY TNHH SHR VIỆT NAM
Công Ty Cổ Phần Kinh Doanh Chế Biến Nông Sản Bảo Minh
Cty CP Đầu Tư Xây Dựng Trung Nam
CÔNG TY CỔ PHẦN PHÁT TRIỂN BẤT ĐỘNG SẢN CASA HOLDINGS
CÔNG TY CỔ PHẦN PHÂN BÓN HÀ LAN
Công Ty TNHH Golden Phoenix Vietnam
Công ty TNHH Quốc tế Fleming Việt Nam
Công Ty TNHH Aeon Việt Nam
Công Ty TNHH Halo Group
Công Ty Cổ Phần VNG
Công Ty Cổ Phần Diana Unicharm
Ngân Hàng TMCP Phương Đông - OCB
CÔNG TY TNHH SMC MANUFACTURING (VIỆT NAM)
MegaCEO
CÔNG TY TNHH THỰC PHẨM PHÚC LỘC THỌ
CellphoneS & Điện Thoại Vui - Hệ thống bán lẻ và sửa chữa điện thoại di động toàn quốc
Cty TNHH Thời Trang & Mỹ Phẩm Duy Anh (DAFC)
Công ty TNHH Công nghệ Dược phẩm Lotus
Công Ty CP TM DV SX Hương Thủy
EQuest Education Group (EQG)
CÔNG TY TNHH PHỒN THỊNH - TAE GWANG
N KID Corporation
Công ty TNHH Vector Fabrication (Việt Nam)
Công Ty CP Dược Phẩm Pharmacity
CÔNG TY TNHH AZOOM VIETNAM INC.
Công ty Cổ Phần Ẩm Thực Chảo Đỏ
Công Ty TNHH Dịch Vụ Quảng Cáo MC
Công ty Cổ phần Hướng nghiệp Á Âu
Please wait for crawl jobs data! . . .
0 - https://careerbuilder.vn/vi/tim-viec-lam/sales-administration-officer.35B46C1D.html
1 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-kinh-doanh-dai-dien-thuong-mai.35B46C1C.html
2 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-it.35B46C1B.html
3 - https://careerbuilder.vn/vi/tim-viec-lam/thuc-tap-hanh-chinh-nhan-su.35B46BE3.html
4 - https://careerbuilder.vn/vi/tim-viec-lam/key-account.35B46C1E.html
5 - https://careerbuilder.vn/vi/tim-viec-lam/thuc-tap-bien-tap.35B46C1F.html
6 - https://careerbuilder.vn/vi/tim-viec-lam/thuc-tap-sinh-tuyen-dung-ha-noi-02-ho-chi-minh-02.35B46BD8.html
7 - https://careerbuilder.vn/vi/tim-viec-lam/key-account-saler.35B46C22.html
8 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-telesales-my-pham.35B46C24.html
9 - https://careerbuilder.vn/vi/tim-viec-lam/ban-hang-kenh-key-account.35B46C26.html
10 - https://careerbuilder.vn/vi/tim-viec-lam/thuc-tap-sinh-ke-toan.35B46C28.html
11 - https://careerbuilder.vn/vi/tim-viec-lam/digital-bank-junior-project-manager.35B46C29.html
12 - https://careerbuilder.vn/vi/tim-viec-lam/vtc-academy-nhan-vien-telesales-fulltime.35B46C25.html
13 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-kinh-doanh.35B46C27.html
Crawler data jobs success!
Crawling data location...
.
.
.
Save data to database...
------------------------
Crawl data companies
Crawling link on page...
PLease wait...
Result:
Company: 38 link
Job : 50 link
------------------------
Công ty cổ phần tập đoàn giáo dục Edufit
CÔNG TY TNHH TIALOC VIỆT NAM
Mobicast
Tập đoàn Digital Novaon
Công ty CP Công trình Giao Thông Đồng Nai
Công Ty TNHH Digimall
Công ty Cổ Phần Thảo Mộc Xanh Long An
Công ty TNHH MTV Mrspeedy Việt Nam
Ngân Hàng TMCP Quốc Tế Việt Nam (VIB)
Tổng Công Ty CP Bảo Hiểm Bảo Long
Công ty TNHH SXTM XNK Minh Hiền
Học viện VTC Academy
FE CREDIT
CÔNG TY CỔ PHẦN ĐẦU TƯ CHÂU Á THÁI BÌNH DƯƠNG
Công ty TNHH Two Kings Distribution
CÔNG TY TNHH THƯƠNG MẠI DỊCH VỤ XUẤT NHẬP KHẨU TTH VIỆT NAM
Công Ty Cổ Phần Chuỗi Thực Phẩm TH
Công Ty Cổ Phần Quảng Cáo Thương Mại Sen Vàng
Skretting Vietnam - Nutreco International ( Vietnam)
Công Ty Cổ Phần Hà Nội Foods Việt Nam
Monroe Consulting Group Vietnam
CÔNG TY CỔ PHẦN AP SAIGON PETRO
CÔNG TY TNHH SHR VIỆT NAM
Công Ty Cổ Phần Kinh Doanh Chế Biến Nông Sản Bảo Minh
Cty CP Đầu Tư Xây Dựng Trung Nam
CÔNG TY CỔ PHẦN PHÁT TRIỂN BẤT ĐỘNG SẢN CASA HOLDINGS
CÔNG TY CỔ PHẦN PHÂN BÓN HÀ LAN
Công Ty TNHH Golden Phoenix Vietnam
Công ty TNHH Quốc tế Fleming Việt Nam
Công Ty TNHH Aeon Việt Nam
Công Ty TNHH Halo Group
Công Ty Cổ Phần VNG
Công Ty Cổ Phần Diana Unicharm
Ngân Hàng TMCP Phương Đông - OCB
CÔNG TY TNHH SMC MANUFACTURING (VIỆT NAM)
MegaCEO
CÔNG TY TNHH THỰC PHẨM PHÚC LỘC THỌ
Please wait for crawl jobs data! . . .
0 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-van-thu.35B46C23.html
1 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-ban-bao-hiem-con-nguoi.35B46B6C.html
2 - https://careerbuilder.vn/vi/tim-viec-lam/quan-ly-khach-hang-cao-cap-quan-ly-khach-hang-doanh-nghiep.35B46C1A.html
3 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-qc-nganh-in-an.35B46C20.html
4 - https://careerbuilder.vn/vi/tim-viec-lam/senior-business-development-executive.35B447EB.html
5 - https://careerbuilder.vn/vi/tim-viec-lam/nhan-vien-qc-nganh-nhua.35B46C08.html
6 - https://careerbuilder.vn/vi/tim-viec-lam/ke-toan-vien-kiem-admin-accountant-cum-administrator.35B46C2A.html
7 - https://careerbuilder.vn/vi/tim-viec-lam/tai-xe.35B46C2B.html
8 - https://careerbuilder.vn/vi/tim-viec-lam/truong-phong-cham-soc-khach-hang.35B46C2C.html
9 - https://careerbuilder.vn/vi/tim-viec-lam/lap-trinh-vien.35B46C2E.html
10 - https://careerbuilder.vn/vi/tim-viec-lam/project-sales-executive.35B46C2D.html
11 - https://careerbuilder.vn/vi/tim-viec-lam/lap-trinh-vien-trung-tam-nghien-cuu-va-phat-trien-san-pham.35B46C31.html
12 - https://careerbuilder.vn/vi/tim-viec-lam/truong-ban-esl.35B46C30.html
Crawler data jobs success!
Crawling data location...
.
.
.
Save data to database...
------------------------
Crawl data companies
Crawling link on page...
PLease wait...
Result:
Company: 35 link
Job : 50 link
------------------------
Học viện VTC Academy
Ngân Hàng TMCP Tiên Phong
CÔNG TY TNHH TIALOC VIỆT NAM
CÔNG TY CỔ PHẦN ĐẦU TƯ CHÂU Á THÁI BÌNH DƯƠNG
Công ty cổ phần tập đoàn giáo dục Edufit
Mobicast
Tập đoàn Digital Novaon
Công ty CP Công trình Giao Thông Đồng Nai
Công Ty TNHH Digimall
Công ty Cổ Phần Thảo Mộc Xanh Long An
Công ty TNHH MTV Mrspeedy Việt Nam
Ngân Hàng TMCP Quốc Tế Việt Nam (VIB)
Tổng Công Ty CP Bảo Hiểm Bảo Long
Công ty TNHH SXTM XNK Minh Hiền
FE CREDIT
Công ty TNHH Two Kings Distribution
CÔNG TY TNHH THƯƠNG MẠI DỊCH VỤ XUẤT NHẬP KHẨU TTH VIỆT NAM
Công Ty Cổ Phần Chuỗi Thực Phẩm TH
Công Ty Cổ Phần Quảng Cáo Thương Mại Sen Vàng
Skretting Vietnam - Nutreco International ( Vietnam)
Công Ty Cổ Phần Hà Nội Foods Việt Nam
Monroe Consulting Group Vietnam
CÔNG TY CỔ PHẦN AP SAIGON PETRO
CÔNG TY TNHH SHR VIỆT NAM
Công Ty Cổ Phần Kinh Doanh Chế Biến Nông Sản Bảo Minh
Cty CP Đầu Tư Xây Dựng Trung Nam
CÔNG TY CỔ PHẦN PHÁT TRIỂN BẤT ĐỘNG SẢN CASA HOLDINGS
CÔNG TY CỔ PHẦN PHÂN BÓN HÀ LAN
Công Ty TNHH Golden Phoenix Vietnam
Công ty TNHH Quốc tế Fleming Việt Nam
Công Ty TNHH Aeon Việt Nam
Công Ty TNHH Halo Group
Công Ty Cổ Phần VNG
Công Ty Cổ Phần Diana Unicharm
Please wait for crawl jobs data! . . .
Crawling data location...
.
.
.
Save data to database...
------------------------
Crawl data companies
Crawling link on page...
PLease wait...
Result:
Company: 34 link
Job : 50 link
------------------------
Công Ty Cổ Phần Quảng Cáo Thương Mại Sen Vàng
# frozen_string_literal: true
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
......@@ -12,93 +13,92 @@
ActiveRecord::Schema.define(version: 2020_07_20_035021) do
create_table "applied_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "user_id"
t.bigint "job_id"
t.string "name"
t.string "email"
t.text "cv"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["job_id"], name: "index_applied_jobs_on_job_id"
t.index ["user_id"], name: "index_applied_jobs_on_user_id"
create_table 'applied_jobs', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.bigint 'user_id'
t.bigint 'job_id'
t.string 'name'
t.string 'email'
t.text 'cv'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
t.index ['job_id'], name: 'index_applied_jobs_on_job_id'
t.index ['user_id'], name: 'index_applied_jobs_on_user_id'
end
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.boolean "area"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'cities', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.string 'name'
t.boolean 'area'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
create_table "city_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "job_id"
t.bigint "city_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["city_id"], name: "index_city_jobs_on_city_id"
t.index ["job_id"], name: "index_city_jobs_on_job_id"
create_table 'city_jobs', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.bigint 'job_id'
t.bigint 'city_id'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
t.index ['city_id'], name: 'index_city_jobs_on_city_id'
t.index ['job_id'], name: 'index_city_jobs_on_job_id'
end
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.string "address"
t.text "short_description"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'companies', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.string 'name'
t.string 'address'
t.text 'short_description'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
create_table "favorites", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'favorites', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.integer 'user_id'
t.integer 'job_id'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
create_table "histories", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'histories', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.integer 'user_id'
t.integer 'job_id'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'industries', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.string 'name'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
create_table "industry_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "industry_id"
t.bigint "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["industry_id"], name: "index_industry_jobs_on_industry_id"
t.index ["job_id"], name: "index_industry_jobs_on_job_id"
create_table 'industry_jobs', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.bigint 'industry_id'
t.bigint 'job_id'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
t.index ['industry_id'], name: 'index_industry_jobs_on_industry_id'
t.index ['job_id'], name: 'index_industry_jobs_on_job_id'
end
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.integer "company_id"
t.string "level"
t.string "experience"
t.string "salary"
t.datetime "create_date"
t.datetime "expiration_date"
t.text "description"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.boolean "newdata"
create_table 'jobs', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.string 'name'
t.integer 'company_id'
t.string 'level'
t.string 'experience'
t.string 'salary'
t.datetime 'create_date'
t.datetime 'expiration_date'
t.text 'description'
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
t.boolean 'newdata'
end
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "email"
t.string "name"
t.string "password_digest"
t.text "cv"
t.boolean "admin", default: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
create_table 'users', options: 'ENGINE=InnoDB DEFAULT CHARSET=utf8', force: :cascade do |t|
t.string 'email'
t.string 'name'
t.string 'password_digest'
t.text 'cv'
t.boolean 'admin', default: false
t.datetime 'created_at', null: false
t.datetime 'updated_at', null: false
end
end
# frozen_string_literal: true
# This file should contain all the record creation needed to seed the database with its default values.
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
#
......
rake aborted!
ActiveRecord::StatementInvalid: Mysql2::Error::ConnectionError: MySQL server has gone away: SELECT `industries`.* FROM `industries` WHERE `industries`.`name` = 'Ngân hàng' LIMIT 1
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:186:in `block in make_foreign_industries_table'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:184:in `each'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:184:in `make_foreign_industries_table'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:98:in `add_data'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:130:in `crawl_data_jobs_interface_1'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:212:in `block in make_data'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `each'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `each_with_index'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `make_data'
/home/hungnt/hungnt_venjob/lib/tasks/crawler.rake:9:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Caused by:
Mysql2::Error::ConnectionError: MySQL server has gone away
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:186:in `block in make_foreign_industries_table'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:184:in `each'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:184:in `make_foreign_industries_table'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:98:in `add_data'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:130:in `crawl_data_jobs_interface_1'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:212:in `block in make_data'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `each'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `each_with_index'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:209:in `make_data'
/home/hungnt/hungnt_venjob/lib/tasks/crawler.rake:9:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
SocketError: Failed to open TCP connection to careerbuilder.vn:443 (getaddrinfo: Temporary failure in name resolution)
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Caused by:
SocketError: getaddrinfo: Temporary failure in name resolution
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
Net::OpenTimeout: execution expired
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
Net::OpenTimeout: execution expired
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
Net::OpenTimeout: execution expired
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
Net::OpenTimeout: execution expired
/home/hungnt/venjob/lib/src/interface_web.rb:40:in `craw_data_cities'
/home/hungnt/venjob/lib/tasks/crawler.rake:7:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
ArgumentError: You should not use the `match` method in your router without specifying an HTTP method.
If you want to expose your action to both GET and POST, add `via: [:get, :post]` option.
If you want to expose your action to GET, use `get` in the router:
Instead of: match "controller#action"
Do: get "controller#action"
/home/hungnt/venjob/config/routes.rb:4:in `block in <main>'
/home/hungnt/venjob/config/routes.rb:1:in `<main>'
/home/hungnt/venjob/config/environment.rb:5:in `<main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate => environment
(See full trace by running task with --trace)
rake aborted!
NoMethodError: undefined method `rescue_from' for #<ActionDispatch::Routing::Mapper:0x000055ef745fa5a0>
Did you mean? rescue
/home/hungnt/venjob/config/routes.rb:6:in `block in <main>'
/home/hungnt/venjob/config/routes.rb:1:in `<main>'
/home/hungnt/venjob/config/environment.rb:5:in `<main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate => environment
(See full trace by running task with --trace)
rake aborted!
SyntaxError: /home/hungnt/venjob/config/routes.rb:6: syntax error, unexpected tIDENTIFIER, expecting end
...to: "errors#unprocessable'" via: :all
... ^~~
/home/hungnt/venjob/config/environment.rb:5:in `<main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate => environment
(See full trace by running task with --trace)
rake aborted!
SocketError: Failed to open TCP connection to careerbuilder.vn:443 (getaddrinfo: Temporary failure in name resolution)
/home/hungnt/venjob/lib/src/interface_web.rb:63:in `block in craw_data_companies'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each_with_index'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `craw_data_companies'
/home/hungnt/venjob/lib/tasks/crawler.rake:8:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Caused by:
SocketError: getaddrinfo: Temporary failure in name resolution
/home/hungnt/venjob/lib/src/interface_web.rb:63:in `block in craw_data_companies'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each_with_index'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `craw_data_companies'
/home/hungnt/venjob/lib/tasks/crawler.rake:8:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
SocketError: Failed to open TCP connection to careerbuilder.vn:443 (getaddrinfo: Temporary failure in name resolution)
/home/hungnt/venjob/lib/src/interface_web.rb:63:in `block in craw_data_companies'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each_with_index'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `craw_data_companies'
/home/hungnt/venjob/lib/tasks/crawler.rake:8:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Caused by:
SocketError: getaddrinfo: Temporary failure in name resolution
/home/hungnt/venjob/lib/src/interface_web.rb:63:in `block in craw_data_companies'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `each_with_index'
/home/hungnt/venjob/lib/src/interface_web.rb:62:in `craw_data_companies'
/home/hungnt/venjob/lib/tasks/crawler.rake:8:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
rake aborted!
SyntaxError: /home/hungnt/hungnt_venjob/config/routes.rb:9: syntax error, unexpected tIDENTIFIER, expecting end
...tch 'apply?job_id=(:job_id)' to: 'job#new_apply', via: 'get'
... ^~
/home/hungnt/hungnt_venjob/config/environment.rb:5:in `<main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate => environment
(See full trace by running task with --trace)
rake aborted!
TypeError: no implicit conversion of URI::Generic into String
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:225:in `block in make_data'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:224:in `each'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:224:in `each_with_index'
/home/hungnt/hungnt_venjob/lib/src/interface_web.rb:224:in `make_data'
/home/hungnt/hungnt_venjob/lib/tasks/crawler.rake:9:in `block (2 levels) in <main>'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `load'
/home/hungnt/.rbenv/versions/2.6.6/bin/bundle:23:in `<main>'
Tasks: TOP => crawler:populate
(See full trace by running task with --trace)
=> 2020-07-26 20:23:43 +0700 IMPORT SUCCESS
\ No newline at end of file
......@@ -4,7 +4,6 @@ require 'open-uri'
# Description/Explanation of Person class
class InterfaceWeb
def self.crawl_link_for_companies_jobs(page)
puts "Crawling link on page...\nPLease wait...\n"
data = []
......@@ -13,22 +12,18 @@ class InterfaceWeb
file = File.readlines('tmp/link.txt', 'r') if File.exist?('tmp/link.txt')
@@stop_crawl = file.blank? ? '' : file.join
page.times do |i|
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html"))
link_companies = page.search(".figcaption .caption @href")
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i + 1}-vi.html"))
link_companies = page.search('.figcaption .caption @href')
website_companies += link_companies.map(&:value).uniq
link_jobs = page.search(".figcaption .title .job_link @href")
link_jobs = page.search('.figcaption .title .job_link @href')
website_jobs += link_jobs.map(&:value)
break if website_jobs.include?(@@stop_crawl)
end
website_companies = website_companies.select { |val| val.present? && val != "javascript:void(0);"}
website_jobs = website_jobs.select { |val| val.present?}
website_companies = website_companies.select { |val| val.present? && val != 'javascript:void(0);' }
website_jobs = website_jobs.select(&:present?)
puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs.length} link\n------------------------"
File.write("tmp/link.txt", "#{website_jobs[0]}")
File.write('tmp/link.txt', website_jobs[0])
data << website_companies << website_jobs
end
......@@ -44,12 +39,12 @@ class InterfaceWeb
page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
puts "Crawling data location... \n. \n. \n."
data_list_cities = []
data = page.search("#location option")
list_cities = data.to_s.split("</option>")
data = page.search('#location option')
list_cities = data.to_s.split('</option>')
list_cities.each do |x|
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/, '').rstrip
end
puts "Save data to database... \n------------------------"
puts 'Save data to database...'
data_list_cities.each_with_index do |val, index|
area = index > 69 ? 0 : 1
City.find_or_create_by(name: val) do |city|
......@@ -63,23 +58,22 @@ class InterfaceWeb
def self.craw_data_companies
puts 'Crawl data companies'
link_crawl = get_link_job_and_companies
link_crawl[0].each_with_index do |url,i|
link_crawl[0].each do |url|
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(url))))
name = ''
address = ''
desc = ''
if page.search(".company-info .info .content .name").text == ""
name = page.search(".section-page #cp_company_name").text
address = page.search(".section-page .cp_basic_info_details ul li:nth-child(1)").text
desc = page.search(".cp_aboutus_item .content_fck").text
if page.search('.company-info .info .content .name').text == ''
name = page.search('.section-page #cp_company_name').text
address = page.search('.section-page .cp_basic_info_details ul li:nth-child(1)').text
desc = page.search('.cp_aboutus_item .content_fck').text
else
name = page.search(".company-info .info .content .name").text
address = page.search(".company-info .info .content p:nth-child(3)").text
desc = page.search(".main-about-us .content").text
name = page.search('.company-info .info .content .name').text
address = page.search('.company-info .info .content p:nth-child(3)').text
desc = page.search('.main-about-us .content').text
end
begin
if (name.present? && address.present? && desc.present?)
if name.present? && address.present? && desc.present?
Company.find_or_create_by(name: name.strip) do |company|
company.name = name.strip
company.address = address
......@@ -87,8 +81,8 @@ class InterfaceWeb
end
puts name
end
rescue => exception
@logger.error "#{exception.message} - Link: #{url}"
rescue StandardError => e
@logger.error "#{e.message} - Link: #{url}"
end
end
end
......@@ -108,103 +102,101 @@ class InterfaceWeb
description: description)
make_foreign_industries_table(industry_name, id_job.id)
make_foreign_cities_table(city_name, id_job.id)
rescue => exception
@logger_jobs.error "#{exception.message} - Job: #{name} - Company_id: #{id_company}"
rescue StandardError => e
@logger_jobs.error "#{e.message} - Job: #{name} - Company_id: #{id_company}"
end
end
def self.crawl_data_jobs_interface_1(page)
name = page.search(".apply-now-content .job-desc .title").text
company_name = page.search(".apply-now-content .job-desc .job-company-name").text
name = page.search('.apply-now-content .job-desc .title').text
company_name = page.search('.apply-now-content .job-desc .job-company-name').text
location = []
length = page.search(".detail-box .map p a").size
length = page.search('.detail-box .map p a').size
length.times do |n|
location << page.search(".detail-box .map p a:nth-child(#{n+1})").text
location << page.search(".detail-box .map p a:nth-child(#{n + 1})").text
end
city_name = location.join(',')
created_date = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text
expiration_date = page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last
salary = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text
industries = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text
industries = industries.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''}
created_date = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].text
expiration_date = page.search('.item-blue .detail-box ul li:last')[1].text.delete!("[\n,\t,\r]").split(' ').last
salary = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[1].text
industries = page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(2) a').text
industries = industries.delete!("[\n,\t,\r]").split(' ').select(&:present?)
industry_name = industries.join(',')
description = page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s
get_level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
description = page.search('.tabs .tab-content .detail-row:nth-child(n)').to_s
get_level = page.search('.item-blue .detail-box:last ul li:nth-child(3)').text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
get_level = get_level[1].to_s.strip
if get_level.blank?
g_level = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
g_level = page.search('.item-blue .detail-box:last ul li:nth-child(2)').text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
level = g_level[1].to_s.strip
else
g_level = get_level
level = g_level
end
exp = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exp = page.search('.item-blue .detail-box:last ul li:nth-child(2)').text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exp = exp[1].to_s.strip
exprience = exp
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end
def self.crawl_data_jobs_interface_2(page)
name = page.search(".apply-now-content .job-desc .title").text
company_name = page.search(".top-job .top-job-info .tit_company").text
name = page.search('.apply-now-content .job-desc .title').text
company_name = page.search('.top-job .top-job-info .tit_company').text
location = []
length = page.search(".info-workplace .value a").size
length = page.search('.info-workplace .value a').size
length.times do |n|
location << page.search(".info-workplace .value a:nth-child(#{n+1})").text
location << page.search(".info-workplace .value a:nth-child(#{n + 1})").text
end
city_name = location.join(',')
created_date = ""
expiration_date = page.search(".info li:nth-child(4)").text
if expiration_date.blank?
expiration_date = ""
created_date = ''
expiration_date = page.search('.info li:nth-child(4)').text
expiration_date = if expiration_date.blank?
''
else
expiration_date = expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last
end
salary = page.search(".info li:nth-child(3)").text.split("Lương").last.strip
industry_name = page.search(".info li:nth-child(5) .value").text
description = page.search(".left-col").to_s
lv = page.search(".boxtp .info li:nth-child(2)").text
if lv.blank?
level = ""
expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last
end
salary = page.search('.info li:nth-child(3)').text.split('Lương').last.strip
industry_name = page.search('.info li:nth-child(5) .value').text
description = page.search('.left-col').to_s
lv = page.search('.boxtp .info li:nth-child(2)').text
level = if lv.blank?
''
else
level = lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
end
exp = page.search(".info li:nth-child(6)").text
if exp.blank?
exprience = ""
exp = page.search('.info li:nth-child(6)').text
exprience = if exp.blank?
''
else
exprience = exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
end
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end
def self.crawl_data_jobs_interface_5(page)
name = page.search(".info-company h1").text
company_name = page.search(".info-company .text-job h2").text
city_name = page.search(".DetailJobNew ul li:nth-child(1) a").text
created_date = ""
expiration_date = page.search(".DetailJobNew li:nth-child(9) span").text.strip
salary = page.search(".DetailJobNew li:nth-child(3) span").text.strip
industry_name = page.search(".DetailJobNew li:nth-child(2) span").text.strip
description = page.search(".left-col .detail-row")
level = page.search(".DetailJobNew ul li:nth-child(6) span").text.strip
exprience = page.search(".DetailJobNew li:nth-child(5) span").text.strip
name = page.search('.info-company h1').text
company_name = page.search('.info-company .text-job h2').text
city_name = page.search('.DetailJobNew ul li:nth-child(1) a').text
created_date = ''
expiration_date = page.search('.DetailJobNew li:nth-child(9) span').text.strip
salary = page.search('.DetailJobNew li:nth-child(3) span').text.strip
industry_name = page.search('.DetailJobNew li:nth-child(2) span').text.strip
description = page.search('.left-col .detail-row')
level = page.search('.DetailJobNew ul li:nth-child(6) span').text.strip
exprience = page.search('.DetailJobNew li:nth-child(5) span').text.strip
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end
def self.make_foreign_industries_table(data,id_job)
def self.make_foreign_industries_table(data, id_job)
content = data.split(',')
content.each do |val|
val.gsub!('&amp;','&') if val.include?('&amp;')
id_industry = Industry.find_by name: (val.strip)
id_industry = Industry.find_by name: val.strip
id_industry = id_industry.blank? ? Industry.create!(name: val.strip).id : id_industry.id
IndustryJob.create!(industry_id: id_industry, job_id: id_job)
end
end
def self.make_foreign_cities_table(data,id_job)
def self.make_foreign_cities_table(data, id_job)
cities = data.split(',')
cities.each do |city|
id_cities = City.find_by name: city.strip
......@@ -222,7 +214,7 @@ class InterfaceWeb
arr_link << val
end
arr_link.reverse!.each_with_index do |path, i|
page = Nokogiri::HTML(URI.open(URI.parse(CGI.escape(path))))
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path))))
if !page.search('.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p')[0].nil?
crawl_data_jobs_interface_1(page)
elsif page.search('section .template-200').text != ''
......
# frozen_string_literal: true
require 'zip'
# Description/Explanation of Person class
def extract_zip(file, destination)
FileUtils.mkdir_p(destination)
Zip::File.open(file) do |zip_file|
......
# frozen_string_literal: true
require 'open-uri'
require 'logger'
require 'src/interface_web'
# Description/Explanation of Person class
namespace :crawler do
task populate: :environment do
InterfaceWeb.craw_data_cities()
InterfaceWeb.craw_data_companies()
InterfaceWeb.make_data()
File.open('log/crawler.log','a') do |f|
InterfaceWeb.craw_data_cities
InterfaceWeb.craw_data_companies
InterfaceWeb.make_data
File.open('log/crawler.log', 'a') do |f|
f.puts "#{Time.now} - INFO: Crawler data don't have error!"
end
end
task deletelog: :environment do
File.open('cron.log','w') { |file| File.truncate(file,0) }
File.open('cron.log', 'w') { |file| File.truncate(file, 0) }
end
end
# frozen_string_literal: true
require 'src/ftp'
# Description/Explanation of Person class
namespace :csv do
task import_csv: :environment do
File.write('importcsv.log', "=> #{Time.now} IMPORT SUCCESS")
File.write('importcsv.log', "=> #{Time.now} IMPORT CSV SUCCESS")
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
......
# frozen_string_literal: true
namespace :user do
task fakedata: :environment do
User.create!(email: 'trunghung5055@gmail.com',
......
require "test_helper"
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class ApplicationSystemTestCase < ActionDispatch::SystemTestCase
driven_by :selenium, using: :chrome, screen_size: [1400, 1400]
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class ApplyJobControllerTest < ActionDispatch::IntegrationTest
test "should get new" do
test 'should get new' do
get apply_job_new_url
assert_response :success
end
test "should get show" do
test 'should get show' do
get apply_job_show_url
assert_response :success
end
test "should get create" do
test 'should get create' do
get apply_job_create_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class CityControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
test 'should get index' do
get city_index_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class ErrorsControllerTest < ActionDispatch::IntegrationTest
test "should get file_not_found" do
test 'should get file_not_found' do
get errors_file_not_found_url
assert_response :success
end
test "should get unprocessable" do
test 'should get unprocessable' do
get errors_unprocessable_url
assert_response :success
end
test "should get internal_server_error" do
test 'should get internal_server_error' do
get errors_internal_server_error_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class HomeControllerTest < ActionDispatch::IntegrationTest
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class IndustryControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
test 'should get index' do
get industry_index_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class JobControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
test 'should get index' do
get job_index_url
assert_response :success
end
test "should get detail" do
test 'should get detail' do
get job_detail_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class TestControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
test 'should get index' do
get test_index_url
assert_response :success
end
end
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class AppliedJobTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class CityJobTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class CityTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class CompanyTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class FavoriteTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class HistoryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class IndustryJobTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class IndustryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class JobTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
require 'test_helper'
# Description/Explanation of Person class
class UserTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
......
# frozen_string_literal: true
ENV['RAILS_ENV'] ||= 'test'
require_relative '../config/environment'
require 'rails/test_help'
# Description/Explanation of Person class
class ActiveSupport::TestCase
# Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order.
fixtures :all
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment