Commit 784ae619 by Van Hau Le

Merge branch 'page' into 'master'

Page

See merge request !1
parents 363e2ef7 c4f83312
Pipeline #859 canceled with stages
in 0 seconds
,phuocht,devops-OptiPlex-3010,23.07.2020 08:12,file:///home/phuocht/.config/libreoffice/4;
\ No newline at end of file
...@@ -27,13 +27,13 @@ gem 'jbuilder', '~> 2.5' ...@@ -27,13 +27,13 @@ gem 'jbuilder', '~> 2.5'
# gem 'redis', '~> 4.0' # gem 'redis', '~> 4.0'
# Use ActiveModel has_secure_password # Use ActiveModel has_secure_password
# gem 'bcrypt', '~> 3.1.7' # gem 'bcrypt', '~> 3.1.7'
gem 'whenever', require: false
# Use ActiveStorage variant # Use ActiveStorage variant
# gem 'mini_magick', '~> 4.8' # gem 'mini_magick', '~> 4.8'
gem 'rubocop', '~> 0.88.0', require: false
# Use Capistrano for deployment # Use Capistrano for deployment
# gem 'capistrano-rails', group: :development # gem 'capistrano-rails', group: :development
gem 'will_paginate', '3.3.0'
# Reduces boot times through caching; required in config/boot.rb # Reduces boot times through caching; required in config/boot.rb
gem 'bootsnap', '>= 1.1.0', require: false gem 'bootsnap', '>= 1.1.0', require: false
...@@ -44,6 +44,7 @@ end ...@@ -44,6 +44,7 @@ end
group :development do group :development do
# Access an interactive console on exception pages or by calling 'console' anywhere in the code. # Access an interactive console on exception pages or by calling 'console' anywhere in the code.
gem 'pry'
gem 'web-console', '>= 3.3.0' gem 'web-console', '>= 3.3.0'
gem 'listen', '>= 3.0.5', '< 3.2' gem 'listen', '>= 3.0.5', '< 3.2'
# Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring # Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring
......
...@@ -47,6 +47,7 @@ GEM ...@@ -47,6 +47,7 @@ GEM
archive-zip (0.12.0) archive-zip (0.12.0)
io-like (~> 0.3.0) io-like (~> 0.3.0)
arel (9.0.0) arel (9.0.0)
ast (2.4.1)
bindex (0.8.1) bindex (0.8.1)
bootsnap (1.4.6) bootsnap (1.4.6)
msgpack (~> 1.0) msgpack (~> 1.0)
...@@ -64,6 +65,8 @@ GEM ...@@ -64,6 +65,8 @@ GEM
chromedriver-helper (2.1.1) chromedriver-helper (2.1.1)
archive-zip (~> 0.10) archive-zip (~> 0.10)
nokogiri (~> 1.8) nokogiri (~> 1.8)
chronic (0.10.2)
coderay (1.1.3)
coffee-rails (4.2.2) coffee-rails (4.2.2)
coffee-script (>= 2.2.0) coffee-script (>= 2.2.0)
railties (>= 4.0.0) railties (>= 4.0.0)
...@@ -125,6 +128,12 @@ GEM ...@@ -125,6 +128,12 @@ GEM
nokogiri (1.10.10) nokogiri (1.10.10)
mini_portile2 (~> 2.4.0) mini_portile2 (~> 2.4.0)
ntlm-http (0.1.1) ntlm-http (0.1.1)
parallel (1.19.2)
parser (2.7.1.4)
ast (~> 2.4.1)
pry (0.13.1)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (4.0.5) public_suffix (4.0.5)
puma (3.12.6) puma (3.12.6)
rack (2.2.3) rack (2.2.3)
...@@ -154,11 +163,25 @@ GEM ...@@ -154,11 +163,25 @@ GEM
method_source method_source
rake (>= 0.8.7) rake (>= 0.8.7)
thor (>= 0.19.0, < 2.0) thor (>= 0.19.0, < 2.0)
rainbow (3.0.0)
rake (13.0.1) rake (13.0.1)
rb-fsevent (0.10.4) rb-fsevent (0.10.4)
rb-inotify (0.10.1) rb-inotify (0.10.1)
ffi (~> 1.0) ffi (~> 1.0)
regexp_parser (1.7.1) regexp_parser (1.7.1)
rexml (3.2.4)
rubocop (0.88.0)
parallel (~> 1.10)
parser (>= 2.7.1.1)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.7)
rexml
rubocop-ast (>= 0.1.0, < 1.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 1.4.0, < 2.0)
rubocop-ast (0.2.0)
parser (>= 2.7.0.1)
ruby-progressbar (1.10.1)
ruby_dep (1.5.0) ruby_dep (1.5.0)
rubyzip (2.3.0) rubyzip (2.3.0)
sass (3.7.4) sass (3.7.4)
...@@ -199,6 +222,7 @@ GEM ...@@ -199,6 +222,7 @@ GEM
unf (0.1.4) unf (0.1.4)
unf_ext unf_ext
unf_ext (0.0.7.7) unf_ext (0.0.7.7)
unicode-display_width (1.7.0)
web-console (3.7.0) web-console (3.7.0)
actionview (>= 5.0) actionview (>= 5.0)
activemodel (>= 5.0) activemodel (>= 5.0)
...@@ -208,6 +232,9 @@ GEM ...@@ -208,6 +232,9 @@ GEM
websocket-driver (0.7.3) websocket-driver (0.7.3)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5) websocket-extensions (0.1.5)
whenever (1.0.0)
chronic (>= 0.6.3)
will_paginate (3.3.0)
xpath (3.2.0) xpath (3.2.0)
nokogiri (~> 1.8) nokogiri (~> 1.8)
...@@ -224,8 +251,10 @@ DEPENDENCIES ...@@ -224,8 +251,10 @@ DEPENDENCIES
listen (>= 3.0.5, < 3.2) listen (>= 3.0.5, < 3.2)
mechanize (~> 2.7.6) mechanize (~> 2.7.6)
mysql2 (~> 0.5.3) mysql2 (~> 0.5.3)
pry
puma (~> 3.11) puma (~> 3.11)
rails (~> 5.2.4, >= 5.2.4.3) rails (~> 5.2.4, >= 5.2.4.3)
rubocop (~> 0.88.0)
sass-rails (~> 5.0) sass-rails (~> 5.0)
selenium-webdriver selenium-webdriver
spring spring
...@@ -234,6 +263,8 @@ DEPENDENCIES ...@@ -234,6 +263,8 @@ DEPENDENCIES
tzinfo-data tzinfo-data
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
web-console (>= 3.3.0) web-console (>= 3.3.0)
whenever
will_paginate (= 3.3.0)
RUBY VERSION RUBY VERSION
ruby 2.6.6p146 ruby 2.6.6p146
......
$(document).ready(function(){
$("div.viet-nam").click(function(){
$(window).scrollTop($('.vietnam-area').offset().top);
});
$("div.international").click(function(){
$(window).scrollTop($('.international-area').offset().top);
});
});
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
html { html {
overflow-y: scroll; overflow-y: scroll;
padding: 0px;
margin: 0px;
} }
body {
padding-top: 60px;
}
section { section {
overflow: auto; overflow: auto;
} }
...@@ -48,7 +44,7 @@ p { ...@@ -48,7 +44,7 @@ p {
font-size: 1.1em; font-size: 1.1em;
line-height: 1.7em; line-height: 1.7em;
} }
#logo{ .footer-text{
height: 200px; height: 100px;
width: 100px; position: relative;
} }
.city-banner{
background: linear-gradient(to top, #33ccff 0%, #006600 100%);
font-size: 38px;
text-align: center;
color: white;
font-style: italic;
font-family: Bookman, URW Bookman L, serif;
}
.city-name {
margin: 10px;
padding: 10px;
text-align: center;
}
.remove-decoration {
&:hover{
a {
text-decoration: none;
}
}
}
.viet-nam, .international {
cursor: pointer;
}
// Place all the styles related to the Top_pages controller here.
// They will automatically be included in application.css.
// You can use Sass (SCSS) here: http://sass-lang.com/
.float-right {
position: absolute;
bottom: 0;
right: 5%;
}
.top-page{
text-align: center;
width: 100%;
height: 100%;
}
.total-job{
position: absolute;
color: white;
top: 40%;
right: 30%;
font-size: 60px;
}
.banner-ground{
position: relative;
width: 100%;
height: 50vh;
}
.top-banner {
background-image: url('banner.jpg');
width: 100%;
height: 100%;
background-repeat: no-repeat;
background-size: cover;
font-style: italic;
font-family: Bookman, URW Bookman L, serif;
}
.salary{
color: green;
}
.job-details{
position: relative;
padding: 15px;
margin: 10px;
}
#button-follow{
position: absolute;
top: 20%;
right: 5%;
}
.row-table{
color: black;
padding: 15px;
margin: 15px;
text-align: center;
}
.city-banner{
background: linear-gradient(to top, #33ccff 0%, #006600 100%);
font-size: 38px;
text-align: center;
color: white;
font-style: italic;
font-family: Bookman, URW Bookman L, serif;
}
.industry-banner{
background: linear-gradient(to top, #33ccff 0%, #006666 100%);
font-size: 38px;
text-align: center;
color: white;
font-style: italic;
font-family: Bookman, URW Bookman L, serif;
}
.search-bar{
size: 50;
}
.custom_footer {
width: 100%;
height: 150px;
background-image: linear-gradient(160deg, black, #8c8686);
color: white;
}
.city-list:hover, .industry-list:hover{
background-color: black;
.city-name, .count-job, .industry-name{
color: white;
}
}
.city-list, .industry-list{
&:hover {
a {
text-decoration: none;
}
}
}
.city-name, .count-job, .industry-name{
text-decoration: none;
color: black;
}
.search-bar{
padding: 20px;
}
.all-city:hover, .all-industry:hover{
.all-cities, .all-industries{
color: blue;
}
}
.all-city, .all-industry{
&:hover{
a {
text-decoration: none;
}
}
}
class CitiesController < ApplicationController
def index
@cities_vietnam = City.all_city.vietnam
@cities_international = City.all_city.international
end
end
class IndustriesController < ApplicationController
end
class JobsController < ApplicationController class JobsController < ApplicationController
def index
end
end end
class TopPagesController < ApplicationController
def index
@cities = City.all
@industries = Industry.all
@total_jobs = Job.count
@jobs = Job.limit(5).order(created_at: :desc)
@jobs_of_cities = City.top_city
@jobs_of_industries = Industry.top_industry
end
end
module ApplicationHelper module ApplicationHelper
def full_title(page_title)
page_title.to_s
end
end end
class City < ApplicationRecord class City < ApplicationRecord
has_many :city_jobs has_many :city_jobs
has_many :jobs, through: :city_jobs has_many :jobs, through: :city_jobs
scope :all_city, -> { joins(:jobs).group(:city_id).order('count(job_id) DESC') }
scope :vietnam, -> { where('location = 1') }
scope :international, -> { where('location = 0') }
def self.top_city
joins(:jobs).group(:city_id).order('count(job_id) DESC').limit(9)
end
end end
class CityJob < ApplicationRecord class CityJob < ApplicationRecord
belongs_to :city belongs_to :city
belongs_to :job belongs_to :job
end end
class Industry < ApplicationRecord class Industry < ApplicationRecord
has_many :industry_jobs has_many :industry_jobs
has_many :jobs, through: :industry_jobs has_many :jobs, through: :industry_jobs
def self.top_industry
joins(:jobs).group(:industry_id).order('count(job_id) DESC').limit(9)
end
end end
class IndustryJob < ApplicationRecord class IndustryJob < ApplicationRecord
belongs_to :industry belongs_to :industry
belongs_to :job belongs_to :job
end end
...@@ -14,4 +14,9 @@ class Job < ApplicationRecord ...@@ -14,4 +14,9 @@ class Job < ApplicationRecord
has_many :histories has_many :histories
has_many :users, through: :histories has_many :users, through: :histories
def company_name
@company_name ||= company&.name
end
end end
<% @cities_international.each do |city| %>
<div class="col-3 remove-decoration">
<%= link_to '#' do %>
<div class="border border-dark rounded city-name">
<div>
<strong><%= city.name %></strong>
</div>
<div>
<%= city.jobs.count %>
</div>
</div>
<% end %>
</div>
<% end %>
<% @cities_vietnam.each do |city| %>
<div class="col-3 remove-decoration">
<%= link_to '#' do %>
<div class="border border-dark rounded city-name">
<div>
<strong><%= city.name %></strong>
</div>
<div>
<%= city.jobs.count %>
</div>
</div>
<% end %>
</div>
<% end %>
<% provide(:title, 'All Cities') %>
<div class="container">
<div class="all-cities">
<div class="city-banner rounded">City list</div>
<div class="row">
<div class="col-6">
<div class="row-table border border-dark rounded viet-nam">Việt Nam</div>
</div>
<div class="col-6">
<div class="row-table border border-dark rounded international">International</div>
</div>
</div>
</div>
<div class="city-banner rounded vietnam-area">Việt Nam</div>
<div class="city-vietnam">
<div class="row">
<%= render 'cities/cities_vietnam' %>
</div>
</div>
<div class="city-banner rounded international-area">International</div>
<div class="city-international">
<div class="row">
<%= render 'cities/cities_international' %>
</div>
</div>
</div>
<div class="custom_footer">
<div class="footer-text">
<div class="float-right">
Copyright © ZIGExN VeNtura 2020
</div>
</div>
</div>
<header class="navbar navbar-expand-lg navbar-dark bg-dark">
<div class="collapse navbar-collapse">
<div class="container-fluid">
<%= link_to image_tag("logo_venjob.png", alt: "Logo", id: "logo_venjob", width: 120), root_path %>
<nav>
<ul class="navbar-nav mr-auto">
<li><%= link_to "Login", '#' , class: "nav-item nav-link" %></li>
<li><%= link_to "Register", '#', class: "nav-item nav-link" %></li>
<li><%= link_to "Favorite", '#', class: "nav-item nav-link" %></li>
<li><%= link_to "History", '#', class: "nav-item nav-link" %></li>
</ul>
</nav>
</div>
</div>
</header>
<div class="search-bar">
<div class="row">
<div class="col-sm">
<input class="form-control mr-sm-2" type="search" placeholder="Search" aria-label="Search">
</div>
<div class="col-sm">
<select class="form-control" id="exampleFormControlSelect1">
<option>--None</option>
<%= @cities.each do |city| %>
<option><%= city.name %></option>
<% end %>
</select>
</div>
<div class="col-sm">
<select class="form-control" id="exampleFormControlSelect1">
<option>--None</option>
<%= @industries.each do |industry| %>
<option><%= industry.name %></option>
<% end %>
</select>
</div>
<div class="col-2-sm">
<button class="btn btn-primary" type="submit">Search</button>
</div>
</div>
</div>
<% @jobs_of_cities.each do |city| %>
<div class="col-4">
<div class="row-table border border-dark rounded city-list">
<%= link_to '#' do %>
<div class="city-name"><strong><%= city.name %></strong></div>
<div class="count-job"><%= city.jobs.count %></div>
<% end %>
</div>
</div>
<% end %>
<% @jobs_of_industries.each do |industry| %>
<div class="col-4">
<div class="row-table border border-dark rounded industry-list">
<%= link_to '#' do %>
<div class="industry-name"><strong><%= industry.name %></strong></div>
<div class="count-job"><%= industry.jobs.count %></div>
<% end %>
</div>
</div>
<% end %>
<% @jobs.each do |job| %>
<% if job.cities.present? %>
<div class="border border-dark rounded">
<div class="job-details">
<div class="title"><strong><%= job.title %></strong></div>
<div><%= job.company_name %></div>
<div class="salary">Salary: <%= job.salary %></div>
<div>
<% job.cities.each do |city| %>
<%= city.name %>
<% end %>
</div>
<button type="button" class="btn btn-primary" id="button-follow">♥ Follow</button>
</div>
</div>
<br>
<% end %>
<% end %>
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>Venjob</title> <title><%= full_title(yield(:title)) %></title>
<%= csrf_meta_tags %> <%= csrf_meta_tags %>
<%= csp_meta_tag %> <%= csp_meta_tag %>
...@@ -12,23 +12,8 @@ ...@@ -12,23 +12,8 @@
<%= javascript_include_tag 'bootstrap', media: 'all', 'data-turbolinks-track': 'reload' %> <%= javascript_include_tag 'bootstrap', media: 'all', 'data-turbolinks-track': 'reload' %>
</head> </head>
<body> <body>
<header class="navbar navbar-expand-lg navbar-dark bg-dark"> <%= render "layouts/header" %>
<div class="collapse navbar-collapse">
<div class="container">
<%= link_to image_tag("logo_venjob.png", alt: "Logo", id: "logo_venjob", width: 120, left: 0), '#'%>
<nav>
<ul class="navbar-nav mr-auto">
<li><%= link_to "Login", '#' %></li>
<li><%= link_to "Register", '#' %></li>
<li><%= link_to "Favorite", '#' %></li>
<li><%= link_to "History", '#' %></li>
</ul>
</nav>
</div>
</div>
</header>
<div class="container">
<%= yield %> <%= yield %>
</div> <%= render "layouts/footer" %>
</body> </body>
</html> </html>
<% provide(:title, 'Venjob') %>
<div class="banner-ground">
<div class="top-banner">
<div class="total-job">Having <%= @total_jobs %> jobs for you!</div>
</div>
</div>
<div class="container">
<div class="search-bar"><%= render 'layouts/search_bar' %></div>
<div class="job-list"><%= render 'layouts/show_jobs' %></div>
<div class="city-banner rounded">City</div>
<div class="row"><%= render 'layouts/show_cities' %></div>
<div class="all-city">
<%= link_to '#' do %>
<div class="row-table border border-dark rounded all-cities">
<strong>All Cities</strong>
</div>
<% end %>
</div>
<div class="industry-banner rounded">Industry</div>
<div class="row"><%= render 'layouts/show_industries' %></div>
<div class="all-industry">
<%= link_to '#' do %>
<div class="row-table border border-dark rounded all-industries">
<strong>All Industries</strong>
</div>
<% end %>
</div>
</div>
Rails.application.routes.draw do Rails.application.routes.draw do
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
resources :jobs resources :jobs
resources :top_pages
resources :industries
resources :cities
root to: "top_pages#index"
end end
env :PATH, ENV['PATH']
every 20.minutes do
rake 'import:auto'
end
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2020_07_15_035356) do ActiveRecord::Schema.define(version: 2020_07_23_071239) do
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name" t.string "name"
...@@ -83,6 +83,11 @@ ActiveRecord::Schema.define(version: 2020_07_15_035356) do ...@@ -83,6 +83,11 @@ ActiveRecord::Schema.define(version: 2020_07_15_035356) do
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
end end
create_table "top_pages", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name" t.string "name"
t.string "email" t.string "email"
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
class Crawler
class Crawler
def initialize(logger, url)
@logger = logger
@url = url
end
def crawl_city_industry
crawl_city
crawl_industry
crawl_company
crawl_job
end
def crawl_city def crawl_city
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#location') get_name = page.search('select#location')
data_city = get_name.search('option').map(&:text).map(&:strip) data_city = get_name.search('option').map(&:text).map(&:strip)
data_city.each do |name_city| data_city.each do |name_city|
if City.find_by(id: 70) if City.find_by(id: 70)
city = City.create!(name: name_city, city = City.create!(name: name_city,
location: 0) location: 0)
else else
city = City.create!(name: name_city, city = City.create!(name: name_city,
location: 1) location: 1)
end end
end end
end end
def crawl_industry def crawl_industry
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")) page = Nokogiri::HTML(URI.open(@url))
get_name = page.search('select#industry') get_name = page.search('select#industry')
data_industry = get_name.search('option').map{ |p| p.text.strip } data_industry = get_name.search('option').map { |p| p.text.strip }
data_industry.each do |name_industry| data_industry.each do |name_industry|
industry = Industry.create!(name: name_industry) industry = Industry.create!(name: name_industry)
end end
end end
def crawl_company
for n in 1..10
company_info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html")) def city_relationship(row, job)
company_link = company_info.css('div.caption a.company-name').map{ |link| link['href'] } location_relationship = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
company_link.each do |link| cities_relationship = City.where(name: location_relationship)
if link.include?('\u2019')
link.gsub!('\u2019',"'") job.cities << cities_relationship
end
if link == 'javascript:void(0);'
next
elsif link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if company_page.search('p.name').text != nil
begin
name_company = company_page.search('p.name').text
address_company = company_page.css('div.content p').children[1].text
introduction_company = company_page.css('div.main-about-us').text
get_name_company = Company.find_by(name: "#{name_company}")
if get_name_company == nil
company = Company.create!(name: name_company,
address: address_company,
introduction: introduction_company)
end
rescue StandardError => e
puts e
end
end
end
end
end
end end
def crawl_job_relationships
for n in 1..10 def industry_relationship(row, job)
page_access = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html")) industry_relationship = row.css('li a').children.map { |name_industry| name_industry.text.strip }
get_link = page_access.css('a.job_link').map{ |link| link['href'] } industries_relationship = Industry.where(name: industry_relationship)
get_link.each do |link|
if link.include?('\u2013') job.industries << industries_relationship
link.gsub!('\u2013','–')
end
page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row')
if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
company_table = Company.find_by(name: "#{get_name_company}")
title_job = page_job.search('div.job-desc p').text
description = page_job.search('div.detail-row')
arr_column = get_row.css('div.has-background').map{ |data| data.text.split(' ').join(' ') }
arr_column.each_with_index do | val, key |
if company_table != nil
if val.include?('Ngày cập nhật')
arr_data = val.gsub('Ngày cập nhật ','').split(' ')
date = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
expiration_date = arr_sub[3]
job = Job.create!(title: title_job,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
level = arr_sub[1]
expiration_date = arr_sub[2]
job = Job.create!(title: title_job,
level: level,
salary: salary,
experience: 'Không có',
expiration_date: expiration_date,
description: description,
company_id: company_table.id)
end
end
end
job_table = Job.find_by(title: "#{title_job}")
if job_table != nil
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip }
location_rel.each do |loc|
puts "#{job_table.id} - #{loc}"
city_table = City.find_by(name: "#{loc}")
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip }
industry_rel.each do |ind|
puts "#{job_table.id} - #{ind}"
industry_table = Industry.find_by(name: "#{ind}")
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end
end
end
end
end
end end
def get_file_csv def create_job(title, link_page, row, company)
Net::FTP.open('192.168.1.156', 'training', 'training') do |ftp| description = link_page.search('div.detail-row').to_s
files = ftp.list salary = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
puts "list out files in root directory:" experience = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
puts files level = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
ftp.getbinaryfile('jobs.zip') expiration_date = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
end
job = Job.find_or_create_by!(title: title,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company.id)
city_relationship(row, job)
industry_relationship(row, job)
end end
def extract_zip(file, destination) def crawl_company
FileUtils.mkdir_p(destination) (1..10).each do |n|
Zip::File.open(file) do |zip_file| info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
zip_file.each do |f| links = info.css('div.caption a.company-name').map { |link| link['href'] }
fpath = File.join(destination, f.name) links.each do |link|
zip_file.extract(f, fpath) unless File.exist?(fpath) next if link == 'javascript:void(0);'
page = Nokogiri::HTML(URI.open(URI.escape(link)))
name = page.search('p.name')&.text
next if name.blank?
address = page.css('div.content p').children[1]&.text
introduction = page.css('div.main-about-us').text
begin
Company.find_or_create_by!(name: name,
address: address,
introduction: introduction)
rescue StandardError => e
@logger.error e.message
end
end end
end end
end end
def import_file_csv def crawl_job
file = "jobs.csv" (1..10).each do |n|
CSV.foreach(file, headers: true) do |row| info = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{n}-vi.html"))
begin links = info.css('a.job_link').map { |link| link['href'] }
company_name = row[5].strip links.each do |link|
company_address = row[2] link_page = Nokogiri::HTML(URI.open(URI.escape(link)))
company_introduction = row[0] row = link_page.search('div.bg-blue div.row')
company_table = Company.find_by(name: "#{company_name}") next if row.blank?
if company_table == nil
company_table = Company.create!(name: company_name,
address: company_address,
introduction: company_introduction)
end
title_job = row[9].strip
description_job = row[7]
level = row[8]
salary = row[11]
if company_table != nil
job_table = Job.create!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company_table.id)
end
industry = row[1].strip
industry_find = Industry.find_by(name: industry)
if industry_find == nil
industry_table = Industry.create!(name: industry)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
elsif industry_find != nil
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
end
puts "========================================="
puts job_table.id, title_job, industry, salary
location_data = row[16].strip
location = (location_data.gsub('["','')).gsub('"]','').strip
location_find = City.find_by(name: location)
if location_find != nil
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end
puts "Location: #{location}"
rescue StandardError => e begin
puts e company_name = link_page.search('div.job-desc a.job-company-name').text.strip
company = Company.find_by(name: company_name)
next if company.blank?
title = link_page.search('div.job-desc p').text.strip
next if title.blank?
create_job(title, link_page, row, company)
rescue StandardError => e
@logger.error e.message
end
end end
end end
end end
end
\ No newline at end of file end
require 'net/ftp'
require 'csv'
require 'zip'
class CSVImporter
NAME_DOMAIN = '192.168.1.156'.freeze
FTP_USERNAME = 'training'.freeze
FTP_PASSWORD = 'training'.freeze
def initialize(logger)
@logger = logger
@extracting_directory = Rails.root.join('lib', 'csv')
@zip_directory = Rails.root.join('jobs.zip')
@importer = Rails.root.join('lib', 'csv', 'jobs.csv')
end
def import
get_file_csv
extract_zip
import_file_csv
end
def get_file_csv
Net::FTP.open(NAME_DOMAIN, USERNAME_FTP, PASSWORD_FTP) do |ftp|
ftp.getbinaryfile('jobs.zip')
end
end
def extract_zip
FileUtils.mkdir_p(@extracting_directory)
Zip::File.open(@zip_directory) do |zip_file|
zip_file.each do |f|
fpath = File.join(@extracting_directory, f.name)
zip_file.extract(f, fpath) unless File.exist?(fpath)
end
end
end
def import_file_csv
CSV.foreach(@importer, headers: true) do |row|
begin
company_name = row["company name"]
next if company_name.blank?
company_address = row["company address"]
company_introduction = row["benefit"]
company = Company.find_or_create_by!(name: company_name,
address: company_address,
introduction: company_introduction)
title_job = row["name"]
next if title_job.blank?
description_job = "#{row["description"]} #{row["requirement"]}"
level = row["level"]
salary = row["salary"]
job = Job.find_or_create_by!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company.id)
industry_name = row["category"]
industries_relationship = Industry.where(name: industry_name)
next if industries_relationship.blank?
job.industries << industries_relationship
location_data = row["work place"]
location = location_data.gsub('["', '').gsub('"]', '')
location_relationship = City.where(name: location)
next if location_relationship.blank?
job.cities << location_relationship
rescue StandardError => e
@logger.error e.message
end
end
end
end
class JobParser
def initialize(logger, url)
@logger = logger
@url = url
end
def crawl_all
find_company
find_job
end
def find_company
info = Nokogiri::HTML(URI.open(@url))
links = info.css('div.caption a.company-name').map { |link| link['href'] }
links.each do |link|
next if link == 'javascript:void(0);'
page = Nokogiri::HTML(URI.open(URI.escape(link)))
name = page.search('p.name')&.text
next if name.blank?
address = page.css('div.content p').children[1]&.text
introduction = page.css('div.main-about-us').text
begin
Company.find_or_create_by!(name: name,
address: address,
introduction: introduction)
rescue StandardError => e
@logger.error e.message
end
end
end
def city_relationship(row, job)
location_relationship = row.css('div.map p a').children.map { |name_city| name_city.text.strip }
cities_relationship = City.where(name: location_relationship)
job.cities << cities_relationship
end
def industry_relationship(row, job)
industry_relationship = row.css('li a').children.map { |name_industry| name_industry.text.strip }
industries_relationship = Industry.where(name: industry_relationship)
job.industries << industries_relationship
end
def create_job(title, link_page, row, company)
description = link_page.search('div.detail-row').to_s
salary = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-usd")]]/p').text.strip
experience = row.at_xpath('//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p')&.text&.strip
level = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-account")]]/p').text.strip
expiration_date = row.at_xpath('//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p').text.strip
job = Job.find_or_create_by!(title: title,
level: level,
salary: salary,
experience: experience,
expiration_date: expiration_date,
description: description,
company_id: company.id)
city_relationship(row, job)
industry_relationship(row, job)
end
def find_job
info = Nokogiri::HTML(URI.open(@url))
link = info.css('a.job_link').map { |link| link['href'] }
link.each do |link|
link_page = Nokogiri::HTML(URI.open(URI.escape(link)))
row = link_page.search('div.bg-blue div.row')
next if row.blank?
begin
company_name = link_page.search('div.job-desc a.job-company-name').text.strip
company = Company.find_by(name: company_name)
next if company.blank?
title = link_page.search('div.job-desc p').text.strip
next if title.blank?
create_job(title, link_page, row, company)
rescue StandardError => e
@logger.error e.message
end
end
end
end
require 'src/crawler.rb' require 'src/crawler.rb'
require 'net/ftp' require 'src/job_parser.rb'
require 'csv' require 'src/csv_importer.rb'
require 'zip'
action = Crawler.new
namespace :import do namespace :import do
desc "crawler data" desc 'crawler data'
task crawler: :environment do task crawler: :environment do
action.crawl_city action = Crawler.new(logger, url).crawl_city_industry
action.crawl_industry
action.crawl_company
action.crawl_job_relationships
end end
desc "get file CSV from server" desc 'Crontab'
task csv_get: :environment do task auto: :environment do
action.get_file_csv crontab = JobParser.new(logger, url)
action.extract_zip('./jobs.zip','.') csv_importer = CSVImporter.new(logger)
crontab.crawl_all
csv_importer.import
end end
desc "Import data from CSV"
task data_csv: :environment do def logger
action.import_file_csv Logger.new(Rails.root.join('log','crawling.log'))
end end
end
\ No newline at end of file def url
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html'.freeze
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment