Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
VeNJob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Tô Ngọc Ánh
VeNJob
Commits
e24f4ced
Commit
e24f4ced
authored
Sep 03, 2020
by
Tô Ngọc Ánh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add autoload sub folder lib
parent
7f9aec69
Pipeline
#1062
canceled with stages
in 0 seconds
Changes
12
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
166 additions
and
172 deletions
+166
-172
.gitignore
+1
-1
app/controllers/jobs_controller.rb
+6
-8
app/models/company.rb
+0
-2
app/models/industry.rb
+0
-2
app/models/location.rb
+0
-2
app/views/shared/_searchbar.html.erb
+2
-2
config/application.rb
+3
-0
lib/common/solr_server.rb
+0
-21
lib/import/crawler/crawler.rb
+97
-95
lib/import/csv/services/csv_import.rb
+33
-32
lib/solr/solr_server.rb
+21
-0
lib/tasks/import_data.rake
+3
-7
No files found.
.gitignore
View file @
e24f4ced
...
...
@@ -26,7 +26,7 @@
# Ignore master key for decrypting credentials and more.
/config/master.key
/lib/data
/lib/
import/csv/
data
/public/uploads/
.env
app/controllers/jobs_controller.rb
View file @
e24f4ced
require
'./lib/common/solr_server.rb'
class
JobsController
<
ApplicationController
before_action
:get_data_search_bar
,
only:
%i[index search]
...
...
@@ -21,9 +19,9 @@ class JobsController < ApplicationController
def
search
solr
=
SolrServer
.
new
results
=
solr
.
search
(
params
[
:search
],
params
[
:industry
],
params
[
:location
],
params
[
:page
]
)
results
=
solr
.
search
(
params
)
@paginatable_array
=
Kaminari
.
paginate_array
(
results
[
'response'
][
'docs'
],
total_count:
results
[
'response'
][
'numFound'
]).
page
(
params
[
:page
])
@keyword
=
set_search_keyword
(
params
[
:search
],
params
[
:industry
],
params
[
:location
]
)
@keyword
=
set_search_keyword
(
params
)
end
private
...
...
@@ -38,10 +36,10 @@ class JobsController < ApplicationController
@industries
=
Industry
.
select
(
:id
,
:name
)
end
def
set_search_keyword
(
keyword
,
industry_id
,
location_id
)
search_text
=
keyword
.
blank?
?
'All Jobs'
:
keyword
industry_name
=
@industries
.
detect
{
|
v
|
v
.
id
==
industry_id
.
to_i
}.
try
(
:name
)
unless
industry_id
.
blank?
city_name
=
@locations
.
detect
{
|
v
|
v
.
id
==
location_id
.
to_i
}.
try
(
:city
)
unless
location_id
.
blank?
def
set_search_keyword
(
search_params
)
search_text
=
search_params
[
:search
].
blank?
?
'All Jobs'
:
search_params
[
:search
]
industry_name
=
@industries
.
detect
{
|
v
|
v
.
id
==
search_params
[
:industry_id
].
to_i
}.
try
(
:name
)
unless
search_params
[
:industry_id
]
.
blank?
city_name
=
@locations
.
detect
{
|
v
|
v
.
id
==
search_params
[
:location_id
].
to_i
}.
try
(
:city
)
unless
search_params
[
:location_id
]
.
blank?
"
#{
search_text
}
#{
industry_name
}
#{
city_name
}
"
end
end
app/models/company.rb
View file @
e24f4ced
require
'./lib/common/convert_slug'
class
Company
<
ApplicationRecord
include
ConvertSlug
...
...
app/models/industry.rb
View file @
e24f4ced
require
'./lib/common/convert_slug'
class
Industry
<
ApplicationRecord
include
ConvertSlug
...
...
app/models/location.rb
View file @
e24f4ced
require
'./lib/common/convert_slug.rb'
class
Location
<
ApplicationRecord
include
ConvertSlug
...
...
app/views/shared/_searchbar.html.erb
View file @
e24f4ced
<%=
form_tag
search_jobs_path
,
method: :get
,
class:
"mt-4 form-group
#{
my_class
}
"
,
enforce_utf8:
false
do
%>
<%=
search_field_tag
:search
,
params
[
:search
],
placeholder:
'Search'
,
class:
'form-control m-2'
%>
<%=
select_tag
:industry
,
options_from_collection_for_select
(
@industries
,
:id
,
:name
,
params
[
:industry
]),
prompt:
'All Industries'
,
class:
'form-control m-2'
%>
<%=
select_tag
:location
,
options_from_collection_for_select
(
@locations
,
:id
,
:city
,
params
[
:location
]),
prompt:
'All Locations'
,
class:
'form-control m-2'
%>
<%=
select_tag
:industry
_id
,
options_from_collection_for_select
(
@industries
,
:id
,
:name
,
params
[
:industry_id
]),
prompt:
'All Industries'
,
class:
'form-control m-2'
%>
<%=
select_tag
:location
_id
,
options_from_collection_for_select
(
@locations
,
:id
,
:city
,
params
[
:location_id
]),
prompt:
'All Locations'
,
class:
'form-control m-2'
%>
<%=
submit_tag
'Search'
,
name:
nil
,
class:
'btn btn-outline-success m-2'
%>
<%
end
%>
config/application.rb
View file @
e24f4ced
...
...
@@ -15,5 +15,8 @@ module VeNJobAnhtn
# Application configuration can go into files in config/initializers
# -- all .rb files in that directory are automatically loaded after loading
# the framework and any gems in your application.
# Auto Load
config
.
autoload_paths
+=
Dir
[
Rails
.
root
.
join
(
'lib'
,
'{import,common,solr}'
)]
end
end
lib/common/solr_server.rb
deleted
100644 → 0
View file @
7f9aec69
class
SolrServer
def
initialize
@solr
=
RSolr
.
connect
(
url:
Settings
.
solr_server
)
end
def
search
(
keyword
,
industry_id
,
location_id
,
page
)
query
,
fq
=
set_query_search
(
keyword
,
industry_id
,
location_id
)
@solr
.
paginate
(
page
,
Job
::
NUMBER_SEARCH_RESULTS
,
'select'
,
params:
{
q:
query
,
fq:
fq
})
end
private
def
set_query_search
(
keyword
,
industry_id
,
location_id
)
industry_id
=
industry_id
.
blank?
?
"*"
:
RSolr
.
solr_escape
(
industry_id
)
location_id
=
location_id
.
blank?
?
"*"
:
RSolr
.
solr_escape
(
location_id
)
keyword
=
keyword
.
blank?
?
"*"
:
RSolr
.
solr_escape
(
keyword
)
query
=
"(title:
#{
keyword
}
)^5 OR company:
#{
keyword
}
"
fq
=
[
"industry_ids:
#{
industry_id
}
"
,
"location_ids:
#{
location_id
}
"
]
[
query
,
fq
]
end
end
lib/
common
/crawler.rb
→
lib/
import/crawler
/crawler.rb
View file @
e24f4ced
require
'open-uri'
class
Crawler
def
initialize
(
logger
)
@logger
=
logger
end
def
crawl_data
(
page_number
,
base_link
)
crawl_industries_locations
job_links
=
get_job_links
(
page_number
,
base_link
)
job_links
.
each
do
|
link
|
next
if
link
.
empty?
crawl_job
(
link
)
end
end
def
get_job_links
(
page_number
,
link
)
job_links
=
[]
page_number
.
times
do
document
=
Nokogiri
::
HTML
(
URI
.
open
(
link
))
jobs_xml
=
document
.
xpath
(
'//div/a[@class="job_link"]/@href'
)
jobs_xml
.
each
{
|
item
|
job_links
<<
item
.
value
}
next_page
=
document
.
at_css
(
'.next-page a'
)
break
if
next_page
.
nil?
link
=
next_page
[
:href
]
module
Crawler
class
Crawler
def
initialize
(
logger
)
@logger
=
logger
end
job_links
end
def
crawl_company
(
company_link
)
uri
=
URI
.
parse
(
URI
.
escape
(
company_link
))
# fix error: uri must be ascii only
document
=
Nokogiri
::
HTML
(
URI
.
open
(
uri
))
company_name
=
document
.
css
(
'.content .name'
).
text
return
if
company_name
.
empty?
company_address
=
document
.
css
(
'.content p'
)[
1
].
text
company_description
=
document
.
css
(
'.main-about-us'
).
css
(
'.content'
).
text
Company
.
find_or_create_by
(
name:
company_name
)
do
|
company
|
company
.
address
=
company_address
company
.
description
=
company_description
def
crawl_data
(
page_number
,
base_link
)
crawl_industries_locations
job_links
=
get_job_links
(
page_number
,
base_link
)
job_links
.
each
do
|
link
|
next
if
link
.
empty?
crawl_job
(
link
)
end
end
rescue
StandardError
=>
e
@logger
.
error
"
#{
e
.
message
}
- Company link:
#{
uri
}
"
end
def
crawl_job
(
job_link
)
uri
=
URI
.
parse
(
URI
.
escape
(
job_link
))
# fix error: uri must be ascii only
document
=
Nokogiri
::
HTML
(
URI
.
open
(
uri
))
job_title
=
document
.
at_css
(
'.job-desc p.title'
).
text
return
if
job_title
.
empty?
job_company_link
=
document
.
at_css
(
'.job-desc a.job-company-name'
)[
:href
]
job_company
=
crawl_company
(
job_company_link
)
return
if
job_company
.
nil?
job_location_name
=
document
.
css
(
'.map p a'
).
map
{
|
val
|
val
.
text
.
strip
}
job_locations
=
Location
.
where
(
city:
job_location_name
)
job_industry_names
=
document
.
at_xpath
(
'//li[./strong/em[contains(@class, "mdi mdi-briefcase")]]'
).
css
(
'p a'
).
map
{
|
val
|
val
.
text
.
strip
}
job_industries
=
Industry
.
where
(
name:
job_industry_names
)
job_salary
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_level
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_experience
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_exp_date
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_description
=
document
.
css
(
'.job-detail-content .detail-row'
).
to_s
Job
.
find_or_create_by
(
title:
job_title
,
company_id:
job_company
.
id
,
level:
job_level
,
experience:
job_experience
,
salary:
job_salary
,
expiration_date:
job_exp_date
)
do
|
job
|
job
.
description
=
job_description
job
.
industries
<<
job_industries
job
.
locations
<<
job_locations
def
get_job_links
(
page_number
,
link
)
job_links
=
[]
page_number
.
times
do
document
=
Nokogiri
::
HTML
(
URI
.
open
(
link
))
jobs_xml
=
document
.
xpath
(
'//div/a[@class="job_link"]/@href'
)
jobs_xml
.
each
{
|
item
|
job_links
<<
item
.
value
}
next_page
=
document
.
at_css
(
'.next-page a'
)
break
if
next_page
.
nil?
link
=
next_page
[
:href
]
end
job_links
end
rescue
StandardError
=>
e
@logger
.
error
"
#{
e
.
message
}
- Job link:
#{
uri
}
"
end
def
crawl_industries_locations
document
=
Nokogiri
::
HTML
(
URI
.
open
(
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'
))
industries
=
document
.
css
(
'#industry option'
).
map
(
&
:text
)
locations
=
document
.
css
(
'#location option'
).
map
(
&
:text
)
industries
.
each
do
|
val
|
Industry
.
find_or_create_by
(
name:
val
)
def
crawl_company
(
company_link
)
uri
=
URI
.
parse
(
URI
.
escape
(
company_link
))
# fix error: uri must be ascii only
document
=
Nokogiri
::
HTML
(
URI
.
open
(
uri
))
company_name
=
document
.
css
(
'.content .name'
).
text
return
if
company_name
.
empty?
company_address
=
document
.
css
(
'.content p'
)[
1
].
text
company_description
=
document
.
css
(
'.main-about-us'
).
css
(
'.content'
).
text
Company
.
find_or_create_by
(
name:
company_name
)
do
|
company
|
company
.
address
=
company_address
company
.
description
=
company_description
end
rescue
StandardError
=>
e
@logger
.
error
"
#{
e
.
message
}
- Company link:
#{
uri
}
"
end
locations
.
take
(
Location
::
CITY_VIETNAM_NUMBER
).
each
do
|
val
|
Location
.
find_or_create_by
(
city:
val
)
do
|
location
|
location
.
oversea
=
false
def
crawl_job
(
job_link
)
uri
=
URI
.
parse
(
URI
.
escape
(
job_link
))
# fix error: uri must be ascii only
document
=
Nokogiri
::
HTML
(
URI
.
open
(
uri
))
job_title
=
document
.
at_css
(
'.job-desc p.title'
).
text
return
if
job_title
.
empty?
job_company_link
=
document
.
at_css
(
'.job-desc a.job-company-name'
)[
:href
]
job_company
=
crawl_company
(
job_company_link
)
return
if
job_company
.
nil?
job_location_name
=
document
.
css
(
'.map p a'
).
map
{
|
val
|
val
.
text
.
strip
}
job_locations
=
Location
.
where
(
city:
job_location_name
)
job_industry_names
=
document
.
at_xpath
(
'//li[./strong/em[contains(@class, "mdi mdi-briefcase")]]'
).
css
(
'p a'
).
map
{
|
val
|
val
.
text
.
strip
}
job_industries
=
Industry
.
where
(
name:
job_industry_names
)
job_salary
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_level
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_experience
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_exp_date
=
document
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
try
(
:text
).
try
(
:strip
)
job_description
=
document
.
css
(
'.job-detail-content .detail-row'
).
to_s
Job
.
find_or_create_by
(
title:
job_title
,
company_id:
job_company
.
id
,
level:
job_level
,
experience:
job_experience
,
salary:
job_salary
,
expiration_date:
job_exp_date
)
do
|
job
|
job
.
description
=
job_description
job
.
industries
<<
job_industries
job
.
locations
<<
job_locations
end
rescue
StandardError
=>
e
@logger
.
error
"
#{
e
.
message
}
- Job link:
#{
uri
}
"
end
locations
.
last
(
locations
.
count
-
Location
::
CITY_VIETNAM_NUMBER
).
each
do
|
val
|
Location
.
find_or_create_by
(
city:
val
)
do
|
location
|
location
.
oversea
=
true
def
crawl_industries_locations
document
=
Nokogiri
::
HTML
(
URI
.
open
(
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'
))
industries
=
document
.
css
(
'#industry option'
).
map
(
&
:text
)
locations
=
document
.
css
(
'#location option'
).
map
(
&
:text
)
industries
.
each
do
|
val
|
Industry
.
find_or_create_by
(
name:
val
)
end
locations
.
take
(
Location
::
CITY_VIETNAM_NUMBER
).
each
do
|
val
|
Location
.
find_or_create_by
(
city:
val
)
do
|
location
|
location
.
oversea
=
false
end
end
locations
.
last
(
locations
.
count
-
Location
::
CITY_VIETNAM_NUMBER
).
each
do
|
val
|
Location
.
find_or_create_by
(
city:
val
)
do
|
location
|
location
.
oversea
=
true
end
end
end
end
...
...
lib/
common/csv
.rb
→
lib/
import/csv/services/csv_import
.rb
View file @
e24f4ced
require
'csv'
require
'./lib/common/extract_zip'
class
CsvImport
include
ExtractZip
module
Csv::Services
class
CsvImport
include
ExtractZip
def
initialize
(
logger
)
@logger
=
logger
end
def
initialize
(
logger
)
@logger
=
logger
end
def
import_job
(
direction
)
CSV
.
foreach
(
"
#{
direction
}
/jobs.csv"
,
headers:
true
).
with_index
(
2
)
do
|
row
,
index
|
next
if
row
[
'category'
].
blank?
||
row
[
'category'
].
match
(
/^[0-9]+$/
).
present?
def
import_job
(
direction
)
CSV
.
foreach
(
"
#{
direction
}
/jobs.csv"
,
headers:
true
).
with_index
(
2
)
do
|
row
,
index
|
next
if
row
[
'category'
].
blank?
||
row
[
'category'
].
match
(
/^[0-9]+$/
).
present?
title
=
row
[
'name'
].
strip
company
=
Company
.
find_or_create_by
(
name:
row
[
'company name'
].
strip
)
do
|
c
|
c
.
description
=
"Contact email:
#{
row
[
'contact email'
]
}
\n
"
\
"Contact name:
#{
row
[
'contact name'
]
}
\n
"
\
"Contact phone:
#{
row
[
'contact phone'
]
}
"
c
.
address
=
"
#{
row
[
'company address'
]
}
,
#{
row
[
'company province'
]
}
"
end
industry
=
Industry
.
find_or_create_by
(
name:
row
[
'category'
].
strip
)
level
=
row
[
'level'
].
try
(
:strip
)
salary
=
row
[
'salary'
].
try
(
:strip
)
locations_name
=
row
[
'work place'
].
tr
(
'"[]'
,
''
).
split
(
','
)
locations
=
Location
.
where
(
city:
locations_name
)
locations
=
locations_name
.
map
{
|
city
|
Location
.
create
(
oversea:
false
,
city:
city
)
}
if
locations
.
empty?
description
=
"Benefits:
\n
#{
row
[
'benefit'
]
}
\n
"
\
"Descriptions:
\n
#{
row
[
'description'
]
}
\n
"
\
"Requirements:
\n
#{
row
[
'requirement'
]
}
"
title
=
row
[
'name'
].
strip
company
=
Company
.
find_or_create_by
(
name:
row
[
'company name'
].
strip
)
do
|
c
|
c
.
description
=
"Contact email:
#{
row
[
'contact email'
]
}
\n
"
\
"Contact name:
#{
row
[
'contact name'
]
}
\n
"
\
"Contact phone:
#{
row
[
'contact phone'
]
}
"
c
.
address
=
"
#{
row
[
'company address'
]
}
,
#{
row
[
'company province'
]
}
"
end
industry
=
Industry
.
find_or_create_by
(
name:
row
[
'category'
].
strip
)
level
=
row
[
'level'
].
try
(
:strip
)
salary
=
row
[
'salary'
].
try
(
:strip
)
locations_name
=
row
[
'work place'
].
tr
(
'"[]'
,
''
).
split
(
','
)
locations
=
Location
.
where
(
city:
locations_name
)
locations
=
locations_name
.
map
{
|
city
|
Location
.
create
(
oversea:
false
,
city:
city
)
}
if
locations
.
empty?
description
=
"Benefits:
\n
#{
row
[
'benefit'
]
}
\n
"
\
"Descriptions:
\n
#{
row
[
'description'
]
}
\n
"
\
"Requirements:
\n
#{
row
[
'requirement'
]
}
"
Job
.
find_or_create_by
(
title:
title
,
company_id:
company
.
id
,
level:
level
,
salary:
salary
)
do
|
job
|
job
.
industries
<<
industry
job
.
locations
<<
locations
job
.
description
=
description
Job
.
find_or_create_by
(
title:
title
,
company_id:
company
.
id
,
level:
level
,
salary:
salary
)
do
|
job
|
job
.
industries
<<
industry
job
.
locations
<<
locations
job
.
description
=
description
end
puts
title
rescue
StandardError
=>
e
@logger
.
error
"Job
#{
index
}
:
#{
e
.
message
}
"
end
puts
title
rescue
StandardError
=>
e
@logger
.
error
"Job
#{
index
}
:
#{
e
.
message
}
"
end
end
end
lib/solr/solr_server.rb
0 → 100644
View file @
e24f4ced
class
SolrServer
def
initialize
@solr
=
RSolr
.
connect
(
url:
Settings
.
solr_server
)
end
def
search
(
search_params
)
query
,
fq
=
set_query_search
(
search_params
)
@solr
.
paginate
(
search_params
[
:page
],
Job
::
NUMBER_SEARCH_RESULTS
,
'select'
,
params:
{
q:
query
,
fq:
fq
})
end
private
def
set_query_search
(
search_params
)
industry_id
=
search_params
[
:industry_id
].
blank?
?
"*"
:
RSolr
.
solr_escape
(
search_params
[
:industry_id
])
location_id
=
search_params
[
:location_id
].
blank?
?
"*"
:
RSolr
.
solr_escape
(
search_params
[
:location_id
])
keyword
=
search_params
[
:search
].
blank?
?
"*"
:
RSolr
.
solr_escape
(
search_params
[
:search
])
query
=
"title:(
#{
keyword
}
)^4 OR company:(
#{
keyword
}
)^2"
fq
=
[
"industry_ids:
#{
industry_id
}
"
,
"location_ids:
#{
location_id
}
"
]
[
query
,
fq
]
end
end
lib/tasks/import_data.rake
View file @
e24f4ced
require
'./lib/common/ftp'
require
'./lib/common/csv'
require
'./lib/common/crawler'
namespace
:import_data
do
logger
=
Logger
.
new
(
'./log/import_data.log'
)
desc
'crawl industries locations jobs'
task
:crawler
,
%i[page_number link]
=>
[
:environment
]
do
|
_
,
args
|
args
.
with_defaults
(
page_number:
1
,
link:
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'
)
crawler
=
Crawler
.
new
(
logger
)
crawler
=
Crawler
::
Crawler
.
new
(
logger
)
crawler
.
crawl_data
(
args
[
:page_number
].
to_i
,
args
[
:link
])
end
desc
'Download csv file from FTP and import'
task
csv: :environment
do
destination_dir
=
'./lib/data'
destination_dir
=
"
#{
Rails
.
root
}
/lib/import/csv/data"
Dir
.
mkdir
destination_dir
unless
File
.
exist?
(
destination_dir
)
ftp
=
Ftp
.
new
(
'192.168.1.156'
,
'training'
,
'training'
)
ftp
.
download_file
(
'jobs.zip'
,
destination_dir
)
ftp
.
close
csv
=
CsvImport
.
new
(
logger
)
csv
=
Csv
::
Services
::
Csv
Import
.
new
(
logger
)
csv
.
extract_zip
(
"
#{
destination_dir
}
/jobs.zip"
,
destination_dir
)
csv
.
import_job
(
destination_dir
)
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment