Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
6ab472c3
Commit
6ab472c3
authored
Jul 28, 2020
by
Huỳnh Thiên Phước
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix code
parent
5204717b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
131 additions
and
138 deletions
+131
-138
app/views/top_pages/show.html.erb
+2
-2
lib/src/crawler.rb
+85
-85
lib/src/crontab.rb
+39
-45
lib/tasks/crawler_import.rake
+5
-6
No files found.
app/views/top_pages/show.html.erb
View file @
6ab472c3
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
<%
@job
.
each
do
|
job
|
%>
<%
@job
.
each
do
|
job
|
%>
<%
if
!
job
.
cities
.
blank?
%>
<%
if
!
job
.
cities
.
blank?
%>
<ul>
<ul>
<div
class=
"title"
><strong>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
nam
e
%>
</strong></div>
<div
class=
"title"
><strong>
<%=
job
.
titl
e
%>
</strong></div>
<
%=
job
.
title
%
>
<
div>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
name
%>
</div
>
<div
class=
"salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
job
.
salary
%>
</div>
<div
class=
"salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
job
.
salary
%>
</div>
<div><i
class=
"fas fa-map-marker"
></i>
<div><i
class=
"fas fa-map-marker"
></i>
<%
job
.
cities
.
each
do
|
location
|
%>
<%
job
.
cities
.
each
do
|
location
|
%>
...
...
lib/src/crawler.rb
View file @
6ab472c3
class
Crawler
class
Crawler
def
crawl_city
def
crawl_city
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#location'
)
get_name
=
page
.
search
(
'select#location'
)
...
@@ -14,15 +14,17 @@
...
@@ -14,15 +14,17 @@
end
end
end
end
end
end
def
crawl_industry
def
crawl_industry
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#industry'
)
get_name
=
page
.
search
(
'select#industry'
)
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
.
each
do
|
name_industry
|
data_industry
.
each
do
|
name_industry
|
industry
=
Industry
.
create!
(
name:
name_industry
)
industry
=
Industry
.
create!
(
name:
name_industry
)
end
end
end
end
def
crawl_company
def
crawl_company
for
n
in
1
..
10
for
n
in
1
..
10
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
...
@@ -36,10 +38,10 @@
...
@@ -36,10 +38,10 @@
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
get_name_company
=
Company
.
find_by
(
name:
name_company
)
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
address:
address_company
,
...
@@ -53,32 +55,31 @@
...
@@ -53,32 +55,31 @@
end
end
end
end
end
end
def
crawl_job_relationships
def
crawl_job_relationships
for
n
in
1
..
10
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
if
get_row
!=
""
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
.
each_with_index
do
|
val
,
key
|
arr_column
.
each_with_index
do
|
val
,
key
|
if
!
company_table
.
nil?
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
date
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
level
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
3
]
expiration_date
=
arr_sub
[
3
]
job
=
Job
.
create!
(
title:
title_job
,
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
level:
level
,
...
@@ -87,10 +88,10 @@
...
@@ -87,10 +88,10 @@
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
description:
description
,
description:
description
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
2
]
job
=
Job
.
create!
(
title:
title_job
,
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
level:
level
,
...
@@ -101,24 +102,23 @@
...
@@ -101,24 +102,23 @@
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
end
end
end
end
if
!
company_table
.
nil?
next
if
!
company_table
.
nil?
job_table
=
Job
.
find_by
(
title:
title_job
)
job_table
=
Job
.
find_by
(
title:
title_job
)
if
!
job_table
.
nil?
unless
job_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
).
nil?
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
end
industry_rel
.
each
do
|
ind
|
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
industry_rel
.
each
do
|
ind
|
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
industry_table
=
Industry
.
find_by
(
name:
ind
)
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
).
nil?
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
end
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
end
end
...
@@ -151,51 +151,52 @@
...
@@ -151,51 +151,52 @@
file
=
"jobs.csv"
file
=
"jobs.csv"
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
begin
begin
company_name
=
row
[
"company name"
].
strip
company_name
=
row
[
"company name"
]
company_address
=
row
[
"company address"
]
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
"benefit"
]
company_introduction
=
row
[
:benefit
]
company_table
=
Company
.
find_by
(
name:
"
#{
company_name
}
"
)
company_table
=
Company
.
find_by
(
name:
company_name
)
if
company_table
==
nil
if
company_table
.
nil?
company_table
=
Company
.
create!
(
name:
company_name
,
company_table
=
Company
.
create!
(
name:
company_name
,
address:
company_address
,
address:
company_address
,
introduction:
company_introduction
)
introduction:
company_introduction
)
end
end
title_job
=
row
[
"name"
].
strip
title_job
=
row
[
:name
]
description_job
=
row
[
"description"
]
description_job
=
row
[
:description
]
level
=
row
[
"level"
]
level
=
row
[
:level
]
salary
=
row
[
"salary"
]
salary
=
row
[
:salary
]
if
company_table
!=
nil
&&
Job
.
find_by
(
title:
title_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
==
nil
unless
company_table
.
nil?
job_table
=
Job
.
create!
(
title:
title_job
,
job_table
=
Job
.
create!
(
title:
title_job
,
description:
description_job
,
description:
description_job
,
level:
level
,
level:
level
,
salary:
salary
,
salary:
salary
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
en
d
puts
job_table
.
i
d
industry
=
row
[
"category"
].
strip
end
industry_find
=
Industry
.
find_by
(
name:
industry
)
industry
=
row
[
:category
]
if
industry_find
==
nil
industry_find
=
Industry
.
find_by
(
name:
industry
)
i
ndustry_table
=
Industry
.
create!
(
name:
industry
)
i
f
industry_find
.
nil?
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
industry_table
=
Industry
.
create!
(
name:
industry
)
else
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
else
end
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
puts
"========================================="
end
puts
job_table
.
id
,
title_job
,
industry
,
salary
puts
job_table
.
id
,
title_job
,
industry
,
salary
location_data
=
row
[
"work place"
].
strip
location_data
=
row
[
"work place"
]
location
=
(
location_data
.
gsub
(
'["'
,
''
)).
gsub
(
'"]'
,
''
).
strip
location
=
location_data
.
gsub
(
'["'
,
''
).
gsub
(
'"]'
,
''
)
location_find
=
City
.
find_by
(
name:
location
)
location_find
=
City
.
find_by
(
name:
location
)
if
location_find
==
nil
if
location_find
.
nil?
city_table
=
City
.
create!
(
name:
location
)
city_table
=
City
.
create!
(
name:
location
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
else
else
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
end
end
puts
"Location:
#{
location
}
"
puts
"Location:
#{
location
}
"
rescue
StandardError
=>
e
rescue
StandardError
=>
e
puts
e
puts
e
end
end
end
end
end
end
def
logger
def
logger
# config.log_level = :info
# config.log_level = :info
Rails
.
logger
=
Logger
.
new
(
STDOUT
)
Rails
.
logger
=
Logger
.
new
(
STDOUT
)
...
@@ -203,4 +204,4 @@
...
@@ -203,4 +204,4 @@
Rails
.
logger
.
level
=
Logger
::
DEBUG
Rails
.
logger
.
level
=
Logger
::
DEBUG
Rails
.
logger
.
datetime_format
=
"%Y-%m-%d %H:%M:%S"
Rails
.
logger
.
datetime_format
=
"%Y-%m-%d %H:%M:%S"
end
end
end
end
\ No newline at end of file
lib/src/crontab.rb
View file @
6ab472c3
class
Crontab
class
Crontab
def
find_company
def
find_company
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
.
each
do
|
link
|
company_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2019'
)
next
if
link
==
'javascript:void(0);'
link
.
gsub!
(
'\u2019'
,
"'"
)
end
next
if
link
==
'javascript:void(0);'
if
link
!=
'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
if
link
!=
'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
unless
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
get_name_company
=
Company
.
find_by
(
name:
name_company
)
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
address:
address_company
,
introduction:
introduction_company
)
introduction:
introduction_company
)
end
end
rescue
StandardError
=>
e
rescue
StandardError
=>
e
puts
e
puts
e
end
end
end
end
end
end
...
@@ -29,30 +26,28 @@ class Crontab
...
@@ -29,30 +26,28 @@ class Crontab
end
end
def
find_job
def
find_job
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
link
.
gsub!
(
'\u2013'
,
'–'
)
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
if
get_row
!=
""
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
get_name_company
)
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
job_table
=
Job
.
find_by
(
title:
title_job
)
job_table
=
Job
.
find_by
(
title:
title_job
)
arr_column
.
each_with_index
do
|
val
,
key
|
arr_column
.
each
do
|
val
|
if
!
company_table
.
nil?
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
date
_update
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
level
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
3
]
expiration_date
=
arr_sub
[
3
]
job
=
Job
.
create!
(
title:
title_job
,
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
level:
level
,
...
@@ -61,10 +56,10 @@ class Crontab
...
@@ -61,10 +56,10 @@ class Crontab
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
description:
description
,
description:
description
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
2
]
job
=
Job
.
create!
(
title:
title_job
,
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
level:
level
,
...
@@ -77,24 +72,24 @@ class Crontab
...
@@ -77,24 +72,24 @@ class Crontab
end
end
end
end
if
!
job_table
.
nil?
&&
!
company_table
.
nil?
if
!
job_table
.
nil?
&&
!
company_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
.
nil?
puts
"Created
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
puts
"Created City
#{
city_table
.
id
}
=>
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
industry_table
=
Industry
.
find_by
(
name:
ind
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
.
nil?
puts
"
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
puts
"Created Industry
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
=>
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
end
end
end
end
end
end
end
end
end
end
\ No newline at end of file
lib/tasks/crawler_import.rake
View file @
6ab472c3
...
@@ -6,23 +6,23 @@ require 'zip'
...
@@ -6,23 +6,23 @@ require 'zip'
action
=
Crawler
.
new
action
=
Crawler
.
new
crontab
=
Crontab
.
new
crontab
=
Crontab
.
new
namespace
:import
do
namespace
:import
do
desc
"crawler data"
desc
'crawler data'
task
crawler: :environment
do
task
crawler: :environment
do
action
.
crawl_city
action
.
crawl_city
action
.
crawl_industry
action
.
crawl_industry
action
.
crawl_company
action
.
crawl_company
action
.
crawl_job_relationships
action
.
crawl_job_relationships
end
end
desc
"get file CSV from server"
desc
'get file CSV from Server'
task
csv_get: :environment
do
task
csv_get: :environment
do
action
.
get_file_csv
action
.
get_file_csv
action
.
extract_zip
(
'./jobs.zip'
,
'.'
)
action
.
extract_zip
(
'./jobs.zip'
,
'.'
)
end
end
desc
"Import data from CSV"
desc
'Import data from CSV'
task
data_csv: :environment
do
task
data_csv: :environment
do
action
.
import_file_csv
action
.
import_file_csv
end
end
desc
"Crontab"
desc
'Crontab'
task
auto: :environment
do
task
auto: :environment
do
crontab
.
find_company
crontab
.
find_company
...
@@ -33,4 +33,4 @@ namespace :import do
...
@@ -33,4 +33,4 @@ namespace :import do
task
log: :environment
do
task
log: :environment
do
action
.
logger
action
.
logger
end
end
end
end
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment