Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
6ab472c3
Commit
6ab472c3
authored
Jul 28, 2020
by
Huỳnh Thiên Phước
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix code
parent
5204717b
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
67 additions
and
71 deletions
+67
-71
app/views/top_pages/show.html.erb
+2
-2
lib/src/crawler.rb
+40
-39
lib/src/crontab.rb
+21
-26
lib/tasks/crawler_import.rake
+4
-4
No files found.
app/views/top_pages/show.html.erb
View file @
6ab472c3
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
<%
@job
.
each
do
|
job
|
%>
<%
@job
.
each
do
|
job
|
%>
<%
if
!
job
.
cities
.
blank?
%>
<%
if
!
job
.
cities
.
blank?
%>
<ul>
<ul>
<div
class=
"title"
><strong>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
nam
e
%>
</strong></div>
<div
class=
"title"
><strong>
<%=
job
.
titl
e
%>
</strong></div>
<
%=
job
.
title
%
>
<
div>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
name
%>
</div
>
<div
class=
"salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
job
.
salary
%>
</div>
<div
class=
"salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
job
.
salary
%>
</div>
<div><i
class=
"fas fa-map-marker"
></i>
<div><i
class=
"fas fa-map-marker"
></i>
<%
job
.
cities
.
each
do
|
location
|
%>
<%
job
.
cities
.
each
do
|
location
|
%>
...
...
lib/src/crawler.rb
View file @
6ab472c3
class
Crawler
class
Crawler
def
crawl_city
def
crawl_city
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#location'
)
get_name
=
page
.
search
(
'select#location'
)
...
@@ -14,15 +14,17 @@
...
@@ -14,15 +14,17 @@
end
end
end
end
end
end
def
crawl_industry
def
crawl_industry
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#industry'
)
get_name
=
page
.
search
(
'select#industry'
)
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
.
each
do
|
name_industry
|
data_industry
.
each
do
|
name_industry
|
industry
=
Industry
.
create!
(
name:
name_industry
)
industry
=
Industry
.
create!
(
name:
name_industry
)
end
end
end
end
def
crawl_company
def
crawl_company
for
n
in
1
..
10
for
n
in
1
..
10
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
...
@@ -53,29 +55,28 @@
...
@@ -53,29 +55,28 @@
end
end
end
end
end
end
def
crawl_job_relationships
def
crawl_job_relationships
for
n
in
1
..
10
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
if
get_row
!=
""
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
.
each_with_index
do
|
val
,
key
|
arr_column
.
each_with_index
do
|
val
,
key
|
if
!
company_table
.
nil?
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
date
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
level
=
arr_sub
[
2
]
...
@@ -87,8 +88,8 @@
...
@@ -87,8 +88,8 @@
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
description:
description
,
description:
description
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
2
]
...
@@ -101,21 +102,21 @@
...
@@ -101,21 +102,21 @@
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
end
end
end
end
if
!
company_table
.
nil?
next
if
!
company_table
.
nil?
job_table
=
Job
.
find_by
(
title:
title_job
)
job_table
=
Job
.
find_by
(
title:
title_job
)
if
!
job_table
.
nil?
unless
job_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
).
nil?
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
industry_table
=
Industry
.
find_by
(
name:
ind
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
).
nil?
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
...
@@ -126,7 +127,6 @@
...
@@ -126,7 +127,6 @@
end
end
end
end
end
end
end
def
get_file_csv
def
get_file_csv
Net
::
FTP
.
open
(
'192.168.1.156'
,
'training'
,
'training'
)
do
|
ftp
|
Net
::
FTP
.
open
(
'192.168.1.156'
,
'training'
,
'training'
)
do
|
ftp
|
...
@@ -151,40 +151,40 @@
...
@@ -151,40 +151,40 @@
file
=
"jobs.csv"
file
=
"jobs.csv"
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
begin
begin
company_name
=
row
[
"company name"
].
strip
company_name
=
row
[
"company name"
]
company_address
=
row
[
"company address"
]
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
"benefit"
]
company_introduction
=
row
[
:benefit
]
company_table
=
Company
.
find_by
(
name:
"
#{
company_name
}
"
)
company_table
=
Company
.
find_by
(
name:
company_name
)
if
company_table
==
nil
if
company_table
.
nil?
company_table
=
Company
.
create!
(
name:
company_name
,
company_table
=
Company
.
create!
(
name:
company_name
,
address:
company_address
,
address:
company_address
,
introduction:
company_introduction
)
introduction:
company_introduction
)
end
end
title_job
=
row
[
"name"
].
strip
title_job
=
row
[
:name
]
description_job
=
row
[
"description"
]
description_job
=
row
[
:description
]
level
=
row
[
"level"
]
level
=
row
[
:level
]
salary
=
row
[
"salary"
]
salary
=
row
[
:salary
]
if
company_table
!=
nil
&&
Job
.
find_by
(
title:
title_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
==
nil
unless
company_table
.
nil?
job_table
=
Job
.
create!
(
title:
title_job
,
job_table
=
Job
.
create!
(
title:
title_job
,
description:
description_job
,
description:
description_job
,
level:
level
,
level:
level
,
salary:
salary
,
salary:
salary
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
puts
job_table
.
id
end
end
industry
=
row
[
"category"
].
strip
industry
=
row
[
:category
]
industry_find
=
Industry
.
find_by
(
name:
industry
)
industry_find
=
Industry
.
find_by
(
name:
industry
)
if
industry_find
==
nil
if
industry_find
.
nil?
industry_table
=
Industry
.
create!
(
name:
industry
)
industry_table
=
Industry
.
create!
(
name:
industry
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
else
else
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
end
end
puts
"========================================="
puts
job_table
.
id
,
title_job
,
industry
,
salary
puts
job_table
.
id
,
title_job
,
industry
,
salary
location_data
=
row
[
"work place"
].
strip
location_data
=
row
[
"work place"
]
location
=
(
location_data
.
gsub
(
'["'
,
''
)).
gsub
(
'"]'
,
''
).
strip
location
=
location_data
.
gsub
(
'["'
,
''
).
gsub
(
'"]'
,
''
)
location_find
=
City
.
find_by
(
name:
location
)
location_find
=
City
.
find_by
(
name:
location
)
if
location_find
==
nil
if
location_find
.
nil?
city_table
=
City
.
create!
(
name:
location
)
city_table
=
City
.
create!
(
name:
location
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
else
else
...
@@ -196,6 +196,7 @@
...
@@ -196,6 +196,7 @@
end
end
end
end
end
end
def
logger
def
logger
# config.log_level = :info
# config.log_level = :info
Rails
.
logger
=
Logger
.
new
(
STDOUT
)
Rails
.
logger
=
Logger
.
new
(
STDOUT
)
...
...
lib/src/crontab.rb
View file @
6ab472c3
class
Crontab
class
Crontab
def
find_company
def
find_company
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
.
each
do
|
link
|
company_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2019'
)
link
.
gsub!
(
'\u2019'
,
"'"
)
end
next
if
link
==
'javascript:void(0);'
next
if
link
==
'javascript:void(0);'
if
link
!=
'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
if
link
!=
'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
unless
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
...
@@ -29,11 +26,8 @@ class Crontab
...
@@ -29,11 +26,8 @@ class Crontab
end
end
def
find_job
def
find_job
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
if
get_row
!=
""
...
@@ -41,15 +35,16 @@ class Crontab
...
@@ -41,15 +35,16 @@ class Crontab
company_table
=
Company
.
find_by
(
name:
get_name_company
)
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
job_table
=
Job
.
find_by
(
title:
title_job
)
job_table
=
Job
.
find_by
(
title:
title_job
)
arr_column
.
each_with_index
do
|
val
,
key
|
arr_column
.
each
do
|
val
|
if
!
company_table
.
nil?
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
date
_update
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
level
=
arr_sub
[
2
]
...
@@ -61,8 +56,8 @@ class Crontab
...
@@ -61,8 +56,8 @@ class Crontab
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
description:
description
,
description:
description
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
2
]
...
@@ -77,19 +72,19 @@ class Crontab
...
@@ -77,19 +72,19 @@ class Crontab
end
end
end
end
if
!
job_table
.
nil?
&&
!
company_table
.
nil?
if
!
job_table
.
nil?
&&
!
company_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
.
nil?
puts
"Created
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
puts
"Created City
#{
city_table
.
id
}
=>
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
industry_table
=
Industry
.
find_by
(
name:
ind
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
.
nil?
puts
"
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
puts
"Created Industry
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
=>
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
...
...
lib/tasks/crawler_import.rake
View file @
6ab472c3
...
@@ -6,23 +6,23 @@ require 'zip'
...
@@ -6,23 +6,23 @@ require 'zip'
action
=
Crawler
.
new
action
=
Crawler
.
new
crontab
=
Crontab
.
new
crontab
=
Crontab
.
new
namespace
:import
do
namespace
:import
do
desc
"crawler data"
desc
'crawler data'
task
crawler: :environment
do
task
crawler: :environment
do
action
.
crawl_city
action
.
crawl_city
action
.
crawl_industry
action
.
crawl_industry
action
.
crawl_company
action
.
crawl_company
action
.
crawl_job_relationships
action
.
crawl_job_relationships
end
end
desc
"get file CSV from server"
desc
'get file CSV from Server'
task
csv_get: :environment
do
task
csv_get: :environment
do
action
.
get_file_csv
action
.
get_file_csv
action
.
extract_zip
(
'./jobs.zip'
,
'.'
)
action
.
extract_zip
(
'./jobs.zip'
,
'.'
)
end
end
desc
"Import data from CSV"
desc
'Import data from CSV'
task
data_csv: :environment
do
task
data_csv: :environment
do
action
.
import_file_csv
action
.
import_file_csv
end
end
desc
"Crontab"
desc
'Crontab'
task
auto: :environment
do
task
auto: :environment
do
crontab
.
find_company
crontab
.
find_company
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment