Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
6ab472c3
Commit
6ab472c3
authored
Jul 28, 2020
by
Huỳnh Thiên Phước
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix code
parent
5204717b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
131 additions
and
138 deletions
+131
-138
app/views/top_pages/show.html.erb
+2
-2
lib/src/crawler.rb
+85
-85
lib/src/crontab.rb
+39
-45
lib/tasks/crawler_import.rake
+5
-6
No files found.
app/views/top_pages/show.html.erb
View file @
6ab472c3
...
...
@@ -2,8 +2,8 @@
<%
@job
.
each
do
|
job
|
%>
<%
if
!
job
.
cities
.
blank?
%>
<ul>
<div
class=
"title"
><strong>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
nam
e
%>
</strong></div>
<
%=
job
.
title
%
>
<div
class=
"title"
><strong>
<%=
job
.
titl
e
%>
</strong></div>
<
div>
<%=
(
@company
.
find_by
(
id:
job
.
company_id
)).
name
%>
</div
>
<div
class=
"salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
job
.
salary
%>
</div>
<div><i
class=
"fas fa-map-marker"
></i>
<%
job
.
cities
.
each
do
|
location
|
%>
...
...
lib/src/crawler.rb
View file @
6ab472c3
class
Crawler
class
Crawler
def
crawl_city
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#location'
)
...
...
@@ -14,15 +14,17 @@
end
end
end
def
crawl_industry
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_name
=
page
.
search
(
'select#industry'
)
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
=
get_name
.
search
(
'option'
).
map
{
|
p
|
p
.
text
.
strip
}
data_industry
.
each
do
|
name_industry
|
industry
=
Industry
.
create!
(
name:
name_industry
)
end
end
def
crawl_company
for
n
in
1
..
10
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
...
...
@@ -36,10 +38,10 @@
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
get_name_company
=
Company
.
find_by
(
name:
name_company
)
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
...
...
@@ -53,32 +55,31 @@
end
end
end
def
crawl_job_relationships
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
.
each_with_index
do
|
val
,
key
|
if
!
company_table
.
nil?
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
.
each_with_index
do
|
val
,
key
|
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
3
]
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
...
...
@@ -87,10 +88,10 @@
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
...
...
@@ -101,24 +102,23 @@
company_id:
company_table
.
id
)
end
end
if
!
company_table
.
nil?
job_table
=
Job
.
find_by
(
title:
title_job
)
if
!
job_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
next
if
!
company_table
.
nil?
job_table
=
Job
.
find_by
(
title:
title_job
)
unless
job_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
).
nil?
puts
"Created City:
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
ind
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
).
nil?
puts
"Created Industry:
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
...
...
@@ -151,51 +151,52 @@
file
=
"jobs.csv"
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
begin
company_name
=
row
[
"company name"
].
strip
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
"benefit"
]
company_table
=
Company
.
find_by
(
name:
"
#{
company_name
}
"
)
if
company_table
==
nil
company_table
=
Company
.
create!
(
name:
company_name
,
address:
company_address
,
introduction:
company_introduction
)
end
title_job
=
row
[
"name"
].
strip
description_job
=
row
[
"description"
]
level
=
row
[
"level"
]
salary
=
row
[
"salary"
]
if
company_table
!=
nil
&&
Job
.
find_by
(
title:
title_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
==
nil
job_table
=
Job
.
create!
(
title:
title_job
,
description:
description_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
en
d
industry
=
row
[
"category"
].
strip
industry_find
=
Industry
.
find_by
(
name:
industry
)
if
industry_find
==
nil
i
ndustry_table
=
Industry
.
create!
(
name:
industry
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
else
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
end
puts
"========================================="
puts
job_table
.
id
,
title_job
,
industry
,
salary
location_data
=
row
[
"work place"
].
strip
location
=
(
location_data
.
gsub
(
'["'
,
''
)).
gsub
(
'"]'
,
''
).
strip
location_find
=
City
.
find_by
(
name:
location
)
if
location_find
==
nil
city_table
=
City
.
create!
(
name:
location
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
else
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
end
puts
"Location:
#{
location
}
"
company_name
=
row
[
"company name"
]
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
:benefit
]
company_table
=
Company
.
find_by
(
name:
company_name
)
if
company_table
.
nil?
company_table
=
Company
.
create!
(
name:
company_name
,
address:
company_address
,
introduction:
company_introduction
)
end
title_job
=
row
[
:name
]
description_job
=
row
[
:description
]
level
=
row
[
:level
]
salary
=
row
[
:salary
]
unless
company_table
.
nil?
job_table
=
Job
.
create!
(
title:
title_job
,
description:
description_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
puts
job_table
.
i
d
end
industry
=
row
[
:category
]
industry_find
=
Industry
.
find_by
(
name:
industry
)
i
f
industry_find
.
nil?
industry_table
=
Industry
.
create!
(
name:
industry
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
else
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
end
puts
job_table
.
id
,
title_job
,
industry
,
salary
location_data
=
row
[
"work place"
]
location
=
location_data
.
gsub
(
'["'
,
''
).
gsub
(
'"]'
,
''
)
location_find
=
City
.
find_by
(
name:
location
)
if
location_find
.
nil?
city_table
=
City
.
create!
(
name:
location
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
else
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
end
puts
"Location:
#{
location
}
"
rescue
StandardError
=>
e
puts
e
puts
e
end
end
end
def
logger
# config.log_level = :info
Rails
.
logger
=
Logger
.
new
(
STDOUT
)
...
...
@@ -203,4 +204,4 @@
Rails
.
logger
.
level
=
Logger
::
DEBUG
Rails
.
logger
.
datetime_format
=
"%Y-%m-%d %H:%M:%S"
end
end
\ No newline at end of file
end
lib/src/crontab.rb
View file @
6ab472c3
class
Crontab
def
find_company
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2019'
)
link
.
gsub!
(
'\u2019'
,
"'"
)
end
next
if
link
==
'javascript:void(0);'
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
.
each
do
|
link
|
next
if
link
==
'javascript:void(0);'
if
link
!=
'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
unless
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
introduction:
introduction_company
)
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
introduction:
introduction_company
)
end
rescue
StandardError
=>
e
puts
e
rescue
StandardError
=>
e
puts
e
end
end
end
...
...
@@ -29,30 +26,28 @@ class Crontab
end
def
find_job
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
job_table
=
Job
.
find_by
(
title:
title_job
)
arr_column
.
each_with_index
do
|
val
,
key
|
if
!
company_table
.
nil?
arr_column
.
each
do
|
val
|
unless
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
if
val
.
include?
(
'Ngày cập nhật'
)
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
_update
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
&&
job_check
.
nil?
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Kinh nghiệm '
,
'*'
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
3
]
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
...
...
@@ -61,10 +56,10 @@ class Crontab
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_table
.
id
)
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
==
nil
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
&&
job_check
.
nil?
arr_sub
=
val
.
gsub
(
'Lương '
,
''
).
gsub
(
' Cấp bậc '
,
'*'
).
gsub
(
' Hết hạn nộp '
,
'*'
).
split
(
'*'
)
salary
=
arr_sub
[
0
]
level
=
arr_sub
[
1
]
expiration_date
=
arr_sub
[
2
]
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
...
...
@@ -77,24 +72,24 @@ class Crontab
end
end
if
!
job_table
.
nil?
&&
!
company_table
.
nil?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
==
nil
puts
"Created
#{
job_table
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
city_table
=
City
.
find_by
(
name:
loc
)
if
CityJob
.
find_by
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
.
nil?
puts
"Created City
#{
city_table
.
id
}
=>
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
==
nil
puts
"
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
industry_table
=
Industry
.
find_by
(
name:
ind
)
if
IndustryJob
.
find_by
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
.
nil?
puts
"Created Industry
#{
job_table
.
id
}
-
#{
industry_table
.
id
}
=>
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
end
end
end
\ No newline at end of file
end
lib/tasks/crawler_import.rake
View file @
6ab472c3
...
...
@@ -6,23 +6,23 @@ require 'zip'
action
=
Crawler
.
new
crontab
=
Crontab
.
new
namespace
:import
do
desc
"crawler data"
desc
'crawler data'
task
crawler: :environment
do
action
.
crawl_city
action
.
crawl_industry
action
.
crawl_company
action
.
crawl_job_relationships
end
desc
"get file CSV from server"
desc
'get file CSV from Server'
task
csv_get: :environment
do
action
.
get_file_csv
action
.
extract_zip
(
'./jobs.zip'
,
'.'
)
end
desc
"Import data from CSV"
desc
'Import data from CSV'
task
data_csv: :environment
do
action
.
import_file_csv
end
desc
"Crontab"
desc
'Crontab'
task
auto: :environment
do
crontab
.
find_company
...
...
@@ -33,4 +33,4 @@ namespace :import do
task
log: :environment
do
action
.
logger
end
end
\ No newline at end of file
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment