Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
59fdc24d
Commit
59fdc24d
authored
Jul 22, 2020
by
Huynh Thien Phuoc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Created crawler
parent
43b8d194
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
283 deletions
+38
-283
jobs.csv
+0
-0
lib/src/crawler.rb
+37
-280
lib/tasks/crawler_import.rake
+1
-3
No files found.
jobs.csv
0 → 100644
View file @
59fdc24d
This source diff could not be displayed because it is too large. You can
view the blob
instead.
lib/src/crawler.rb
View file @
59fdc24d
...
@@ -54,7 +54,7 @@
...
@@ -54,7 +54,7 @@
end
end
end
end
end
end
def
crawl_job
def
crawl_job
_relationships
for
n
in
1
..
10
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
...
@@ -62,306 +62,63 @@
...
@@ -62,306 +62,63 @@
if
link
.
include?
(
'\u2013'
)
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
end
pagecompany
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
pagecompany
.
search
(
'div.bg-blue div.row'
)
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
if
get_row
!=
""
length_page
=
get_row
.
css
(
'li p'
).
children
.
length
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
location_length
=
get_row
.
search
(
'div.map a'
).
children
.
length
title
=
pagecompany
.
search
(
'div.job-desc p'
).
text
get_name_company
=
pagecompany
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
description
=
pagecompany
.
search
(
'div.detail-row'
)
industry
=
get_row
.
css
(
'li a'
).
children
.
text
.
split
(
' '
).
join
(
' '
)
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
if
length_page
.
to_i
==
11
||
length_page
.
to_i
==
9
||
length_page
.
to_i
==
13
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
if
location_length
==
3
description
=
page_job
.
search
(
'div.detail-row'
)
date
=
get_row
.
css
(
'p'
).
children
[(
location_length
)
-
1
].
text
arr_column
=
get_row
.
css
(
'div.has-background'
).
map
{
|
data
|
data
.
text
.
split
(
' '
).
join
(
' '
)
}
salary
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
2
].
text
.
split
(
' '
).
join
(
' '
)
arr_column
.
each_with_index
do
|
val
,
key
|
experience
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
1
].
text
.
split
(
' '
).
join
(
' '
)
level
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)].
text
.
split
(
' '
).
join
(
' '
)
expiration_date
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
+
1
].
text
.
split
(
' '
).
join
(
' '
)
if
company_table
!=
nil
if
company_table
!=
nil
job
=
Job
.
create!
(
title:
title
,
if
val
.
include?
(
'Ngày cập nhật'
)
description:
description
,
arr_data
=
val
.
gsub
(
'Ngày cập nhật '
,
''
).
split
(
' '
)
date
=
arr_data
.
first
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
true
arr_sub
=
((((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Kinh nghiệm '
,
'*'
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)).
split
(
'*'
)
salary
=
arr_sub
[
0
]
experience
=
arr_sub
[
1
]
level
=
arr_sub
[
2
]
expiration_date
=
arr_sub
[
3
]
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
level:
level
,
salary:
salary
,
salary:
salary
,
experience:
experience
,
experience:
experience
,
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
company_id:
company_table
.
id
)
end
elsif
location_length
==
2
date
=
get_row
.
css
(
'p'
).
children
[(
location_length
)
-
1
].
text
salary
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
3
].
text
.
split
(
' '
).
join
(
' '
)
experience
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
2
].
text
.
split
(
' '
).
join
(
' '
)
level
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
1
].
text
.
split
(
' '
).
join
(
' '
)
expiration_date
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)].
text
.
split
(
' '
).
join
(
' '
)
if
company_table
!=
nil
job
=
Job
.
create!
(
title:
title
,
description:
description
,
description:
description
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
end
elsif
val
.
include?
(
'Lương'
)
&&
val
.
include?
(
'Kinh nghiệm'
)
==
false
end
arr_sub
=
(((
val
.
gsub
(
'Lương '
,
''
)).
gsub
(
' Cấp bậc '
,
'*'
)).
gsub
(
' Hết hạn nộp '
,
'*'
)).
split
(
'*'
)
salary
=
arr_sub
[
0
]
elsif
length_page
.
to_i
==
10
||
length_page
.
to_i
==
12
||
length_page
.
to_i
==
8
level
=
arr_sub
[
1
]
if
location_length
==
3
expiration_date
=
arr_sub
[
2
]
date
=
get_row
.
css
(
'p'
).
children
[(
location_length
)
-
1
].
text
job
=
Job
.
create!
(
title:
title_job
,
salary
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
1
].
text
.
split
(
' '
).
join
(
' '
)
level
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)].
text
.
split
(
' '
).
join
(
' '
)
expiration_date
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
+
1
].
text
.
split
(
' '
).
join
(
' '
)
if
company_table
!=
nil
job
=
Job
.
create!
(
title:
title
,
description:
description
,
level:
level
,
level:
level
,
salary:
salary
,
salary:
salary
,
experience:
experience
,
experience:
'Không có'
,
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
company_id:
company_table
.
id
)
end
elsif
location_length
==
2
date
=
get_row
.
css
(
'p'
).
children
[(
location_length
)
-
1
].
text
salary
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
2
].
text
.
split
(
' '
).
join
(
' '
)
level
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)
-
1
].
text
.
split
(
' '
).
join
(
' '
)
expiration_date
=
get_row
.
css
(
'p'
).
children
[(
length_page
.
to_i
)].
text
.
split
(
' '
).
join
(
' '
)
if
company_table
!=
nil
job
=
Job
.
create!
(
title:
title
,
description:
description
,
description:
description
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
company_id:
company_table
.
id
)
company_id:
company_table
.
id
)
end
end
end
end
end
end
job_table
=
Job
.
find_by
(
title:
"
#{
title_job
}
"
)
if
job_table
!=
nil
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
puts
"
#{
job_table
.
id
}
-
#{
loc
}
"
city_table
=
City
.
find_by
(
name:
"
#{
loc
}
"
)
city_jobs
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
puts
"
#{
job_table
.
id
}
-
#{
ind
}
"
industry_table
=
Industry
.
find_by
(
name:
"
#{
ind
}
"
)
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
end
end
def
crawl_city_job
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
pagecompany
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
pagecompany
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
begin
length_page
=
get_row
.
css
(
'li p'
).
children
.
length
location_length
=
get_row
.
search
(
'div.map a'
).
children
.
length
title
=
pagecompany
.
search
(
'div.job-desc p'
).
text
.
strip
get_name_company
=
pagecompany
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
exp_exist
=
get_row
.
css
(
'div.has-background li strong'
).
text
.
include?
(
'Kinh nghiệm'
)
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
job_table
=
Job
.
find_by
(
title:
"
#{
title
}
"
,
company_id:
"
#{
company_table
.
id
}
"
)
if
length_page
.
to_i
==
11
||
length_page
.
to_i
==
9
||
length_page
.
to_i
==
13
&&
exp_exist
==
true
&&
company_table
.
id
!=
nil
if
location_length
==
3
location
=
get_row
.
search
(
'div.map a'
).
children
[
0
].
text
.
strip
location1
=
get_row
.
search
(
'div.map a'
).
children
[
1
].
text
.
strip
city_table
=
City
.
find_by
(
name:
"
#{
location
}
"
)
city_table1
=
City
.
find_by
(
name:
"
#{
location1
}
"
)
if
city_table
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
elsif
city_table1
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table1
.
id
)
end
elsif
location_length
==
2
location
=
get_row
.
search
(
'div.map a'
).
children
.
text
.
strip
city_table
=
City
.
find_by
(
name:
"
#{
location
}
"
)
if
city_table
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
elsif
length_page
.
to_i
==
10
||
length_page
.
to_i
==
12
||
length_page
.
to_i
==
8
&&
exp_exist
==
false
&&
company_table
.
id
!=
nil
if
location_length
==
3
location
=
get_row
.
search
(
'div.map a'
).
children
[
0
].
text
.
strip
location1
=
get_row
.
search
(
'div.map a'
).
children
[
1
].
text
.
strip
city_table
=
City
.
find_by
(
name:
"
#{
location
}
"
)
city_table1
=
City
.
find_by
(
name:
"
#{
location1
}
"
)
if
city_table
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
elsif
city_table1
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table1
.
id
)
end
elsif
location_length
==
2
location
=
get_row
.
search
(
'div.map a'
).
children
.
text
.
strip
city_table
=
City
.
find_by
(
name:
"
#{
location
}
"
)
if
city_table
!=
nil
&&
job_table
!=
nil
city_job_relationship
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
city_table
.
id
)
end
end
end
rescue
StandardError
=>
e
puts
e
end
end
end
end
end
def
crawl_industry_job
for
n
in
1
..
10
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
if
link
.
include?
(
'\u2013'
)
link
.
gsub!
(
'\u2013'
,
'–'
)
end
pagecompany
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
pagecompany
.
search
(
'div.bg-blue div.row'
)
if
get_row
!=
""
begin
length_page
=
get_row
.
css
(
'li p'
).
children
.
length
location_length
=
get_row
.
search
(
'div.map a'
).
children
.
length
title
=
pagecompany
.
search
(
'div.job-desc p'
).
text
exp_exist
=
get_row
.
css
(
'div.has-background li strong'
).
text
.
include?
(
'Kinh nghiệm'
)
industry_length
=
get_row
.
css
(
'li a'
).
children
.
length
get_name_company
=
pagecompany
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
"
#{
get_name_company
}
"
)
job_table
=
Job
.
find_by
(
title:
"
#{
title
}
"
,
company_id:
"
#{
company_table
.
id
}
"
)
if
company_table
.
id
!=
nil
&&
job_table
.
id
!=
nil
if
length_page
.
to_i
==
11
||
length_page
.
to_i
==
9
||
length_page
.
to_i
==
13
&&
exp_exist
==
true
if
location_length
==
3
if
industry_length
==
3
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
industry2
=
get_row
.
css
(
'li a'
).
children
[
2
].
text
.
split
(
' '
).
join
(
' '
)
find_ind2
=
Industry
.
find_by
(
name:
"
#{
industry2
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
elsif
find_ind2
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind2
.
id
}
"
)
end
elsif
industry_length
==
2
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
find_ind
.
id
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
end
elsif
industry_length
==
1
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
find_ind
.
id
)
end
elsif
location_length
==
2
if
industry_length
==
3
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
industry2
=
get_row
.
css
(
'li a'
).
children
[
2
].
text
.
split
(
' '
).
join
(
' '
)
find_ind2
=
Industry
.
find_by
(
name:
"
#{
industry2
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
find_ind
.
id
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
elsif
find_ind2
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind2
.
id
}
"
)
end
elsif
industry_length
==
2
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
find_ind
.
id
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
end
elsif
industry_length
==
1
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
find_ind
.
id
)
end
end
elsif
length_page
.
to_i
==
10
||
length_page
.
to_i
==
12
||
length_page
.
to_i
==
8
&&
exp_exist
==
false
if
location_length
==
3
if
industry_length
==
3
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
industry2
=
get_row
.
css
(
'li a'
).
children
[
2
].
text
.
split
(
' '
).
join
(
' '
)
find_ind2
=
Industry
.
find_by
(
name:
"
#{
industry2
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
elsif
find_ind2
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind2
.
id
}
"
)
end
elsif
industry_length
==
2
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
end
elsif
industry_length
==
1
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
end
elsif
location_length
==
2
if
industry_length
==
3
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
industry2
=
get_row
.
css
(
'li a'
).
children
[
2
].
text
.
split
(
' '
).
join
(
' '
)
find_ind2
=
Industry
.
find_by
(
name:
"
#{
industry2
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
elsif
find_ind2
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind2
.
id
}
"
)
end
elsif
industry_length
==
2
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry1
=
get_row
.
css
(
'li a'
).
children
[
1
].
text
.
split
(
' '
).
join
(
' '
)
find_ind1
=
Industry
.
find_by
(
name:
"
#{
industry1
}
"
)
if
find_ind
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
elsif
find_ind1
!=
nil
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind1
.
id
}
"
)
end
elsif
industry_length
==
1
industry
=
get_row
.
css
(
'li a'
).
children
[
0
].
text
.
split
(
' '
).
join
(
' '
)
find_ind
=
Industry
.
find_by
(
name:
"
#{
industry
}
"
)
industry_job_relationship
=
IndustryJob
.
create!
(
job_id:
"
#{
job_table
.
id
}
"
,
industry_id:
"
#{
find_ind
.
id
}
"
)
end
end
end
end
end
end
rescue
StandardError
=>
e
puts
e
end
end
end
end
end
end
...
...
lib/tasks/crawler_import.rake
View file @
59fdc24d
...
@@ -7,9 +7,7 @@ namespace :import do
...
@@ -7,9 +7,7 @@ namespace :import do
crawl
.
crawl_city
crawl
.
crawl_city
crawl
.
crawl_industry
crawl
.
crawl_industry
crawl
.
crawl_company
crawl
.
crawl_company
crawl
.
crawl_job
crawl
.
crawl_job_relationships
crawl
.
crawl_city_job
crawl
.
crawl_industry_job
end
end
task
csv_get: :environment
do
task
csv_get: :environment
do
crawl
.
get_file_csv
crawl
.
get_file_csv
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment