Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
Venjob_HungNT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ngô Trung Hưng
Venjob_HungNT
Commits
f5d71986
Commit
f5d71986
authored
Jul 22, 2020
by
Ngo Trung Hung
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix crawler
parent
81cfe475
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
84 additions
and
149 deletions
+84
-149
app/assets/stylesheets/custom.scss
+3
-3
app/models/city.rb
+1
-1
app/views/shared/_block_cities_hot.html.erb
+3
-3
app/views/shared/_box_five_job.html.erb
+1
-3
app/views/shared/_box_nine_city.html.erb
+1
-1
lib/src/crawler.rb
+16
-12
lib/src/interface_web.rb
+56
-123
lib/tasks/crawler.rake
+3
-3
No files found.
app/assets/stylesheets/custom.scss
View file @
f5d71986
...
@@ -687,7 +687,6 @@ $main-color: #221f20;
...
@@ -687,7 +687,6 @@ $main-color: #221f20;
.box_info_salary
{
.box_info_salary
{
font-size
:
13px
;
font-size
:
13px
;
color
:
#008563
;
color
:
#008563
;
font-weight
:
600
;
transform
:
translateY
(
-2px
);
transform
:
translateY
(
-2px
);
}
}
.box_btn_favotite
{
.box_btn_favotite
{
...
@@ -818,7 +817,7 @@ $main-color: #221f20;
...
@@ -818,7 +817,7 @@ $main-color: #221f20;
}
}
.box_text_five_jobs.box_padding_city
{
.box_text_five_jobs.box_padding_city
{
background-
color
:
#da6d2e
;
background-
image
:
linear-gradient
(
to
right
,
#86cb49
,
#169b74
,
#86cb49
);
color
:
white
;
color
:
white
;
font-weight
:
600
;
font-weight
:
600
;
margin-bottom
:
0px
!
important
;
margin-bottom
:
0px
!
important
;
...
@@ -849,12 +848,13 @@ $main-color: #221f20;
...
@@ -849,12 +848,13 @@ $main-color: #221f20;
cursor
:
pointer
;
cursor
:
pointer
;
color
:
white
;
color
:
white
;
text-align
:
center
;
text-align
:
center
;
font-size
:
1
8
px
;
font-size
:
1
7
px
;
font-weight
:
bold
;
font-weight
:
bold
;
}
}
.ct_jobs_count
{
.ct_jobs_count
{
cursor
:
pointer
;
cursor
:
pointer
;
font-size
:
16px
;
text-align
:
center
;
text-align
:
center
;
color
:
#999
;
color
:
#999
;
}
}
...
...
app/models/city.rb
View file @
f5d71986
...
@@ -6,7 +6,7 @@ class City < ApplicationRecord
...
@@ -6,7 +6,7 @@ class City < ApplicationRecord
hash
=
{}
hash
=
{}
data_cities
=
City
.
all
data_cities
=
City
.
all
data_cities
.
each
do
|
val
|
data_cities
.
each
do
|
val
|
hash
[
val
.
id
]
=
val
.
jobs
.
count
hash
[
val
.
name
]
=
val
.
jobs
.
count
end
end
hash
.
sort_by
{
|
k
,
v
|
v
}.
reverse
hash
.
sort_by
{
|
k
,
v
|
v
}.
reverse
end
end
...
...
app/views/shared/_block_cities_hot.html.erb
View file @
f5d71986
<div
class=
"box_info_city"
>
<div
class=
"box_info_city"
>
<div
class=
"ct_name"
>
<div
class=
"ct_name"
>
<%
data
=
City
.
find
(
k
)
%>
<%=
link_to
name_city
,
'#'
,
class:
'link_ct'
%>
<%=
link_to
data
.
name
,
'#'
,
class:
'link_ct'
%>
</div>
</div>
<div
class=
"ct_jobs_count"
>
<div
class=
"ct_jobs_count"
>
<%=
link_to
"
#{
v
}
công việc"
,
'#'
,
class:
'link_ct'
%>
<%=
link_to
"
#{
jobs_count
}
công việc"
,
'#'
,
class:
'link_ct'
%>
</div>
</div>
</div>
</div>
\ No newline at end of file
app/views/shared/_box_five_job.html.erb
View file @
f5d71986
...
@@ -6,11 +6,9 @@
...
@@ -6,11 +6,9 @@
<%
@five_jobs
.
each
do
|
val
|
%>
<%
@five_jobs
.
each
do
|
val
|
%>
<div
class=
"box_jobs"
>
<div
class=
"box_jobs"
>
<div
class=
"col-sm-12 d-block d-sm-none"
>
<div
class=
"col-sm-12 d-block d-sm-none"
>
<button
type=
"submit"
class=
"btn_favorite_outline xs"
>
<button
type=
"submit"
class=
"btn_favorite_outline xs"
>
<i
class=
"far fa-heart"
></i>
<i
class=
"far fa-heart"
></i>
</button>
</button>
</div>
</div>
<div
class=
"row"
>
<div
class=
"row"
>
<div
class=
"col-sm-10 col-md-9 col-lg-10"
>
<div
class=
"col-sm-10 col-md-9 col-lg-10"
>
...
@@ -31,7 +29,7 @@
...
@@ -31,7 +29,7 @@
<%=
dt
.
join
(
''
).
chomp
(
'| '
)
%>
<%=
dt
.
join
(
''
).
chomp
(
'| '
)
%>
</h5>
</h5>
</div>
</div>
<h5
class=
"box_info_salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
val
.
salary
%>
</h5>
<h5
class=
"box_info_salary"
><i
class=
"fas fa-dollar-sign"
></i>
Lương:
<%=
val
.
salary
%>
</h5>
<div
class=
"coc"
>
<div
class=
"coc"
>
<h5
class=
"box_info_des"
>
<%=
strip_tags
(
val
.
description
)
%>
</h5>
<h5
class=
"box_info_des"
>
<%=
strip_tags
(
val
.
description
)
%>
</h5>
</div>
</div>
...
...
app/views/shared/_box_nine_city.html.erb
View file @
f5d71986
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
<div
class=
"row no-gutters"
>
<div
class=
"row no-gutters"
>
<%
@top_city
.
each
do
|
k
,
v
|
%>
<%
@top_city
.
each
do
|
k
,
v
|
%>
<div
class=
"col-lg-4 col-md-4 col-sm-6 col-xs-12"
>
<div
class=
"col-lg-4 col-md-4 col-sm-6 col-xs-12"
>
<%=
render
'shared/block_cities_hot'
,
k:
k
,
v
:
v
%>
<%=
render
'shared/block_cities_hot'
,
name_city:
k
,
jobs_count
:
v
%>
</div>
</div>
<%
end
%>
<%
end
%>
<div
class=
"col-lg-4 col-md-4 col-sm-6 col-xs-12"
>
<div
class=
"col-lg-4 col-md-4 col-sm-6 col-xs-12"
>
...
...
lib/src/crawler.rb
View file @
f5d71986
...
@@ -12,7 +12,7 @@ class Clawler
...
@@ -12,7 +12,7 @@ class Clawler
list_cities
.
each
do
|
x
|
list_cities
.
each
do
|
x
|
data_list_cities
<<
x
.
gsub
(
/(^<[\w\D]*>)/
,
''
).
gsub
(
/\n/
,
''
).
rstrip
data_list_cities
<<
x
.
gsub
(
/(^<[\w\D]*>)/
,
''
).
gsub
(
/\n/
,
''
).
rstrip
end
end
puts
"Save data to database...
\n
"
puts
"Save data to database...
\n
------------------------
"
data_list_cities
.
length
.
times
do
|
i
|
data_list_cities
.
length
.
times
do
|
i
|
area
=
i
>
69
?
0
:
1
area
=
i
>
69
?
0
:
1
name
=
(
data_list_cities
[
i
].
to_s
)
name
=
(
data_list_cities
[
i
].
to_s
)
...
@@ -29,7 +29,7 @@ class Clawler
...
@@ -29,7 +29,7 @@ class Clawler
list_industries
.
each
do
|
x
|
list_industries
.
each
do
|
x
|
data_list_industries
<<
x
.
gsub
(
/(^<[\w\D]*>)/
,
''
).
gsub
(
/\n/
,
''
).
strip
data_list_industries
<<
x
.
gsub
(
/(^<[\w\D]*>)/
,
''
).
gsub
(
/\n/
,
''
).
strip
end
end
puts
"Save data to database...
\n
"
puts
"Save data to database...
\n
------------------------
"
data_list_industries
.
length
.
times
do
|
i
|
data_list_industries
.
length
.
times
do
|
i
|
name
=
data_list_industries
[
i
].
to_s
name
=
data_list_industries
[
i
].
to_s
if
name
.
include?
(
'&'
)
if
name
.
include?
(
'&'
)
...
@@ -39,23 +39,27 @@ class Clawler
...
@@ -39,23 +39,27 @@ class Clawler
end
end
end
end
# FILL DATA COMPANIES
# FILL DATA COMPANIES
def
self
.
make_companies
def
self
.
make_companies
Company
.
create!
(
name:
"Bảo mật"
,
# Company.create!(name: "Bảo mật",
address:
"Vui lòng xem trong mô tả công việc"
,
# address: "Vui lòng xem trong mô tả công việc",
short_description:
"Vui lòng xem trong mô tả công việc"
)
# short_description: "Vui lòng xem trong mô tả công việc")
Company
.
find_or_create_by
(
name:
'Bảo mật'
,
address:
'Vui lòng xem trong mô tả công việc'
)
do
|
company
|
company
.
name
=
'Bảo mật'
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
@data
=
Interface_web
.
craw_data_companies
()
@data
=
Interface_web
.
craw_data_companies
()
puts
'Save info companies to database . . .'
puts
'Save info companies to database . . .'
i
=
0
@data
[
:name
].
each_with_index
do
|
name
,
index
|
@data
[
:name
].
each
do
|
n
|
if
Company
.
find_by
(
name:
name
).
blank?
if
Company
.
find_by
(
name:
n
).
blank?
address
=
@data
[
:address
][
index
]
address
=
@data
[
:address
][
i
]
short_description
=
@data
[
:description
][
index
]
short_description
=
@data
[
:description
][
i
]
Company
.
create!
(
name:
name
,
Company
.
create!
(
name:
n
,
address:
address
,
address:
address
,
short_description:
short_description
)
short_description:
short_description
)
end
end
i
+=
1
end
end
end
end
# FILL DATA JOBS
# FILL DATA JOBS
...
...
lib/src/interface_web.rb
View file @
f5d71986
class
Interface_web
class
Interface_web
# func get "n" link company & job
# func get "n" link company & job
debugger
def
self
.
crawl_link_for_companies_jobs
(
page
)
def
self
.
crawl_link_for_companies_jobs
(
page
)
puts
"Crawling link on page...
\n
PLease wait...
\n
"
puts
"Crawling link on page...
\n
PLease wait...
\n
"
data
=
[]
data
=
[]
...
@@ -20,14 +19,13 @@ class Interface_web
...
@@ -20,14 +19,13 @@ class Interface_web
website_jobs
=
website_jobs
.
join
(
","
)
website_jobs
=
website_jobs
.
join
(
","
)
website_jobs
=
website_jobs
.
split
(
","
)
website_jobs
=
website_jobs
.
split
(
","
)
website_jobs
=
website_jobs
.
select
{
|
val
|
val
!=
''
}
website_jobs
=
website_jobs
.
select
{
|
val
|
val
!=
''
}
puts
"Result:
\n
Company:
#{
website_companies
.
length
}
link
\n
Job :
#{
website_jobs
}
link
"
puts
"Result:
\n
Company:
#{
website_companies
.
length
}
link
\n
Job :
#{
website_jobs
.
length
}
link
\n
------------------------
"
data
<<
website_companies
<<
website_jobs
data
<<
website_companies
<<
website_jobs
end
end
@crawl_link_for_companies_jobs
=
crawl_link_for_companies_jobs
(
15
)
@crawl_link_for_companies_jobs
=
crawl_link_for_companies_jobs
(
1
)
def
self
.
get_link_job_and_companies
def
self
.
get_link_job_and_companies
@crawl_link_for_companies_jobs
||=
crawl_link_for_companies_jobs
(
1
)
@crawl_link_for_companies_jobs
||=
crawl_link_for_companies_jobs
(
1
5
)
end
end
def
self
.
base_link
(
url
)
def
self
.
base_link
(
url
)
...
@@ -36,13 +34,12 @@ class Interface_web
...
@@ -36,13 +34,12 @@ class Interface_web
def
self
.
craw_data_companies
def
self
.
craw_data_companies
link_crawl
=
get_link_job_and_companies
link_crawl
=
get_link_job_and_companies
@
data_companies
=
{}
data_companies
=
{}
@
data_companies_name
=
[]
data_companies_name
=
[]
@
data_companies_address
=
[]
data_companies_address
=
[]
@
data_companies_description
=
[]
data_companies_description
=
[]
puts
'Crawl data companies'
puts
'Crawl data companies'
@current_company
=
0
link_crawl
[
0
].
each_with_index
do
|
url
,
i
|
link_crawl
[
0
].
each
do
|
url
|
page
=
base_link
(
url
)
page
=
base_link
(
url
)
name
=
''
name
=
''
address
=
''
address
=
''
...
@@ -57,40 +54,37 @@ class Interface_web
...
@@ -57,40 +54,37 @@ class Interface_web
desc
=
page
.
search
(
".main-about-us .content"
).
text
desc
=
page
.
search
(
".main-about-us .content"
).
text
end
end
if
(
name
!=
""
&&
address
!=
""
&&
desc
!=
""
)
if
(
name
.
present?
&&
address
.
present?
&&
desc
.
present?
)
@data_companies_name
<<
name
.
to_s
.
strip
data_companies_name
<<
name
.
to_s
.
strip
@data_companies_address
<<
address
.
to_s
.
strip
data_companies_address
<<
address
.
to_s
.
strip
@data_companies_description
<<
desc
data_companies_description
<<
desc
@current_company
+=
1
end
end
puts
"
Crawling
#{
@current_company
}
"
puts
"
Process company
#{
i
+
1
}
. . .
\n
------------------------
"
end
end
@data_companies
[
:name
]
=
@
data_companies_name
data_companies
[
:name
]
=
data_companies_name
@data_companies
[
:address
]
=
@
data_companies_address
data_companies
[
:address
]
=
data_companies_address
@
data_companies_description
.
each
do
|
val
|
data_companies_description
.
each
do
|
val
|
val
.
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
val
.
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
val
.
strip!
val
.
strip!
end
end
@data_companies
[
:description
]
=
@
data_companies_description
data_companies
[
:description
]
=
data_companies_description
@
data_companies
data_companies
end
end
def
self
.
add_data
def
self
.
add_data
(
name
,
company_name
,
city_name
,
created_date
,
expiration_date
,
salary
,
industry_name
,
description
,
level
,
exprience
)
@data
[
:name
]
=
@
name
@data
[
:name
]
=
name
@data
[
:company_name
]
=
@
company_name
@data
[
:company_name
]
=
company_name
@data
[
:city_name
]
=
@
city_name
@data
[
:city_name
]
=
city_name
@data
[
:created_date
]
=
@
created_date
@data
[
:created_date
]
=
created_date
@data
[
:expiration_date
]
=
@
expiration_date
@data
[
:expiration_date
]
=
expiration_date
@data
[
:salary
]
=
@
salary
@data
[
:salary
]
=
salary
@data
[
:industry_name
]
=
@
industry_name
@data
[
:industry_name
]
=
industry_name
@data
[
:description
]
=
@
description
@data
[
:description
]
=
description
@data
[
:level
]
=
@
level
@data
[
:level
]
=
level
@data
[
:exprience
]
=
@
exprience
@data
[
:exprience
]
=
exprience
end
end
def
self
.
crawl_data_jobs_interface_1
(
url
)
def
self
.
crawl_data_jobs_interface_1
(
page
)
page
=
base_link
(
url
)
@name
<<
page
.
search
(
".apply-now-content .job-desc .title"
).
text
@name
<<
page
.
search
(
".apply-now-content .job-desc .title"
).
text
@company_name
<<
page
.
search
(
".apply-now-content .job-desc .job-company-name"
).
text
@company_name
<<
page
.
search
(
".apply-now-content .job-desc .job-company-name"
).
text
...
@@ -109,31 +103,30 @@ class Interface_web
...
@@ -109,31 +103,30 @@ class Interface_web
@salary
<<
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
1
].
text
@salary
<<
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
1
].
text
industr
y_name
=
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a"
).
text
industr
ies
=
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a"
).
text
industr
y_name
=
industry_name
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
select
{
|
v
|
v
!=
''
}
industr
ies
=
industries
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
select
{
|
v
|
v
!=
''
}
@industry_name
<<
industr
y_name
.
join
(
','
)
@industry_name
<<
industr
ies
.
join
(
','
)
@description
<<
page
.
search
(
".tabs .tab-content .detail-row:nth-child(n)"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
@description
<<
page
.
search
(
".tabs .tab-content .detail-row:nth-child(n)"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
get_level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(3)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
get_level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(3)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
get_level
=
get_level
[
1
].
to_s
.
strip
get_level
=
get_level
[
1
].
to_s
.
strip
if
get_level
==
""
if
get_level
==
""
level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(2)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
g_
level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(2)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
@level
<<
level
[
1
].
to_s
.
strip
@level
<<
g_
level
[
1
].
to_s
.
strip
else
else
level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(3)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
g_
level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(3)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
@level
<<
level
[
1
].
to_s
.
strip
@level
<<
g_
level
[
1
].
to_s
.
strip
end
end
exp
rience
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(2)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
'Kinh nghiệm'
)
exp
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(2)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
'Kinh nghiệm'
)
exp
rience
=
exprience
[
1
].
to_s
.
strip
exp
=
exp
[
1
].
to_s
.
strip
@exprience
<<
exp
rience
@exprience
<<
exp
add_data
()
add_data
(
@name
,
@company_name
,
@city_name
,
@created_date
,
@expiration_date
,
@salary
,
@industry_name
,
@description
,
@level
,
@exprience
)
end
end
def
self
.
crawl_data_jobs_interface_2
(
url
)
def
self
.
crawl_data_jobs_interface_2
(
page
)
page
=
base_link
(
url
)
@name
<<
page
.
search
(
".apply-now-content .job-desc .title"
).
text
@name
<<
page
.
search
(
".apply-now-content .job-desc .title"
).
text
...
@@ -169,11 +162,6 @@ class Interface_web
...
@@ -169,11 +162,6 @@ class Interface_web
@level
<<
lv
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
strip
.
split
(
'Cấp bậc'
).
last
.
strip
@level
<<
lv
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
strip
.
split
(
'Cấp bậc'
).
last
.
strip
end
end
# if exp == ""
# @exprience << ""
# else
# @exprience << exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
# end
exp
=
page
.
search
(
".info li:nth-child(6)"
).
text
exp
=
page
.
search
(
".info li:nth-child(6)"
).
text
if
exp
.
blank?
if
exp
.
blank?
@exprience
<<
""
@exprience
<<
""
...
@@ -181,68 +169,11 @@ class Interface_web
...
@@ -181,68 +169,11 @@ class Interface_web
@exprience
<<
exp
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
'Kinh nghiệm'
).
last
.
strip
@exprience
<<
exp
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
'Kinh nghiệm'
).
last
.
strip
end
end
add_data
()
add_data
(
@name
,
@company_name
,
@city_name
,
@created_date
,
@expiration_date
,
@salary
,
@industry_name
,
@description
,
@level
,
@exprience
)
end
def
self
.
crawl_data_jobs_interface_3
(
url
)
page
=
base_link
(
url
)
@name
<<
page
.
search
(
".intro_job h1"
).
text
@company_name
<<
page
.
search
(
".info-company .text-job h2"
).
text
@city_name
<<
page
.
search
(
".DetailJobNew ul li:nth-child(1) a"
).
text
@created_date
<<
""
@expiration_date
<<
page
.
search
(
".DetailJobNew .info ul li:nth-child(3) p"
).
text
.
strip
@salary
<<
page
.
search
(
".DetailJobNew .salary ul li:nth-child(3) p"
).
text
.
strip
@industry_name
<<
page
.
search
(
".DetailJobNew .salary ul li:nth-child(2) p"
).
text
.
strip
@description
<<
page
.
search
(
".content_job .detail-row"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
@level
<<
page
.
search
(
".DetailJobNew .info ul li:nth-child(2) p"
).
text
.
strip
@exprience
<<
page
.
search
(
".DetailJobNew .info ul li:nth-child(1) p"
).
text
.
strip
add_data
()
end
def
self
.
crawl_data_jobs_interface_4
(
url
)
page
=
base_link
(
url
)
@name
<<
page
.
search
(
".info-company h1"
).
text
if
page
.
search
(
".zone-company .text-job h2"
).
text
==
""
@company_name
<<
page
.
search
(
".info-company .text-job h2"
).
text
industry_name
=
page
.
search
(
".DetailJobNew li:nth-child(3) span"
).
text
.
strip
@industry_name
<<
industry_name
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
select
{
|
v
|
v
!=
''
}
else
@company_name
<<
page
.
search
(
".zone-company .text-job h2"
).
text
.
strip
industry_name
=
page
.
search
(
".DetailJobNew li:nth-child(3) span a"
).
text
@industry_name
<<
industry_name
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
select
{
|
v
|
v
!=
''
}
end
@city_name
<<
page
.
search
(
".DetailJobNew ul li:nth-child(1) a"
).
text
@created_date
<<
""
@expiration_date
<<
page
.
search
(
".DetailJobNew li:nth-child(7) span"
).
text
@salary
<<
page
.
search
(
".DetailJobNew li:nth-child(6) span"
).
text
@description
<<
page
.
search
(
".left-col"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
@level
<<
page
.
search
(
".DetailJobNew ul li:nth-child(2) span"
).
text
@exprience
<<
""
add_data
()
end
end
def
self
.
crawl_data_jobs_interface_5
(
url
)
def
self
.
crawl_data_jobs_interface_5
(
page
)
page
=
base_link
(
url
)
#
page = base_link(url)
@name
<<
page
.
search
(
".info-company h1"
).
text
@name
<<
page
.
search
(
".info-company h1"
).
text
@company_name
<<
page
.
search
(
".info-company .text-job h2"
).
text
@company_name
<<
page
.
search
(
".info-company .text-job h2"
).
text
...
@@ -263,7 +194,7 @@ class Interface_web
...
@@ -263,7 +194,7 @@ class Interface_web
@exprience
<<
page
.
search
(
".DetailJobNew li:nth-child(5) span"
).
text
.
strip
@exprience
<<
page
.
search
(
".DetailJobNew li:nth-child(5) span"
).
text
.
strip
add_data
()
add_data
(
@name
,
@company_name
,
@city_name
,
@created_date
,
@expiration_date
,
@salary
,
@industry_name
,
@description
,
@level
,
@exprience
)
end
end
def
self
.
make_data
def
self
.
make_data
...
@@ -281,24 +212,25 @@ class Interface_web
...
@@ -281,24 +212,25 @@ class Interface_web
@city_name
=
[]
@city_name
=
[]
link_crawl
=
get_link_job_and_companies
link_crawl
=
get_link_job_and_companies
total_jobs
=
link_crawl
[
1
].
length
link_crawl
[
1
].
each_with_index
do
|
path
,
i
|
current_job
=
1
link_crawl
[
1
].
each
do
|
path
|
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
"https://careerbuilder.vn/
#{
path
}
"
))))
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
"https://careerbuilder.vn/
#{
path
}
"
))))
if
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
0
]
!=
nil
if
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
0
]
!=
nil
crawl_data_jobs_interface_1
(
pa
th
)
crawl_data_jobs_interface_1
(
pa
ge
)
elsif
page
.
search
(
"section .template-200"
).
text
!=
""
elsif
page
.
search
(
"section .template-200"
).
text
!=
""
crawl_data_jobs_interface_2
(
pa
th
)
crawl_data_jobs_interface_2
(
pa
ge
)
elsif
(
page
.
search
(
".DetailJobNew ul li"
).
size
==
10
&&
!
page
.
search
(
'.right-col ul li'
).
text
.
include?
(
'Độ tuổi'
))
elsif
(
page
.
search
(
".DetailJobNew ul li"
).
size
==
10
&&
!
page
.
search
(
'.right-col ul li'
).
text
.
include?
(
'Độ tuổi'
))
crawl_data_jobs_interface_5
(
pa
th
)
crawl_data_jobs_interface_5
(
pa
ge
)
end
end
puts
"Process:
#{
current_job
}
/
#{
total_jobs
}
"
puts
"Process:
#{
i
+
1
}
/
#{
link_crawl
[
1
].
length
}
"
current_job
+=
1
end
end
@data
@data
end
end
end
end
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# crawl_data_jobs_interface_3(path)
# crawl_data_jobs_interface_3(path)
\ No newline at end of file
lib/tasks/crawler.rake
View file @
f5d71986
require
'src/crawler'
require
'src/crawler'
namespace
:db
do
namespace
:db
do
task
populate: :environment
do
task
populate: :environment
do
#
Clawler.make_industries
Clawler
.
make_industries
#
Clawler.make_cities
Clawler
.
make_cities
Clawler
.
make_companies
Clawler
.
make_companies
#
Clawler.make_jobs
Clawler
.
make_jobs
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment