Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
VeNJOB
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Nguyen Ngoc Nghia
VeNJOB
Commits
37105061
Commit
37105061
authored
Mar 02, 2020
by
nnnghia98
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactoring code
parent
6e007fe3
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
47 deletions
+51
-47
app/services/crawl_data.rb
+24
-26
app/services/job_html.rb
+27
-21
No files found.
app/services/crawl_data.rb
View file @
37105061
...
...
@@ -7,6 +7,7 @@ class CrawlData
def
crawl_web
page
=
Nokogiri
::
HTML
.
parse
(
open
(
Settings
.
crawl
.
base_url
,
ssl_verify_mode:
nil
))
total_job
=
page
.
css
(
"div.ais-stats h1.col-sm-10 span"
).
text
.
gsub
(
","
,
""
).
to_f
return
if
total_job
==
0
total_page
=
(
total_job
/
Settings
.
crawl
.
jobs_per_page
).
floor
crawl_job_title_logger
=
ActiveSupport
::
Logger
.
new
(
"log/crawl_data.log"
)
crawl_job_title_logger
.
info
"Crawl at
#{
Time
.
current
}
"
...
...
@@ -18,31 +19,31 @@ class CrawlData
job_page
=
Nokogiri
::
HTML
.
parse
(
open
(
URI
.
encode
(
job_url
)))
# Job code
job_code
=
job_url
.
split
(
"/"
).
last
.
split
(
"."
)[
-
2
]
job
=
JobHtml
.
new
(
job_page
).
parse_job
# Company code
company_code
=
job_url
.
split
(
"/"
).
last
.
split
(
"-"
).
last
.
split
(
"."
)[
-
2
].
strip
next
if
job_page
.
css
(
".LeftJobCB"
).
nil?
||
job
[
:workplace
].
blank?
next
if
job_page
.
css
(
".LeftJobCB"
).
nil?
# Job code
job_code
=
job_url
.
split
(
"/"
).
last
.
split
(
"."
)[
-
2
]
||
""
job
=
JobHtml
.
new
(
job_page
).
parse_job
# Company code
company_code
=
job_page
.
css
(
".viewmorejob a @href"
).
present?
?
job_page
.
css
(
".viewmorejob a @href"
).
text
.
split
(
"/"
).
last
.
split
(
"-"
)[
-
2
].
strip
:
""
crawl_job_title_logger
.
info
"
#{
job
[
:title
]
}
"
next
if
job
[
:workplace
].
nil?
job
[
:workplace
].
each
do
|
city_name
|
city_id
=
city_id
(
city_name
)
company_id
=
company_id
(
company_code
,
job
[
:company_name
],
job
[
:company_address
],
job
[
:company_description
])
job_id
=
job_id
(
job_code
,
job
[
:title
],
job
[
:salary
],
city_id
=
get_city
(
city_name
).
id
company_id
=
get_company
(
company_code
,
job
[
:company_name
],
job
[
:company_address
],
job
[
:company_description
]).
id
job_id
=
get_job
(
job_code
,
job
[
:title
],
job
[
:salary
],
job
[
:description
],
job
[
:requirement
],
job
[
:level
],
job
[
:post_date
],
job
[
:expiration_date
],
company_id
)
job
[
:expiration_date
],
company_id
)
.
id
CityJob
.
find_or_create_by!
(
job_id:
job_id
,
city_id:
city_id
)
job
[
:industries
].
each
do
|
job_industry
|
industry_id
=
industry_id
(
job_industry
.
strip
)
job_industry
=
job_industry
.
strip
industry_id
=
get_industry
(
job_industry
).
id
IndustryJob
.
find_or_create_by!
(
industry_id:
industry_id
,
job_id:
job_id
)
end
end
...
...
@@ -50,28 +51,25 @@ class CrawlData
end
end
def
company_id
(
code
,
name
,
address
,
description
)
def
get_company
(
code
,
name
,
address
,
description
)
company
=
Company
.
find_or_initialize_by
(
code:
code
)
company
.
update
(
name:
name
,
address:
address
,
description:
description
)
company
.
id
company
end
def
industry_id
(
name
)
def
get_industry
(
name
)
industry
=
Industry
.
find_or_create_by!
(
name:
name
)
industry
.
id
industry
end
def
city_id
(
name
)
def
get_city
(
name
)
name
=
name
.
strip
City
.
find_or_create_by
(
name:
name
,
region:
"Việt Nam"
)
.
id
City
.
find_or_create_by
(
name:
name
,
region:
"Việt Nam"
)
end
def
job_id
(
code
=
nil
,
title
,
salary
,
description
,
requirement
,
level
,
post_date
,
expiration_date
,
company_id
)
if
expiration_date
.
nil?
job
=
Job
.
find_or_initialize_by
(
title:
job_title
,
company_id:
company_id
)
else
job
=
Job
.
find_or_initialize_by
(
code:
code
)
end
def
get_job
(
code
=
nil
,
title
,
salary
,
description
,
requirement
,
level
,
post_date
,
expiration_date
,
company_id
)
attrs
=
expiration_date
.
nil?
?
{
title:
job_title
,
company_id:
company_id
}
:
{
code:
code
}
job
=
Job
.
find_or_initialize_by
attrs
job
.
update
(
code:
code
,
title:
title
,
...
...
@@ -82,6 +80,6 @@ class CrawlData
expiration_date:
expiration_date
,
level:
level
,
company_id:
company_id
)
job
.
id
job
end
end
app/services/job_html.rb
View file @
37105061
...
...
@@ -6,61 +6,67 @@ class JobHtml
def
parse_job
get_job_info
get_job_detail
job
=
{
title:
get_title
,
salary:
@job_salary
,
level:
@job_level
,
{
title:
get_title
,
salary:
get_job_info
[
:salary
]
,
level:
get_job_info
[
:level
]
,
post_date:
get_post_date
,
description:
@job_description
,
requirement:
@job_requirement
,
expiration_date:
@job_expiration_date
,
workplace:
@job_workplace
,
level:
@job_level
,
industries:
@job_industries
,
description:
get_job_detail
[
:description
],
requirement:
get_job_detail
[
:requirement
],
expiration_date:
get_job_info
[
:expiration_date
],
workplace:
get_job_info
[
:workplace
],
industries:
get_job_info
[
:industries
],
company_name:
get_company_name
,
company_address:
get_company_address
,
company_description:
get_company_description
}
return
job
end
private
def
get_title
@
job_title
=
@
html_data
.
css
(
".top-job-info h1"
).
text
.
strip
@html_data
.
css
(
".top-job-info h1"
).
text
.
strip
end
def
get_post_date
@
job_post_date
=
@
html_data
.
css
(
".datepost span"
).
text
@html_data
.
css
(
".datepost span"
).
text
end
def
get_job_info
info_container
=
@html_data
.
css
(
".DetailJobNew li p"
)
job_info
=
{}
job_info
=
(
0
..
info_container
.
count
-
1
).
map
do
|
info_part
|
(
0
..
info_container
.
count
-
1
).
each
do
|
info_part
|
info
=
info_container
[
info_part
].
text
case
when
info
.
include?
(
"Nơi làm việc"
)
@job_workplace
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
split
(
","
)
job_info
[
:workplace
]
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
split
(
","
)
||
[]
when
info
.
include?
(
"Lương"
)
@job_salary
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
job_info
[
:salary
]
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
when
info
.
include?
(
"Cấp bậc"
)
@job_level
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
job_info
[
:level
]
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
when
info
.
include?
(
"Hết hạn nộp"
)
@job_expiration_date
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
job_info
[
:expiration_date
]
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
strip
when
info
.
include?
(
"Ngành nghề"
)
@job_industries
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
split
(
","
)
job_info
[
:industries
]
=
info
.
gsub
(
"/[
\r\n
]+/"
,
""
).
partition
(
":"
).
last
.
split
(
","
)
end
end
return
job_info
end
def
get_job_detail
detail_container
=
@html_data
.
css
(
"div.MarBot20"
)
job_detail
=
{}
job_detail
=
(
0
..
detail_container
.
count
-
1
).
map
do
|
detail_part
|
(
0
..
detail_container
.
count
-
1
).
map
do
|
detail_part
|
detail
=
detail_container
[
detail_part
].
text
if
detail
.
include?
(
"Mô tả Công việc"
)
@job_description
=
detail
.
partition
(
"Mô tả Công việc"
).
last
job_detail
[
:description
]
=
detail
.
partition
(
"Mô tả Công việc"
).
last
elsif
detail
.
include?
(
"Yêu Cầu Công Việc"
)
@job_requirement
=
detail
.
partition
(
"Yêu Cầu Công Việc"
).
last
job_detail
[
:requirement
]
=
detail
.
partition
(
"Yêu Cầu Công Việc"
).
last
end
end
return
job_detail
end
def
get_company_name
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment