Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
ven-job
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Trịnh Hoàng Phúc
ven-job
Commits
b7c9fd57
Commit
b7c9fd57
authored
May 13, 2020
by
Trịnh Hoàng Phúc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix review 13/05/2020
parent
db3ba1fb
Pipeline
#614
canceled with stages
in 0 seconds
Changes
6
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
61 additions
and
59 deletions
+61
-59
app/models/job.rb
+2
-2
app/services/crawler_service.rb
+40
-15
app/services/job_service.rb
+0
-7
db/migrate/20200423044651_add_foreign_to_cities.rb
+1
-1
db/migrate/20200511055632_add_columns_to_jobs.rb
+2
-2
lib/tasks/crawler.rake
+16
-32
No files found.
app/models/job.rb
View file @
b7c9fd57
...
...
@@ -16,10 +16,10 @@ class Job < ApplicationRecord
has_and_belongs_to_many
:industries
has_and_belongs_to_many
:cities
validate
:updated_date_job_cannot_be_greater_than_expiration_date
,
on: :create
validate
:updated_date_job_cannot_be_greater_than_expiration_date
validates
:title
,
length:
{
minimum:
6
}
validates
:title
,
:updated_date_job
,
:level
,
:expiration_date
,
:salary
,
:min_salary
,
:max_salary
,
presence:
true
,
on: :create
validates
:title
,
:updated_date_job
,
:level
,
:expiration_date
,
:salary
,
:min_salary
,
:max_salary
,
presence:
true
validates
:min_salary
,
:max_salary
,
numericality:
{
only_integer:
true
}
def
updated_date_job_cannot_be_greater_than_expiration_date
...
...
app/services/crawler_service.rb
View file @
b7c9fd57
class
CrawlerService
def
self
.
convert_salary
salary
if
salary
==
"Cạnh tranh"
[
0
,
999999999
]
elsif
salary
.
include?
"Dưới"
max_salary
=
(
salary
.
gsub
(
"Dưới "
,
""
).
gsub
(
" Tr VND"
,
""
).
gsub
(
","
,
"."
).
to_f
*
1000000
).
to_i
[
0
,
max_salary
]
elsif
salary
.
include?
"Trên"
min_salary
=
(
salary
.
gsub
(
"Trên "
,
""
).
gsub
(
" Tr VND"
,
""
).
gsub
(
","
,
"."
).
to_f
*
1000000
).
to_i
max_salary
=
999999999
[
min_salary
,
max_salary
]
else
range_salary
=
salary
.
split
(
"-"
)
min_salary
=
(
range_salary
[
0
].
gsub
(
"$ "
,
""
).
gsub
(
" Tr "
,
""
).
to_f
*
1000000
).
to_i
max_salary
=
(
range_salary
[
1
].
gsub
(
" Tr VND"
,
""
).
gsub
(
" "
,
""
).
to_f
*
1000000
).
to_i
[
min_salary
,
max_salary
]
def
self
.
convert_salary
(
salary
)
return
[
0
,
999_999_999
]
if
salary
==
"Cạnh tranh"
vn_salary
=
salary
.
tr
(
"^[0-9]{1,2}[.,]
\d
{1-2}"
,
" "
)
.
tr
(
","
,
"."
)
.
split
(
" "
)
.
map
{
|
s
|
(
s
.
to_f
*
1_000_000
).
to_i
}
return
[
0
,
vn_salary
[
0
]]
if
salary
.
include?
"Dưới"
return
[
vn_salary
[
0
],
0
]
if
salary
.
include?
"Trên"
[
vn_salary
[
0
],
vn_salary
[
1
]]
end
def
self
.
imports
(
job_attributes
,
company_attributes
,
cities
,
industries
)
ActiveRecord
::
Base
.
transaction
do
raise
Exception
.
new
"Not enough data transferred"
if
job_attributes
.
nil?
||
company_attributes
.
nil?
||
cities
.
nil?
||
industries
.
nil?
job_attributes
[
:company_id
]
=
Company
.
find_or_create_by
(
company_attributes
).
id
job
=
Job
.
find_or_create_by
(
job_attributes
)
if
job
.
errors
.
full_messages
.
present?
raise
Exception
.
new
"
#{
job
.
errors
.
full_messages
.
join
(
","
)
}
"
raise
ActiveRecord
::
Rollback
end
cities
=
cities
.
map
do
|
city
|
City
.
find_or_create_by
({
title:
city
})
end
industries
=
industries
.
map
do
|
industry
|
Industry
.
find_or_create_by
({
title:
industry
})
end
if
cities
.
length
>
0
cities
.
each
do
|
city
|
job
.
cities
<<
city
end
end
if
industries
.
length
>
0
industries
.
each
do
|
industry
|
job
.
industries
<<
industry
end
end
end
end
end
\ No newline at end of file
app/services/job_service.rb
deleted
100644 → 0
View file @
db3ba1fb
class
JobService
def
self
.
check_exist_or_create_job
job_attributes
job
=
Job
.
find_or_create_by
(
job_attributes
)
return
job
end
end
\ No newline at end of file
db/migrate/20200423044651_add_foreign_to_cities.rb
View file @
b7c9fd57
class
AddForeignToCities
<
ActiveRecord
::
Migration
[
6.0
]
def
change
add_column
:cities
,
:foreign
,
:boolean
,
:default
=>
false
add_column
:cities
,
:foreign
,
:boolean
,
default:
false
end
end
db/migrate/20200511055632_add_columns_to_jobs.rb
View file @
b7c9fd57
class
AddColumnsToJobs
<
ActiveRecord
::
Migration
[
6.0
]
def
change
add_column
:jobs
,
:min_salary
,
:bigint
,
:default
=>
0
add_column
:jobs
,
:max_salary
,
:bigint
,
:default
=>
0
add_column
:jobs
,
:min_salary
,
:bigint
,
default:
0
add_column
:jobs
,
:max_salary
,
:bigint
,
default:
0
add_column
:jobs
,
:benefit
,
:text
add_column
:jobs
,
:job_requirements
,
:text
add_column
:jobs
,
:other_information
,
:text
...
...
lib/tasks/crawler.rake
View file @
b7c9fd57
...
...
@@ -20,8 +20,12 @@ namespace :crawler do
next
end
# Set salary, min-salary, max-salary
if
item
.
at_css
(
".figure .figcaption .caption .salary"
).
text
.
include?
"USD"
logger
.
warn
"Another template
#{
item
.
css
(
".figure .figcaption .title .job_link @href"
).
text
}
"
next
end
salary
=
item
.
at_css
(
".figure .figcaption .caption .salary"
).
text
.
gsub
(
"$ "
,
""
)
min_salary
,
max_salary
=
CrawlerService
.
convert_salary
salary
min_salary
,
max_salary
=
CrawlerService
.
convert_salary
(
salary
)
# Job attributes
job_attributes
=
{
title:
item
.
at_css
(
".figure .figcaption .title a @title"
).
text
,
...
...
@@ -30,7 +34,7 @@ namespace :crawler do
min_salary:
min_salary
,
max_salary:
max_salary
}
html_job_detail
.
css
(
".
search-result-list-detail .tabs #tab-1 .job-detail-content
.has-background ul li"
).
each
do
|
ele
|
html_job_detail
.
css
(
".
job-detail-content .row
.has-background ul li"
).
each
do
|
ele
|
type
=
ele
.
at_css
(
"strong"
).
text
case
type
when
"Hết hạn nộp"
...
...
@@ -42,8 +46,8 @@ namespace :crawler do
end
end
html_job_detail
.
css
(
".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-row"
).
each
do
|
ele
|
if
ele
.
at_css
(
"h3"
).
present
?
type
=
ele
.
at_css
(
"h3
"
).
text
next
if
ele
.
at_css
(
".detail-title"
).
nil
?
type
=
ele
.
at_css
(
".detail-title
"
).
text
case
type
when
"Phúc lợi "
job_attributes
[
:benefit
]
=
ele
.
at_css
(
"ul"
).
inner_html
.
squish
...
...
@@ -55,47 +59,27 @@ namespace :crawler do
job_attributes
[
:other_information
]
=
ele
.
inner_html
.
squish
.
gsub
(
"<h3 class=
\"
detail-title
\"
>Thông tin khác</h3>"
,
""
)
end
end
end
if
item
.
at_css
(
".figure .image a @href"
).
text
!=
"javascript:void(0);"
next
if
item
.
at_css
(
".figure .image a @href"
).
text
==
"javascript:void(0);"
# Company attributes
html_company_detail
=
Nokogiri
::
HTML
.
parse
(
open
(
URI
.
encode
(
item
.
css
(
".figure .image a @href"
).
text
)))
if
html_company_detail
.
at_css
(
".jobsby-company"
).
present
?
next
if
html_company_detail
.
at_css
(
".jobsby-company"
).
nil
?
company_attributes
=
{
title:
html_company_detail
.
at_css
(
".jobsby-company .company-introduction .company-info .info .content .name"
).
text
,
address:
html_company_detail
.
css
(
".jobsby-company .company-introduction .company-info .info .content p"
)[
1
].
text
,
logo:
html_company_detail
.
at_css
(
".jobsby-company .company-introduction .company-info .info .img @src"
).
text
,
description:
html_company_detail
.
at_css
(
".jobsby-company .company-introduction .company-info .info .content ul"
).
inner_html
.
squish
}
# Check exist or create company
job_attributes
[
:company_id
]
=
Company
.
find_or_create_by
(
company_attributes
).
id
end
end
# Create job
job
=
JobService
.
check_exist_or_create_job
job_attributes
if
job
.
errors
.
full_messages
.
present?
logger
.
error
"
#{
job
.
errors
.
full_messages
.
join
(
","
)
}
"
next
end
# Defind cities array
cities
=
item
.
css
(
".figure .figcaption .caption .location ul li"
).
map
do
|
city
|
city
=
City
.
find_or_create_by
({
title:
city
.
text
.
squish
})
end
# Create city_job
if
cities
.
length
>
0
cities
.
each
do
|
city
|
job
.
cities
<<
city
end
city
.
text
.
squish
end
# Defind industries array
industries
=
html_job_detail
.
css
(
".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a"
).
map
do
|
ele
|
industry
=
Industry
.
find_or_create_by
({
title:
ele
.
text
.
gsub
(
","
,
""
).
squish
})
end
# Create industry_job
if
industries
.
length
>
0
industries
.
each
do
|
industry
|
job
.
industries
<<
industry
end
industries
=
html_job_detail
.
css
(
".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a"
).
map
do
|
industry
|
industry
.
text
.
tr
(
","
,
""
).
squish
end
CrawlerService
.
imports
(
job_attributes
,
company_attributes
,
cities
,
industries
)
logger
.
info
"Crawl success url :
#{
item
.
css
(
".figure .figcaption .title .job_link @href"
).
text
}
"
rescue
Exception
=>
e
logger
.
error
e
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment