Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
VeNJOB
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Nguyen Hoang Mai Phuong
VeNJOB
Commits
6ba9bc64
Commit
6ba9bc64
authored
Jul 23, 2021
by
Nguyen Hoang Mai Phuong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix crawler
parent
fe91b8cf
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
17 deletions
+23
-17
config/schedule.rb
+1
-1
db/migrate/20210723035105_change_companies.rb
+5
-0
db/schema.rb
+2
-2
lib/tasks/crawler.rake
+15
-14
No files found.
config/schedule.rb
View file @
6ba9bc64
every
1
.
day
,
at:
'08:
18
am'
do
every
1
.
day
,
at:
'08:
00
am'
do
rake
'crawler:all'
end
db/migrate/20210723035105_change_companies.rb
0 → 100644
View file @
6ba9bc64
class
ChangeCompanies
<
ActiveRecord
::
Migration
[
6.1
]
def
change
change_column
:companies
,
:address
,
:text
end
end
db/schema.rb
View file @
6ba9bc64
...
...
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord
::
Schema
.
define
(
version:
2021_07_2
0_145646
)
do
ActiveRecord
::
Schema
.
define
(
version:
2021_07_2
3_035105
)
do
create_table
"apply_jobs"
,
charset:
"utf8mb4"
,
collation:
"utf8mb4_0900_ai_ci"
,
force: :cascade
do
|
t
|
t
.
bigint
"user_id"
,
null:
false
...
...
@@ -44,7 +44,7 @@ ActiveRecord::Schema.define(version: 2021_07_20_145646) do
t
.
text
"description"
t
.
datetime
"created_at"
,
precision:
6
,
null:
false
t
.
datetime
"updated_at"
,
precision:
6
,
null:
false
t
.
string
"address"
t
.
text
"address"
t
.
text
"overview"
end
...
...
lib/tasks/crawler.rake
View file @
6ba9bc64
...
...
@@ -4,6 +4,8 @@ require 'logger'
namespace
:crawler
do
desc
'Crawl Jobs and Companies'
task
jobs: :environment
do
logger
=
Logger
.
new
(
"
#{
Rails
.
root
}
/log/crawler_jobs.log"
)
logger
.
info
"Start crawler job at:
#{
Time
.
current
}
"
base_url
=
Nokogiri
::
HTML
(
URI
.
open
(
'https://careerbuilder.vn/'
))
job_page
=
base_url
.
css
(
'div.menu div.dropdown-menu ul li a'
)[
0
].
attributes
[
'href'
].
value
parse_job_page
=
Nokogiri
::
HTML
(
URI
.
open
(
job_page
))
...
...
@@ -28,8 +30,8 @@ namespace :crawler do
company_name
=
company
.
css
(
'div.company-info div.content p.name'
)
next
if
company_name
.
nil?
logger
=
Logger
.
new
(
"
#{
Rails
.
root
}
/log/crawler_jobs.log
"
)
logger
.
info
(
"Link company:
#{
company_page
}
.to_s"
)
logger
.
info
(
"Link company:
#{
company_page
}
"
)
company_info
=
company
.
css
(
'div.company-info div.content'
)
address
=
company_info
.
css
(
'p'
)[
1
].
try
(
:text
)
description
=
company_info
.
css
(
'ul li'
).
text
...
...
@@ -51,22 +53,22 @@ namespace :crawler do
next
if
title
.
nil?
logger
.
info
(
"Link job:
#{
job_detail_page
}
"
)
salary
,
experience
,
type
,
level
,
expired_at
=
''
detail_content
=
detail_job
.
css
(
'div.
col-lg-4 col-sm-6 item-blue
ul li'
)
detail_content
=
detail_job
.
css
(
'div.
row div.detail-box.has-background
ul li'
)
detail_content
.
each
do
|
content
|
case
content
.
css
(
'strong'
).
text
when
'Lương'
salary
=
content
.
css
(
'p'
).
text
when
'Kinh nghiệm'
puts
content
.
css
(
'p'
).
text
experience
=
content
.
css
(
'p'
).
text
when
'Hình thức'
puts
content
.
css
(
'p'
).
text
type
=
content
.
css
(
'p'
).
text
when
'Cấp bậc'
level
=
content
.
css
(
'p'
).
text
when
'Hết hạn nộp'
expired_at
=
content
.
css
(
'p'
).
text
end
end
benefits
,
overview
,
requirement
,
other_requirement
=
''
detail_require
=
detail_job
.
css
(
'div.detail-row'
)
...
...
@@ -97,26 +99,25 @@ namespace :crawler do
company_id:
Company
.
find_by
(
name:
company_name
.
text
).
id
)
job_industries
=
[]
industries
=
detail_job
.
css
(
'div.detail-box.has-background ul li p a'
)
industries
.
each
do
|
industry
|
name
=
industry
.
text
.
squish
industry_name
=
Industry
.
find_or_create_by
(
name:
name
)
job
.
industries
<<
industry_name
job_industries
<<
Industry
.
find__by
(
name:
name
)
end
job
.
industries
<<
job_industries
job_cities
=
[]
location
=
detail_job
.
css
(
'div.map p a'
)
location
.
each
do
|
city
|
name
=
city
.
text
city_name
=
City
.
find_or_create_by
(
name:
name
)
job
.
cities
<<
city_name
job_cities
<<
City
.
find_by
(
name:
name
)
end
job
.
cities
<<
job_cities
unless
job_cities
.
nil?
end
page
+=
1
end
logger
.
info
"End crawler job at:
#{
Time
.
current
}
"
end
desc
'Crawl Industries'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment