Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob_thanhnd
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
thanhnd
venjob_thanhnd
Commits
6f382fd2
Commit
6f382fd2
authored
Feb 17, 2020
by
thanhnd
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cleaning folder and edit crawler.rake
parent
eeedca0f
Pipeline
#459
canceled with stages
in 0 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
19 deletions
+20
-19
lib/tasks/crawler.rake
+20
-19
No files found.
lib/tasks/crawler.rake
View file @
6f382fd2
...
@@ -32,39 +32,40 @@ namespace :crawler do
...
@@ -32,39 +32,40 @@ namespace :crawler do
job
=
Nokogiri
::
HTML
(
open
(
uri
))
job
=
Nokogiri
::
HTML
(
open
(
uri
))
title
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.top-job div.top-job-info h1'
)
title
=
job
.
css
(
'div.top-job div.top-job-info h1'
)
company_name
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.top-job div.top-job-info div.tit_company'
)
company_name
=
job
.
css
(
'div.top-job div.top-job-info div.tit_company'
)
updated_date
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.datepost span'
)
updated_date
=
job
.
css
(
'div.datepost span'
)
location
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]'
)
location
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]'
)
experience
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[2].bgLine2 p[1].fl_left > text()'
)
experience
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li[2].bgLine2 p[1].fl_left > text()'
)
industry
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[1].fl_left b'
)
industry
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[1].fl_left b'
)
level
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p.fl_right label'
)
level
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p.fl_right label'
)
salary
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p.fl_right label'
)
salary
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p.fl_right label'
)
deadline
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[2].fl_right > text()'
)
deadline
=
job
.
css
(
'div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[2].fl_right > text()'
)
description
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.MarBot20'
)
description
=
job
.
css
(
'div.MarBot20'
)
address
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.box1Detail p.TitleDetailNew label label'
)
address
=
job
.
css
(
'div.box1Detail p.TitleDetailNew label label'
)
company_intro
=
job
.
css
(
'html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.desc_company.content_fck span#emp_more'
)
company_intro
=
job
.
css
(
'div.desc_company.content_fck span#emp_more'
)
#skip if field blank
#skip if field blank
next
if
industry
.
text
.
blank?
next
if
industry
.
text
.
blank?
#insert data to City table:
#insert data to City table:
City
.
create
(
area_id:
area
.
id
,
city_name:
"
#{
location
.
text
.
gsub!
(
","
,
""
)
}
"
,
city_description:
""
)
# puts location.text
city_name
=
location
.
text
.
gsub
(
","
,
""
)
City
.
find_or_create_by
(
area_id:
area
.
id
,
city_name:
city_name
,
city_description:
""
)
#insert data to Industry table
#insert data to Industry table
Industry
.
find_or_create_by
(
industry_name:
"
#{
industry
.
text
}
"
,
industry_description:
""
)
Industry
.
find_or_create_by
(
industry_name:
industry
.
text
,
industry_description:
""
)
#insert data to Companies table
#insert data to Companies table
Company
.
find_or_create_by
(
company_name:
"
#{
company_name
.
text
}
"
,
company_description:
"
#{
company_intro
.
text
}
"
,
address:
"
#{
address
.
text
}
"
)
Company
.
find_or_create_by
(
company_name:
"
#{
company_name
.
text
}
"
,
company_description:
"
#{
company_intro
.
text
}
"
,
address:
"
#{
address
.
text
}
"
)
#insert data to Jobs table
#insert data to Jobs table
city
=
City
.
find_
or_create_by
(
area_id:
area
.
id
,
city_name:
"
#{
location
.
text
}
"
)
city
=
City
.
find_
by
(
area_id:
area
.
id
,
city_name:
city_name
)
industryid
=
Industry
.
find_
or_create_by
(
industry_name:
"
#{
industry
.
text
}
"
)
industryid
=
Industry
.
find_
by
(
industry_name:
industry
.
text
)
companyid
=
Company
.
find_
or_create_by
(
company_name:
"
#{
company_name
.
text
}
"
)
companyid
=
Company
.
find_
by
(
company_name:
company_name
.
text
)
Job
.
find_or_create_by
(
area_id:
area
.
id
,
city_id:
city
.
id
,
industry_id:
industryid
.
id
,
company_id:
companyid
.
id
,
job_name:
"
#{
title
.
text
}
"
,
salary:
"
#{
salary
.
text
}
"
,
deadline:
"
#{
deadline
.
text
}
"
,
level:
"
#{
level
.
text
}
"
,
experience:
"
#{
experience
.
text
.
strip
}
"
,
last_updated:
"
#{
updated_date
.
text
.
strip
}
"
,
description:
"description.text"
)
Job
.
find_or_create_by
(
area_id:
area
.
id
,
city_id:
city
.
id
,
industry_id:
industryid
.
id
,
company_id:
companyid
.
id
,
job_name:
"
#{
title
.
text
}
"
,
salary:
"
#{
salary
.
text
}
"
,
deadline:
"
#{
deadline
.
text
}
"
,
level:
"
#{
level
.
text
}
"
,
experience:
"
#{
experience
.
text
.
strip
}
"
,
last_updated:
"
#{
updated_date
.
text
.
strip
}
"
,
description:
"description.text"
)
list_url
=
nextpage
[
0
][
"href"
]
list_url
=
nextpage
[
0
][
"href"
]
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment