Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
67e63523
Commit
67e63523
authored
Jul 31, 2020
by
Huỳnh Thiên Phước
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Collapse code
parent
c18c0034
Pipeline
#745
failed with stages
in 0 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
84 additions
and
68 deletions
+84
-68
app/assets/stylesheets/top_pages.scss
+3
-1
app/controllers/top_pages_controller.rb
+1
-0
app/views/layouts/_search_bar.html.erb
+0
-0
app/views/layouts/_show_cities.html.erb
+1
-1
app/views/layouts/_show_industries.html.erb
+1
-1
app/views/top_pages/index.html.erb
+2
-0
lib/src/crontab.rb
+73
-61
lib/tasks/crawler_import.rake
+3
-4
No files found.
app/assets/stylesheets/top_pages.scss
View file @
67e63523
...
@@ -52,7 +52,6 @@
...
@@ -52,7 +52,6 @@
padding
:
15px
;
padding
:
15px
;
margin
:
15px
;
margin
:
15px
;
text-align
:
center
;
text-align
:
center
;
border
:
1px
solid
red
;
}
}
.city-banner
{
.city-banner
{
background
:
linear-gradient
(
to
top
,
#33ccff
0%
,
#006600
100%
);
background
:
linear-gradient
(
to
top
,
#33ccff
0%
,
#006600
100%
);
...
@@ -70,3 +69,6 @@
...
@@ -70,3 +69,6 @@
font-style
:
italic
;
font-style
:
italic
;
font-family
:
Bookman
,
URW
Bookman
L
,
serif
;
font-family
:
Bookman
,
URW
Bookman
L
,
serif
;
}
}
.search-bar
{
size
:
50
;
}
app/controllers/top_pages_controller.rb
View file @
67e63523
...
@@ -4,5 +4,6 @@ class TopPagesController < ApplicationController
...
@@ -4,5 +4,6 @@ class TopPagesController < ApplicationController
@jobs
=
Job
.
limit
(
5
).
order
(
created_at: :desc
)
@jobs
=
Job
.
limit
(
5
).
order
(
created_at: :desc
)
@jobs_of_cities
=
CityJob
.
limit
(
9
).
group
(
'city_id'
).
order
(
'Count(*) DESC'
).
count
@jobs_of_cities
=
CityJob
.
limit
(
9
).
group
(
'city_id'
).
order
(
'Count(*) DESC'
).
count
@jobs_of_industries
=
IndustryJob
.
limit
(
9
).
group
(
'industry_id'
).
order
(
'Count(*) DESC'
).
count
@jobs_of_industries
=
IndustryJob
.
limit
(
9
).
group
(
'industry_id'
).
order
(
'Count(*) DESC'
).
count
end
end
end
end
app/views/layouts/_search_bar.html.erb
0 → 100644
View file @
67e63523
app/views/layouts/_show_cities.html.erb
View file @
67e63523
<%
@jobs_of_cities
.
each
do
|
city
,
count_job
|
%>
<%
@jobs_of_cities
.
each
do
|
city
,
count_job
|
%>
<div
class=
"col-4"
>
<div
class=
"col-4"
>
<div
class=
"row-table"
>
<div
class=
"row-table
border border-dark rounded
"
>
<div
class=
"city-name"
><strong>
<%=
City
.
find
(
city
).
name
%>
</strong></div>
<div
class=
"city-name"
><strong>
<%=
City
.
find
(
city
).
name
%>
</strong></div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
</div>
</div>
...
...
app/views/layouts/_show_industries.html.erb
View file @
67e63523
<%
@jobs_of_industries
.
each
do
|
industry
,
count_job
|
%>
<%
@jobs_of_industries
.
each
do
|
industry
,
count_job
|
%>
<div
class=
"col-4"
>
<div
class=
"col-4"
>
<div
class=
"row-table"
>
<div
class=
"row-table
border border-dark rounded
"
>
<div
class=
"industry-name"
><strong>
<%=
Industry
.
find
(
industry
).
name
%>
</strong></div>
<div
class=
"industry-name"
><strong>
<%=
Industry
.
find
(
industry
).
name
%>
</strong></div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
</div>
</div>
...
...
app/views/top_pages/index.html.erb
View file @
67e63523
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
</div>
</div>
</div>
</div>
<div
class=
"container"
>
<div
class=
"container"
>
<div
class=
"search-bar"
>
<%=
render
'layouts/search_bar'
%>
</div>
<br>
<div
class=
"job-list"
>
<%=
render
'layouts/show_jobs'
%>
</div>
<div
class=
"job-list"
>
<%=
render
'layouts/show_jobs'
%>
</div>
</div>
</div>
<div
class=
"city-banner"
>
City
</div>
<div
class=
"city-banner"
>
City
</div>
...
...
lib/src/crontab.rb
View file @
67e63523
class
Crontab
class
Crontab
attr_accessor
def
initialize
(
logger
)
def
initialize
(
logger
)
@mylogger
=
logger
@mylogger
=
logger
end
end
def
find_company
def
find_company
(
url
)
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html"
))
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
url
))
company_links
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_links
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_links
.
each
do
|
link
|
company_links
.
each
do
|
link
|
next
if
link
==
'javascript:void(0);'
next
if
link
==
'javascript:void(0);'
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
(
company_page
.
search
(
'p.name'
).
text
).
present?
name_company
=
company_page
.
search
(
'p.name'
)
&
.
text
begin
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
]
&
.
text
name_company
=
company_page
.
search
(
'p.name'
).
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
next
if
name_company
.
blank?
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
begin
get_name_company
=
Company
.
find_by
(
name:
name_company
)
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
address:
address_company
,
introduction:
introduction_company
)
introduction:
introduction_company
)
end
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
end
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
end
end
def
create_job
(
title_job
,
level
,
salary
,
experience
,
expiration_date
,
description
,
company_id
)
Job
.
create!
(
title:
title_job
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_id
)
end
def
create_city_rel
(
get_row
,
job_find
)
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
puts
job_find
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
loc
)
next
if
city_table
.
nil?
unless
CityJob
.
exists?
(
job_id:
job_find
.
id
,
city_id:
city_table
.
id
)
puts
"Created City:
#{
job_find
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
job_find
.
id
,
city_id:
city_table
.
id
)
end
end
end
def
create_industry_rel
(
get_row
,
job_find
)
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
puts
job_find
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
ind
)
next
if
industry_table
.
nil?
unless
IndustryJob
.
exists?
(
job_id:
job_find
.
id
,
industry_id:
industry_table
.
id
)
puts
"Created Industry:
#{
job_find
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
job_find
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
end
end
def
find_job
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html"
))
def
find_job
(
url
)
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
url
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
get_link
.
each
do
|
link
|
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
link_
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
get_row
=
link_
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
.
present?
if
get_row
.
present?
begin
begin
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
get_name_company
=
link_page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
title_job
=
link_page_job
.
search
(
'div.job-desc p'
).
text
.
strip
description
=
link_page_job
.
search
(
'div.detail-row'
)
salary
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
text
.
strip
experience
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
)
&
.
text
&
.
strip
level
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
text
.
strip
expiration_date
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
get_name_company
)
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
next
if
company_table
.
nil?
next
if
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
job_check
=
Job
.
exists?
(
title:
title_job
,
company_id:
company_table
.
id
)
salary
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
text
.
strip
unless
job_check
==
false
experience
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
).
text
.
strip
create_job
(
title_job
,
level
,
salary
,
experience
,
expiration_date
,
description
,
company_table
.
id
)
level
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
text
.
strip
expiration_date
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
text
.
strip
if
job_check
.
nil?
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_table
.
id
)
end
find_job
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
puts
find_job
.
title
if
find_job
.
present?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
loc
)
next
if
city_table
.
nil?
unless
CityJob
.
exists?
(
job_id:
find_job
.
id
,
city_id:
city_table
.
id
)
puts
"Created City:
#{
find_job
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
find_job
.
id
,
city_id:
city_table
.
id
)
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
ind
)
next
if
industry_table
.
nil?
unless
IndustryJob
.
exists?
(
job_id:
find_job
.
id
,
industry_id:
industry_table
.
id
)
puts
"Created Industry:
#{
find_job
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
find_job
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
end
rescue
StandardError
=>
e
next
if
job_check
==
false
# @mylogger.error "#{e.message}"
job_find
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
puts
e
create_city_rel
(
get_row
,
job_find
)
create_industry_rel
(
get_row
,
job_find
)
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
end
end
end
end
end
...
...
lib/tasks/crawler_import.rake
View file @
67e63523
...
@@ -4,10 +4,9 @@ require 'net/ftp'
...
@@ -4,10 +4,9 @@ require 'net/ftp'
require
'csv'
require
'csv'
require
'zip'
require
'zip'
namespace
:import
do
namespace
:import
do
logger
||=
Logger
.
new
(
Rails
.
root
.
join
(
'log'
,
'my.log'
))
logger
||=
Logger
.
new
(
Rails
.
root
.
join
(
'log'
,
'my.log'
))
url
=
'https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-1-vi.html'
desc
'crawler data'
desc
'crawler data'
task
crawler: :environment
do
task
crawler: :environment
do
action
=
Crawler
.
new
(
logger
)
action
=
Crawler
.
new
(
logger
)
...
@@ -20,8 +19,8 @@ namespace :import do
...
@@ -20,8 +19,8 @@ namespace :import do
task
auto: :environment
do
task
auto: :environment
do
action
=
Crawler
.
new
(
logger
)
action
=
Crawler
.
new
(
logger
)
crontab
=
Crontab
.
new
(
logger
)
crontab
=
Crontab
.
new
(
logger
)
crontab
.
find_company
crontab
.
find_company
(
url
)
crontab
.
find_job
crontab
.
find_job
(
url
)
action
.
get_file_csv
action
.
get_file_csv
action
.
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
action
.
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
action
.
import_file_csv
(
Rails
.
root
.
join
(
'lib'
,
'csv'
,
'jobs.csv'
))
action
.
import_file_csv
(
Rails
.
root
.
join
(
'lib'
,
'csv'
,
'jobs.csv'
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment