Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Huỳnh Thiên Phước
venjob
Commits
0bdc8e73
Commit
0bdc8e73
authored
Aug 03, 2020
by
Huỳnh Thiên Phước
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rename variable, create import csv file
parent
d58cf3ff
Pipeline
#766
canceled with stages
in 0 seconds
Changes
12
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
230 additions
and
218 deletions
+230
-218
app/assets/stylesheets/top_pages.scss
+11
-4
app/controllers/top_pages_controller.rb
+2
-2
app/helpers/application_helper.rb
+1
-1
app/models/city_job.rb
+3
-0
app/models/industry_job.rb
+1
-0
app/views/layouts/_show_cities.html.erb
+7
-4
app/views/layouts/_show_jobs.html.erb
+0
-1
app/views/top_pages/index.html.erb
+16
-17
lib/src/crawler.rb
+69
-147
lib/src/csvimporter.rb
+86
-0
lib/src/jobparser.rb
+29
-36
lib/tasks/crawler_import.rake
+5
-6
No files found.
app/assets/stylesheets/top_pages.scss
View file @
0bdc8e73
...
...
@@ -78,6 +78,14 @@
background-image
:
linear-gradient
(
160deg
,
black
,
#8c8686
);
color
:
white
;
}
.city-name
{
}
\ No newline at end of file
.city-list
:hover
{
background-color
:
black
;
.city-name
,
.count-job
{
text-decoration
:
none
;
color
:
white
;
}
}
.city-name
,
.count-job
{
text-decoration
:
none
;
color
:
black
;
}
app/controllers/top_pages_controller.rb
View file @
0bdc8e73
class
TopPagesController
<
ApplicationController
def
index
@total_jobs
=
Job
.
ids
@total_jobs
=
Job
.
count
@jobs
=
Job
.
limit
(
5
).
order
(
created_at: :desc
)
@jobs_of_cities
=
CityJob
.
limit
(
9
).
group
(
'city_id'
).
order
(
'Count(*) DESC'
).
count
@jobs_of_cities
=
CityJob
.
top_city
@jobs_of_industries
=
IndustryJob
.
limit
(
9
).
group
(
'industry_id'
).
order
(
'Count(*) DESC'
).
count
end
end
app/helpers/application_helper.rb
View file @
0bdc8e73
module
ApplicationHelper
def
full_title
(
page_title
)
"
#{
page_title
}
"
page_title
.
to_s
end
end
app/models/city_job.rb
View file @
0bdc8e73
...
...
@@ -2,4 +2,7 @@ class CityJob < ApplicationRecord
belongs_to
:city
belongs_to
:job
def
self
.
top_city
limit
(
9
).
group
(
'city_id'
).
order
(
'Count(*) DESC'
).
count
end
end
app/models/industry_job.rb
View file @
0bdc8e73
class
IndustryJob
<
ApplicationRecord
belongs_to
:industry
belongs_to
:job
end
app/views/layouts/_show_cities.html.erb
View file @
0bdc8e73
<%
@jobs_of_cities
.
each
do
|
city
,
count_job
|
%>
<div
class=
"col-4"
>
<div
class=
"row-table border border-dark rounded"
>
<div><strong>
<%=
link_to
"
#{
City
.
find
(
city
).
name
}
"
,
'#'
,
class:
"city-name"
%>
</strong></div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
<div
class=
"row-table border border-dark rounded city-list"
>
<%=
link_to
'#'
do
%>
<div
class=
"city-name"
><strong>
<%=
City
.
find
(
city
).
name
%>
</strong></div>
<div
class=
"count-job"
>
<%=
count_job
%>
</div>
<%
end
%>
</div>
</div>
</div>
<%
end
%>
\ No newline at end of file
app/views/layouts/_show_jobs.html.erb
View file @
0bdc8e73
...
...
@@ -10,7 +10,6 @@
<%=
city
.
name
%>
<%
end
%>
</div>
<%
job
.
description
.
html_safe
%>
<button
type=
"button"
class=
"btn btn-primary"
id=
"button-follow"
>
♥ Follow
</button>
</div>
</div>
...
...
app/views/top_pages/index.html.erb
View file @
0bdc8e73
<%
provide
(
:title
,
'Venjob'
)
%>
<div
class=
"banner-ground"
>
<div
class=
"top-banner"
>
<div
class=
"total-job"
>
Having
<%=
@total_jobs
.
count
%>
jobs for you!
</div>
<div
class=
"total-job"
>
Having
<%=
@total_jobs
%>
jobs for you!
</div>
</div>
</div>
<div
class=
"search-bar"
>
<%=
render
'layouts/search_bar'
%>
</div>
<br>
<div
class=
"container"
>
<div
class=
"search-bar"
>
<%=
render
'layouts/search_bar'
%>
</div>
<br>
<div
class=
"job-list"
>
<%=
render
'layouts/show_jobs'
%>
</div>
</div>
<div
class=
"city-banner"
>
City
</div>
<div
class=
"container"
>
<div
class=
"row"
>
<%=
render
'layouts/show_cities'
%>
</div>
<div
class=
"all-industry"
>
<div
class=
"row-table border border-dark rounded"
>
All Cities
</div>
</div>
</div>
<div
class=
"industry-banner"
>
Industry
</div>
<div
class=
"container"
>
<div
class=
"row"
>
<%=
render
'layouts/show_industries'
%>
</div>
<div
class=
"all-industry"
>
<div
class=
"row-table border border-dark rounded"
>
All Industries
</div>
</div>
<div
class=
"city-banner rounded"
>
City
</div>
<div
class=
"row"
>
<%=
render
'layouts/show_cities'
%>
</div>
<div
class=
"all-city"
>
<div
class=
"row-table border border-dark rounded"
><strong>
All Cities
</strong></div>
</div>
<div
class=
"industry-banner rounded"
>
Industry
</div>
<div
class=
"row"
>
<%=
render
'layouts/show_industries'
%>
</div>
<div
class=
"all-industry"
>
<div
class=
"row-table border border-dark rounded"
><strong>
All Industries
</strong></div>
</div>
</div>
lib/src/crawler.rb
View file @
0bdc8e73
require
'net/ftp'
require
'csv'
require
'zip'
class
Crawler
def
initialize
(
logger
,
url
)
@
my
logger
=
logger
@logger
=
logger
@url
=
url
@NAME_DOMAIN
=
'192.168.1.156'
@USERNAME_FTP
=
'training'
@PASSWORD_FTP
=
'training'
end
def
crawl_city_industry
crawl_city
crawl_industry
crawl_company
crawl_job
_relationships
crawl_job
end
def
crawl_city
...
...
@@ -45,158 +39,86 @@ class Crawler
end
end
def
city_relationship
(
row
,
job
)
location_relationship
=
row
.
css
(
'div.map p a'
).
children
.
map
{
|
name_city
|
name_city
.
text
.
strip
}
cities_relationship
=
City
.
where
(
name:
location_relationship
)
job
.
cities
<<
cities_relationship
end
def
industry_relationship
(
row
,
job
)
industry_relationship
=
row
.
css
(
'li a'
).
children
.
map
{
|
name_industry
|
name_industry
.
text
.
strip
}
industries_relationship
=
Industry
.
where
(
name:
industry_relationship
)
job
.
industries
<<
industries_relationship
end
def
create_job
(
title
,
link_page
,
row
,
company
)
description
=
link_page
.
search
(
'div.detail-row'
).
to_s
salary
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
text
.
strip
experience
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
)
&
.
text
&
.
strip
level
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
text
.
strip
expiration_date
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
text
.
strip
job
=
Job
.
find_or_create_by!
(
title:
title
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company
.
id
)
city_relationship
(
row
,
job
)
industry_relationship
(
row
,
job
)
end
def
crawl_company
(
1
..
10
).
each
do
|
n
|
company_info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
company_link
=
company_info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
company_link
.
each
do
|
link
|
next
if
link
==
'javascript:void(0);'
company_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
if
!
(
company_page
.
search
(
'p.name'
).
text
).
nil?
begin
name_company
=
company_page
.
search
(
'p.name'
).
text
address_company
=
company_page
.
css
(
'div.content p'
).
children
[
1
].
text
introduction_company
=
company_page
.
css
(
'div.main-about-us'
).
text
get_name_company
=
Company
.
find_by
(
name:
name_company
)
if
get_name_company
.
nil?
company
=
Company
.
create!
(
name:
name_company
,
address:
address_company
,
introduction:
introduction_company
)
end
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
links
=
info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
links
.
each
do
|
link
|
next
if
link
==
'javascript:void(0);'
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
escape
(
link
)))
name
=
page
.
search
(
'p.name'
)
&
.
text
return
if
name
.
blank?
address
=
page
.
css
(
'div.content p'
).
children
[
1
]
&
.
text
introduction
=
page
.
css
(
'div.main-about-us'
).
text
begin
Company
.
find_or_create_by!
(
name:
name
,
address:
address
,
introduction:
introduction
)
rescue
StandardError
=>
e
@logger
.
error
e
.
message
end
end
end
end
def
crawl_job
_relationships
def
crawl_job
(
1
..
10
).
each
do
|
n
|
page_access
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
get_link
=
page_access
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
get_link
.
each
do
|
link
|
page_job
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
link
))))
get_row
=
page_job
.
search
(
'div.bg-blue div.row'
)
if
get_row
.
present?
begin
get_name_company
=
page_job
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company_table
=
Company
.
find_by
(
name:
get_name_company
)
title_job
=
page_job
.
search
(
'div.job-desc p'
).
text
description
=
page_job
.
search
(
'div.detail-row'
)
next
if
company_table
.
nil?
job_check
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
salary
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
text
.
strip
experience
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
).
text
.
strip
level
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
text
.
strip
expiration_date
=
get_row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
text
.
strip
if
job_check
.
blank?
job
=
Job
.
create!
(
title:
title_job
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_table
.
id
)
end
find_job
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
puts
find_job
.
title
if
find_job
.
present?
location_rel
=
get_row
.
css
(
'div.map p a'
).
children
.
map
{
|
location
|
location
.
text
.
strip
}
location_rel
.
each
do
|
loc
|
city_table
=
City
.
find_by
(
name:
loc
)
next
if
city_table
.
nil?
unless
CityJob
.
exists?
(
job_id:
find_job
.
id
,
city_id:
city_table
.
id
).
nil?
puts
"Created City:
#{
find_job
.
id
}
-
#{
city_table
.
id
}
.
#{
loc
}
"
city_jobs
=
CityJob
.
create!
(
job_id:
find_job
.
id
,
city_id:
city_table
.
id
)
end
end
industry_rel
=
get_row
.
css
(
'li a'
).
children
.
map
{
|
industry
|
industry
.
text
.
strip
}
industry_rel
.
each
do
|
ind
|
industry_table
=
Industry
.
find_by
(
name:
ind
)
next
if
industry_table
.
nil?
unless
IndustryJob
.
exists?
(
job_id:
find_job
.
id
,
industry_id:
industry_table
.
id
)
puts
"Created Industry:
#{
find_job
.
id
}
-
#{
industry_table
.
id
}
.
#{
ind
}
"
industry_jobs
=
IndustryJob
.
create!
(
job_id:
find_job
.
id
,
industry_id:
industry_table
.
id
)
end
end
end
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
end
end
end
end
info
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
n
}
-vi.html"
))
link
=
info
.
css
(
'a.job_link'
).
map
{
|
link
|
link
[
'href'
]
}
link
.
each
do
|
link
|
link_page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
escape
(
link
)))
row
=
link_page
.
search
(
'div.bg-blue div.row'
)
next
if
row
.
blank?
def
get_file_csv
Net
::
FTP
.
open
(
@NAME_DOMAIN
,
@USERNAME_FTP
,
@PASSWORD_FTP
)
do
|
ftp
|
ftp
.
getbinaryfile
(
'jobs.zip'
)
end
end
begin
company_name
=
link_page
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company
=
Company
.
find_by
(
name:
company_name
)
next
if
company
.
blank?
def
extract_zip
(
file
,
destination
)
FileUtils
.
mkdir_p
(
destination
)
Zip
::
File
.
open
(
file
)
do
|
zip_file
|
zip_file
.
each
do
|
f
|
fpath
=
File
.
join
(
destination
,
f
.
name
)
zip_file
.
extract
(
f
,
fpath
)
unless
File
.
exist?
(
fpath
)
end
end
end
title
=
link_page
.
search
(
'div.job-desc p'
).
text
.
strip
next
if
title
.
blank?
create_job
(
title
,
link_page
,
row
,
company
)
def
import_file_csv
(
file
)
CSV
.
foreach
(
file
,
headers:
true
)
do
|
row
|
begin
company_name
=
row
[
"company name"
]
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
"benefit"
]
company_table
=
Company
.
find_by
(
name:
company_name
)
if
company_table
.
nil?
company_table
=
Company
.
create!
(
name:
company_name
,
address:
company_address
,
introduction:
company_introduction
)
rescue
StandardError
=>
e
@logger
.
error
e
.
message
end
title_job
=
row
[
"name"
]
description_job
=
"
#{
row
[
"description"
]
}
#{
row
[
"requirement"
]
}
"
level
=
row
[
"level"
]
salary
=
row
[
"salary"
]
job_table
=
Job
.
find_by
(
title:
title_job
)
if
!
company_table
.
nil?
&&
job_table
.
nil?
job_table
=
Job
.
create!
(
title:
title_job
,
description:
description_job
,
level:
level
,
salary:
salary
,
company_id:
company_table
.
id
)
puts
job_table
.
id
end
next
if
company_table
.
nil?
find_job
=
Job
.
find_by
(
title:
title_job
,
company_id:
company_table
.
id
)
industry
=
row
[
"category"
]
industry_find
=
Industry
.
find_by
(
name:
industry
)
if
industry_find
.
nil?
&&
find_job
.
present?
industry_table
=
Industry
.
create!
(
name:
industry
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
else
unless
IndustryJob
.
exists?
(
job_id:
find_job
.
id
,
industry_id:
industry_find
.
id
)
industry_job_table
=
IndustryJob
.
create!
(
job_id:
job_table
.
id
,
industry_id:
industry_find
.
id
)
end
end
puts
job_table
.
id
,
title_job
,
industry
,
salary
location_data
=
row
[
"work place"
]
location
=
location_data
.
gsub
(
'["'
,
''
).
gsub
(
'"]'
,
''
)
location_find
=
City
.
find_by
(
name:
location
)
if
location_find
.
nil?
city_table
=
City
.
create!
(
name:
location
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
else
unless
CityJob
.
exists?
(
job_id:
find_job
.
id
,
city_id:
location_find
.
id
)
city_job_table
=
CityJob
.
create!
(
job_id:
job_table
.
id
,
city_id:
location_find
.
id
)
end
end
rescue
StandardError
=>
e
@mylogger
.
error
"
#{
e
.
message
}
"
end
end
end
end
lib/src/csvimporter.rb
0 → 100644
View file @
0bdc8e73
require
'net/ftp'
require
'csv'
require
'zip'
class
CSVimporter
def
initialize
(
logger
)
@logger
=
logger
@NAME_DOMAIN
=
'192.168.1.156'
@USERNAME_FTP
=
'training'
@PASSWORD_FTP
=
'training'
@extracting_directory
=
Rails
.
root
.
join
(
'lib'
,
'csv'
)
@zip_directory
=
Rails
.
root
.
join
(
'jobs.zip'
)
@importer
=
Rails
.
root
.
join
(
'lib'
,
'csv'
,
'jobs.csv'
)
end
def
import
get_file_csv
extract_zip
import_file_csv
end
def
get_file_csv
Net
::
FTP
.
open
(
@NAME_DOMAIN
,
@USERNAME_FTP
,
@PASSWORD_FTP
)
do
|
ftp
|
ftp
.
getbinaryfile
(
'jobs.zip'
)
end
end
def
extract_zip
FileUtils
.
mkdir_p
(
@extracting_directory
)
Zip
::
File
.
open
(
@zip_directory
)
do
|
zip_file
|
zip_file
.
each
do
|
f
|
fpath
=
File
.
join
(
@extracting_directory
,
f
.
name
)
zip_file
.
extract
(
f
,
fpath
)
unless
File
.
exist?
(
fpath
)
end
end
end
def
import_file_csv
CSV
.
foreach
(
@importer
,
headers:
true
)
do
|
row
|
begin
company_name
=
row
[
"company name"
]
next
if
company_name
.
blank?
company_address
=
row
[
"company address"
]
company_introduction
=
row
[
"benefit"
]
company
=
Company
.
find_or_create_by!
(
name:
company_name
,
address:
company_address
,
introduction:
company_introduction
)
title_job
=
row
[
"name"
]
next
if
title_job
.
blank?
description_job
=
"
#{
row
[
"description"
]
}
#{
row
[
"requirement"
]
}
"
level
=
row
[
"level"
]
salary
=
row
[
"salary"
]
job
=
Job
.
find_or_create_by!
(
title:
title_job
,
description:
description_job
,
level:
level
,
salary:
salary
,
company_id:
company
.
id
)
industry_name
=
row
[
"category"
]
industries_relationship
=
Industry
.
where
(
name:
industry_name
)
if
industries_relationship
.
blank?
industry
=
Industry
.
create!
(
name:
industry_name
)
industries_relationship
=
Industry
.
where
(
name:
industry
)
job
.
industries
<<
industries_relationship
else
job
.
industries
<<
industries_relationship
end
location_data
=
row
[
"work place"
]
location
=
location_data
.
gsub
(
'["'
,
''
).
gsub
(
'"]'
,
''
)
location_relationship
=
City
.
where
(
name:
location
)
if
location_relationship
.
blank?
city
=
City
.
create!
(
name:
location
)
location_relationship
=
City
.
where
(
name:
city
)
job
.
cities
<<
location_relationship
else
job
.
cities
<<
location_relationship
end
rescue
StandardError
=>
e
@logger
.
error
e
.
message
end
end
end
end
\ No newline at end of file
lib/src/
crontab
.rb
→
lib/src/
jobparser
.rb
View file @
0bdc8e73
require
'net/ftp'
require
'csv'
require
'zip'
class
InforJob
class
JobParser
def
initialize
(
logger
,
url
)
@
my
logger
=
logger
@logger
=
logger
@url
=
url
end
...
...
@@ -18,56 +15,53 @@ class InforJob
links
=
info
.
css
(
'div.caption a.company-name'
).
map
{
|
link
|
link
[
'href'
]
}
links
.
each
do
|
link
|
next
if
link
==
'javascript:void(0);'
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
escape
(
link
)))
name
=
page
.
search
(
'p.name'
)
&
.
text
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
escape
(
link
)))
name
=
page
.
search
(
'p.name'
)
&
.
text
return
if
name
.
blank?
address
=
page
.
css
(
'div.content p'
).
children
[
1
]
&
.
text
introduction
=
page
.
css
(
'div.main-about-us'
).
text
begin
puts
name
Company
.
find_or_create_by!
(
name:
name
,
address:
address
,
introduction:
introduction
)
rescue
StandardError
=>
e
@
my
logger
.
error
e
.
message
@logger
.
error
e
.
message
end
end
end
def
c
reate_city_rel
(
row
,
info_
job
)
location_rel
=
row
.
css
(
'div.map p a'
).
children
.
map
{
|
name_city
|
name_city
.
text
.
strip
}
cit
y_table
=
City
.
where
(
name:
location_rel
)
def
c
ity_relationship
(
row
,
job
)
location_rel
ationship
=
row
.
css
(
'div.map p a'
).
children
.
map
{
|
name_city
|
name_city
.
text
.
strip
}
cit
ies_relationship
=
City
.
where
(
name:
location_relationship
)
puts
"
#{
info_job
.
cities
<<
city_table
}
"
info_job
.
cities
<<
city_table
job
.
cities
<<
cities_relationship
end
def
create_industry_rel
(
row
,
info_
job
)
industry_rel
=
row
.
css
(
'li a'
).
children
.
map
{
|
name_industry
|
name_industry
.
text
.
strip
}
industr
y_table
=
Industry
.
where
(
name:
industry_rel
)
def
industry_relationship
(
row
,
job
)
industry_rel
ationship
=
row
.
css
(
'li a'
).
children
.
map
{
|
name_industry
|
name_industry
.
text
.
strip
}
industr
ies_relationship
=
Industry
.
where
(
name:
industry_relationship
)
puts
"
#{
info_job
.
industries
<<
industry_table
}
"
info_job
.
industries
<<
industry_table
job
.
industries
<<
industries_relationship
end
def
create_job
(
title
,
link_page
,
row
,
company
_table
)
def
create_job
(
title
,
link_page
,
row
,
company
)
description
=
link_page
.
search
(
'div.detail-row'
).
to_s
salary
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-usd")]]/p'
).
text
.
strip
experience
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "fa fa-briefcase")]]/p'
)
&
.
text
&
.
strip
level
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-account")]]/p'
).
text
.
strip
expiration_date
=
row
.
at_xpath
(
'//li[./strong/i[contains(@class, "mdi mdi-calendar-check")]]/p'
).
text
.
strip
info_
job
=
Job
.
find_or_create_by!
(
title:
title
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company_table
.
id
)
job
=
Job
.
find_or_create_by!
(
title:
title
,
level:
level
,
salary:
salary
,
experience:
experience
,
expiration_date:
expiration_date
,
description:
description
,
company_id:
company
.
id
)
c
reate_city_rel
(
row
,
info_
job
)
create_industry_rel
(
row
,
info_
job
)
c
ity_relationship
(
row
,
job
)
industry_relationship
(
row
,
job
)
end
def
find_job
...
...
@@ -79,19 +73,18 @@ class InforJob
next
if
row
.
blank?
begin
name_company
=
link_page
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company
_table
=
Company
.
find_by
(
name:
name_company
)
next
if
company
_table
.
blank?
company_name
=
link_page
.
search
(
'div.job-desc a.job-company-name'
).
text
.
strip
company
=
Company
.
find_by
(
name:
company_name
)
next
if
company
.
blank?
title
=
link_page
.
search
(
'div.job-desc p'
).
text
.
strip
next
if
title
.
blank?
create_job
(
title
,
link_page
,
row
,
company
_table
)
create_job
(
title
,
link_page
,
row
,
company
)
rescue
StandardError
=>
e
puts
e
# @mylogger.error e.message
@logger
.
error
e
.
message
end
end
end
end
lib/tasks/crawler_import.rake
View file @
0bdc8e73
require
'src/crawler.rb'
require
'src/crontab.rb'
require
'src/jobparser.rb'
require
'src/csvimporter.rb'
namespace
:import
do
desc
'crawler data'
...
...
@@ -9,12 +10,10 @@ namespace :import do
end
desc
'Crontab'
task
auto: :environment
do
action
=
Crawler
.
new
(
logger
)
c
rontab
=
InforJob
.
new
(
logger
,
url
)
crontab
=
JobParser
.
new
(
logger
,
url
)
c
svimporter
=
CSVimporter
.
new
(
logger
)
crontab
.
crawl_all
action
.
get_file_csv
action
.
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
action
.
import_file_csv
(
Rails
.
root
.
join
(
'lib'
,
'csv'
,
'jobs.csv'
))
csvimporter
.
import
end
def
logger
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment