Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob_nth
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ngô Trung Hưng
venjob_nth
Commits
37488f20
Commit
37488f20
authored
Aug 04, 2020
by
Ngô Trung Hưng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
autoload
parent
bf921c4c
Pipeline
#788
canceled with stages
in 0 seconds
Changes
8
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
34 additions
and
16 deletions
+34
-16
config/application.rb
+1
-4
lib/src/base/base.rb
+5
-1
lib/src/crawler.rb
+3
-1
lib/src/crawler_job.rb
+8
-5
lib/src/interface/blue_interface.rb
+5
-1
lib/src/interface/green_interface.rb
+5
-1
lib/src/interface/red_interface.rb
+5
-1
lib/tasks/crawler.rake
+2
-2
No files found.
config/application.rb
View file @
37488f20
...
...
@@ -10,10 +10,7 @@ module Venjob
class
Application
<
Rails
::
Application
# Initialize configuration defaults for originally generated Rails version.
config
.
load_defaults
5.2
config
.
autoload_paths
+=
[
Rails
.
root
.
join
(
'lib/src'
),
Rails
.
root
.
join
(
'lib/src/base'
),
Rails
.
root
.
join
(
'lib/src/interface'
)]
config
.
autoload_paths
<<
Rails
.
root
.
join
(
'lib'
)
# Settings in config/environments/* take precedence over those specified here.
# Application configuration can go into files in config/initializers
# -- all .rb files in that directory are automatically loaded after loading
...
...
lib/src/base/base.rb
View file @
37488f20
...
...
@@ -5,7 +5,9 @@ require 'open-uri'
require
'logger'
# Crawler data
class
Base
module
Src
module
Base
class
Base
attr_accessor
:job
,
:page
def
initialize
(
page
)
...
...
@@ -84,4 +86,6 @@ class Base
def
fill_experience
exist_experience?
?
page
.
xpath
(
'//ul//li[position()=2]//p'
)[
1
].
text
.
strip
:
''
end
end
end
end
lib/src/crawler.rb
View file @
37488f20
...
...
@@ -3,7 +3,8 @@
require
'open-uri'
# Crawler data
class
Crawler
module
Src
class
Crawler
attr_accessor
:number_link
def
initialize
(
number_link
)
...
...
@@ -62,4 +63,5 @@ class Crawler
logger
.
error
"Crawler data companies has error:
#{
e
}
"
end
end
end
end
lib/src/crawler_job.rb
View file @
37488f20
# frozen_string_literal: true
# Crawler data job
class
CrawlerJob
<
Crawler
# Autoload
module
Src
# Crawler job
class
CrawlerJob
<
Crawler
SIZE_LI
=
8
def
crawl_link
...
...
@@ -33,11 +35,11 @@ class CrawlerJob < Crawler
parse_data
.
each
do
|
path
|
page
=
safe_link
(
path
)
if
page
.
search
(
'.item-blue .detail-box:nth-child(1) ul li:nth-child(1) p'
)[
0
].
present?
@data
=
RedInterface
.
new
(
page
).
create_data
@data
=
Src
::
Interface
::
RedInterface
.
new
(
page
).
create_data
elsif
page
.
search
(
'section .template-200'
).
text
.
present?
@data
=
BlueInterface
.
new
(
page
).
create_data
@data
=
Src
::
Interface
::
BlueInterface
.
new
(
page
).
create_data
elsif
page
.
search
(
'.DetailJobNew ul li'
).
size
==
SIZE_LI
&&
page
.
search
(
'.right-col ul li'
).
text
.
exclude?
(
'Độ tuổi'
)
@data
=
GreenInterface
.
new
(
page
).
create_data
@data
=
Src
::
Interface
::
GreenInterface
.
new
(
page
).
create_data
end
add_data
(
@data
)
refresh_first_link
...
...
@@ -76,4 +78,5 @@ class CrawlerJob < Crawler
job
.
cities
<<
city
end
end
end
end
lib/src/interface/blue_interface.rb
View file @
37488f20
# frozen_string_literal: true
# Inherience from base
class
BlueInterface
<
Base
module
Src
module
Interface
class
BlueInterface
<
Base
::
Base
def
fill_company_name
page
.
search
(
'.top-job .top-job-info .tit_company'
).
text
end
...
...
@@ -38,4 +40,6 @@ class BlueInterface < Base
def
fill_experience
page
.
xpath
(
'//ul//li[position()=7]/b'
).
text
end
end
end
end
lib/src/interface/green_interface.rb
View file @
37488f20
# frozen_string_literal: true
# ahihi
class
GreenInterface
<
Base
module
Src
module
Interface
class
GreenInterface
<
Base
::
Base
def
fill_name
page
.
search
(
'.info-company h1'
).
text
end
...
...
@@ -42,4 +44,6 @@ class GreenInterface < Base
def
fill_experience
exist_experience?
?
page
.
search
(
'.DetailJobNew li:nth-child(5) span'
).
text
.
strip
:
''
end
end
end
end
lib/src/interface/red_interface.rb
View file @
37488f20
# frozen_string_literal: true
# Inherience from base
class
RedInterface
<
Base
module
Src
module
Interface
class
RedInterface
<
Base
::
Base
end
end
end
lib/tasks/crawler.rake
View file @
37488f20
...
...
@@ -10,9 +10,9 @@ namespace :crawler do
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
cw
=
Crawler
.
new
(
NUMBER_LINK_WILL_BE_CRAWLER
)
cw
=
Src
::
Crawler
.
new
(
NUMBER_LINK_WILL_BE_CRAWLER
)
cw
.
craw_data_cities
cw
.
craw_data_companies
CrawlerJob
.
new
(
NUMBER_LINK_WILL_BE_CRAWLER
).
craw_data_jobs
Src
::
CrawlerJob
.
new
(
NUMBER_LINK_WILL_BE_CRAWLER
).
craw_data_jobs
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment