Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
Venjob_HungNT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ngô Trung Hưng
Venjob_HungNT
Commits
2dfce383
Commit
2dfce383
authored
Jul 23, 2020
by
Ngo Trung Hung
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
6905c99b
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
36 additions
and
52 deletions
+36
-52
app/assets/stylesheets/custom.scss
+6
-4
app/views/shared/_box_five_job.html.erb
+1
-1
config/database.yml
+1
-1
config/environments/development.rb
+1
-0
config/schedule.rb
+6
-1
crawl_time.txt
+2
-0
lib/src/ftp.rb
+5
-4
lib/src/interface_web.rb
+10
-37
lib/tasks/crawler.rake
+3
-4
lib/tasks/import_csv.rake
+1
-0
No files found.
app/assets/stylesheets/custom.scss
View file @
2dfce383
...
...
@@ -14,7 +14,7 @@ $main-color: #23303D;
.cus_header
{
height
:
52px
;
width
:
100%
;
background-
color
:
$main-color
;
background-
image
:
linear-gradient
(
30deg
,
#434343
,
#000000
)
;
position
:
fixed
;
border-bottom
:
1px
solid
#333
;
// box-shadow: 0px 2px 4px 2px #999;
...
...
@@ -204,6 +204,7 @@ $main-color: #23303D;
// footer
.custom_footer
{
margin-top
:
30px
;
width
:
100%
;
height
:
120px
;
background-color
:
$main-color
;
...
...
@@ -239,7 +240,7 @@ $main-color: #23303D;
.slogan_text
{
text-align
:
center
;
font-family
:
'Raleway'
,
sans-serif
;
font-size
:
35
px
;
font-size
:
40
px
;
color
:
#eaeaea
;
transform
:
scale
(
1
.35
);
font-weight
:
700
;
...
...
@@ -610,7 +611,7 @@ $main-color: #23303D;
span
{
font-family
:
'Raleway'
,
sans-serif
;
font-size
:
30px
;
font-weight
:
2
00
;
font-weight
:
4
00
;
}
}
...
...
@@ -817,7 +818,8 @@ $main-color: #23303D;
}
.box_text_five_jobs.box_padding_city
{
background-image
:
linear-gradient
(
to
right
,
#86cb49
,
#169b74
,
#86cb49
);
// background-image: linear-gradient(to right, #86cb49, #169b74, #86cb49);
background-image
:
linear-gradient
(
to
right
,
#cc2b5e
,
#753a88
);
color
:
white
;
font-weight
:
600
;
margin-bottom
:
0px
!
important
;
...
...
app/views/shared/_box_five_job.html.erb
View file @
2dfce383
...
...
@@ -14,8 +14,8 @@
<div
class=
"col-sm-10 col-md-9 col-lg-10"
>
<div
class=
"box_info"
>
<div
class=
"lol"
>
<!--
<%=
image_tag
'trophy'
,
class:
'img_job_name'
%>
-->
<%=
link_to
val
.
name
,
'#'
,
class:
'job_name'
%>
<%=
image_tag
'trophy'
,
class:
'img_job_name'
%>
</div>
<div
class=
"cop"
>
<h5
class=
"box_info_copany_name"
><i
class=
"far fa-building"
></i>
<%=
val
.
company
.
name
%>
</h5>
...
...
config/database.yml
View file @
2dfce383
...
...
@@ -14,7 +14,7 @@ default: &default
encoding
:
utf8
pool
:
<%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username
:
root
password
:
'
1'
password
:
'
1
2345678
'
socket
:
/var/run/mysqld/mysqld.sock
...
...
config/environments/development.rb
View file @
2dfce383
Rails
.
application
.
configure
do
# Settings specified here will take precedence over those in config/application.rb.
# In the development environment your application's code is reloaded on
# every request. This slows down response time but is perfect for development
# since you don't have to restart the web server when you make code changes.
...
...
config/schedule.rb
View file @
2dfce383
env
:PATH
,
ENV
[
'PATH'
]
every
3
.
minutes
do
every
10
.
minutes
do
rake
"crawler:populate"
end
every
:day
,
at:
"19:00pm"
do
rake
"csv:import_csv"
end
crawl_time.txt
0 → 100644
View file @
2dfce383
=> 2020-07-23 22:40:02 +0700
\ No newline at end of file
lib/src/ftp.rb
View file @
2dfce383
...
...
@@ -21,16 +21,17 @@ class FtpSever
end
def
self
.
data_csv
#
donwload_csv()
donwload_csv
()
table
=
CSV
.
parse
(
File
.
read
(
"lib/csv/jobs.csv"
),
headers:
true
)
end
def
self
.
parse_csv_industries
(
data
)
puts
'Import data industries . . .'
industries
=
[]
data
[
'category'
].
each
do
|
val
|
industries
<<
val
.
strip
end
# data['category'].each do |val|
# industries << val.strip
# end
industries
+=
data
[
'category'
].
map
(
&
:strip
)
industries
.
each
do
|
val
|
val
.
gsub!
(
','
,
'/'
)
if
val
.
include?
(
','
)
val
.
gsub!
(
'/'
,
' / '
)
...
...
lib/src/interface_web.rb
View file @
2dfce383
...
...
@@ -9,7 +9,6 @@ class InterfaceWeb
file
=
File
.
readlines
(
'tmp/link.txt'
,
'r'
)
if
File
.
exist?
(
'tmp/link.txt'
)
@@stop_crawl
=
file
.
blank?
?
''
:
file
.
join
File
.
delete
(
'tmp/link.txt'
)
if
File
.
exist?
(
'tmp/link.txt'
)
page
.
times
do
|
i
|
page
=
Nokogiri
::
HTML
(
URI
.
open
(
"https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-
#{
i
+
1
}
-vi.html"
))
...
...
@@ -24,12 +23,13 @@ class InterfaceWeb
website_jobs
=
website_jobs
.
select
{
|
val
|
val
.
present?
}
puts
"Result:
\n
Company:
#{
website_companies
.
length
}
link
\n
Job :
#{
website_jobs
.
length
}
link
\n
------------------------"
File
.
open
(
"tmp/link.txt"
,
"w+"
)
{
|
f
|
f
.
write
(
website_jobs
[
0
])}
File
.
write
(
"tmp/link.txt"
,
"
#{
website_jobs
[
0
]
}
"
)
data
<<
website_companies
<<
website_jobs
end
def
self
.
get_link_job_and_companies
@crawl_link_for_companies_jobs
||=
crawl_link_for_companies_jobs
(
1
0
)
@crawl_link_for_companies_jobs
||=
crawl_link_for_companies_jobs
(
1
)
end
def
self
.
safe_link
(
url
)
...
...
@@ -85,7 +85,6 @@ class InterfaceWeb
end
def
self
.
add_data
(
name
,
company_name
,
city_name
,
created_date
,
expiration_date
,
salary
,
industry_name
,
description
,
level
,
exprience
)
Job
.
update_all
(
newdata:
0
)
id_company
=
Company
.
find_by
name:
company_name
id_company
=
id_company
.
present?
?
id_company
.
id
:
1
id_job
=
Job
.
create!
(
name:
name
,
...
...
@@ -95,36 +94,27 @@ class InterfaceWeb
salary:
salary
,
create_date:
created_date
,
expiration_date:
expiration_date
,
description:
description
,
newdata:
1
)
description:
description
)
make_foreign_industries_table
(
industry_name
,
id_job
.
id
)
make_foreign_cities_table
(
city_name
,
id_job
.
id
)
end
def
self
.
crawl_data_jobs_interface_1
(
page
)
name
=
page
.
search
(
".apply-now-content .job-desc .title"
).
text
company_name
=
page
.
search
(
".apply-now-content .job-desc .job-company-name"
).
text
location
=
[]
length
=
page
.
search
(
".detail-box .map p a"
).
size
length
.
times
do
|
n
|
location
<<
page
.
search
(
".detail-box .map p a:nth-child(
#{
n
+
1
}
)"
).
text
end
city_name
=
location
.
join
(
','
)
created_date
=
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
0
].
text
expiration_date
=
page
.
search
(
".item-blue .detail-box ul li:last"
)[
1
].
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
last
salary
=
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
1
].
text
industries
=
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a"
).
text
industries
=
industries
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
select
{
|
v
|
v
!=
''
}
industry_name
=
industries
.
join
(
','
)
description
=
page
.
search
(
".tabs .tab-content .detail-row:nth-child(n)"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
get_level
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(3)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
lstrip
.
split
(
'Cấp bậc'
)
get_level
=
get_level
[
1
].
to_s
.
strip
if
get_level
==
""
...
...
@@ -134,49 +124,37 @@ class InterfaceWeb
g_level
=
get_level
level
=
g_level
[
1
].
to_s
.
strip
end
exp
=
page
.
search
(
".item-blue .detail-box:last ul li:nth-child(2)"
).
text
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
'Kinh nghiệm'
)
exp
=
exp
[
1
].
to_s
.
strip
exprience
=
exp
add_data
(
name
,
company_name
,
city_name
,
created_date
,
expiration_date
,
salary
,
industry_name
,
description
,
level
,
exprience
)
end
def
self
.
crawl_data_jobs_interface_2
(
page
)
name
=
page
.
search
(
".apply-now-content .job-desc .title"
).
text
company_name
=
page
.
search
(
".top-job .top-job-info .tit_company"
).
text
location
=
[]
length
=
page
.
search
(
".info-workplace .value a"
).
size
length
.
times
do
|
n
|
location
<<
page
.
search
(
".info-workplace .value a:nth-child(
#{
n
+
1
}
)"
).
text
end
city_name
=
location
.
join
(
','
)
created_date
=
""
expiration_date
=
page
.
search
(
".info li:nth-child(4)"
).
text
if
expiration_date
.
blank?
expiration_date
=
""
else
expiration_date
=
expiration_date
.
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
split
(
' '
).
last
end
salary
=
page
.
search
(
".info li:nth-child(3)"
).
text
.
split
(
"Lương"
).
last
.
strip
industry_name
=
page
.
search
(
".info li:nth-child(5) .value"
).
text
description
=
page
.
search
(
".left-col"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
lv
=
page
.
search
(
".boxtp .info li:nth-child(2)"
).
text
if
lv
.
blank?
level
=
""
else
level
=
lv
.
delete!
(
"[
\n
,
\t
,
\r
]"
).
strip
.
split
(
'Cấp bậc'
).
last
.
strip
end
exp
=
page
.
search
(
".info li:nth-child(6)"
).
text
if
exp
.
blank?
exprience
=
""
...
...
@@ -189,25 +167,15 @@ class InterfaceWeb
def
self
.
crawl_data_jobs_interface_5
(
page
)
name
=
page
.
search
(
".info-company h1"
).
text
company_name
=
page
.
search
(
".info-company .text-job h2"
).
text
city_name
=
page
.
search
(
".DetailJobNew ul li:nth-child(1) a"
).
text
created_date
=
""
expiration_date
=
page
.
search
(
".DetailJobNew li:nth-child(9) span"
).
text
.
strip
salary
=
page
.
search
(
".DetailJobNew li:nth-child(3) span"
).
text
.
strip
industry_name
=
page
.
search
(
".DetailJobNew li:nth-child(2) span"
).
text
.
strip
description
=
page
.
search
(
".left-col .detail-row"
).
to_s
.
delete!
(
"[
\n
,
\t
,
\r
]"
)
level
=
page
.
search
(
".DetailJobNew ul li:nth-child(6) span"
).
text
.
strip
exprience
=
page
.
search
(
".DetailJobNew li:nth-child(5) span"
).
text
.
strip
add_data
(
name
,
company_name
,
city_name
,
created_date
,
expiration_date
,
salary
,
industry_name
,
description
,
level
,
exprience
)
end
...
...
@@ -233,7 +201,12 @@ class InterfaceWeb
def
self
.
make_data
puts
'Please wait for crawl jobs data! . . .'
link_crawl
=
get_link_job_and_companies
link_crawl
[
1
].
each_with_index
do
|
path
,
i
|
arr_link
=
[]
link_crawl
[
1
].
each
do
|
val
|
break
if
@@stop_crawl
==
val
arr_link
<<
val
end
arr_link
.
reverse
.
each_with_index
do
|
path
,
i
|
break
if
@@stop_crawl
==
path
page
=
Nokogiri
::
HTML
(
URI
.
open
(
URI
.
parse
(
URI
.
escape
(
path
))))
if
page
.
search
(
".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p"
)[
0
]
!=
nil
...
...
lib/tasks/crawler.rake
View file @
2dfce383
require
'open-uri'
require
'logger'
require
'src/interface_web'
namespace
:crawler
do
task
populate: :environment
do
File
.
write
(
'q.txt'
,
"hello
#{
Time
.
now
}
"
)
InterfaceWeb
.
craw_data_cities
()
InterfaceWeb
.
craw_data_companies
()
InterfaceWeb
.
make_data
()
File
.
open
(
'log/crawler.log'
,
'a'
)
do
|
f
|
f
.
puts
"
#{
Time
.
now
}
- INFO: OK"
end
task
do: :environment
do
File
.
write
(
'oo.txt'
,
"hello
#{
Time
.
now
}
"
)
end
end
lib/tasks/import_csv.rake
View file @
2dfce383
require
'src/ftp'
namespace
:csv
do
task
import_csv: :environment
do
File
.
write
(
'importcsv.log'
,
"=>
#{
Time
.
now
}
IMPORT SUCCESS"
)
Company
.
find_or_create_by
(
name:
'Bảo mật'
,
address:
'Vui lòng xem trong mô tả công việc'
)
do
|
company
|
company
.
name
=
'Bảo mật'
company
.
address
=
'Vui lòng xem trong mô tả công việc'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment