Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
venjob_thanhnd
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
thanhnd
venjob_thanhnd
Commits
05f0132b
Commit
05f0132b
authored
Feb 17, 2020
by
thanhnd
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix comment's Toi-san
parent
6250ed61
Pipeline
#462
canceled with stages
in 0 seconds
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
28 additions
and
125 deletions
+28
-125
.rake_tasks~
+0
-67
config/database.yml
+0
-26
insert_areas.rb
+0
-4
lib/tasks/crawler.rake
+21
-28
lib/tasks/insert.rake
+7
-0
No files found.
.rake_tasks~
deleted
100644 → 0
View file @
6250ed61
about
action_mailbox:ingress:exim
action_mailbox:ingress:postfix
action_mailbox:ingress:qmail
action_mailbox:install
action_text:install
active_storage:install
app:template
app:update
assets:clean[keep]
assets:clobber
assets:environment
assets:precompile
cache_digests:dependencies
cache_digests:nested_dependencies
db:create
db:drop
db:environment:set
db:fixtures:load
db:migrate
db:migrate:status
db:prepare
db:rollback
db:schema:cache:clear
db:schema:cache:dump
db:schema:dump
db:schema:load
db:seed
db:seed:replant
db:setup
db:structure:dump
db:structure:load
db:version
log:clear
middleware
restart
secret
stats
test
test:db
test:system
time:zones[country_or_offset]
tmp:clear
tmp:create
webpacker
webpacker:binstubs
webpacker:check_binstubs
webpacker:check_node
webpacker:check_yarn
webpacker:clean[keep]
webpacker:clobber
webpacker:compile
webpacker:info
webpacker:install
webpacker:install:angular
webpacker:install:coffee
webpacker:install:elm
webpacker:install:erb
webpacker:install:react
webpacker:install:stimulus
webpacker:install:svelte
webpacker:install:typescript
webpacker:install:vue
webpacker:verify_install
webpacker:yarn_install
yarn:install
zeitwerk:check
config/database.yml
View file @
05f0132b
# SQLite. Versions 3.8.0 and up are supported.
# gem install sqlite3
#
# Ensure the SQLite 3 gem is defined in your Gemfile
# gem 'sqlite3'
#
#default: &default
# adapter: sqlite3
# pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
# timeout: 5000
#development:
# <<: *default
# database: db/development.sqlite3
# Warning: The database defined as "test" will be erased and
# re-generated from your development database when you run "rake".
# Do not set this db to the same as development or production.
#test:
# <<: *default
# database: db/test.sqlite3
#production:
# <<: *default
# database: db/production.sqlite3
default
:
&default
adapter
:
mysql2
encoding
:
unicode
...
...
insert_areas.rb
deleted
100644 → 0
View file @
6250ed61
def
insert_area
Area
,
create
(
name:
'aadd'
,
description:
'adfad'
)
end
\ No newline at end of file
lib/tasks/crawler.rake
View file @
05f0132b
...
...
@@ -3,7 +3,6 @@ require 'uri'
namespace
:crawler
do
task
crawl: :environment
do
insert_data_area
crawl_data
end
...
...
@@ -16,41 +15,40 @@ namespace :crawler do
#get link for next page
np
=
Nokogiri
::
HTML
(
open
(
list_url
))
nextpage
=
np
.
css
(
'div.paginationTwoStatus a.right'
)
#nextpage = np.css('div.paginationTwoStatus a.right')
nextpage
=
np
.
css
(
'.paginationTwoStatus .right'
)
puts
nextpage
[
0
][
"href"
]
#get all links in one page
links
=
page
.
css
(
'span.jobtitle h3.job a'
)
#links = page.css('span.jobtitle h3.job a')
links
=
page
.
css
(
'.jobtitle .job a'
)
area
=
Area
.
find
(
1
)
links
.
each
do
|
link
|
url
=
"
#{
link
[
'href'
]
}
"
url
.
force_encoding
(
'ASCII-8BIT'
)
uri
=
URI
::
encode
(
url
)
url
=
link
[
'href'
]
uri
=
URI
::
encode
(
url
)
job
=
Nokogiri
::
HTML
(
open
(
uri
))
title
=
job
.
css
(
'
div.top-job div
.top-job-info h1'
)
company_name
=
job
.
css
(
'
div.top-job div
.top-job-info div.tit_company'
)
updated_date
=
job
.
css
(
'
div
.datepost span'
)
location
=
job
.
css
(
'
div#showScroll.box2Detail ul
.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]'
)
experience
=
job
.
css
(
'
div#showScroll.box2Detail ul
.DetailJobNew li[2].bgLine2 p[1].fl_left > text()'
)
industry
=
job
.
css
(
'
div#showScroll.box2Detail ul
.DetailJobNew li[3].bgLine1 p[1].fl_left b'
)
level
=
job
.
css
(
'
div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p
.fl_right label'
)
salary
=
job
.
css
(
'
div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p
.fl_right label'
)
deadline
=
job
.
css
(
'
div#showScroll.box2Detail ul
.DetailJobNew li[3].bgLine1 p[2].fl_right > text()'
)
description
=
job
.
css
(
'
div
.MarBot20'
)
address
=
job
.
css
(
'
div.box1Detail p
.TitleDetailNew label label'
)
company_intro
=
job
.
css
(
'
div
.desc_company.content_fck span#emp_more'
)
title
=
job
.
css
(
'.top-job-info h1'
)
company_name
=
job
.
css
(
'.top-job-info div.tit_company'
)
updated_date
=
job
.
css
(
'.datepost span'
)
location
=
job
.
css
(
'
#showScroll
.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]'
)
experience
=
job
.
css
(
'
#showScroll
.DetailJobNew li[2].bgLine2 p[1].fl_left > text()'
)
industry
=
job
.
css
(
'
#showScroll
.DetailJobNew li[3].bgLine1 p[1].fl_left b'
)
level
=
job
.
css
(
'
#showScroll .DetailJobNew .bgLine1
.fl_right label'
)
salary
=
job
.
css
(
'
#showScroll .DetailJobNew .bgLine2
.fl_right label'
)
deadline
=
job
.
css
(
'
#showScroll
.DetailJobNew li[3].bgLine1 p[2].fl_right > text()'
)
description
=
job
.
css
(
'.MarBot20'
)
address
=
job
.
css
(
'
.box1Detail
.TitleDetailNew label label'
)
company_intro
=
job
.
css
(
'.desc_company.content_fck span#emp_more'
)
#skip if field blank
next
if
industry
.
text
.
blank?
#insert data to City table:
# puts location.text
city_name
=
location
.
text
.
gsub
(
","
,
""
)
City
.
find_or_create_by
(
area_id:
area
.
id
,
city_name:
city_name
,
city_description:
""
)
...
...
@@ -58,23 +56,18 @@ namespace :crawler do
Industry
.
find_or_create_by
(
industry_name:
industry
.
text
,
industry_description:
""
)
#insert data to Companies table
Company
.
find_or_create_by
(
company_name:
"
#{
company_name
.
text
}
"
,
company_description:
"
#{
company_intro
.
text
}
"
,
address:
"
#{
address
.
text
}
"
)
Company
.
find_or_create_by
(
company_name:
company_name
.
text
,
company_description:
company_intro
.
text
,
address:
address
.
text
)
#insert data to Jobs table
city
=
City
.
find_by
(
area_id:
area
.
id
,
city_name:
city_name
)
industryid
=
Industry
.
find_by
(
industry_name:
industry
.
text
)
companyid
=
Company
.
find_by
(
company_name:
company_name
.
text
)
Job
.
find_or_create_by
(
area_id:
area
.
id
,
city_id:
city
.
id
,
industry_id:
industryid
.
id
,
company_id:
companyid
.
id
,
job_name:
"
#{
title
.
text
}
"
,
salary:
"
#{
salary
.
text
}
"
,
deadline:
"
#{
deadline
.
text
}
"
,
level:
"
#{
level
.
text
}
"
,
experience:
"
#{
experience
.
text
.
strip
}
"
,
last_updated:
"
#{
updated_date
.
text
.
strip
}
"
,
description:
"description.text"
)
Job
.
find_or_create_by
(
area_id:
area
.
id
,
city_id:
city
.
id
,
industry_id:
industryid
.
id
,
company_id:
companyid
.
id
,
job_name:
title
.
text
,
salary:
salary
.
text
,
deadline:
deadline
.
text
,
level:
level
.
text
,
experience:
experience
.
text
.
strip
,
last_updated:
updated_date
.
text
.
strip
,
description:
description
.
text
)
list_url
=
nextpage
[
0
][
"href"
]
end
end
end
def
insert_data_area
Area
.
find_or_create_by
(
area_name:
"Viet Nam"
,
area_description:
"VN"
)
Area
.
find_or_create_by
(
area_name:
"Nuoc Ngoai"
,
area_description:
"NN"
)
end
end
lib/tasks/insert.rake
0 → 100644
View file @
05f0132b
namespace
:insert
do
task
areas_table: :environment
do
Area
.
find_or_create_by
(
area_name:
"Viet Nam"
,
area_description:
"VN"
)
Area
.
find_or_create_by
(
area_name:
"Nuoc Ngoai"
,
area_description:
"NN"
)
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment