Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
Venjob_HungNT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ngô Trung Hưng
Venjob_HungNT
Commits
d369638b
Commit
d369638b
authored
Jul 22, 2020
by
Ngô Trung Hưng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
import data from csv
parent
564970e4
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
72 additions
and
41 deletions
+72
-41
app/models/industry.rb
+9
-2
lib/src/crawler.rb
+13
-14
lib/src/ftp.rb
+38
-15
lib/src/interface_web.rb
+1
-4
lib/tasks/crawler.rake
+11
-6
No files found.
app/models/industry.rb
View file @
d369638b
...
@@ -2,6 +2,13 @@ class Industry < ApplicationRecord
...
@@ -2,6 +2,13 @@ class Industry < ApplicationRecord
has_many
:industry_jobs
has_many
:industry_jobs
has_many
:jobs
,
through: :industry_jobs
has_many
:jobs
,
through: :industry_jobs
scope
:sort_asc
,
->
{
order
(
name: :asc
)}
scope
:sort_asc
,
->
{
order
(
name: :asc
)}
# Ex:- scope :active, lambda {where(:active => true)}
# Ex:- scope :active, -> {where(:active => true)}
def
self
.
top_hot
hash
=
{}
data_industries
=
Industry
.
all
data_industries
.
each
do
|
val
|
hash
[
val
.
name
]
=
val
.
jobs
.
count
end
hash
.
sort_by
{
|
k
,
v
|
v
}.
reverse
end
end
end
lib/src/crawler.rb
View file @
d369638b
...
@@ -46,7 +46,7 @@ class Clawler
...
@@ -46,7 +46,7 @@ class Clawler
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
end
@data
=
Interface
_w
eb
.
craw_data_companies
()
@data
=
Interface
W
eb
.
craw_data_companies
()
puts
'Save info companies to database . . .'
puts
'Save info companies to database . . .'
@data
[
:name
].
each_with_index
do
|
name
,
index
|
@data
[
:name
].
each_with_index
do
|
name
,
index
|
if
Company
.
find_by
(
name:
name
).
blank?
if
Company
.
find_by
(
name:
name
).
blank?
...
@@ -62,24 +62,23 @@ class Clawler
...
@@ -62,24 +62,23 @@ class Clawler
# FILL DATA JOBS
# FILL DATA JOBS
def
self
.
make_jobs
def
self
.
make_jobs
Job
.
update_all
(
newdata:
0
)
Job
.
update_all
(
newdata:
0
)
@data_jobs
=
Interface
_w
eb
.
make_data
()
@data_jobs
=
Interface
W
eb
.
make_data
()
puts
'Save to database . . .'
puts
'Save to database . . .'
i
=
@data_jobs
[
:name
].
length
@data_jobs
[
:name
].
each_with_index
do
|
n
,
i
|
i
.
times
do
|
n
|
name
=
n
.
to_s
name
=
@data_jobs
[
:name
][
n
].
to_s
company_name
=
@data_jobs
[
:company_name
][
i
].
to_s
.
strip
company_name
=
@data_jobs
[
:company_name
][
n
].
to_s
.
strip
id_company
=
Company
.
find_by
name:
company_name
id_company
=
Company
.
find_by
name:
company_name
if
id_company
!=
nil
if
id_company
!=
nil
id_company
=
id_company
.
id
id_company
=
id_company
.
id
else
else
id_company
=
1
id_company
=
1
end
end
level
=
@data_jobs
[
:level
][
n
].
to_s
level
=
@data_jobs
[
:level
][
i
].
to_s
experience
=
@data_jobs
[
:exprience
][
n
].
to_s
experience
=
@data_jobs
[
:exprience
][
i
].
to_s
salary
=
@data_jobs
[
:salary
][
n
].
to_s
salary
=
@data_jobs
[
:salary
][
i
].
to_s
create_date
=
@data_jobs
[
:created_date
][
n
].
to_s
create_date
=
@data_jobs
[
:created_date
][
i
].
to_s
expiration_date
=
@data_jobs
[
:expiration_date
][
n
].
to_s
expiration_date
=
@data_jobs
[
:expiration_date
][
i
].
to_s
description
=
@data_jobs
[
:description
][
n
].
to_s
description
=
@data_jobs
[
:description
][
i
].
to_s
id_job
=
Job
.
create!
(
name:
name
,
id_job
=
Job
.
create!
(
name:
name
,
company_id:
id_company
,
company_id:
id_company
,
...
@@ -90,8 +89,8 @@ class Clawler
...
@@ -90,8 +89,8 @@ class Clawler
expiration_date:
expiration_date
,
expiration_date:
expiration_date
,
description:
description
,
description:
description
,
newdata:
1
)
newdata:
1
)
self
.
make_foreign_industries_table
(
@data_jobs
[
:industry_name
][
n
],
id_job
.
id
)
self
.
make_foreign_industries_table
(
@data_jobs
[
:industry_name
][
i
],
id_job
.
id
)
self
.
make_foreign_cities_table
(
@data_jobs
[
:city_name
][
n
],
id_job
.
id
)
self
.
make_foreign_cities_table
(
@data_jobs
[
:city_name
][
i
],
id_job
.
id
)
end
end
end
end
...
...
lib/src/ftp.rb
View file @
d369638b
...
@@ -2,7 +2,7 @@ require 'net/ftp'
...
@@ -2,7 +2,7 @@ require 'net/ftp'
require
'src/unzip'
require
'src/unzip'
require
'csv'
require
'csv'
class
F
TP_s
ever
class
F
tpS
ever
CONTENT_SERVER_DOMAIN_NAME
=
'192.168.1.156'
CONTENT_SERVER_DOMAIN_NAME
=
'192.168.1.156'
CONTENT_SERVER_USER_NAME
=
'training'
CONTENT_SERVER_USER_NAME
=
'training'
CONTENT_SERVER_USER_PASSWORD
=
'training'
CONTENT_SERVER_USER_PASSWORD
=
'training'
...
@@ -13,9 +13,9 @@ class FTP_sever
...
@@ -13,9 +13,9 @@ class FTP_sever
begin
begin
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
File
.
delete
(
'./jobs.zip'
)
if
File
.
exist?
(
'./jobs.zip'
)
File
.
delete
(
'./jobs.zip'
)
if
File
.
exist?
(
'./jobs.zip'
)
puts
"
Unzip done
\n
"
puts
"
Extract file done
"
rescue
rescue
puts
"File not found
\n
"
puts
"File not found"
end
end
end
end
end
end
...
@@ -24,16 +24,7 @@ class FTP_sever
...
@@ -24,16 +24,7 @@ class FTP_sever
donwload_csv
()
donwload_csv
()
table
=
CSV
.
parse
(
File
.
read
(
"lib/csv/jobs.csv"
),
headers:
true
)
table
=
CSV
.
parse
(
File
.
read
(
"lib/csv/jobs.csv"
),
headers:
true
)
end
end
# puts table['name']
# puts table['company name'].size
# puts table['company province'].size
##puts table['category'].size
# puts table['company address'].size
# puts table['level'].size
# puts table['salary'].size
# puts table['benefit'].size
# puts table['requirement'].size
# puts table['description'].size
def
self
.
parse_csv_industries
(
data
)
def
self
.
parse_csv_industries
(
data
)
puts
'Import data industries . . .'
puts
'Import data industries . . .'
industries
=
[]
industries
=
[]
...
@@ -74,6 +65,7 @@ class FTP_sever
...
@@ -74,6 +65,7 @@ class FTP_sever
company
.
address
=
data
[
'company address'
][
index
]
company
.
address
=
data
[
'company address'
][
index
]
company
.
short_description
=
data
[
'benefit'
][
index
]
company
.
short_description
=
data
[
'benefit'
][
index
]
end
end
puts
index
rescue
=>
exception
rescue
=>
exception
puts
'---'
puts
'---'
end
end
...
@@ -85,13 +77,14 @@ class FTP_sever
...
@@ -85,13 +77,14 @@ class FTP_sever
def
self
.
parse_csv_jobs
(
data
)
def
self
.
parse_csv_jobs
(
data
)
Job
.
update_all
(
newdata:
0
)
Job
.
update_all
(
newdata:
0
)
data
[
'name'
].
each_with_index
do
|
name
,
index
|
data
[
'name'
].
each_with_index
do
|
name
,
index
|
desc
=
data
[
'requirement'
][
index
]
<<
'\n'
<<
data
[
'description'
][
index
]
desc
=
(
data
[
'requirement'
][
index
]).
to_s
+
'\n'
<<
(
data
[
'description'
][
index
]).
to_s
id_company
=
Company
.
find_by
name:
data
[
'company name'
][
index
].
to_s
.
strip
id_company
=
Company
.
find_by
name:
data
[
'company name'
][
index
].
to_s
.
strip
if
id_company
.
blank?
if
id_company
.
blank?
id_company
=
1
id_company
=
1
else
else
id_company
=
id_company
.
id
id_company
=
id_company
.
id
end
end
begin
id_job
=
Job
.
create!
(
name:
name
,
id_job
=
Job
.
create!
(
name:
name
,
company_id:
id_company
,
company_id:
id_company
,
level:
data
[
'level'
][
index
],
level:
data
[
'level'
][
index
],
...
@@ -101,7 +94,36 @@ class FTP_sever
...
@@ -101,7 +94,36 @@ class FTP_sever
expiration_date:
""
,
expiration_date:
""
,
description:
desc
,
description:
desc
,
newdata:
1
)
newdata:
1
)
make_foreign_cities_table
(
data
[
'work place'
][
index
],
id_job
.
id
)
make_foreign_industries_table
(
data
[
'category'
][
index
],
id_job
.
id
)
puts
index
rescue
=>
exception
puts
'---------'
end
end
end
def
self
.
make_foreign_cities_table
(
data
,
id_job
)
data
=
data
.
to_s
.
delete
(
"[]
\"
"
)
id_cities
=
City
.
find_by
name:
data
.
strip
if
id_cities
.
blank?
id_cities
=
City
.
create!
(
name:
data
.
strip
,
area:
1
).
id
else
id_cities
=
id_cities
.
id
end
CityJob
.
create!
(
job_id:
id_job
,
city_id:
id_cities
)
end
def
self
.
make_foreign_industries_table
(
data
,
id_job
)
data
=
data
.
to_s
.
gsub
(
','
,
'/'
).
gsub
(
'/'
,
' / '
)
id_industry
=
Industry
.
find_by
name:
(
data
.
strip
)
if
id_industry
.
blank?
id_industry
=
Industry
.
create!
(
name:
data
.
strip
).
id
else
id_industry
=
id_industry
.
id
end
end
IndustryJob
.
create!
(
industry_id:
id_industry
,
job_id:
id_job
)
end
end
def
self
.
import_data_from_csv
def
self
.
import_data_from_csv
...
@@ -109,7 +131,7 @@ class FTP_sever
...
@@ -109,7 +131,7 @@ class FTP_sever
parse_csv_industries
(
data
)
parse_csv_industries
(
data
)
parse_csv_cities
(
data
)
parse_csv_cities
(
data
)
parse_csv_companies
(
data
)
parse_csv_companies
(
data
)
#
parse_csv_jobs(data)
parse_csv_jobs
(
data
)
end
end
end
end
\ No newline at end of file
lib/src/interface_web.rb
View file @
d369638b
class
Interface
_w
eb
class
Interface
W
eb
# func get "n" link company & job
# func get "n" link company & job
def
self
.
crawl_link_for_companies_jobs
(
page
)
def
self
.
crawl_link_for_companies_jobs
(
page
)
puts
"Crawling link on page...
\n
PLease wait...
\n
"
puts
"Crawling link on page...
\n
PLease wait...
\n
"
...
@@ -226,7 +226,5 @@ class Interface_web
...
@@ -226,7 +226,5 @@ class Interface_web
end
end
end
end
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# crawl_data_jobs_interface_3(path)
# crawl_data_jobs_interface_3(path)
\ No newline at end of file
lib/tasks/crawler.rake
View file @
d369638b
require
'src/crawler'
require
'src/crawler'
require
'src/ftp'
require
'src/ftp'
namespace
:
db
do
namespace
:
crawler
do
task
populate: :environment
do
task
populate: :environment
do
#
Clawler.make_industries
Clawler
.
make_industries
#
Clawler.make_cities
Clawler
.
make_cities
#
Clawler.make_companies
Clawler
.
make_companies
#
Clawler.make_jobs
Clawler
.
make_jobs
end
end
task
csv: :environment
do
task
csv: :environment
do
FTP_sever
.
import_data_from_csv
Company
.
find_or_create_by
(
name:
'Bảo mật'
,
address:
'Vui lòng xem trong mô tả công việc'
)
do
|
company
|
company
.
name
=
'Bảo mật'
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
FtpSever
.
import_data_from_csv
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment