Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
Venjob_HungNT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ngô Trung Hưng
Venjob_HungNT
Commits
d369638b
Commit
d369638b
authored
Jul 22, 2020
by
Ngô Trung Hưng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
import data from csv
parent
564970e4
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
83 additions
and
52 deletions
+83
-52
app/models/industry.rb
+9
-2
lib/src/crawler.rb
+13
-14
lib/src/ftp.rb
+48
-25
lib/src/interface_web.rb
+1
-4
lib/tasks/crawler.rake
+12
-7
No files found.
app/models/industry.rb
View file @
d369638b
...
...
@@ -2,6 +2,13 @@ class Industry < ApplicationRecord
has_many
:industry_jobs
has_many
:jobs
,
through: :industry_jobs
scope
:sort_asc
,
->
{
order
(
name: :asc
)}
# Ex:- scope :active, lambda {where(:active => true)}
# Ex:- scope :active, -> {where(:active => true)}
def
self
.
top_hot
hash
=
{}
data_industries
=
Industry
.
all
data_industries
.
each
do
|
val
|
hash
[
val
.
name
]
=
val
.
jobs
.
count
end
hash
.
sort_by
{
|
k
,
v
|
v
}.
reverse
end
end
lib/src/crawler.rb
View file @
d369638b
...
...
@@ -46,7 +46,7 @@ class Clawler
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
@data
=
Interface
_w
eb
.
craw_data_companies
()
@data
=
Interface
W
eb
.
craw_data_companies
()
puts
'Save info companies to database . . .'
@data
[
:name
].
each_with_index
do
|
name
,
index
|
if
Company
.
find_by
(
name:
name
).
blank?
...
...
@@ -62,24 +62,23 @@ class Clawler
# FILL DATA JOBS
def
self
.
make_jobs
Job
.
update_all
(
newdata:
0
)
@data_jobs
=
Interface
_w
eb
.
make_data
()
@data_jobs
=
Interface
W
eb
.
make_data
()
puts
'Save to database . . .'
i
=
@data_jobs
[
:name
].
length
i
.
times
do
|
n
|
name
=
@data_jobs
[
:name
][
n
].
to_s
company_name
=
@data_jobs
[
:company_name
][
n
].
to_s
.
strip
@data_jobs
[
:name
].
each_with_index
do
|
n
,
i
|
name
=
n
.
to_s
company_name
=
@data_jobs
[
:company_name
][
i
].
to_s
.
strip
id_company
=
Company
.
find_by
name:
company_name
if
id_company
!=
nil
id_company
=
id_company
.
id
else
id_company
=
1
end
level
=
@data_jobs
[
:level
][
n
].
to_s
experience
=
@data_jobs
[
:exprience
][
n
].
to_s
salary
=
@data_jobs
[
:salary
][
n
].
to_s
create_date
=
@data_jobs
[
:created_date
][
n
].
to_s
expiration_date
=
@data_jobs
[
:expiration_date
][
n
].
to_s
description
=
@data_jobs
[
:description
][
n
].
to_s
level
=
@data_jobs
[
:level
][
i
].
to_s
experience
=
@data_jobs
[
:exprience
][
i
].
to_s
salary
=
@data_jobs
[
:salary
][
i
].
to_s
create_date
=
@data_jobs
[
:created_date
][
i
].
to_s
expiration_date
=
@data_jobs
[
:expiration_date
][
i
].
to_s
description
=
@data_jobs
[
:description
][
i
].
to_s
id_job
=
Job
.
create!
(
name:
name
,
company_id:
id_company
,
...
...
@@ -90,8 +89,8 @@ class Clawler
expiration_date:
expiration_date
,
description:
description
,
newdata:
1
)
self
.
make_foreign_industries_table
(
@data_jobs
[
:industry_name
][
n
],
id_job
.
id
)
self
.
make_foreign_cities_table
(
@data_jobs
[
:city_name
][
n
],
id_job
.
id
)
self
.
make_foreign_industries_table
(
@data_jobs
[
:industry_name
][
i
],
id_job
.
id
)
self
.
make_foreign_cities_table
(
@data_jobs
[
:city_name
][
i
],
id_job
.
id
)
end
end
...
...
lib/src/ftp.rb
View file @
d369638b
...
...
@@ -2,7 +2,7 @@ require 'net/ftp'
require
'src/unzip'
require
'csv'
class
F
TP_s
ever
class
F
tpS
ever
CONTENT_SERVER_DOMAIN_NAME
=
'192.168.1.156'
CONTENT_SERVER_USER_NAME
=
'training'
CONTENT_SERVER_USER_PASSWORD
=
'training'
...
...
@@ -13,9 +13,9 @@ class FTP_sever
begin
extract_zip
(
'./jobs.zip'
,
'lib/csv'
)
File
.
delete
(
'./jobs.zip'
)
if
File
.
exist?
(
'./jobs.zip'
)
puts
"
Unzip done
\n
"
puts
"
Extract file done
"
rescue
puts
"File not found
\n
"
puts
"File not found"
end
end
end
...
...
@@ -24,16 +24,7 @@ class FTP_sever
donwload_csv
()
table
=
CSV
.
parse
(
File
.
read
(
"lib/csv/jobs.csv"
),
headers:
true
)
end
# puts table['name']
# puts table['company name'].size
# puts table['company province'].size
##puts table['category'].size
# puts table['company address'].size
# puts table['level'].size
# puts table['salary'].size
# puts table['benefit'].size
# puts table['requirement'].size
# puts table['description'].size
def
self
.
parse_csv_industries
(
data
)
puts
'Import data industries . . .'
industries
=
[]
...
...
@@ -73,7 +64,8 @@ class FTP_sever
company
.
name
=
name
.
strip
company
.
address
=
data
[
'company address'
][
index
]
company
.
short_description
=
data
[
'benefit'
][
index
]
end
end
puts
index
rescue
=>
exception
puts
'---'
end
...
...
@@ -85,31 +77,61 @@ class FTP_sever
def
self
.
parse_csv_jobs
(
data
)
Job
.
update_all
(
newdata:
0
)
data
[
'name'
].
each_with_index
do
|
name
,
index
|
desc
=
data
[
'requirement'
][
index
]
<<
'\n'
<<
data
[
'description'
][
index
]
desc
=
(
data
[
'requirement'
][
index
]).
to_s
+
'\n'
<<
(
data
[
'description'
][
index
]).
to_s
id_company
=
Company
.
find_by
name:
data
[
'company name'
][
index
].
to_s
.
strip
if
id_company
.
blank?
id_company
=
1
else
id_company
=
id_company
.
id
end
id_job
=
Job
.
create!
(
name:
name
,
company_id:
id_company
,
level:
data
[
'level'
][
index
],
experience:
""
,
salary:
data
[
'salary'
][
index
],
create_date:
Time
.
now
,
expiration_date:
""
,
description:
desc
,
newdata:
1
)
begin
id_job
=
Job
.
create!
(
name:
name
,
company_id:
id_company
,
level:
data
[
'level'
][
index
],
experience:
""
,
salary:
data
[
'salary'
][
index
],
create_date:
Time
.
now
,
expiration_date:
""
,
description:
desc
,
newdata:
1
)
make_foreign_cities_table
(
data
[
'work place'
][
index
],
id_job
.
id
)
make_foreign_industries_table
(
data
[
'category'
][
index
],
id_job
.
id
)
puts
index
rescue
=>
exception
puts
'---------'
end
end
end
def
self
.
make_foreign_cities_table
(
data
,
id_job
)
data
=
data
.
to_s
.
delete
(
"[]
\"
"
)
id_cities
=
City
.
find_by
name:
data
.
strip
if
id_cities
.
blank?
id_cities
=
City
.
create!
(
name:
data
.
strip
,
area:
1
).
id
else
id_cities
=
id_cities
.
id
end
CityJob
.
create!
(
job_id:
id_job
,
city_id:
id_cities
)
end
def
self
.
make_foreign_industries_table
(
data
,
id_job
)
data
=
data
.
to_s
.
gsub
(
','
,
'/'
).
gsub
(
'/'
,
' / '
)
id_industry
=
Industry
.
find_by
name:
(
data
.
strip
)
if
id_industry
.
blank?
id_industry
=
Industry
.
create!
(
name:
data
.
strip
).
id
else
id_industry
=
id_industry
.
id
end
IndustryJob
.
create!
(
industry_id:
id_industry
,
job_id:
id_job
)
end
def
self
.
import_data_from_csv
data
=
data_csv
()
parse_csv_industries
(
data
)
parse_csv_cities
(
data
)
parse_csv_companies
(
data
)
#
parse_csv_jobs(data)
parse_csv_jobs
(
data
)
end
end
\ No newline at end of file
lib/src/interface_web.rb
View file @
d369638b
class
Interface
_w
eb
class
Interface
W
eb
# func get "n" link company & job
def
self
.
crawl_link_for_companies_jobs
(
page
)
puts
"Crawling link on page...
\n
PLease wait...
\n
"
...
...
@@ -226,7 +226,5 @@ class Interface_web
end
end
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# crawl_data_jobs_interface_3(path)
\ No newline at end of file
lib/tasks/crawler.rake
View file @
d369638b
require
'src/crawler'
require
'src/ftp'
namespace
:
db
do
namespace
:
crawler
do
task
populate: :environment
do
#
Clawler.make_industries
#
Clawler.make_cities
#
Clawler.make_companies
#
Clawler.make_jobs
Clawler
.
make_industries
Clawler
.
make_cities
Clawler
.
make_companies
Clawler
.
make_jobs
end
task
csv: :environment
do
FTP_sever
.
import_data_from_csv
task
csv: :environment
do
Company
.
find_or_create_by
(
name:
'Bảo mật'
,
address:
'Vui lòng xem trong mô tả công việc'
)
do
|
company
|
company
.
name
=
'Bảo mật'
company
.
address
=
'Vui lòng xem trong mô tả công việc'
company
.
short_description
=
'Vui lòng xem trong mô tả công việc'
end
FtpSever
.
import_data_from_csv
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment