Commit 363e2ef7 by Huynh Thien Phuoc

Merge branch 'crawler'

parents d31e14f4 aa11d545
Pipeline #786 canceled with stages
in 0 seconds
,phuocht,devops-OptiPlex-3010,23.07.2020 08:12,file:///home/phuocht/.config/libreoffice/4;
\ No newline at end of file
//= link_tree ../images
//= link_directory ../javascripts .js
//= link_directory ../stylesheets .css
//= link bootstrap.js
//= link bootstrap.css
\ No newline at end of file
......@@ -12,5 +12,6 @@
//
//= require rails-ujs
//= require activestorage
//= require jquery-3.5.1.slim.min.js
//= require turbolinks
//= require_tree .
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
html {
overflow-y: scroll;
}
body {
padding-top: 60px;
}
section {
overflow: auto;
}
textarea {
resize: vertical;
}
.center {
text-align: center;
}
.center h1 {
margin-bottom: 10px;
}
/* typography */
h1, h2, h3, h4, h5, h6 {
line-height: 1;
}
h1 {
font-size: 3em;
letter-spacing: -2px;
margin-bottom: 30px;
text-align: center;
}
h2 {
font-size: 1.2em;
letter-spacing: -1px;
margin-bottom: 30px;
text-align: center;
font-weight: normal;
color: #999;
}
p {
font-size: 1.1em;
line-height: 1.7em;
}
#logo{
height: 200px;
width: 100px;
}
class JobsController < ApplicationController
def index
end
end
......@@ -7,9 +7,28 @@
<%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track': 'reload' %>
<%= javascript_include_tag 'application', 'data-turbolinks-track': 'reload' %>
</head>
<%= stylesheet_link_tag 'bootstrap', media: 'all', 'data-turbolinks-track': 'reload' %>
<%= javascript_include_tag 'bootstrap', media: 'all', 'data-turbolinks-track': 'reload' %>
</head>
<body>
<%= yield %>
<header class="navbar navbar-expand-lg navbar-dark bg-dark">
<div class="collapse navbar-collapse">
<div class="container">
<%= link_to image_tag("logo_venjob.png", alt: "Logo", id: "logo_venjob", width: 120, left: 0), '#'%>
<nav>
<ul class="navbar-nav mr-auto">
<li><%= link_to "Login", '#' %></li>
<li><%= link_to "Register", '#' %></li>
<li><%= link_to "Favorite", '#' %></li>
<li><%= link_to "History", '#' %></li>
</ul>
</nav>
</div>
</div>
</header>
<div class="container">
<%= yield %>
</div>
</body>
</html>
......@@ -12,3 +12,4 @@ Rails.application.config.assets.paths << Rails.root.join('node_modules')
# application.js, application.css, and all non-JS/CSS in the app/assets
# folder are already added.
# Rails.application.config.assets.precompile += %w( admin.js admin.css )
Rails.application.config.assets.precompile = ["manifest.js"]
Rails.application.routes.draw do
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
resources :crawlerdata
resources :jobs
end
......@@ -77,9 +77,9 @@
date = arr_data.first
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == true
arr_sub = ((((val.gsub('Lương ','')).gsub(' Kinh nghiệm ', '*')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
experience = arr_sub[1]
level =arr_sub[2]
salary = arr_sub[0]
experience = arr_sub[1]
level = arr_sub[2]
expiration_date = arr_sub[3]
job = Job.create!(title: title_job,
level: level,
......@@ -89,9 +89,9 @@
description: description,
company_id: company_table.id)
elsif val.include?('Lương') && val.include?('Kinh nghiệm') == false
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
level =arr_sub[1]
arr_sub = (((val.gsub('Lương ','')).gsub(' Cấp bậc ', '*')).gsub(' Hết hạn nộp ', '*')).split('*')
salary = arr_sub[0]
level = arr_sub[1]
expiration_date = arr_sub[2]
job = Job.create!(title: title_job,
level: level,
......@@ -122,11 +122,71 @@
end
end
end
def get_file_csv
Net::FTP.open('192.168.1.156', 'training', 'training') do |ftp|
files = ftp.list
puts "list out files in root directory:"
puts files
ftp.getbinaryfile('jobs.zip')
end
end
def extract_zip(file, destination)
FileUtils.mkdir_p(destination)
Zip::File.open(file) do |zip_file|
zip_file.each do |f|
fpath = File.join(destination, f.name)
zip_file.extract(f, fpath) unless File.exist?(fpath)
end
end
end
def import_file_csv
file = "jobs.csv"
CSV.foreach(file, headers: true) do |row|
begin
company_name = row[5].strip
company_address = row[2]
company_introduction = row[0]
company_table = Company.find_by(name: "#{company_name}")
if company_table == nil
company_table = Company.create!(name: company_name,
address: company_address,
introduction: company_introduction)
end
title_job = row[9].strip
description_job = row[7]
level = row[8]
salary = row[11]
if company_table != nil
job_table = Job.create!(title: title_job,
description: description_job,
level: level,
salary: salary,
company_id: company_table.id)
end
industry = row[1].strip
industry_find = Industry.find_by(name: industry)
if industry_find == nil
industry_table = Industry.create!(name: industry)
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
elsif industry_find != nil
industry_job_table = IndustryJob.create!(job_id: job_table.id, industry_id: industry_find.id)
end
puts "========================================="
puts job_table.id, title_job, industry, salary
location_data = row[16].strip
location = (location_data.gsub('["','')).gsub('"]','').strip
location_find = City.find_by(name: location)
if location_find != nil
city_job_table = CityJob.create!(job_id: job_table.id, city_id: location_find.id)
end
puts "Location: #{location}"
rescue StandardError => e
puts e
end
end
end
end
\ No newline at end of file
require 'src/crawler.rb'
require 'net/ftp'
crawl = Crawler.new
require 'csv'
require 'zip'
action = Crawler.new
namespace :import do
desc "crawler data"
task crawler: :environment do
crawl.crawl_city
crawl.crawl_industry
crawl.crawl_company
crawl.crawl_job_relationships
action.crawl_city
action.crawl_industry
action.crawl_company
action.crawl_job_relationships
end
desc "get file CSV from server"
task csv_get: :environment do
crawl.get_file_csv
action.get_file_csv
action.extract_zip('./jobs.zip','.')
end
desc "Import data from CSV"
task data_csv: :environment do
action.import_file_csv
end
end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment