Commit 2dfce383 by Ngo Trung Hung

fix

parent 6905c99b
...@@ -14,7 +14,7 @@ $main-color: #23303D; ...@@ -14,7 +14,7 @@ $main-color: #23303D;
.cus_header { .cus_header {
height: 52px; height: 52px;
width: 100%; width: 100%;
background-color: $main-color; background-image: linear-gradient(30deg, #434343, #000000);
position: fixed; position: fixed;
border-bottom: 1px solid #333; border-bottom: 1px solid #333;
// box-shadow: 0px 2px 4px 2px #999; // box-shadow: 0px 2px 4px 2px #999;
...@@ -204,6 +204,7 @@ $main-color: #23303D; ...@@ -204,6 +204,7 @@ $main-color: #23303D;
// footer // footer
.custom_footer { .custom_footer {
margin-top: 30px;
width: 100%; width: 100%;
height: 120px; height: 120px;
background-color: $main-color; background-color: $main-color;
...@@ -239,7 +240,7 @@ $main-color: #23303D; ...@@ -239,7 +240,7 @@ $main-color: #23303D;
.slogan_text { .slogan_text {
text-align: center; text-align: center;
font-family: 'Raleway', sans-serif; font-family: 'Raleway', sans-serif;
font-size: 35px; font-size: 40px;
color: #eaeaea; color: #eaeaea;
transform: scale(1.35); transform: scale(1.35);
font-weight: 700; font-weight: 700;
...@@ -610,7 +611,7 @@ $main-color: #23303D; ...@@ -610,7 +611,7 @@ $main-color: #23303D;
span { span {
font-family: 'Raleway', sans-serif; font-family: 'Raleway', sans-serif;
font-size: 30px; font-size: 30px;
font-weight: 200; font-weight: 400;
} }
} }
...@@ -817,7 +818,8 @@ $main-color: #23303D; ...@@ -817,7 +818,8 @@ $main-color: #23303D;
} }
.box_text_five_jobs.box_padding_city { .box_text_five_jobs.box_padding_city {
background-image: linear-gradient(to right, #86cb49, #169b74, #86cb49); // background-image: linear-gradient(to right, #86cb49, #169b74, #86cb49);
background-image: linear-gradient(to right, #cc2b5e, #753a88);
color: white; color: white;
font-weight: 600; font-weight: 600;
margin-bottom: 0px !important; margin-bottom: 0px !important;
...@@ -825,7 +827,7 @@ $main-color: #23303D; ...@@ -825,7 +827,7 @@ $main-color: #23303D;
.box_info_city { .box_info_city {
width: 100%; width: 100%;
height: 100px; height: 100px;
background-color: rgb(41, 41, 41); background-color: rgb(41, 41, 41);
border: 1px solid rgba($color: #c0c0c0, $alpha: 0.3); border: 1px solid rgba($color: #c0c0c0, $alpha: 0.3);
padding-top: 25px; padding-top: 25px;
transition: 0.2s; transition: 0.2s;
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
<div class="col-sm-10 col-md-9 col-lg-10"> <div class="col-sm-10 col-md-9 col-lg-10">
<div class="box_info"> <div class="box_info">
<div class="lol"> <div class="lol">
<!-- <%= image_tag 'trophy',class: 'img_job_name' %> -->
<%= link_to val.name, '#', class: 'job_name' %> <%= link_to val.name, '#', class: 'job_name' %>
<%= image_tag 'trophy',class: 'img_job_name' %>
</div> </div>
<div class="cop"> <div class="cop">
<h5 class="box_info_copany_name"><i class="far fa-building"></i> <%= val.company.name %></h5> <h5 class="box_info_copany_name"><i class="far fa-building"></i> <%= val.company.name %></h5>
......
...@@ -14,7 +14,7 @@ default: &default ...@@ -14,7 +14,7 @@ default: &default
encoding: utf8 encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root username: root
password: '1' password: '12345678'
socket: /var/run/mysqld/mysqld.sock socket: /var/run/mysqld/mysqld.sock
......
Rails.application.configure do Rails.application.configure do
# Settings specified here will take precedence over those in config/application.rb. # Settings specified here will take precedence over those in config/application.rb.
# In the development environment your application's code is reloaded on # In the development environment your application's code is reloaded on
# every request. This slows down response time but is perfect for development # every request. This slows down response time but is perfect for development
# since you don't have to restart the web server when you make code changes. # since you don't have to restart the web server when you make code changes.
......
env :PATH, ENV['PATH'] env :PATH, ENV['PATH']
every 3.minutes do every 10.minutes do
rake "crawler:populate" rake "crawler:populate"
end end
every :day, at: "19:00pm"do
rake "csv:import_csv"
end
=> 2020-07-23 22:40:02 +0700
\ No newline at end of file
...@@ -21,16 +21,17 @@ class FtpSever ...@@ -21,16 +21,17 @@ class FtpSever
end end
def self.data_csv def self.data_csv
# donwload_csv() donwload_csv()
table = CSV.parse(File.read("lib/csv/jobs.csv"), headers: true) table = CSV.parse(File.read("lib/csv/jobs.csv"), headers: true)
end end
def self.parse_csv_industries(data) def self.parse_csv_industries(data)
puts 'Import data industries . . .' puts 'Import data industries . . .'
industries = [] industries = []
data['category'].each do |val| # data['category'].each do |val|
industries << val.strip # industries << val.strip
end # end
industries += data['category'].map(&:strip)
industries.each do |val| industries.each do |val|
val.gsub!(',','/') if val.include?(',') val.gsub!(',','/') if val.include?(',')
val.gsub!('/',' / ') val.gsub!('/',' / ')
......
...@@ -8,8 +8,7 @@ class InterfaceWeb ...@@ -8,8 +8,7 @@ class InterfaceWeb
website_jobs = [] website_jobs = []
file = File.readlines('tmp/link.txt', 'r') if File.exist?('tmp/link.txt') file = File.readlines('tmp/link.txt', 'r') if File.exist?('tmp/link.txt')
@@stop_crawl = file.blank? ? '' : file.join @@stop_crawl = file.blank? ? '' : file.join
File.delete('tmp/link.txt') if File.exist?('tmp/link.txt')
page.times do |i| page.times do |i|
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html")) page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html"))
...@@ -24,12 +23,13 @@ class InterfaceWeb ...@@ -24,12 +23,13 @@ class InterfaceWeb
website_jobs = website_jobs.select { |val| val.present?} website_jobs = website_jobs.select { |val| val.present?}
puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs.length} link\n------------------------" puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs.length} link\n------------------------"
File.open("tmp/link.txt", "w+") {|f| f.write(website_jobs[0])}
File.write("tmp/link.txt", "#{website_jobs[0]}")
data << website_companies << website_jobs data << website_companies << website_jobs
end end
def self.get_link_job_and_companies def self.get_link_job_and_companies
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(10) @crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(1)
end end
def self.safe_link(url) def self.safe_link(url)
...@@ -84,8 +84,7 @@ class InterfaceWeb ...@@ -84,8 +84,7 @@ class InterfaceWeb
end end
end end
def self.add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience) def self.add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
Job.update_all(newdata: 0)
id_company = Company.find_by name: company_name id_company = Company.find_by name: company_name
id_company = id_company.present? ? id_company.id : 1 id_company = id_company.present? ? id_company.id : 1
id_job = Job.create!(name: name, id_job = Job.create!(name: name,
...@@ -95,36 +94,27 @@ class InterfaceWeb ...@@ -95,36 +94,27 @@ class InterfaceWeb
salary: salary, salary: salary,
create_date: created_date, create_date: created_date,
expiration_date: expiration_date, expiration_date: expiration_date,
description: description, description: description)
newdata: 1)
make_foreign_industries_table(industry_name, id_job.id) make_foreign_industries_table(industry_name, id_job.id)
make_foreign_cities_table(city_name, id_job.id) make_foreign_cities_table(city_name, id_job.id)
end end
def self.crawl_data_jobs_interface_1(page) def self.crawl_data_jobs_interface_1(page)
name = page.search(".apply-now-content .job-desc .title").text name = page.search(".apply-now-content .job-desc .title").text
company_name = page.search(".apply-now-content .job-desc .job-company-name").text company_name = page.search(".apply-now-content .job-desc .job-company-name").text
location = [] location = []
length = page.search(".detail-box .map p a").size length = page.search(".detail-box .map p a").size
length.times do |n| length.times do |n|
location << page.search(".detail-box .map p a:nth-child(#{n+1})").text location << page.search(".detail-box .map p a:nth-child(#{n+1})").text
end end
city_name = location.join(',') city_name = location.join(',')
created_date = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text created_date = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text
expiration_date = page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last expiration_date = page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last
salary = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text salary = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text
industries = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text industries = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text
industries = industries.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''} industries = industries.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''}
industry_name = industries.join(',') industry_name = industries.join(',')
description = page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s.delete!("[\n,\t,\r]") description = page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s.delete!("[\n,\t,\r]")
get_level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc') get_level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
get_level = get_level[1].to_s.strip get_level = get_level[1].to_s.strip
if get_level == "" if get_level == ""
...@@ -134,49 +124,37 @@ class InterfaceWeb ...@@ -134,49 +124,37 @@ class InterfaceWeb
g_level = get_level g_level = get_level
level = g_level[1].to_s.strip level = g_level[1].to_s.strip
end end
exp = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm') exp = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exp = exp[1].to_s.strip exp = exp[1].to_s.strip
exprience = exp exprience = exp
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience) add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end end
def self.crawl_data_jobs_interface_2(page) def self.crawl_data_jobs_interface_2(page)
name = page.search(".apply-now-content .job-desc .title").text name = page.search(".apply-now-content .job-desc .title").text
company_name = page.search(".top-job .top-job-info .tit_company").text company_name = page.search(".top-job .top-job-info .tit_company").text
location = [] location = []
length = page.search(".info-workplace .value a").size length = page.search(".info-workplace .value a").size
length.times do |n| length.times do |n|
location << page.search(".info-workplace .value a:nth-child(#{n+1})").text location << page.search(".info-workplace .value a:nth-child(#{n+1})").text
end end
city_name = location.join(',') city_name = location.join(',')
created_date = "" created_date = ""
expiration_date = page.search(".info li:nth-child(4)").text expiration_date = page.search(".info li:nth-child(4)").text
if expiration_date.blank? if expiration_date.blank?
expiration_date = "" expiration_date = ""
else else
expiration_date = expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last expiration_date = expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last
end end
salary = page.search(".info li:nth-child(3)").text.split("Lương").last.strip salary = page.search(".info li:nth-child(3)").text.split("Lương").last.strip
industry_name = page.search(".info li:nth-child(5) .value").text industry_name = page.search(".info li:nth-child(5) .value").text
description = page.search(".left-col").to_s.delete!("[\n,\t,\r]") description = page.search(".left-col").to_s.delete!("[\n,\t,\r]")
lv = page.search(".boxtp .info li:nth-child(2)").text lv = page.search(".boxtp .info li:nth-child(2)").text
if lv.blank? if lv.blank?
level = "" level = ""
else else
level = lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip level = lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
end end
exp = page.search(".info li:nth-child(6)").text exp = page.search(".info li:nth-child(6)").text
if exp.blank? if exp.blank?
exprience = "" exprience = ""
...@@ -189,25 +167,15 @@ class InterfaceWeb ...@@ -189,25 +167,15 @@ class InterfaceWeb
def self.crawl_data_jobs_interface_5(page) def self.crawl_data_jobs_interface_5(page)
name = page.search(".info-company h1").text name = page.search(".info-company h1").text
company_name = page.search(".info-company .text-job h2").text company_name = page.search(".info-company .text-job h2").text
city_name = page.search(".DetailJobNew ul li:nth-child(1) a").text city_name = page.search(".DetailJobNew ul li:nth-child(1) a").text
created_date = "" created_date = ""
expiration_date = page.search(".DetailJobNew li:nth-child(9) span").text.strip expiration_date = page.search(".DetailJobNew li:nth-child(9) span").text.strip
salary = page.search(".DetailJobNew li:nth-child(3) span").text.strip salary = page.search(".DetailJobNew li:nth-child(3) span").text.strip
industry_name = page.search(".DetailJobNew li:nth-child(2) span").text.strip industry_name = page.search(".DetailJobNew li:nth-child(2) span").text.strip
description = page.search(".left-col .detail-row").to_s.delete!("[\n,\t,\r]") description = page.search(".left-col .detail-row").to_s.delete!("[\n,\t,\r]")
level = page.search(".DetailJobNew ul li:nth-child(6) span").text.strip level = page.search(".DetailJobNew ul li:nth-child(6) span").text.strip
exprience = page.search(".DetailJobNew li:nth-child(5) span").text.strip exprience = page.search(".DetailJobNew li:nth-child(5) span").text.strip
add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience) add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end end
...@@ -233,7 +201,12 @@ class InterfaceWeb ...@@ -233,7 +201,12 @@ class InterfaceWeb
def self.make_data def self.make_data
puts 'Please wait for crawl jobs data! . . .' puts 'Please wait for crawl jobs data! . . .'
link_crawl = get_link_job_and_companies link_crawl = get_link_job_and_companies
link_crawl[1].each_with_index do |path,i| arr_link = []
link_crawl[1].each do |val|
break if @@stop_crawl == val
arr_link << val
end
arr_link.reverse.each_with_index do |path,i|
break if @@stop_crawl == path break if @@stop_crawl == path
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path)))) page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path))))
if page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0] != nil if page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0] != nil
......
require 'open-uri' require 'open-uri'
require 'logger'
require 'src/interface_web' require 'src/interface_web'
namespace :crawler do namespace :crawler do
task populate: :environment do task populate: :environment do
File.write('q.txt', "hello #{Time.now}")
InterfaceWeb.craw_data_cities() InterfaceWeb.craw_data_cities()
InterfaceWeb.craw_data_companies() InterfaceWeb.craw_data_companies()
InterfaceWeb.make_data() InterfaceWeb.make_data()
end File.open('log/crawler.log','a') do |f|
f.puts "#{Time.now} - INFO: OK"
task do: :environment do end
File.write('oo.txt', "hello #{Time.now}")
end end
end end
require 'src/ftp' require 'src/ftp'
namespace :csv do namespace :csv do
task import_csv: :environment do task import_csv: :environment do
File.write('importcsv.log', "=> #{Time.now} IMPORT SUCCESS")
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company| Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật' company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc' company.address = 'Vui lòng xem trong mô tả công việc'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment