Commit e806339f by Hung0326 Committed by GitHub

Merge pull request #12 from Hung0326/dev

fix bug
parents 99841a3b a93ab3c6
Pipeline #693 canceled with stages
in 0 seconds
...@@ -22,6 +22,7 @@ gem 'turbolinks', '~> 5' ...@@ -22,6 +22,7 @@ gem 'turbolinks', '~> 5'
gem 'jbuilder', '~> 2.5' gem 'jbuilder', '~> 2.5'
gem 'nokogiri' gem 'nokogiri'
gem 'rubyzip' gem 'rubyzip'
gem 'whenever'
# Use Redis adapter to run Action Cable in production # Use Redis adapter to run Action Cable in production
# gem 'redis', '~> 4.0' # gem 'redis', '~> 4.0'
# Use ActiveModel has_secure_password # Use ActiveModel has_secure_password
......
...@@ -65,6 +65,7 @@ GEM ...@@ -65,6 +65,7 @@ GEM
chromedriver-helper (2.1.1) chromedriver-helper (2.1.1)
archive-zip (~> 0.10) archive-zip (~> 0.10)
nokogiri (~> 1.8) nokogiri (~> 1.8)
chronic (0.10.2)
coderay (1.1.3) coderay (1.1.3)
coffee-rails (4.2.2) coffee-rails (4.2.2)
coffee-script (>= 2.2.0) coffee-script (>= 2.2.0)
...@@ -217,6 +218,8 @@ GEM ...@@ -217,6 +218,8 @@ GEM
websocket-driver (0.7.3) websocket-driver (0.7.3)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5) websocket-extensions (0.1.5)
whenever (1.0.0)
chronic (>= 0.6.3)
xpath (3.2.0) xpath (3.2.0)
nokogiri (~> 1.8) nokogiri (~> 1.8)
...@@ -249,6 +252,7 @@ DEPENDENCIES ...@@ -249,6 +252,7 @@ DEPENDENCIES
tzinfo-data tzinfo-data
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
web-console (>= 3.3.0) web-console (>= 3.3.0)
whenever
RUBY VERSION RUBY VERSION
ruby 2.6.6p146 ruby 2.6.6p146
......
...@@ -9,7 +9,7 @@ body { ...@@ -9,7 +9,7 @@ body {
} }
$breakpoint-tablet: 992px; $breakpoint-tablet: 992px;
$main-color: #221f20; $main-color: #23303D;
// Header // Header
.cus_header { .cus_header {
height: 52px; height: 52px;
......
...@@ -14,7 +14,7 @@ default: &default ...@@ -14,7 +14,7 @@ default: &default
encoding: utf8 encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root username: root
password: '12345678' password: '1'
socket: /var/run/mysqld/mysqld.sock socket: /var/run/mysqld/mysqld.sock
......
every '53 13 23 7 *' do
rake "crawler:populate"
end
\ No newline at end of file
require 'open-uri'
require 'src/interface_web'
class Clawler
@@page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
# PILL DATA CITIES
def self.make_cities
puts "Crawling data location... \n. \n. \n."
data_list_cities = []
data = @@page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
puts "Save data to database... \n------------------------"
data_list_cities.each_with_index do |val, index|
area = index > 69 ? 0 : 1
City.find_or_create_by(name: val) do |city|
city.name = val
city.area = area
end
end
end
#PIL DATA INDUSTRIES
def self.make_industries
puts "Crawling data industries... \n. \n. \n."
data_list_industries = []
data = @@page.search("#industry option")
list_industries = data.to_s.split("</option>")
list_industries.each do |x|
data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip
end
puts "Save data to database... \n------------------------"
data_list_industries.each do |val|
val.gsub!('&amp;','&') if val.include?('&amp;')
Industry.find_or_create_by(name: val) { |industry| industry.name = val }
end
end
# FILL DATA COMPANIES
def self.make_companies
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc'
end
@data = InterfaceWeb.craw_data_companies()
puts 'Save info companies to database . . .'
@data[:name].each_with_index do |name, index|
if Company.find_by(name: name).blank?
address = @data[:address][index]
short_description = @data[:description][index]
Company.create!(name: name,
address: address,
short_description: short_description)
end
end
end
# FILL DATA JOBS
def self.make_jobs
Job.update_all(newdata: 0)
@data_jobs = InterfaceWeb.make_data()
puts 'Save to database . . .'
@data_jobs[:name].each_with_index do |n,i|
name = n.to_s
company_name = @data_jobs[:company_name][i].to_s.strip
id_company = Company.find_by name: company_name
if id_company != nil
id_company = id_company.id
else
id_company = 1
end
level = @data_jobs[:level][i].to_s
experience = @data_jobs[:exprience][i].to_s
salary = @data_jobs[:salary][i].to_s
create_date = @data_jobs[:created_date][i].to_s
expiration_date = @data_jobs[:expiration_date][i].to_s
description = @data_jobs[:description][i].to_s
id_job = Job.create!(name: name,
company_id: id_company,
level: level,
experience: experience,
salary: salary,
create_date: create_date,
expiration_date: expiration_date,
description: description,
newdata: 1)
self.make_foreign_industries_table(@data_jobs[:industry_name][i],id_job.id)
self.make_foreign_cities_table(@data_jobs[:city_name][i],id_job.id)
end
end
def self.make_foreign_industries_table(data,id_job)
@content = data.split(',')
length = @content.length
length.times do |n|
id_industry = Industry.find_by name: (@content[n].strip)
if !id_industry
id_industry = Industry.create!(name: @content[n].strip).id
else
id_industry = id_industry.id
end
IndustryJob.create!(industry_id: id_industry,
job_id: id_job)
end
end
def self.make_foreign_cities_table(data,id_job)
@cities = data.split(',')
# length = @cities.length
@cities.each do |city|
id_cities = City.find_by name: city.strip
if !id_cities
id_cities = City.create!(name: city.strip, area: 1).id
else
id_cities = id_cities.id
end
CityJob.create!(job_id: id_job,
city_id: id_cities)
end
end
end
...@@ -41,12 +41,9 @@ class FtpSever ...@@ -41,12 +41,9 @@ class FtpSever
def self.parse_csv_cities(data) def self.parse_csv_cities(data)
puts 'Import data cities . . .' puts 'Import data cities . . .'
arr_city = '' cities = data['work place'].uniq.select { |val| val.present? }
cities = data['work place'].select { |val| val.present? } cities = cities.map{ |val| val.delete("[]\"") }
cities.uniq! cities.each do |val|
arr_city = cities.map{ |val| val.delete("[]\"") }
arr_city.each do |val|
if !val.blank? if !val.blank?
City.find_or_create_by(name: val) do |city| City.find_or_create_by(name: val) do |city|
city.name = val city.name = val
...@@ -78,11 +75,7 @@ class FtpSever ...@@ -78,11 +75,7 @@ class FtpSever
data['name'].each_with_index do |name, index| data['name'].each_with_index do |name, index|
desc = (data['requirement'][index]).to_s + '\n' << (data['description'][index]).to_s desc = (data['requirement'][index]).to_s + '\n' << (data['description'][index]).to_s
id_company = Company.find_by name: data['company name'][index].to_s.strip id_company = Company.find_by name: data['company name'][index].to_s.strip
if id_company.blank? id_company = id_company.blank? ? 1 : id_company.id
id_company = 1
else
id_company = id_company.id
end
begin begin
id_job = Job.create!( name: name, id_job = Job.create!( name: name,
company_id: id_company, company_id: id_company,
...@@ -104,25 +97,16 @@ class FtpSever ...@@ -104,25 +97,16 @@ class FtpSever
def self.make_foreign_cities_table(data,id_job) def self.make_foreign_cities_table(data,id_job)
data = data.to_s.delete("[]\"") data = data.to_s.delete("[]\"")
id_cities = City.find_by name: data.strip id_city = City.find_by name: data.strip
if id_cities.blank? id_city = id_city.blank? ? City.create!(name: data.strip, area: 1).id : id_city.id
id_cities = City.create!(name: data.strip, area: 1).id CityJob.create!(job_id: id_job, city_id: id_city)
else
id_cities = id_cities.id
end
CityJob.create!(job_id: id_job, city_id: id_cities)
end end
def self.make_foreign_industries_table(data,id_job) def self.make_foreign_industries_table(data,id_job)
data = data.to_s.gsub(',','/').gsub('/',' / ') data = data.to_s.gsub(',','/').gsub('/',' / ')
id_industry = Industry.find_by name: (data.strip) id_industry = Industry.find_by name: (data.strip)
if id_industry.blank? id_industry = id_industry.blank? ? Industry.create!(name: data.strip).id : id_industry.id
id_industry = Industry.create!(name: data.strip).id IndustryJob.create!(industry_id: id_industry, job_id: id_job)
else
id_industry = id_industry.id
end
IndustryJob.create!(industry_id: id_industry,
job_id: id_job)
end end
def self.import_data_from_csv def self.import_data_from_csv
......
require 'open-uri'
class InterfaceWeb class InterfaceWeb
# func get "n" link company & job
def self.crawl_link_for_companies_jobs(page) def self.crawl_link_for_companies_jobs(page)
puts "Crawling link on page...\nPLease wait...\n" puts "Crawling link on page...\nPLease wait...\n"
data = [] data = []
website_companies = [] website_companies = []
website_jobs = [] website_jobs = []
file = File.readlines('tmp/link.txt', 'r') if File.exist?('tmp/link.txt')
@@stop_crawl = file.blank? ? '' : file.join
File.delete('tmp/link.txt') if File.exist?('tmp/link.txt')
page.times do |i| page.times do |i|
page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html")) page = Nokogiri::HTML(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i+1}-vi.html"))
website_companies << page.search(".figcaption .caption a/@href").text.to_s.split('https://careerbuilder.vn/') link_companies = page.search(".figcaption .caption @href")
website_jobs << page.search(".figcaption .title .job_link @href").text.to_s.split('https://careerbuilder.vn/') website_companies += link_companies.map(&:value).uniq
link_jobs = page.search(".figcaption .title .job_link @href")
website_jobs += link_jobs.map(&:value)
break if website_jobs.include?(@@stop_crawl)
end end
website_companies = website_companies.join(",") website_companies = website_companies.select { |val| val.present? && val != "javascript:void(0);"}
website_companies = website_companies.split(",").uniq! website_jobs = website_jobs.select { |val| val.present?}
website_companies = website_companies.select { |val| val != ''}
website_jobs = website_jobs.join(",")
website_jobs = website_jobs.split(",")
website_jobs = website_jobs.select { |val| val != ''}
puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs.length} link\n------------------------" puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs.length} link\n------------------------"
File.open("tmp/link.txt", "w+") {|f| f.write(website_jobs[0])}
data << website_companies << website_jobs data << website_companies << website_jobs
end end
# @crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(3)
def self.get_link_job_and_companies def self.get_link_job_and_companies
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(1) @crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(10)
end end
def self.base_link(url) def self.safe_link(url)
Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{url}")))) Nokogiri::HTML(URI.parse(URI.escape(url)))
end
def self.craw_data_cities
page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
puts "Crawling data location... \n. \n. \n."
data_list_cities = []
data = page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
puts "Save data to database... \n------------------------"
data_list_cities.each_with_index do |val, index|
area = index > 69 ? 0 : 1
City.find_or_create_by(name: val) do |city|
city.name = val
city.area = area
end
end
end end
def self.craw_data_companies def self.craw_data_companies
link_crawl = get_link_job_and_companies
data_companies = {}
data_companies_name = []
data_companies_address = []
data_companies_description = []
puts 'Crawl data companies' puts 'Crawl data companies'
link_crawl = get_link_job_and_companies
link_crawl[0].each_with_index do |url,i| link_crawl[0].each_with_index do |url,i|
page = base_link(url) page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(url))))
name = '' name = ''
address = '' address = ''
desc = '' desc = ''
...@@ -55,157 +74,179 @@ class InterfaceWeb ...@@ -55,157 +74,179 @@ class InterfaceWeb
end end
if (name.present? && address.present? && desc.present?) if (name.present? && address.present? && desc.present?)
data_companies_name << name.to_s.strip Company.find_or_create_by(name: name.strip) do |company|
data_companies_address << address.to_s.strip company.name = name.strip
data_companies_description << desc company.address = address
company.short_description = desc
end end
puts "Process company #{i+1}. . .\n------------------------" puts name
end end
data_companies[:name] = data_companies_name
data_companies[:address] = data_companies_address
data_companies_description.each do |val|
val.to_s.delete!("[\n,\t,\r]")
val.strip!
end end
data_companies[:description] = data_companies_description
data_companies
end end
def self.add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience) def self.add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
@data[:name] = name Job.update_all(newdata: 0)
@data[:company_name] = company_name id_company = Company.find_by name: company_name
@data[:city_name] = city_name id_company = id_company.present? ? id_company.id : 1
@data[:created_date] = created_date id_job = Job.create!(name: name,
@data[:expiration_date] = expiration_date company_id: id_company,
@data[:salary] = salary level: level,
@data[:industry_name] = industry_name experience: exprience,
@data[:description] = description salary: salary,
@data[:level] = level create_date: created_date,
@data[:exprience] = exprience expiration_date: expiration_date,
description: description,
newdata: 1)
make_foreign_industries_table(industry_name, id_job.id)
make_foreign_cities_table(city_name, id_job.id)
end end
def self.crawl_data_jobs_interface_1(page) def self.crawl_data_jobs_interface_1(page)
@name << page.search(".apply-now-content .job-desc .title").text name = page.search(".apply-now-content .job-desc .title").text
@company_name << page.search(".apply-now-content .job-desc .job-company-name").text company_name = page.search(".apply-now-content .job-desc .job-company-name").text
location = [] location = []
length = page.search(".detail-box .map p a").size length = page.search(".detail-box .map p a").size
length.times do |n| length.times do |n|
location << page.search(".detail-box .map p a:nth-child(#{n+1})").text location << page.search(".detail-box .map p a:nth-child(#{n+1})").text
end end
@city_name << location.join(',') city_name = location.join(',')
location.clear
@created_date << page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text created_date = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0].text
@expiration_date << page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last expiration_date = page.search(".item-blue .detail-box ul li:last")[1].text.delete!("[\n,\t,\r]").split(' ').last
@salary << page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text salary = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[1].text
industries = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text industries = page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(2) a").text
industries = industries.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''} industries = industries.delete!("[\n,\t,\r]").split(' ').select { |v| v != ''}
@industry_name << industries.join(',') industry_name = industries.join(',')
description = page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s.delete!("[\n,\t,\r]")
@description << page.search(".tabs .tab-content .detail-row:nth-child(n)").to_s.delete!("[\n,\t,\r]") get_level = page.search(".item-blue .detail-box:last ul li:nth-child(3)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
block_industries_hotlstrip.split('Cấp bậc') get_level = get_level[1].to_s.strip
@level << g_level[1].to_s.strip if get_level == ""
g_level = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").lstrip.split('Cấp bậc')
level = g_level[1].to_s.strip
else
g_level = get_level
level = g_level[1].to_s.strip
end end
exp = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm') exp = page.search(".item-blue .detail-box:last ul li:nth-child(2)").text.delete!("[\n,\t,\r]").split('Kinh nghiệm')
exp = exp[1].to_s.strip exp = exp[1].to_s.strip
@exprience << exp exprience = exp
add_data(@name, @company_name, @city_name, @created_date, @expiration_date, @salary, @industry_name, @description, @level, @exprience) add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end end
def self.crawl_data_jobs_interface_2(page) def self.crawl_data_jobs_interface_2(page)
@name << page.search(".apply-now-content .job-desc .title").text name = page.search(".apply-now-content .job-desc .title").text
@company_name << page.search(".top-job .top-job-info .tit_company").text company_name = page.search(".top-job .top-job-info .tit_company").text
location = [] location = []
length = page.search(".info-workplace .value a").size length = page.search(".info-workplace .value a").size
length.times do |n| length.times do |n|
location << page.search(".info-workplace .value a:nth-child(#{n+1})").text location << page.search(".info-workplace .value a:nth-child(#{n+1})").text
end end
@city_name << location.join(',') city_name = location.join(',')
location.clear
@created_date << "" created_date = ""
expiration_date = page.search(".info li:nth-child(4)").text expiration_date = page.search(".info li:nth-child(4)").text
if expiration_date.blank? if expiration_date.blank?
@expiration_date << "" expiration_date = ""
else else
@expiration_date << expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last expiration_date = expiration_date.to_s.delete!("[\n,\t,\r]").split(' ').last
end end
@salary << page.search(".info li:nth-child(3)").text.split("Lương").last.strip salary = page.search(".info li:nth-child(3)").text.split("Lương").last.strip
@industry_name << page.search(".info li:nth-child(5) .value").text industry_name = page.search(".info li:nth-child(5) .value").text
@description << page.search(".left-col").to_s.delete!("[\n,\t,\r]") description = page.search(".left-col").to_s.delete!("[\n,\t,\r]")
lv = page.search(".boxtp .info li:nth-child(2)").text lv = page.search(".boxtp .info li:nth-child(2)").text
if lv.blank? if lv.blank?
@level << "" level = ""
else else
@level << lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip level = lv.delete!("[\n,\t,\r]").strip.split('Cấp bậc').last.strip
end end
exp = page.search(".info li:nth-child(6)").text exp = page.search(".info li:nth-child(6)").text
if exp.blank? if exp.blank?
@exprience << "" exprience = ""
else else
@exprience << exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip exprience = exp.delete!("[\n,\t,\r]").split('Kinh nghiệm').last.strip
end end
add_data(@name, @company_name, @city_name, @created_date, @expiration_date, @salary, @industry_name, @description, @level, @exprience) add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end end
def self.crawl_data_jobs_interface_5(page) def self.crawl_data_jobs_interface_5(page)
@name << page.search(".info-company h1").text name = page.search(".info-company h1").text
@company_name << page.search(".info-company .text-job h2").text company_name = page.search(".info-company .text-job h2").text
@city_name << page.search(".DetailJobNew ul li:nth-child(1) a").text city_name = page.search(".DetailJobNew ul li:nth-child(1) a").text
@created_date << "" created_date = ""
@expiration_date << page.search(".DetailJobNew li:nth-child(9) span").text.strip expiration_date = page.search(".DetailJobNew li:nth-child(9) span").text.strip
@salary << page.search(".DetailJobNew li:nth-child(3) span").text.strip salary = page.search(".DetailJobNew li:nth-child(3) span").text.strip
@industry_name << page.search(".DetailJobNew li:nth-child(2) span").text.strip industry_name = page.search(".DetailJobNew li:nth-child(2) span").text.strip
@description << page.search(".left-col .detail-row").to_s.delete!("[\n,\t,\r]") description = page.search(".left-col .detail-row").to_s.delete!("[\n,\t,\r]")
@level << page.search(".DetailJobNew ul li:nth-child(6) span").text.strip level = page.search(".DetailJobNew ul li:nth-child(6) span").text.strip
@exprience << page.search(".DetailJobNew li:nth-child(5) span").text.strip exprience = page.search(".DetailJobNew li:nth-child(5) span").text.strip
add_data(@name, @company_name, @city_name, @created_date, @expiration_date, @salary, @industry_name, @description, @level, @exprience) add_data(name, company_name, city_name, created_date, expiration_date, salary, industry_name, description, level, exprience)
end
def self.make_foreign_industries_table(data,id_job)
content = data.split(',')
content.each do |val|
val.gsub!('&amp;','&') if val.include?('&amp;')
id_industry = Industry.find_by name: (val.strip)
id_industry = id_industry.blank? ? Industry.create!(name: val.strip).id : id_industry.id
IndustryJob.create!(industry_id: id_industry, job_id: id_job)
end
end
def self.make_foreign_cities_table(data,id_job)
cities = data.split(',')
cities.each do |city|
id_cities = City.find_by name: city.strip
id_cities = id_cities.blank? ? City.create!(name: city.strip, area: 1).id : id_cities.id
CityJob.create!(job_id: id_job, city_id: id_cities)
end
end end
def self.make_data def self.make_data
puts 'Please wait for crawl jobs data! . . .' puts 'Please wait for crawl jobs data! . . .'
@data = {} name = ''
@name = [] company_name = ''
@company_name = [] level = ''
@level = [] exprience = ''
@exprience = [] salary = ''
@salary = [] created_date = ''
@created_date = [] expiration_date = ''
@expiration_date = [] description = ''
@description = [] industry_name = ''
@industry_name = [] city_name = ''
@city_name = []
link_crawl = get_link_job_and_companies link_crawl = get_link_job_and_companies
link_crawl[1].each_with_index do |path,i| link_crawl[1].each_with_index do |path,i|
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape("https://careerbuilder.vn/#{path}")))) break if @@stop_crawl == path
page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(path))))
if page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0] != nil if page.search(".item-blue .detail-box:nth-child(1) ul li:nth-child(1) p")[0] != nil
crawl_data_jobs_interface_1(page) crawl_data_jobs_interface_1(page)
elsif page.search("section .template-200").text != "" elsif page.search("section .template-200").text != ""
...@@ -213,11 +254,7 @@ class InterfaceWeb ...@@ -213,11 +254,7 @@ class InterfaceWeb
elsif (page.search(".DetailJobNew ul li").size == 10 && !page.search('.right-col ul li').text.include?('Độ tuổi')) elsif (page.search(".DetailJobNew ul li").size == 10 && !page.search('.right-col ul li').text.include?('Độ tuổi'))
crawl_data_jobs_interface_5(page) crawl_data_jobs_interface_5(page)
end end
puts "Process: #{i+1}/#{link_crawl[1].length}" puts "#{i} - #{path}"
end end
@data
end end
end end
\ No newline at end of file
# else # insert "page.search(".DetailJobNew ul li").size == 8" (if want catch interface 4)
# crawl_data_jobs_interface_3(path)
\ No newline at end of file
require 'src/crawler' require 'open-uri'
require 'src/ftp' require 'src/interface_web'
namespace :crawler do namespace :crawler do
task populate: :environment do task populate: :environment do
Clawler.make_industries InterfaceWeb.craw_data_companies()
Clawler.make_cities InterfaceWeb.make_data()
Clawler.make_companies
Clawler.make_jobs
end
task csv: :environment do
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc'
end
FtpSever.import_data_from_csv
end end
end end
require 'src/ftp'
namespace :csv do
task import_csv: :environment do
Company.find_or_create_by(name: 'Bảo mật', address: 'Vui lòng xem trong mô tả công việc') do |company|
company.name = 'Bảo mật'
company.address = 'Vui lòng xem trong mô tả công việc'
company.short_description = 'Vui lòng xem trong mô tả công việc'
end
FtpSever.import_data_from_csv
end
end
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment