Commit 81cfe475 by Ngo Trung Hung

fix

parent 9387da00
...@@ -14,7 +14,7 @@ default: &default ...@@ -14,7 +14,7 @@ default: &default
encoding: utf8 encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root username: root
password: '1' password: '12345678'
socket: /var/run/mysqld/mysqld.sock socket: /var/run/mysqld/mysqld.sock
......
require 'open-uri' require 'open-uri'
require 'src/interface_web' require 'src/interface_web'
class Clawler class Clawler
@page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html')) @@page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
# PILL DATA CITIES # PILL DATA CITIES
def self.make_cities def self.make_cities
@data_list_cities = [] puts "Crawling data location... \n. \n. \n."
data = @page.search("#location option") data_list_cities = []
data = @@page.search("#location option")
list_cities = data.to_s.split("</option>") list_cities = data.to_s.split("</option>")
list_cities.each do |x| list_cities.each do |x|
@data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end end
puts "Save data to database... \n"
@data_list_cities.length.times do |i| data_list_cities.length.times do |i|
area = i > 69 ? 0 : 1 area = i > 69 ? 0 : 1
name = (@data_list_cities[i].to_s) name = (data_list_cities[i].to_s)
City.create!(name: name, area: area) City.create!(name: name, area: area)
end end
end end
#PIL DATA INDUSTRIES #PIL DATA INDUSTRIES
def self.make_industries def self.make_industries
@data_list_industries = [] puts "Crawling data industries... \n. \n. \n."
data = @page.search("#industry option") data_list_industries = []
data = @@page.search("#industry option")
list_industries = data.to_s.split("</option>") list_industries = data.to_s.split("</option>")
list_industries.each do |x| list_industries.each do |x|
@data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip
end end
puts "Save data to database... \n"
@data_list_industries.length.times do |i| data_list_industries.length.times do |i|
name = @data_list_industries[i].to_s name = data_list_industries[i].to_s
if name.include?('&amp;') if name.include?('&amp;')
name.gsub!('&amp;','&') name.gsub!('&amp;','&')
end end
......
class Interface_web class Interface_web
# func get "n" link company & job # func get "n" link company & job
debugger
def self.crawl_link_for_companies_jobs(page) def self.crawl_link_for_companies_jobs(page)
puts "Crawling link on page...\nPLease wait...\n"
data = [] data = []
website_companies = [] website_companies = []
website_jobs = [] website_jobs = []
...@@ -18,12 +20,14 @@ class Interface_web ...@@ -18,12 +20,14 @@ class Interface_web
website_jobs = website_jobs.join(",") website_jobs = website_jobs.join(",")
website_jobs = website_jobs.split(",") website_jobs = website_jobs.split(",")
website_jobs = website_jobs.select { |val| val != ''} website_jobs = website_jobs.select { |val| val != ''}
puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs} link"
data << website_companies << website_jobs data << website_companies << website_jobs
end end
@crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(5)
@crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(1)
def self.get_link_job_and_companies def self.get_link_job_and_companies
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(5) @crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(1)
end end
def self.base_link(url) def self.base_link(url)
......
...@@ -2,8 +2,8 @@ require 'src/crawler' ...@@ -2,8 +2,8 @@ require 'src/crawler'
namespace :db do namespace :db do
task populate: :environment do task populate: :environment do
# Clawler.make_industries # Clawler.make_industries
Clawler.make_cities # Clawler.make_cities
Clawler.make_companies Clawler.make_companies
Clawler.make_jobs # Clawler.make_jobs
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment