Commit 81cfe475 by Ngo Trung Hung

fix

parent 9387da00
......@@ -14,7 +14,7 @@ default: &default
encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root
password: '1'
password: '12345678'
socket: /var/run/mysqld/mysqld.sock
......
require 'open-uri'
require 'src/interface_web'
class Clawler
@page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
@@page = Nokogiri::HTML(URI.open('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html'))
# PILL DATA CITIES
def self.make_cities
@data_list_cities = []
data = @page.search("#location option")
puts "Crawling data location... \n. \n. \n."
data_list_cities = []
data = @@page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
@data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
@data_list_cities.length.times do |i|
puts "Save data to database... \n"
data_list_cities.length.times do |i|
area = i > 69 ? 0 : 1
name = (@data_list_cities[i].to_s)
name = (data_list_cities[i].to_s)
City.create!(name: name, area: area)
end
end
#PIL DATA INDUSTRIES
def self.make_industries
@data_list_industries = []
data = @page.search("#industry option")
puts "Crawling data industries... \n. \n. \n."
data_list_industries = []
data = @@page.search("#industry option")
list_industries = data.to_s.split("</option>")
list_industries.each do |x|
@data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip
data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').strip
end
@data_list_industries.length.times do |i|
name = @data_list_industries[i].to_s
puts "Save data to database... \n"
data_list_industries.length.times do |i|
name = data_list_industries[i].to_s
if name.include?('&amp;')
name.gsub!('&amp;','&')
end
......
class Interface_web
# func get "n" link company & job
debugger
def self.crawl_link_for_companies_jobs(page)
puts "Crawling link on page...\nPLease wait...\n"
data = []
website_companies = []
website_jobs = []
......@@ -18,12 +20,14 @@ class Interface_web
website_jobs = website_jobs.join(",")
website_jobs = website_jobs.split(",")
website_jobs = website_jobs.select { |val| val != ''}
puts "Result:\nCompany: #{website_companies.length} link\nJob : #{website_jobs} link"
data << website_companies << website_jobs
end
@crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(5)
@crawl_link_for_companies_jobs = crawl_link_for_companies_jobs(1)
def self.get_link_job_and_companies
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(5)
@crawl_link_for_companies_jobs ||= crawl_link_for_companies_jobs(1)
end
def self.base_link(url)
......
......@@ -2,8 +2,8 @@ require 'src/crawler'
namespace :db do
task populate: :environment do
# Clawler.make_industries
Clawler.make_cities
# Clawler.make_cities
Clawler.make_companies
Clawler.make_jobs
# Clawler.make_jobs
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment