Commit 661293a4 by Xuan Trung Le

implement crawler data from careerbuild

parent fe41d8a4
...@@ -11,5 +11,4 @@ ...@@ -11,5 +11,4 @@
// about supported directives. // about supported directives.
// //
//= require rails-ujs //= require rails-ujs
//= require turbolinks
//= require_tree . //= require_tree .
// Place all the behaviors and hooks related to the matching controller here.
// All this logic will automatically be available in application.js.
// Place all the styles related to the datas controller here.
// They will automatically be included in application.css.
// You can use Sass (SCSS) here: http://sass-lang.com/
class DatasController < ApplicationController
require 'nokogiri'
require 'open-uri'
BASE_CAREERBUILDER_URL = "https://careerbuilder.vn"
LIST_URL = "#{BASE_CAREERBUILDER_URL}/viec-lam"
def index
@datas = crawl_data
end
def crawl_data
links = get_link[0..4]
job_details = []
links.each do |link|
puts "Fetching #{link}..."
params = {}
doc = Nokogiri::HTML(open(link))
# get company information
params[:name] = doc.css('.top-job .top-job-info h1').text
params[:company_name] = doc.css('.top-job .top-job-info .tit_company').text
params[:updated_date] = doc.css('.top-job .datepost').text
# get employment information
doc.css('.MyJobLeft .box2Detail .DetailJobNew li').children.each do |child|
info = child.text.gsub(/\t|\n/, '').split(':')
next if info.blank?
case info[0].strip.upcase
when 'NƠI LÀM VIỆC'
params[:city] = info[1]
when 'CẤP BẬC'
params[:level] = info[1]
when 'KINH NGHIỆM'
params[:level] = info[1]
when 'LƯƠNG'
params[:salary] = info[1]
when 'NGÀNH NGHỀ'
params[:industry] = info[1]
when 'HẾT HẠN NỘP'
params[:expiry_date] = info[1]
end
end
# get job description
doc.css('.MarBot20').children.each do |child|
params[:description] = child.to_html
end
job_details << params
end
return job_details
end
def get_link
url = "#{LIST_URL}/tat-ca-viec-lam-trang-#{1}-vi.html"
doc = Nokogiri::HTML(open(url))
return doc.css('.brief .jobtitle .job a').map { |a| a['href'] }.compact.uniq
end
end
module DatasHelper
end
Rails.application.routes.draw do Rails.application.routes.draw do
get 'datas/index'
devise_for :users devise_for :users
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
root 'datas#index'
end end
require 'test_helper'
class DatasControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
get datas_index_url
assert_response :success
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment