Commit ee42f340 by Thanh Hung Pham

Feature crawler data

parent 0ee72cca
......@@ -28,7 +28,8 @@ gem 'jbuilder', '~> 2.5'
# Use Redis adapter to run Action Cable in production
# gem 'redis', '~> 3.0'
# Use ActiveModel has_secure_password
# gem 'bcrypt', '~> 3.1.7'
gem 'bcrypt', '~> 3.1.7'
gem 'whenever'
# Use Capistrano for deployment
# gem 'capistrano-rails', group: :development
......@@ -48,6 +49,8 @@ group :development do
# Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring
gem 'spring'
gem 'spring-watcher-listen', '~> 2.0.0'
gem 'better_errors'
gem 'binding_of_caller'
end
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
......
......@@ -41,7 +41,14 @@ GEM
addressable (2.5.1)
public_suffix (~> 2.0, >= 2.0.2)
arel (8.0.0)
bcrypt (3.1.11)
better_errors (2.1.1)
coderay (>= 1.0.0)
erubis (>= 2.6.6)
rack (>= 0.9.0)
bindex (0.5.0)
binding_of_caller (0.7.2)
debug_inspector (>= 0.0.1)
builder (3.2.3)
byebug (9.0.6)
capybara (2.14.3)
......@@ -53,6 +60,8 @@ GEM
xpath (~> 2.0)
childprocess (0.7.0)
ffi (~> 1.0, >= 1.0.11)
chronic (0.10.2)
coderay (1.1.1)
coffee-rails (4.2.2)
coffee-script (>= 2.2.0)
railties (>= 4.0.0)
......@@ -61,7 +70,9 @@ GEM
execjs
coffee-script-source (1.12.2)
concurrent-ruby (1.0.5)
debug_inspector (0.0.3)
erubi (1.6.0)
erubis (2.7.0)
execjs (2.7.0)
ffi (1.9.18)
globalid (0.4.0)
......@@ -163,6 +174,8 @@ GEM
websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2)
whenever (0.9.7)
chronic (>= 0.6.3)
xpath (2.1.0)
nokogiri (~> 1.3)
......@@ -170,6 +183,9 @@ PLATFORMS
ruby
DEPENDENCIES
bcrypt (~> 3.1.7)
better_errors
binding_of_caller
byebug
capybara (~> 2.13)
coffee-rails (~> 4.2)
......@@ -186,6 +202,7 @@ DEPENDENCIES
tzinfo-data
uglifier (>= 1.3.0)
web-console (>= 3.3.0)
whenever
BUNDLED WITH
1.15.1
class Apply < ApplicationRecord
belongs_to :user
belongs_to :job
end
class Area < ApplicationRecord
has_many :city
end
class Category < ApplicationRecord
has_many :job_category
end
class City < ApplicationRecord
belongs_to :area
has_many :job
end
class Company < ApplicationRecord
has_many :job
end
class Contact < ApplicationRecord
has_many :job
end
class Favorite < ApplicationRecord
belongs_to :user
belongs_to :job
end
class History < ApplicationRecord
belongs_to :user
belongs_to :job
end
class Job < ApplicationRecord
belongs_to :city, optional: true
belongs_to :company, optional: true
belongs_to :job_type, optional: true
belongs_to :contact, optional: true
has_many :job_category
end
class JobCategory < ApplicationRecord
belongs_to :job
belongs_to :category
end
class JobType < ApplicationRecord
has_many :job
end
class User < ApplicationRecord
end
......@@ -14,7 +14,7 @@ default: &default
encoding: utf8
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
username: root
password:
password: 123456
socket: /var/run/mysqld/mysqld.sock
development:
......
# Use this file to easily define all of your cron jobs.
#
# It's helpful, but not entirely necessary to understand cron before proceeding.
# http://en.wikipedia.org/wiki/Cron
# Example:
#
# set :output, "/path/to/my/cron_log.log"
#
# every 2.hours do
# command "/usr/bin/some_great_command"
# runner "MyModel.some_method"
# rake "some:great:rake:task"
# end
#
# every 4.days do
# runner "AnotherModel.prune_old_records"
# end
# Learn more: http://github.com/javan/whenever
every 1.day, at: '12:00 pm' do
runner 'MyModel.task_to_run'
end
class CreateAreas < ActiveRecord::Migration[5.1]
def change
create_table :areas do |t|
t.string :name
t.timestamps
end
end
end
class CreateCategories < ActiveRecord::Migration[5.1]
def change
create_table :categories do |t|
t.string :name
t.timestamps
end
end
end
class CreateJobTypes < ActiveRecord::Migration[5.1]
def change
create_table :job_types do |t|
t.string :name
t.timestamps
end
end
end
class CreateCompanies < ActiveRecord::Migration[5.1]
def change
create_table :companies do |t|
t.string :name
t.string :address
t.text :description
t.string :district
t.string :province
t.timestamps
end
end
end
class CreateContacts < ActiveRecord::Migration[5.1]
def change
create_table :contacts do |t|
t.string :name
t.string :email
t.string :phone
t.timestamps
end
end
end
class CreateUsers < ActiveRecord::Migration[5.1]
def change
create_table :users do |t|
t.string :email
t.string :password
t.string :fullname
t.string :reset_digest
t.datetime :reset_sent_at
t.string :activation_digest
t.boolean :activated
t.datetime :activated_at
t.boolean :admin
t.string :cv_name
t.timestamps
end
end
end
class CreateCities < ActiveRecord::Migration[5.1]
def change
create_table :cities do |t|
t.string :name
t.references :area, index: true
t.timestamps
end
end
end
class CreateJobs < ActiveRecord::Migration[5.1]
def change
create_table :jobs do |t|
t.string :name
t.text :description
t.references :city, index: true
t.string :salary
t.references :company, index: true
t.text :benefit
t.string :level
t.text :requirement
t.references :job_type, index: true
t.references :contact, index: true
t.datetime :expiry_date
t.string :experience
t.integer :status
t.timestamps
end
end
end
class CreateJobCategories < ActiveRecord::Migration[5.1]
def change
create_table :job_categories do |t|
t.references :job, index: true
t.references :category, index: true
t.timestamps
end
end
end
class CreateFavorites < ActiveRecord::Migration[5.1]
def change
create_table :favorites do |t|
t.references :user, index: true
t.references :job, index: true
t.timestamps
end
end
end
class CreateHistories < ActiveRecord::Migration[5.1]
def change
create_table :histories do |t|
t.references :user, index: true
t.references :job, index: true
t.timestamps
end
end
end
class CreateApplies < ActiveRecord::Migration[5.1]
def change
create_table :applies do |t|
t.references :user, index: true
t.references :job, index: true
t.datetime :applied_at
t.string :ip_address
t.string :user_agrent
t.timestamps
end
end
end
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
#
# Note that this schema.rb definition is the authoritative source for your
# database schema. If you need to create the application database on another
# system, you should be using db:schema:load, not running all the migrations
# from scratch. The latter is a flawed and unsustainable approach (the more migrations
# you'll amass, the slower it'll run and the greater likelihood for issues).
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170628020034) do
create_table "applies", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.bigint "user_id"
t.bigint "job_id"
t.datetime "applied_at"
t.string "ip_address"
t.string "user_agrent"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["job_id"], name: "index_applies_on_job_id"
t.index ["user_id"], name: "index_applies_on_user_id"
end
create_table "areas", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "categories", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "cities", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.bigint "area_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["area_id"], name: "index_cities_on_area_id"
end
create_table "companies", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.string "address"
t.text "description"
t.string "district"
t.string "province"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "contacts", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.string "email"
t.string "phone"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "favorites", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.bigint "user_id"
t.bigint "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["job_id"], name: "index_favorites_on_job_id"
t.index ["user_id"], name: "index_favorites_on_user_id"
end
create_table "histories", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.bigint "user_id"
t.bigint "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["job_id"], name: "index_histories_on_job_id"
t.index ["user_id"], name: "index_histories_on_user_id"
end
create_table "job_categories", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.bigint "job_id"
t.bigint "category_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["category_id"], name: "index_job_categories_on_category_id"
t.index ["job_id"], name: "index_job_categories_on_job_id"
end
create_table "job_types", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "jobs", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "name"
t.text "description"
t.bigint "city_id"
t.string "salary"
t.bigint "company_id"
t.text "benefit"
t.string "level"
t.text "requirement"
t.bigint "job_type_id"
t.bigint "contact_id"
t.datetime "expiry_date"
t.string "experience"
t.integer "status"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["city_id"], name: "index_jobs_on_city_id"
t.index ["company_id"], name: "index_jobs_on_company_id"
t.index ["contact_id"], name: "index_jobs_on_contact_id"
t.index ["job_type_id"], name: "index_jobs_on_job_type_id"
end
create_table "users", force: :cascade, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8" do |t|
t.string "email"
t.string "password"
t.string "fullname"
t.string "reset_digest"
t.datetime "reset_sent_at"
t.string "activation_digest"
t.boolean "activated"
t.datetime "activated_at"
t.boolean "admin"
t.string "cv_name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
end
require 'nokogiri'
require 'open-uri'
require 'rake'
namespace :crawler_data do
task load_page: :environment do
doc = Nokogiri::HTML(open('http://careerbuilder.vn'))
import_area
import_category(doc)
import_city(doc)
new_jobs_url = doc.css('div.logo_nav li.hasmenu li a')[0]['href']
inport_job(new_jobs_url)
end
def import_area
Area.new(name: 'Viet Nam').save if Area.where(name: 'Viet Nam').blank?
Area.new(name: 'International').save if Area.where(name: 'International').blank?
end
def import_category(doc)
categories = doc.css('div.s-home2 div#NewSearchJob3 div.box_multiSelect_industry option')
categories = categories.slice(1..categories.size - 2)
categories.each do |category|
Category.new(name: category.text).save if Category.where(name: category.text).blank?
end
end
def import_city(doc)
cities = doc.css('div.s-home2 div#NewSearchJob3 div.box_multiSelect_location option').drop(1)
area_id = 1
cities.each do |city|
area_id = 2 if city.text == 'Angola'
City.new(name: city.text, area: Area.find(area_id)).save if City.where(name: city.text).blank?
end
end
def inport_job(url)
10.times do
doc_new_jobs = Nokogiri::HTML(open(url))
doc_new_jobs.encoding = 'utf-8'
doc_new_jobs.css('div.gird_standard dl dd h3 a').each do |link|
encoded_url = URI.encode(link['href'])
doc_job_details = Nokogiri::HTML(open(encoded_url))
# Company Information
company_name = doc_job_details.css('div.tit_company').text # Company name
company_address = doc_job_details.css("div.box1Detail label[itemprop='addressLocality']").text # Company Address
company_description = doc_job_details.css('div.desc_company').text # Company description
Company.new(name: company_name, address: company_address, description: company_description).save if Company.where(name: company_name).blank?
# Job Information
job_name = doc_job_details.css('div.LeftJobCB h1').text # Job name
job_description = doc_job_details.css('div.MarBot20').text # Job description
doc_job_details.css('ul.DetailJobNew span').each do |detail|
case detail.text.strip
when 'Nơi làm việc:'
@job_location = detail.parent.css("b[itemprop='jobLocation']").text # Job location
when 'Cấp bậc:'
@job_level = detail.parent.css("label[itemprop='occupationalCategory']").text # Job level
when 'Kinh nghiệm:'
@job_experience = detail.parent.children.last.text # Job experience
when 'Lương:'
@job_salary = detail.parent.css("label[itemprop='baseSalary']").text + detail.parent.css("label[itemprop='salaryCurrency']").text # Job salary
when 'Ngành nghề:'
@job_category = detail.parent.css("b a[itemprop='industry']").text # Job category
when 'Hết hạn nộp:'
@job_expiry_date = detail.parent.children.last.text # Job expiry date
end
end
Job.new(name: job_name, description: job_description,
salary: @job_salary,
city: City.find_by_name(@job_location),
level: @job_level, experience: @job_experience, status: 0).save
@job_category.split(',').each do |job_category|
JobCategory.new(job: Job.find_by_name(job_name), category: Category.find_by_name(job_category)).save
end
end
url = doc_new_jobs.css('div.paginationTwoStatus a.right')[0]['href']
end
end
end
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
user: one
job: one
applied_at: 2017-06-28 09:00:34
ip_address: MyString
user_agrent: MyString
two:
user: two
job: two
applied_at: 2017-06-28 09:00:34
ip_address: MyString
user_agrent: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
two:
name: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
two:
name: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
area: one
two:
name: MyString
area: two
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
address: MyString
description: MyString
district: MyString
province: MyString
two:
name: MyString
address: MyString
description: MyString
district: MyString
province: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
email: MyString
phone: MyString
two:
name: MyString
email: MyString
phone: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
user: one
job: one
two:
user: two
job: two
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
user: one
job: one
two:
user: two
job: two
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
job: one
category: one
two:
job: two
category: two
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
two:
name: MyString
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
name: MyString
description: MyString
city: one
salary: MyString
company: one
benefit: MyString
level: MyString
requirement: MyString
job_type: one
contact: one
expiry_date: 2017-06-28 08:53:37
experience: MyString
status: 1
two:
name: MyString
description: MyString
city: two
salary: MyString
company: two
benefit: MyString
level: MyString
requirement: MyString
job_type: two
contact: two
expiry_date: 2017-06-28 08:53:37
experience: MyString
status: 1
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one:
email: MyString
password: MyString
fullname: MyString
reset_digest: MyString
reset_sent_at: 2017-06-28 08:47:36
activation_digest: MyString
activated: false
activated_at: 2017-06-28 08:47:36
admin: false
cv_name: MyString
two:
email: MyString
password: MyString
fullname: MyString
reset_digest: MyString
reset_sent_at: 2017-06-28 08:47:36
activation_digest: MyString
activated: false
activated_at: 2017-06-28 08:47:36
admin: false
cv_name: MyString
require 'test_helper'
class ApplyTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class AreaTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class CategoryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class CityTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class CompanyTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class ContactTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class FavoriteTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class HistoryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class JobCategoryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class JobTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class JobTypeTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class UserTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment