Commit 3ddf4fbe by Ba Toi Dang

Merge branch 'crawl_data_rake' into 'master'

Crawl data rake

See merge request !1
parents d3f43332 35782d33
Pipeline #471 failed with stages
in 0 seconds
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
...@@ -52,3 +52,5 @@ end ...@@ -52,3 +52,5 @@ end
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem # Windows does not include zoneinfo files, so bundle the tzinfo-data gem
gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
#mysql2
gem 'mysql2'
...@@ -62,8 +62,8 @@ GEM ...@@ -62,8 +62,8 @@ GEM
bootsnap (1.4.5) bootsnap (1.4.5)
msgpack (~> 1.0) msgpack (~> 1.0)
builder (3.2.4) builder (3.2.4)
byebug (11.1.0) byebug (11.1.1)
capybara (3.30.0) capybara (3.31.0)
addressable addressable
mini_mime (>= 0.1.3) mini_mime (>= 0.1.3)
nokogiri (~> 1.8) nokogiri (~> 1.8)
...@@ -72,16 +72,16 @@ GEM ...@@ -72,16 +72,16 @@ GEM
regexp_parser (~> 1.5) regexp_parser (~> 1.5)
xpath (~> 3.2) xpath (~> 3.2)
childprocess (3.0.0) childprocess (3.0.0)
concurrent-ruby (1.1.5) concurrent-ruby (1.1.6)
crass (1.0.6) crass (1.0.6)
erubi (1.9.0) erubi (1.9.0)
ffi (1.12.1) ffi (1.12.2)
globalid (0.4.2) globalid (0.4.2)
activesupport (>= 4.2.0) activesupport (>= 4.2.0)
i18n (1.8.2) i18n (1.8.2)
concurrent-ruby (~> 1.0) concurrent-ruby (~> 1.0)
jbuilder (2.9.1) jbuilder (2.10.0)
activesupport (>= 4.2.0) activesupport (>= 5.0.0)
listen (3.1.5) listen (3.1.5)
rb-fsevent (~> 0.9, >= 0.9.4) rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7) rb-inotify (~> 0.9, >= 0.9.7)
...@@ -94,18 +94,19 @@ GEM ...@@ -94,18 +94,19 @@ GEM
marcel (0.3.3) marcel (0.3.3)
mimemagic (~> 0.3.2) mimemagic (~> 0.3.2)
method_source (0.9.2) method_source (0.9.2)
mimemagic (0.3.3) mimemagic (0.3.4)
mini_mime (1.0.2) mini_mime (1.0.2)
mini_portile2 (2.4.0) mini_portile2 (2.4.0)
minitest (5.14.0) minitest (5.14.0)
msgpack (1.3.1) msgpack (1.3.3)
mysql2 (0.5.3)
nio4r (2.5.2) nio4r (2.5.2)
nokogiri (1.10.7) nokogiri (1.10.8)
mini_portile2 (~> 2.4.0) mini_portile2 (~> 2.4.0)
public_suffix (4.0.3) public_suffix (4.0.3)
puma (4.3.1) puma (4.3.1)
nio4r (~> 2.0) nio4r (~> 2.0)
rack (2.1.1) rack (2.2.2)
rack-proxy (0.6.5) rack-proxy (0.6.5)
rack rack
rack-test (1.1.0) rack-test (1.1.0)
...@@ -142,7 +143,7 @@ GEM ...@@ -142,7 +143,7 @@ GEM
ffi (~> 1.0) ffi (~> 1.0)
regexp_parser (1.6.0) regexp_parser (1.6.0)
ruby_dep (1.5.0) ruby_dep (1.5.0)
rubyzip (2.0.0) rubyzip (2.2.0)
sass-rails (6.0.0) sass-rails (6.0.0)
sassc-rails (~> 2.1, >= 2.1.1) sassc-rails (~> 2.1, >= 2.1.1)
sassc (2.2.1) sassc (2.2.1)
...@@ -205,6 +206,7 @@ DEPENDENCIES ...@@ -205,6 +206,7 @@ DEPENDENCIES
capybara (>= 2.15) capybara (>= 2.15)
jbuilder (~> 2.7) jbuilder (~> 2.7)
listen (>= 3.0.5, < 3.2) listen (>= 3.0.5, < 3.2)
mysql2
puma (~> 4.1) puma (~> 4.1)
rails (~> 6.0.2, >= 6.0.2.1) rails (~> 6.0.2, >= 6.0.2.1)
sass-rails (>= 6) sass-rails (>= 6)
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
class AppliedJob < ApplicationRecord
belongs_to :job
belongs_to :user
end
class Area < ApplicationRecord
has_many :cities
has_many :jobs
validates_presence_of :area_name
end
class City < ApplicationRecord
belongs_to :area
has_many :jobs
validates_presence_of :city_name
end
class Company < ApplicationRecord
has_many :jobs
validates_presence_of :company_name
end
File mode changed from 100644 to 100755
class Industry < ApplicationRecord
has_many :industry_jobs
validates_presence_of :industry_name
end
class IndustryJob < ApplicationRecord
belongs_to :industry
has_many :jobs
end
class Job < ApplicationRecord
has_many :industry_jobs
has_many :saved_jobs
has_many :applied_jobs
has_and_belongs_to_many :users
belongs_to :area
belongs_to :city
belongs_to :company
validates_presence_of :job_name
end
class SavedJob < ApplicationRecord
belongs_to :job
belongs_to :user
end
class User < ApplicationRecord
has_many :applied_jobs
has_and_belongs_to_many :jobs
validates_presence_of :fname
validates_presence_of :lname
end
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
# SQLite. Versions 3.8.0 and up are supported.
# gem install sqlite3
#
# Ensure the SQLite 3 gem is defined in your Gemfile
# gem 'sqlite3'
#
default: &default default: &default
adapter: sqlite3 adapter: mysql2
pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> encoding: unicode
timeout: 5000
development: development:
<<: *default adapter: mysql2
database: db/development.sqlite3 host: localhost
database: venjob
# Warning: The database defined as "test" will be erased and username: zigexn
# re-generated from your development database when you run "rake". password: 123
# Do not set this db to the same as development or production. host: localhost
test: # socket: /var/lib/mysql/mysql.sock
<<: *default encoding: utf8
database: db/test.sqlite3 pool: 5
production: production:
<<: *default adapter: mysql2
database: db/production.sqlite3 host: localhost
database: venjob
username: zigexn
password: 123
host: localhost
socket: /tmp/mysql.sock
encoding: utf8
staging:
adapter: mysql2
host: localhost
database: venjob
username: zigexn
password: 123
host: localhost
socket: /tmp/mysql.sock
encoding: utf8
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
class CreateAreas < ActiveRecord::Migration[6.0]
def change
create_table :areas do |t|
t.string :area_name
t.string :area_description
t.timestamps
end
end
end
class CreateCities < ActiveRecord::Migration[6.0]
def change
create_table :cities do |t|
t.integer :area_id
t.string :city_name
t.string :city_description
t.timestamps
end
add_index :cities, :city_name
end
end
class CreateCompanies < ActiveRecord::Migration[6.0]
def change
create_table :companies do |t|
t.string :company_name
t.text :company_description
t.string :address
t.string :phone_number
t.string :website
t.string :email
t.string :size
t.timestamps
end
add_index :companies, :email
end
end
class CreateIndustries < ActiveRecord::Migration[6.0]
def change
create_table :industries do |t|
t.text :industry_name
t.string :industry_description
t.timestamps
end
end
end
class CreateIndustryJobs < ActiveRecord::Migration[6.0]
def change
create_table :industry_jobs do |t|
t.integer :industry_id
t.integer :job_id
t.timestamps
end
end
end
class CreateJobs < ActiveRecord::Migration[6.0]
def change
create_table :jobs do |t|
t.integer :area_id
t.integer :city_id
t.integer :industry_id
t.integer :company_id
t.text :job_name
t.string :salary
t.datetime :deadline
t.string :level
t.string :experience
t.datetime :last_updated
t.text :description
t.timestamps
end
end
end
class CreateUsers < ActiveRecord::Migration[6.0]
def change
create_table :users do |t|
t.string :email
t.string :fname
t.string :lname
t.integer :role
t.string :remember_digest
t.string :activation_digest
t.boolean :activated
t.datetime :activated_at
t.string :reset_digest
t.datetime :reset_sent_at
t.string :password_digest
t.timestamps
end
end
end
class CreateSavedJobs < ActiveRecord::Migration[6.0]
def change
create_table :saved_jobs do |t|
t.integer :user_id
t.integer :job_id
t.timestamps
end
end
end
class CreateAppliedJobs < ActiveRecord::Migration[6.0]
def change
create_table :applied_jobs do |t|
t.integer :user_id
t.integer :job_id
t.timestamps
end
end
end
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
#
# This file is the source Rails uses to define your schema when running `rails
# db:schema:load`. When creating a new database, `rails db:schema:load` tends to
# be faster and is potentially less error prone than running all of your
# migrations from scratch. Old migrations may fail to apply correctly if those
# migrations use external dependencies or application code.
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2020_02_04_084755) do
create_table "applied_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "areas", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "area_name"
t.string "area_description"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "area_id"
t.string "city_name"
t.string "city_description"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.index ["city_name"], name: "index_cities_on_city_name"
end
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "company_name"
t.text "company_description"
t.string "address"
t.string "phone_number"
t.string "website"
t.string "email"
t.string "size"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.index ["email"], name: "index_companies_on_email"
end
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.text "industry_name"
t.string "industry_description"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "industry_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "industry_id"
t.integer "job_id"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "area_id"
t.integer "city_id"
t.integer "industry_id"
t.integer "company_id"
t.text "job_name"
t.string "salary"
t.datetime "deadline"
t.string "level"
t.string "experience"
t.datetime "last_updated"
t.text "description"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "saved_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "email"
t.string "fname"
t.string "lname"
t.integer "role"
t.string "remember_digest"
t.string "activation_digest"
t.boolean "activated"
t.datetime "activated_at"
t.string "reset_digest"
t.datetime "reset_sent_at"
t.string "password_digest"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
end
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
require 'addressable/uri'
require 'uri'
namespace :crawler do
task crawl: :environment do
crawl_data
end
def crawl_data
base_careerbuilder_url = "https://careerbuilder.vn"
list_url = "#{base_careerbuilder_url}/viec-lam/tat-ca-viec-lam-vi.html"
while list_url.to_s.length > 0
page = Nokogiri::HTML(open(list_url))
#get link for next page
np = Nokogiri::HTML(open(list_url))
nextpage = np.css('.paginationTwoStatus .right')
puts nextpage[0]["href"]
#get all links in one page
links = page.css('.jobtitle .job a')
area = Area.find(1)
links.each do |link|
url = link['href']
uri = URI::encode(url)
job = Nokogiri::HTML(open(uri))
job_detail = job.css('#showScroll .DetailJobNew')
title = job.css('.top-job-info h1')
company_name = job.css('.top-job-info .tit_company')
updated_date = job.css('.datepost span')
location = job_detail.css('li[1].bgLine1 p[1].fl_left b a[2]')
experience = job_detail.css('li[2].bgLine2 p[1].fl_left > text()')
industry = job_detail.css('li[3].bgLine1 p[1].fl_left b')
level = job_detail.css('.bgLine1 .fl_right label')
salary = job_detail.css('.bgLine2 .fl_right label')
deadline = job_detail.css('li[3].bgLine1 p[2].fl_right > text()')
description = job.css('.MarBot20')
address = job.css('.box1Detail .TitleDetailNew label label')
company_intro = job.css('#emp_more')
#skip if field blank
next if industry.text.blank?
#insert data to City table:
city_name = location.text.gsub(",", "")
city = City.find_or_create_by(area_id: area.id, city_name: city_name, city_description: "")
#insert data to Industry table
industry = Industry.find_or_create_by(industry_name: industry.text, industry_description: "")
#insert data to Companies table
company = Company.find_or_create_by(company_name: company_name.text, company_description: company_intro.text, address: address.text)
#insert data to Jobs table
Job.find_or_create_by(area_id: area.id, city_id: city.id , industry_id: industry.id, company_id: company.id, job_name: title.text, salary: salary.text, deadline: deadline.text, level: level.text, experience: experience.text.strip, last_updated: updated_date.text.strip, description: description.text)
end
list_url = nextpage[0]["href"]
end
end
end
namespace :insert do
task areas_table: :environment do
Area.find_or_create_by(area_name: "Viet Nam", area_description: "VN")
Area.find_or_create_by(area_name: "Nuoc Ngoai", area_description: "NN")
end
end
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
require "test_helper"
class ApplicationSystemTestCase < ActionDispatch::SystemTestCase
driven_by :selenium, using: :chrome, screen_size: [1400, 1400]
end
require "test_helper"
class ApplicationCable::ConnectionTest < ActionCable::Connection::TestCase
# test "connects with cookies" do
# cookies.signed[:user_id] = 42
#
# connect
#
# assert_equal connection.user_id, "42"
# end
end
ENV['RAILS_ENV'] ||= 'test'
require_relative '../config/environment'
require 'rails/test_help'
class ActiveSupport::TestCase
# Run tests in parallel with specified workers
parallelize(workers: :number_of_processors)
# Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order.
fixtures :all
# Add more helper methods to be used by all tests here...
end
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment