Commit 9b293818 by Trịnh Hoàng Phúc

Fix review 11/05/2020

parent ff55254b
Pipeline #609 failed with stages
in 0 seconds
class City < ApplicationRecord
validates :title, presence: true
has_and_belongs_to_many :jobs
end
class Company < ApplicationRecord
validates :title, presence: true
has_many :jobs
end
class Industry < ApplicationRecord
validates :title, presence: true
has_and_belongs_to_many :jobs
end
class Job < ApplicationRecord
validates :title, presence: true
belongs_to :company
has_many :applies
......
class AddColumnsToJobs < ActiveRecord::Migration[6.0]
def change
add_column :jobs, :min_salary, :bigint, :default => 0
add_column :jobs, :max_salary, :bigint, :default => 0
add_column :jobs, :benefit, :text
add_column :jobs, :job_requirements, :text
add_column :jobs, :other_information, :text
end
end
......@@ -10,9 +10,9 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2020_04_23_044651) do
ActiveRecord::Schema.define(version: 2020_05_11_055632) do
create_table "admins", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "admins", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "email", default: "", null: false
t.string "encrypted_password", default: "", null: false
t.string "reset_password_token"
......@@ -24,7 +24,7 @@ ActiveRecord::Schema.define(version: 2020_04_23_044651) do
t.index ["reset_password_token"], name: "index_admins_on_reset_password_token", unique: true
end
create_table "applies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "applies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.bigint "user_id", null: false
t.bigint "job_id", null: false
t.datetime "created_at", precision: 6, null: false
......@@ -33,21 +33,21 @@ ActiveRecord::Schema.define(version: 2020_04_23_044651) do
t.index ["user_id"], name: "index_applies_on_user_id"
end
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "title"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.boolean "foreign", default: false
end
create_table "cities_jobs", id: false, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "cities_jobs", id: false, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.bigint "city_id", null: false
t.bigint "job_id", null: false
t.index ["city_id", "job_id"], name: "index_cities_jobs_on_city_id_and_job_id"
t.index ["job_id", "city_id"], name: "index_cities_jobs_on_job_id_and_city_id"
end
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "title"
t.string "address"
t.string "logo"
......@@ -56,7 +56,7 @@ ActiveRecord::Schema.define(version: 2020_04_23_044651) do
t.datetime "updated_at", precision: 6, null: false
end
create_table "favorites", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "favorites", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.bigint "user_id", null: false
t.bigint "job_id", null: false
t.datetime "created_at", precision: 6, null: false
......@@ -65,20 +65,20 @@ ActiveRecord::Schema.define(version: 2020_04_23_044651) do
t.index ["user_id"], name: "index_favorites_on_user_id"
end
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "title"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
end
create_table "industries_jobs", id: false, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "industries_jobs", id: false, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.bigint "industry_id", null: false
t.bigint "job_id", null: false
t.index ["industry_id", "job_id"], name: "index_industries_jobs_on_industry_id_and_job_id"
t.index ["job_id", "industry_id"], name: "index_industries_jobs_on_job_id_and_industry_id"
end
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "title"
t.string "updated_date_job"
t.string "level"
......@@ -89,10 +89,15 @@ ActiveRecord::Schema.define(version: 2020_04_23_044651) do
t.bigint "company_id"
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.bigint "min_salary", default: 0
t.bigint "max_salary", default: 0
t.text "benefit"
t.text "job_requirements"
t.text "other_information"
t.index ["company_id"], name: "index_jobs_on_company_id"
end
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci", force: :cascade do |t|
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t|
t.string "email", default: "", null: false
t.string "encrypted_password", default: "", null: false
t.string "reset_password_token"
......
......@@ -6,148 +6,160 @@ namespace :crawler do
task job: :environment do
# Define exception logger
exception_logger = ActiveSupport::Logger.new("log/exception_logger.log")
exception_logger = Logger.new("log/exception_logger.log")
# Define skip logger
skip_url_logger = ActiveSupport::Logger.new("log/skip_url_logger.log")
skip_url_logger = Logger.new("log/skip_url_logger.log")
# Loop page
(10..12).each do |page|
(1..2).each do |page|
# Fetch and parse HTML document
html_jobs = Nokogiri::HTML.parse(open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
# Loop item
html_jobs.css(".jobs-side-list .job-item").each do |item|
# Set salary, min-salary, max-salary
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
if salary == "Cạnh tranh"
min_salary = 0
max_salary = 999999999
elsif salary.include? "Dưới"
min_salary = 0
max_salary = (salary.gsub("Dưới ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
elsif salary.include? "Trên"
min_salary = (salary.gsub("Trên ","").gsub(" Tr VND","").gsub(",",".").to_f*1000000).to_i
max_salary = 999999999
else
range_salary = salary.split("-")
min_salary = (range_salary[0].gsub("$ ","").gsub(" Tr ","").to_f*1000000).to_i
max_salary = (range_salary[1].gsub(" Tr VND","").gsub(" ","").to_f*1000000).to_i
end
# Job attributes
job_attributes = {
title: item.css(".figure .figcaption .title a @title").text,
updated_date_job: item.css(".bottom-right-icon .time time").text,
level: nil,
years_of_experience: nil,
salary: item.css(".figure .figcaption .caption .salary").text.gsub("$ ",""),
expiration_date: nil,
job_description: nil,
company_id: nil,
title: item.at_css(".figure .figcaption .title a @title").text,
updated_date_job: item.at_css(".bottom-right-icon .time time").text,
salary: salary,
min_salary: min_salary,
max_salary: max_salary
}
# Defind cities array
cities = []
item.css(".figure .figcaption .caption .location ul li").each do |city|
city = check_exist_or_create_city(city.text.strip)
cities << city
end
if item.css(".figure .image a @href").text != "javascript:void(0);"
# Company attributes
html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text)))
unless html_company_detail.at_css(".jobsby-company").nil?
company_attributes = {
title: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content .name").text,
address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text,
logo: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .img @src").text,
description: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.strip
}
# Check exist or create company
job_attributes[:company_id] = check_exist_or_create_company(company_attributes)
end
end
# Defind industry ids array
industries = []
html_job_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
unless html_job_detail.at_css(".search-result-list-detail").nil?
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
if html_job_detail.at_css(".search-result-list-detail .container .no-gutters").present?
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .has-background ul li").each do |ele|
type = ele.css("strong").text
type = ele.at_css("strong").text
case type
when "Hết hạn nộp"
job_attributes[:expiration_date] = ele.css("p").text.strip
job_attributes[:expiration_date] = ele.at_css("p").text.squish
when "Cấp bậc"
job_attributes[:level] = ele.css("p").text.strip
job_attributes[:level] = ele.at_css("p").text.squish
when "Kinh nghiệm"
job_attributes[:years_of_experience] = ele.css("p").text.strip
job_attributes[:years_of_experience] = ele.at_css("p").text.squish
end
end
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-row").each do |ele|
if ele.at_css("h3").present?
type = ele.at_css("h3").text
case type
when "Phúc lợi "
job_attributes[:benefit] = ele.at_css("ul").inner_html.squish
when "Mô tả Công việc"
job_attributes[:job_description] = ele.inner_html.squish.gsub("<h3 class=\"detail-title\">Mô tả Công việc</h3>","")
when "Yêu Cầu Công Việc"
job_attributes[:job_requirements] = ele.inner_html.squish.gsub("<h3 class=\"detail-title\">Yêu Cầu Công Việc</h3>","")
when "Thông tin khác"
job_attributes[:other_information] = ele.inner_html.squish.gsub("<h3 class=\"detail-title\">Thông tin khác</h3>","")
end
end
end
if item.at_css(".figure .image a @href").text != "javascript:void(0);"
# Company attributes
html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text)))
if html_company_detail.at_css(".jobsby-company").present?
company_attributes = {
title: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content .name").text,
address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text,
logo: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .img @src").text,
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish
}
# Check exist or create company
job_attributes[:company_id] = check_exist_or_create_company(company_attributes)
end
end
# Create job
job = check_exist_or_create_job(job_attributes)
# Defind cities array
cities = []
item.css(".figure .figcaption .caption .location ul li").each do |city|
city = check_exist_or_create_city(city.text.squish)
cities << city
end
# Create city_job
if cities.length > 0
cities.each do |city|
job.cities << city
end
end
# Create industry_job
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-box .industry p a").each do |ele|
industry = check_exist_or_create_industry(ele.text.gsub(",","").strip)
industry = check_exist_or_create_industry(ele.text.gsub(",","").squish)
industries << industry
end
# Get description for job attributes
description = ""
html_job_detail.css(".search-result-list-detail .tabs #tab-1 .job-detail-content .detail-row").each do |ele|
description << ele.inner_html
if industries.length > 0
industries.each do |industry|
job.industries << industry
end
end
# Set description for job attributes
job_attributes[:job_description] = description.strip
else
skip_url_logger.info "another template #{item.css(".figure .figcaption .title .job_link @href").text}"
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
end
# Create job
job = check_exist_or_create_job(job_attributes)
# Create city_job
if cities.count > 0
cities.each do |city|
job.cities << city
end
end
# Create industry_job
if industries.count > 0
industries.each do |industry|
job.industries << industry
end
end
rescue
exception_logger.info "Error url: #{item.css(".figure .figcaption .title .job_link @href").text}"
rescue Exception => e
exception_logger.info e
skip_url_logger.info "another template #{item.at_css(".figure .figcaption .title .job_link @href").text}"
next
end
end
end
task city: :environment do
# Fetch and parse HTML document
html_cities = Nokogiri::HTML.parse(open("https://careerbuilder.vn/tim-viec-lam.html"))
unless html_cities.at_css(".find-jobsby-categories .main-jobs-by-location").nil?
# Defind cities array
cities = []
# Get city in country
html_cities.css(".find-jobsby-categories .main-jobs-by-location .jobs-in-country .list-jobs-by-country li a").each do |title|
city = {
title: title.text.gsub("Việc làm tại ","").strip,
foreign: false
}
cities << city
end
# Get city foreign
html_cities.css(".find-jobsby-categories .main-jobs-by-location .overseas-jobs .list-overseas-jobs li a").each do |title|
city = {
title: title.text.strip,
foreign: true
}
cities << city
end
if cities.count > 0
City.import cities
end
# Get city in country
cities_in_country = html_cities.css(".find-jobsby-categories .main-jobs-by-location .jobs-in-country .list-jobs-by-country li a").map do |title|
{
title: title.text.gsub("Việc làm tại ","").squish,
foreign: false
}
end
# Get city foreign
cities_foreign = html_cities.css(".find-jobsby-categories .main-jobs-by-location .overseas-jobs .list-overseas-jobs li a").map do |title|
{
title: title.text.squish,
foreign: true
}
end
cities = cities_in_country + cities_foreign
if cities.length > 0
City.import cities
end
end
task industry: :environment do
# Fetch and parse HTML document
html_industries = Nokogiri::HTML.parse(open("https://careerbuilder.vn/tim-viec-lam.html"))
unless html_industries.at_css(".find-jobsby-categories .list-of-working-positions").nil?
# Defind industries array
industries = []
# Get industry
html_industries.css(".find-jobsby-categories .list-of-working-positions .list-jobs li a").each do |title|
industry = {
title: title.text.strip
}
industries << industry
end
if industries.count > 0
Industry.import industries
end
# Get industry
industries = html_industries.css(".find-jobsby-categories .list-of-working-positions .list-jobs li a").map do |title|
{
title: title.text.squish
}
end
if industries.length > 0
Industry.import industries
end
end
def check_exist_or_create_company(company_attributes)
find_company = Company.find_or_create_by(company_attributes)
return find_company.id
......@@ -155,7 +167,7 @@ namespace :crawler do
def check_exist_or_create_industry(industry_title)
industries = Industry.where("title LIKE ?", industry_title)
if industries.count == 0
if industries.length == 0
industry = Industry.create(title: industry_title)
else
industry = industries[0]
......@@ -165,7 +177,7 @@ namespace :crawler do
def check_exist_or_create_city(city_title)
cities = City.where("title LIKE ?", city_title)
if cities.count == 0
if cities.length == 0
city = City.create(title: city_title)
else
city = cities[0]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment