Commit 37105061 by nnnghia98

refactoring code

parent 6e007fe3
......@@ -7,6 +7,7 @@ class CrawlData
def crawl_web
page = Nokogiri::HTML.parse(open(Settings.crawl.base_url, ssl_verify_mode: nil))
total_job = page.css("div.ais-stats h1.col-sm-10 span").text.gsub(",", "").to_f
return if total_job == 0
total_page = (total_job / Settings.crawl.jobs_per_page).floor
crawl_job_title_logger = ActiveSupport::Logger.new("log/crawl_data.log")
crawl_job_title_logger.info "Crawl at #{Time.current}"
......@@ -18,31 +19,31 @@ class CrawlData
job_page = Nokogiri::HTML.parse(open(URI.encode(job_url)))
# Job code
job_code = job_url.split("/").last.split(".")[-2]
job = JobHtml.new(job_page).parse_job
# Company code
company_code = job_url.split("/").last.split("-").last.split(".")[-2].strip
next if job_page.css(".LeftJobCB").nil? || job[:workplace].blank?
next if job_page.css(".LeftJobCB").nil?
# Job code
job_code = job_url.split("/").last.split(".")[-2] || ""
job = JobHtml.new(job_page).parse_job
# Company code
company_code = job_page.css(".viewmorejob a @href").present? ?
job_page.css(".viewmorejob a @href").text.split("/").last.split("-")[-2].strip : ""
crawl_job_title_logger.info "#{job[:title]}"
next if job[:workplace].nil?
job[:workplace].each do |city_name|
city_id = city_id(city_name)
company_id = company_id(company_code, job[:company_name], job[:company_address], job[:company_description])
job_id = job_id(job_code, job[:title], job[:salary],
city_id = get_city(city_name).id
company_id = get_company(company_code, job[:company_name], job[:company_address], job[:company_description]).id
job_id = get_job(job_code, job[:title], job[:salary],
job[:description], job[:requirement],
job[:level], job[:post_date],
job[:expiration_date], company_id)
job[:expiration_date], company_id).id
CityJob.find_or_create_by!(job_id: job_id, city_id: city_id)
job[:industries].each do |job_industry|
industry_id = industry_id(job_industry.strip)
job_industry = job_industry.strip
industry_id = get_industry(job_industry).id
IndustryJob.find_or_create_by!(industry_id: industry_id, job_id: job_id)
end
end
......@@ -50,28 +51,25 @@ class CrawlData
end
end
def company_id(code, name, address, description)
def get_company(code, name, address, description)
company = Company.find_or_initialize_by(code: code)
company.update(name: name, address: address, description: description)
company.id
company
end
def industry_id(name)
def get_industry(name)
industry = Industry.find_or_create_by!(name: name)
industry.id
industry
end
def city_id(name)
def get_city(name)
name = name.strip
City.find_or_create_by(name: name, region: "Việt Nam").id
City.find_or_create_by(name: name, region: "Việt Nam")
end
def job_id(code = nil, title, salary, description, requirement, level, post_date, expiration_date, company_id)
if expiration_date.nil?
job = Job.find_or_initialize_by(title: job_title, company_id: company_id)
else
job = Job.find_or_initialize_by(code: code)
end
def get_job(code = nil, title, salary, description, requirement, level, post_date, expiration_date, company_id)
attrs = expiration_date.nil? ? {title: job_title, company_id: company_id} : {code: code}
job = Job.find_or_initialize_by attrs
job.update(code: code,
title: title,
......@@ -82,6 +80,6 @@ class CrawlData
expiration_date: expiration_date,
level: level,
company_id: company_id)
job.id
job
end
end
......@@ -6,61 +6,67 @@ class JobHtml
def parse_job
get_job_info
get_job_detail
job = { title: get_title,
salary: @job_salary,
level: @job_level,
post_date: get_post_date,
description: @job_description,
requirement: @job_requirement,
expiration_date: @job_expiration_date,
workplace: @job_workplace,
level: @job_level,
industries: @job_industries,
company_name: get_company_name,
company_address: get_company_address,
company_description: get_company_description }
return job
{ title: get_title,
salary: get_job_info[:salary],
level: get_job_info[:level],
post_date: get_post_date,
description: get_job_detail[:description],
requirement: get_job_detail[:requirement],
expiration_date: get_job_info[:expiration_date],
workplace: get_job_info[:workplace],
industries: get_job_info[:industries],
company_name: get_company_name,
company_address: get_company_address,
company_description: get_company_description }
end
private
def get_title
@job_title = @html_data.css(".top-job-info h1").text.strip
@html_data.css(".top-job-info h1").text.strip
end
def get_post_date
@job_post_date = @html_data.css(".datepost span").text
@html_data.css(".datepost span").text
end
def get_job_info
info_container = @html_data.css(".DetailJobNew li p")
job_info = {}
job_info = (0..info_container.count - 1).map do |info_part|
(0..info_container.count - 1).each do |info_part|
info = info_container[info_part].text
case
when info.include?("Nơi làm việc")
@job_workplace = info.gsub("/[\r\n]+/", "").partition(":").last.split(",")
job_info[:workplace] = info.gsub("/[\r\n]+/", "").partition(":").last.split(",") || []
when info.include?("Lương")
@job_salary = info.gsub("/[\r\n]+/", "").partition(":").last.strip
job_info[:salary] = info.gsub("/[\r\n]+/", "").partition(":").last.strip
when info.include?("Cấp bậc")
@job_level = info.gsub("/[\r\n]+/", "").partition(":").last.strip
job_info[:level] = info.gsub("/[\r\n]+/", "").partition(":").last.strip
when info.include?("Hết hạn nộp")
@job_expiration_date = info.gsub("/[\r\n]+/", "").partition(":").last.strip
job_info[:expiration_date] = info.gsub("/[\r\n]+/", "").partition(":").last.strip
when info.include?("Ngành nghề")
@job_industries = info.gsub("/[\r\n]+/", "").partition(":").last.split(",")
job_info[:industries] = info.gsub("/[\r\n]+/", "").partition(":").last.split(",")
end
end
return job_info
end
def get_job_detail
detail_container = @html_data.css("div.MarBot20")
job_detail = {}
job_detail = (0..detail_container.count - 1).map do |detail_part|
(0..detail_container.count - 1).map do |detail_part|
detail = detail_container[detail_part].text
if detail.include?("Mô tả Công việc")
@job_description = detail.partition("Mô tả Công việc").last
job_detail[:description] = detail.partition("Mô tả Công việc").last
elsif detail.include?("Yêu Cầu Công Việc")
@job_requirement = detail.partition("Yêu Cầu Công Việc").last
job_detail[:requirement] = detail.partition("Yêu Cầu Công Việc").last
end
end
return job_detail
end
def get_company_name
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment