Commit 2e086f87 by Trịnh Hoàng Phúc

Fix review 14/05/2020

parent 15b33b8c
Pipeline #619 failed with stages
in 0 seconds
......@@ -14,14 +14,15 @@ namespace :crawler do
html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
# Loop item
html_jobs.css(".jobs-side-list .job-item").each do |item|
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text)))
url = item.css(".figure .figcaption .title .job_link @href").text
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(url)))
if html_job_detail.at_css(".search-result-list-detail .tabs div#tab-1").nil?
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}"
logger.warn "Another template #{url}"
next
end
# Set salary, min-salary, max-salary
if item.at_css(".figure .figcaption .caption .salary").text.include? "USD"
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}"
logger.warn "Another template #{url}"
next
end
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
......@@ -63,11 +64,12 @@ namespace :crawler do
# Company attributes
html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text)))
next if html_company_detail.at_css(".jobsby-company").nil?
company_css = ".jobsby-company .company-introduction .company-info .info "
company_attributes = {
title: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content .name").text,
address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text,
logo: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .img @src").text,
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish
title: html_company_detail.at_css(company_css + ".content .name").text,
address: html_company_detail.css(company_css + ".content p")[1].text,
logo: html_company_detail.at_css(company_css + ".img @src").text,
description: html_company_detail.at_css(company_css + ".content ul").inner_html.squish
}
# Defind cities array
cities = item.css(".figure .figcaption .caption .location ul li").map do |city|
......@@ -80,7 +82,7 @@ namespace :crawler do
result = CrawlerService.imports(job_attributes, company_attributes, cities, industries)
logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}"
logger.info "Crawl success url : #{url}"
rescue Exception => e
logger.error e
next
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment