Commit 2e086f87 by Trịnh Hoàng Phúc

Fix review 14/05/2020

parent 15b33b8c
Pipeline #619 failed with stages
in 0 seconds
...@@ -14,14 +14,15 @@ namespace :crawler do ...@@ -14,14 +14,15 @@ namespace :crawler do
html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html")) html_jobs = Nokogiri::HTML.parse(URI.open("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html"))
# Loop item # Loop item
html_jobs.css(".jobs-side-list .job-item").each do |item| html_jobs.css(".jobs-side-list .job-item").each do |item|
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(item.css(".figure .figcaption .title .job_link @href").text))) url = item.css(".figure .figcaption .title .job_link @href").text
html_job_detail = Nokogiri::HTML.parse(URI.open(URI.encode(url)))
if html_job_detail.at_css(".search-result-list-detail .tabs div#tab-1").nil? if html_job_detail.at_css(".search-result-list-detail .tabs div#tab-1").nil?
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}" logger.warn "Another template #{url}"
next next
end end
# Set salary, min-salary, max-salary # Set salary, min-salary, max-salary
if item.at_css(".figure .figcaption .caption .salary").text.include? "USD" if item.at_css(".figure .figcaption .caption .salary").text.include? "USD"
logger.warn "Another template #{item.css(".figure .figcaption .title .job_link @href").text}" logger.warn "Another template #{url}"
next next
end end
salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","") salary = item.at_css(".figure .figcaption .caption .salary").text.gsub("$ ","")
...@@ -63,11 +64,12 @@ namespace :crawler do ...@@ -63,11 +64,12 @@ namespace :crawler do
# Company attributes # Company attributes
html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text))) html_company_detail = Nokogiri::HTML.parse(open(URI.encode(item.css(".figure .image a @href").text)))
next if html_company_detail.at_css(".jobsby-company").nil? next if html_company_detail.at_css(".jobsby-company").nil?
company_css = ".jobsby-company .company-introduction .company-info .info "
company_attributes = { company_attributes = {
title: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content .name").text, title: html_company_detail.at_css(company_css + ".content .name").text,
address: html_company_detail.css(".jobsby-company .company-introduction .company-info .info .content p")[1].text, address: html_company_detail.css(company_css + ".content p")[1].text,
logo: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .img @src").text, logo: html_company_detail.at_css(company_css + ".img @src").text,
description: html_company_detail.at_css(".jobsby-company .company-introduction .company-info .info .content ul").inner_html.squish description: html_company_detail.at_css(company_css + ".content ul").inner_html.squish
} }
# Defind cities array # Defind cities array
cities = item.css(".figure .figcaption .caption .location ul li").map do |city| cities = item.css(".figure .figcaption .caption .location ul li").map do |city|
...@@ -80,7 +82,7 @@ namespace :crawler do ...@@ -80,7 +82,7 @@ namespace :crawler do
result = CrawlerService.imports(job_attributes, company_attributes, cities, industries) result = CrawlerService.imports(job_attributes, company_attributes, cities, industries)
logger.info "Crawl success url : #{item.css(".figure .figcaption .title .job_link @href").text}" logger.info "Crawl success url : #{url}"
rescue Exception => e rescue Exception => e
logger.error e logger.error e
next next
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment