Fix code crawler

parent a3d60343
Pipeline #714 canceled with stages
in 0 seconds
...@@ -32,8 +32,8 @@ ...@@ -32,8 +32,8 @@
link.gsub!('\u2019',"'") link.gsub!('\u2019',"'")
end end
next if link == 'javascript:void(0);' next if link == 'javascript:void(0);'
elsif link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html' if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(CGI.escape(link)))) company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil? if !(company_page.search('p.name').text).nil?
begin begin
name_company = company_page.search('p.name').text name_company = company_page.search('p.name').text
...@@ -61,7 +61,7 @@ ...@@ -61,7 +61,7 @@
if link.include?('\u2013') if link.include?('\u2013')
link.gsub!('\u2013','–') link.gsub!('\u2013','–')
end end
page_job = Nokogiri::HTML(URI.open(URI.parse(CGI.escape(link)))) page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row') get_row = page_job.search('div.bg-blue div.row')
if get_row != "" if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
...@@ -101,17 +101,21 @@ ...@@ -101,17 +101,21 @@
company_id: company_table.id) company_id: company_table.id)
end end
end end
if !company_table.nil?
job_table = Job.find_by(title: title_job) job_table = Job.find_by(title: title_job)
if !job_table.nil? if !job_table.nil?
location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip } location_rel = get_row.css('div.map p a').children.map{ |location| location.text.strip }
location_rel.each do |loc| location_rel.each do |loc|
city_table = City.find_by(name: "#{loc}") city_table = City.find_by(name: "#{loc}")
if CityJob.find_by(job_id: job_table.id, city_id: city_table.id) == nil
puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}" puts "Created City: #{job_table.id} - #{city_table.id}.#{loc}"
city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id) city_jobs = CityJob.create!(job_id: job_table.id, city_id: city_table.id)
end end
end
industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip } industry_rel = get_row.css('li a').children.map{ |industry| industry.text.strip }
industry_rel.each do |ind| industry_rel.each do |ind|
industry_table = Industry.find_by(name: "#{ind}") industry_table = Industry.find_by(name: "#{ind}")
if IndustryJob.find_by(job_id: job_table.id, industry_id: industry_table.id) == nil
puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}" puts "Created Industry: #{job_table.id} - #{industry_table.id}.#{ind}"
industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id) industry_jobs = IndustryJob.create!(job_id: job_table.id, industry_id: industry_table.id)
end end
...@@ -121,6 +125,8 @@ ...@@ -121,6 +125,8 @@
end end
end end
end end
end
end
def get_file_csv def get_file_csv
Net::FTP.open('192.168.1.156', 'training', 'training') do |ftp| Net::FTP.open('192.168.1.156', 'training', 'training') do |ftp|
......
...@@ -7,8 +7,8 @@ class Crontab ...@@ -7,8 +7,8 @@ class Crontab
link.gsub!('\u2019',"'") link.gsub!('\u2019',"'")
end end
next if link == 'javascript:void(0);' next if link == 'javascript:void(0);'
elsif link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html' if link != 'https://careerbuilder.vn/vi/nha-tuyen-dung/hr-vietnam\xE2\x80\x99s-ess-client.35A4EFBA.html'
company_page = Nokogiri::HTML(URI.open(URI.parse(CGI.escape(link)))) company_page = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
if !(company_page.search('p.name').text).nil? if !(company_page.search('p.name').text).nil?
begin begin
name_company = company_page.search('p.name').text name_company = company_page.search('p.name').text
...@@ -34,7 +34,7 @@ class Crontab ...@@ -34,7 +34,7 @@ class Crontab
if link.include?('\u2013') if link.include?('\u2013')
link.gsub!('\u2013','–') link.gsub!('\u2013','–')
end end
page_job = Nokogiri::HTML(URI.open(URI.parse(CGI.escape(link)))) page_job = Nokogiri::HTML(URI.open(URI.parse(URI.escape(link))))
get_row = page_job.search('div.bg-blue div.row') get_row = page_job.search('div.bg-blue div.row')
if get_row != "" if get_row != ""
get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip get_name_company = page_job.search('div.job-desc a.job-company-name').text.strip
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment