Commit 6f382fd2 by thanhnd

cleaning folder and edit crawler.rake

parent eeedca0f
Pipeline #459 canceled with stages
in 0 seconds
......@@ -32,39 +32,40 @@ namespace :crawler do
job = Nokogiri::HTML(open(uri))
title = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.top-job div.top-job-info h1')
company_name = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.top-job div.top-job-info div.tit_company')
updated_date = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.datepost span')
location = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]')
experience = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[2].bgLine2 p[1].fl_left > text()')
industry = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[1].fl_left b')
level = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p.fl_right label')
salary = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p.fl_right label')
deadline = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[2].fl_right > text()')
description = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.MarBot20')
address = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.box1Detail p.TitleDetailNew label label')
company_intro = job.css('html body.jobseeker_site.jobdetail-standard div#uni_wrapper div#uni_container div.MyJobDetail div.MyJobLeft div.LeftJobCB div.desc_company.content_fck span#emp_more')
title = job.css('div.top-job div.top-job-info h1')
company_name = job.css('div.top-job div.top-job-info div.tit_company')
updated_date = job.css('div.datepost span')
location = job.css('div#showScroll.box2Detail ul.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]')
experience = job.css('div#showScroll.box2Detail ul.DetailJobNew li[2].bgLine2 p[1].fl_left > text()')
industry = job.css('div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[1].fl_left b')
level = job.css('div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p.fl_right label')
salary = job.css('div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p.fl_right label')
deadline = job.css('div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[2].fl_right > text()')
description = job.css('div.MarBot20')
address = job.css('div.box1Detail p.TitleDetailNew label label')
company_intro = job.css('div.desc_company.content_fck span#emp_more')
#skip if field blank
next if industry.text.blank?
#insert data to City table:
City.create(area_id: area.id, city_name: "#{location.text.gsub!(",", "")}", city_description: "")
# puts location.text
city_name = location.text.gsub(",", "")
City.find_or_create_by(area_id: area.id, city_name: city_name, city_description: "")
#insert data to Industry table
Industry.find_or_create_by(industry_name: "#{industry.text}", industry_description: "")
Industry.find_or_create_by(industry_name: industry.text, industry_description: "")
#insert data to Companies table
Company.find_or_create_by(company_name: "#{company_name.text}", company_description: "#{company_intro.text}", address: "#{address.text}" )
#insert data to Jobs table
city = City.find_or_create_by(area_id: area.id, city_name: "#{location.text}")
industryid = Industry.find_or_create_by(industry_name: "#{industry.text}")
companyid = Company.find_or_create_by(company_name: "#{company_name.text}")
city = City.find_by(area_id: area.id, city_name: city_name)
industryid = Industry.find_by(industry_name: industry.text)
companyid = Company.find_by(company_name: company_name.text)
Job.find_or_create_by(area_id: area.id, city_id: city.id, industry_id: industryid.id, company_id: companyid.id, job_name: "#{title.text}", salary: "#{salary.text}", deadline: "#{deadline.text}", level: "#{level.text}", experience: "#{experience.text.strip}", last_updated: "#{updated_date.text.strip}", description: "description.text")
Job.find_or_create_by(area_id: area.id, city_id: city.id , industry_id: industryid.id, company_id: companyid.id, job_name: "#{title.text}", salary: "#{salary.text}", deadline: "#{deadline.text}", level: "#{level.text}", experience: "#{experience.text.strip}", last_updated: "#{updated_date.text.strip}", description: "description.text")
list_url = nextpage[0]["href"]
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment