Commit 05f0132b by thanhnd

fix comment's Toi-san

parent 6250ed61
Pipeline #462 canceled with stages
in 0 seconds
about
action_mailbox:ingress:exim
action_mailbox:ingress:postfix
action_mailbox:ingress:qmail
action_mailbox:install
action_text:install
active_storage:install
app:template
app:update
assets:clean[keep]
assets:clobber
assets:environment
assets:precompile
cache_digests:dependencies
cache_digests:nested_dependencies
db:create
db:drop
db:environment:set
db:fixtures:load
db:migrate
db:migrate:status
db:prepare
db:rollback
db:schema:cache:clear
db:schema:cache:dump
db:schema:dump
db:schema:load
db:seed
db:seed:replant
db:setup
db:structure:dump
db:structure:load
db:version
log:clear
middleware
restart
secret
stats
test
test:db
test:system
time:zones[country_or_offset]
tmp:clear
tmp:create
webpacker
webpacker:binstubs
webpacker:check_binstubs
webpacker:check_node
webpacker:check_yarn
webpacker:clean[keep]
webpacker:clobber
webpacker:compile
webpacker:info
webpacker:install
webpacker:install:angular
webpacker:install:coffee
webpacker:install:elm
webpacker:install:erb
webpacker:install:react
webpacker:install:stimulus
webpacker:install:svelte
webpacker:install:typescript
webpacker:install:vue
webpacker:verify_install
webpacker:yarn_install
yarn:install
zeitwerk:check
# SQLite. Versions 3.8.0 and up are supported.
# gem install sqlite3
#
# Ensure the SQLite 3 gem is defined in your Gemfile
# gem 'sqlite3'
#
#default: &default
# adapter: sqlite3
# pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %>
# timeout: 5000
#development:
# <<: *default
# database: db/development.sqlite3
# Warning: The database defined as "test" will be erased and
# re-generated from your development database when you run "rake".
# Do not set this db to the same as development or production.
#test:
# <<: *default
# database: db/test.sqlite3
#production:
# <<: *default
# database: db/production.sqlite3
default: &default default: &default
adapter: mysql2 adapter: mysql2
encoding: unicode encoding: unicode
......
def insert_area
Area,create(name: 'aadd', description: 'adfad')
end
\ No newline at end of file
...@@ -3,7 +3,6 @@ require 'uri' ...@@ -3,7 +3,6 @@ require 'uri'
namespace :crawler do namespace :crawler do
task crawl: :environment do task crawl: :environment do
insert_data_area
crawl_data crawl_data
end end
...@@ -16,41 +15,40 @@ namespace :crawler do ...@@ -16,41 +15,40 @@ namespace :crawler do
#get link for next page #get link for next page
np = Nokogiri::HTML(open(list_url)) np = Nokogiri::HTML(open(list_url))
nextpage = np.css('div.paginationTwoStatus a.right') #nextpage = np.css('div.paginationTwoStatus a.right')
nextpage = np.css('.paginationTwoStatus .right')
puts nextpage[0]["href"] puts nextpage[0]["href"]
#get all links in one page #get all links in one page
links = page.css('span.jobtitle h3.job a') #links = page.css('span.jobtitle h3.job a')
links = page.css('.jobtitle .job a')
area = Area.find(1) area = Area.find(1)
links.each do |link| links.each do |link|
url = "#{link['href']}"
url.force_encoding('ASCII-8BIT')
uri = URI::encode(url)
url = link['href']
uri = URI::encode(url)
job = Nokogiri::HTML(open(uri)) job = Nokogiri::HTML(open(uri))
title = job.css('div.top-job div.top-job-info h1') title = job.css('.top-job-info h1')
company_name = job.css('div.top-job div.top-job-info div.tit_company') company_name = job.css('.top-job-info div.tit_company')
updated_date = job.css('div.datepost span') updated_date = job.css('.datepost span')
location = job.css('div#showScroll.box2Detail ul.DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]') location = job.css('#showScroll .DetailJobNew li[1].bgLine1 p[1].fl_left b a[2]')
experience = job.css('div#showScroll.box2Detail ul.DetailJobNew li[2].bgLine2 p[1].fl_left > text()') experience = job.css('#showScroll .DetailJobNew li[2].bgLine2 p[1].fl_left > text()')
industry = job.css('div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[1].fl_left b') industry = job.css('#showScroll .DetailJobNew li[3].bgLine1 p[1].fl_left b')
level = job.css('div#showScroll.box2Detail ul.DetailJobNew li.bgLine1 p.fl_right label') level = job.css('#showScroll .DetailJobNew .bgLine1 .fl_right label')
salary = job.css('div#showScroll.box2Detail ul.DetailJobNew li.bgLine2 p.fl_right label') salary = job.css('#showScroll .DetailJobNew .bgLine2 .fl_right label')
deadline = job.css('div#showScroll.box2Detail ul.DetailJobNew li[3].bgLine1 p[2].fl_right > text()') deadline = job.css('#showScroll .DetailJobNew li[3].bgLine1 p[2].fl_right > text()')
description = job.css('div.MarBot20') description = job.css('.MarBot20')
address = job.css('div.box1Detail p.TitleDetailNew label label') address = job.css('.box1Detail .TitleDetailNew label label')
company_intro = job.css('div.desc_company.content_fck span#emp_more') company_intro = job.css('.desc_company.content_fck span#emp_more')
#skip if field blank #skip if field blank
next if industry.text.blank? next if industry.text.blank?
#insert data to City table: #insert data to City table:
# puts location.text
city_name = location.text.gsub(",", "") city_name = location.text.gsub(",", "")
City.find_or_create_by(area_id: area.id, city_name: city_name, city_description: "") City.find_or_create_by(area_id: area.id, city_name: city_name, city_description: "")
...@@ -58,23 +56,18 @@ namespace :crawler do ...@@ -58,23 +56,18 @@ namespace :crawler do
Industry.find_or_create_by(industry_name: industry.text, industry_description: "") Industry.find_or_create_by(industry_name: industry.text, industry_description: "")
#insert data to Companies table #insert data to Companies table
Company.find_or_create_by(company_name: "#{company_name.text}", company_description: "#{company_intro.text}", address: "#{address.text}" ) Company.find_or_create_by(company_name: company_name.text, company_description: company_intro.text, address: address.text)
#insert data to Jobs table #insert data to Jobs table
city = City.find_by(area_id: area.id, city_name: city_name) city = City.find_by(area_id: area.id, city_name: city_name)
industryid = Industry.find_by(industry_name: industry.text) industryid = Industry.find_by(industry_name: industry.text)
companyid = Company.find_by(company_name: company_name.text) companyid = Company.find_by(company_name: company_name.text)
Job.find_or_create_by(area_id: area.id, city_id: city.id , industry_id: industryid.id, company_id: companyid.id, job_name: "#{title.text}", salary: "#{salary.text}", deadline: "#{deadline.text}", level: "#{level.text}", experience: "#{experience.text.strip}", last_updated: "#{updated_date.text.strip}", description: "description.text") Job.find_or_create_by(area_id: area.id, city_id: city.id , industry_id: industryid.id, company_id: companyid.id, job_name: title.text, salary: salary.text, deadline: deadline.text, level: level.text, experience: experience.text.strip, last_updated: updated_date.text.strip, description: description.text)
list_url = nextpage[0]["href"] list_url = nextpage[0]["href"]
end end
end end
end end
def insert_data_area
Area.find_or_create_by(area_name: "Viet Nam", area_description: "VN")
Area.find_or_create_by(area_name: "Nuoc Ngoai", area_description: "NN")
end
end end
namespace :insert do
task areas_table: :environment do
Area.find_or_create_by(area_name: "Viet Nam", area_description: "VN")
Area.find_or_create_by(area_name: "Nuoc Ngoai", area_description: "NN")
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment