Commit b8abb807 by Mai Hoang Thai Ha

add logger

parent 8328deb3
class CreateJobs < ActiveRecord::Migration[6.1]
def change
create_table :jobs do |t|
t.string :title
t.string :title, null: false
t.string :job_type
t.string :salary
t.string :experience
......
......@@ -105,7 +105,7 @@ ActiveRecord::Schema.define(version: 2021_07_20_055614) do
end
create_table "jobs", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.string "title"
t.string "title", null: false
t.string "job_type"
t.string "salary"
t.string "experience"
......
......@@ -7,6 +7,8 @@ namespace :crawler do
unless %w[ALL TEST].include?(ENV['TYPE'])
abort 'Do you want to crawl all pages (ALL) or some pages (TEST)? Please ONLY pass ONE argument.'
end
logger = Logger.new("#{Rails.root}/log/job_crawler.log")
logger.info "Start crawler job at: #{Time.current}"
total_pages = 5 # default = TEST
if ENV['TYPE'] == 'ALL'
first_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html').body)
......@@ -16,10 +18,12 @@ namespace :crawler do
end
(1..total_pages).each do |page|
parsed_page = Nokogiri::HTML(HTTParty.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{page}-vi.html").body)
logger.info("Page: #{page}")
jobs_item = parsed_page.css('div.job-item .job_link')
jobs_item.each do |item|
retries ||= 0
url ||= item.attribute('href').text
logger.info("job link: #{url}")
job_page = Nokogiri::HTML(HTTParty.get(url).body)
# Job
job_title = job_page.css('div.job-desc h1.title').text
......@@ -88,6 +92,7 @@ namespace :crawler do
job_object.cities << city_objects
rescue URI::InvalidURIError => e
puts "[Error] #{e.message}"
logger.error "URI must be ascii only : #{url}"
encode_url = CGI.escape(url.remove('https://careerbuilder.vn/vi/tim-viec-lam/'))
url = "https://careerbuilder.vn/vi/tim-viec-lam/#{encode_url}"
retry if (retries += 1) < 2
......@@ -96,6 +101,7 @@ namespace :crawler do
puts e.backtrace.inspect
end
end
logger.info "Finished at: #{Time.current}"
end
desc 'crawler industry form CareerBuilder'
......@@ -110,7 +116,7 @@ namespace :crawler do
desc 'crawler city form CareerBuilder'
task cities: :environment do
parsed_page ||= Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/tim-viec-lam.html').body)
parsed_page = Nokogiri::HTML(HTTParty.get('https://careerbuilder.vn/tim-viec-lam.html').body)
list_location = parsed_page.css('div.main-jobs-by-location ul li')
list_location.each do |city|
city_name = city.text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment