Commit 19c487c5 by Ngô Trung Hưng

bug crawler city

parent 22843861
// Place all the styles related to the homepage controller here.
// Place all the styles related to the Home controller here.
// They will automatically be included in application.css.
// You can use Sass (SCSS) here: http://sass-lang.com/
class ApplicationController < ActionController::Base
include CrawlerHelper
end
class HomepageController < ApplicationController
class HomeController < ApplicationController
def index
craw_data_companies()
end
end
module CrawlerHelper
def crawl_industries_data
data_list_industries = []
agent = Mechanize.new
page = agent.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")
data = page.search("#industry option")
list_industries = data.to_s.split("</option>")
list_industries.each do |x|
data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
render plain: data_list_industries;
end
def crawl_cities_data
data_list_cities = []
agent = Mechanize.new
page = agent.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")
data = page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
render plain: data_list_cities;
end
def crawl_link_for_companies_data
@website = []
num_page_will_crawl = 2
agent = Mechanize.new
for i in 1..num_page_will_crawl do
page = agent.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-trang-#{i}-vi.html")
@website << page.search(".figcaption .caption a/@href").text.to_s.split('html')
end
for x in 0..(num_page_will_crawl - 1) do
for y in 0..@website[x].length do
@website[x][y].to_s << 'html'
end
end
@website = @website.join(",")
@website = @website.split(",")
@website
end
def craw_data_companies
link_crawl = crawl_link_for_companies_data()
link_crawl.uniq!
agent = Mechanize.new
link_crawl.each do |url|
# debugger
page = agent.get("#{url.to_s}")
end
render plain: link_crawl
end
end
\ No newline at end of file
<h1>Homepage#index</h1>
<p>Find me in app/views/homepage/index.html.erb</p>
......@@ -16,6 +16,7 @@ default: &default
username: root
password: '1'
socket: /var/run/mysqld/mysqld.sock
development:
<<: *default
......
Rails.application.routes.draw do
get 'homepage/index'
get 'home/index'
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
end
class CreateCompanies < ActiveRecord::Migration[5.2]
def change
create_table :companies do |t|
create_table :companies, :options => 'COLLATE=utf8_general_ci' do |t|
t.string :name
t.string :address
t.string :short_description
......
class CreateCities < ActiveRecord::Migration[5.2]
def change
create_table :cities do |t|
create_table :cities, :options => 'COLLATE=utf8_general_ci' do |t|
t.string :name
t.boolean :area
t.timestamps
......
class CreateIndustries < ActiveRecord::Migration[5.2]
def change
create_table :industries do |t|
create_table :industries, :options => 'COLLATE=utf8_general_ci' do |t|
t.string :name
t.timestamps
end
......
class CreateJobs < ActiveRecord::Migration[5.2]
def change
create_table :jobs do |t|
create_table :jobs, :options => 'COLLATE=utf8_general_ci' do |t|
t.string :name
t.integer :company_id
t.string :level
......
class CreateHistories < ActiveRecord::Migration[5.2]
def change
create_table :histories do |t|
create_table :histories, :options => 'COLLATE=utf8_general_ci' do |t|
t.integer :user_id
t.integer :job_id
t.timestamps
......
class CreateFavorites < ActiveRecord::Migration[5.2]
def change
create_table :favorites do |t|
create_table :favorites, :options => 'COLLATE=utf8_general_ci' do |t|
t.integer :user_id
t.integer :job_id
t.timestamps
......
class CreateUsers < ActiveRecord::Migration[5.2]
def change
create_table :users do |t|
create_table :users, :options => 'COLLATE=utf8_general_ci' do |t|
t.string :email
t.string :name
t.string :password_digest
......
class CreateCityJobs < ActiveRecord::Migration[5.2]
def change
create_table :city_jobs do |t|
create_table :city_jobs, :options => 'COLLATE=utf8_general_ci' do |t|
# t.belongs_to :city, class_name: "city", foreign_key: "city_id"
# t.belongs_to :job, class_name: "job", foreign_key: "job_id"
t.references :job
......
class CreateIndustryJobs < ActiveRecord::Migration[5.2]
def change
create_table :industry_jobs do |t|
create_table :industry_jobs, :options => 'COLLATE=utf8_general_ci' do |t|
t.references :industry
t.references :job
t.timestamps
......
class CreateAppliedJobs < ActiveRecord::Migration[5.2]
def change
create_table :applied_jobs do |t|
create_table :applied_jobs, :options => 'COLLATE=utf8_general_ci' do |t|
t.references :user
t.references :job
t.string :name
......
class ChangeDatatypeToTableCompanies < ActiveRecord::Migration[5.2]
def change
change_column :companies, :short_description, :text
#Ex:- change_column("admin_users", "email", :string, :limit =>25)
end
end
......@@ -10,9 +10,9 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2020_07_15_025243) do
ActiveRecord::Schema.define(version: 2020_07_15_090747) do
create_table "applied_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "applied_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "user_id"
t.bigint "job_id"
t.string "name"
......@@ -24,14 +24,14 @@ ActiveRecord::Schema.define(version: 2020_07_15_025243) do
t.index ["user_id"], name: "index_applied_jobs_on_user_id"
end
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "cities", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.boolean "area"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "city_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "city_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "job_id"
t.bigint "city_id"
t.datetime "created_at", null: false
......@@ -40,35 +40,35 @@ ActiveRecord::Schema.define(version: 2020_07_15_025243) do
t.index ["job_id"], name: "index_city_jobs_on_job_id"
end
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "companies", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.string "address"
t.string "short_description"
t.text "short_description"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "favorites", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "favorites", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "histories", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "histories", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
t.integer "job_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "industries", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "industry_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "industry_jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.bigint "industry_id"
t.bigint "job_id"
t.datetime "created_at", null: false
......@@ -77,7 +77,7 @@ ActiveRecord::Schema.define(version: 2020_07_15_025243) do
t.index ["job_id"], name: "index_industry_jobs_on_job_id"
end
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "jobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "name"
t.integer "company_id"
t.string "level"
......@@ -90,7 +90,7 @@ ActiveRecord::Schema.define(version: 2020_07_15_025243) do
t.datetime "updated_at", null: false
end
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
create_table "users", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "email"
t.string "name"
t.string "password_digest"
......
namespace :db do
task populate: :environment do
end
# CRAWLER ALL CITIES
task make_cities: :environment do
@data_list_cities = []
agent = Mechanize.new
page = agent.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")
data = page.search("#location option")
list_cities = data.to_s.split("</option>")
list_cities.each do |x|
@data_list_cities << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
for i in 0..(@data_list_cities.length - 1)
if i <= 69
name = (@data_list_cities[i].to_s)
City.create!(name: name, area: 1)
elsif i > 69
name = (@data_list_cities[i].to_s)
City.create!(name: name, area: 0)
end
end
end
# CRAWLER ALL INDUSTRIES
task make_industry: :environment do
@data_list_industries = []
agent = Mechanize.new
page = agent.get("https://careerbuilder.vn/viec-lam/tat-ca-viec-lam-vi.html")
data = page.search("#industry option")
list_industries = data.to_s.split("</option>")
list_industries.each do |x|
@data_list_industries << x.gsub(/(^<[\w\D]*>)/, '').gsub(/\n/,'').rstrip
end
for i in 0..(@data_list_industries.length - 1)
name = (@data_list_industries[i].to_s)
Industry.create!(name: name)
end
end
#
end
require 'test_helper'
class HomeControllerTest < ActionDispatch::IntegrationTest
# test "the truth" do
# assert true
# end
end
require 'test_helper'
class HomepageControllerTest < ActionDispatch::IntegrationTest
test "should get index" do
get homepage_index_url
assert_response :success
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment