Slide 33
Slide 33 text
# -*- coding: utf-8 -*-
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class MySpider(scrapy.CrawlSpider):
name = “myspider"
allowed_domains = [“blog.agilize.com.br”]
start_urls = (
‘http://www.blog.agilize.com.br/'
)
rules = (
Rule(
# Rule 1
LinkExtractor(
),
callback=‘parse_item’,
),
)
def parse_item(self, response):
pass
# -*- coding: utf-8 -*-
import scrapy
class MySpider(scrapy.Spider):
name = “myspider"
allowed_domains = [“blog.agilize.com.br”]
start_urls = (
‘http://www.blog.agilize.com.br/'
)
def parse(self, response):
pass