pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pywin32
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple twisted
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple scrapy
cmd命令
Py20200531\day04>scrapy startproject myjobspider
Py20200531\day04\myjobspider>scrapy genspider jobspider www.51job.com
jobspider.py
import scrapy
class JobspiderSpider(scrapy.Spider):
name = 'jobspiders'
# allowed_domains = ['www.51job.com']
start_urls = ['http://www.51job.com/']
def parse(self, response):
htmlText=response.xpath('/html')
print(htmlText.extract()[0])
pass
startspider.py
from scrapy.cmdline import execute
execute(['scrapy', 'crawl','jobspiders'])
settings.py