You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

33 lines
1006 B

4 years ago
#encoding:utf-8
#爬虫配置文件
##########################################
#筛选条件设计为:
#出现任意【排除词】,则不显示
#必须包含全部【必须词】,否则不显示
#满足前两条件下,出现任意【包含词】,则显示
#若没有【包含词】,则不显示
##########################################
#【排除词】。出现任意一个词,将被筛掉。优先级最高
exclude = ['停止招生']
#【必须词】。必须出现这个词,否则不显示。优先级为中
focus_include = ['2020']
#【包含词】。出现任意一个词,将显示。优先级为低
include = ['计算机', '软件', '电子信息', '人工智能', '网络', '大数据']
#找多少页
page_end= 50
#爬虫的时间间隔切勿将本选项设置过低否则坐等被封IP
interval= 3
#缓存文件目录
cache_file = '/cache.dat'
#是否开启调试报告
debug_info = False
if __name__ == "__main__":
print("execute spider.py!!!\n NOT config.py")