You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

33 lines
1006 B

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#encoding:utf-8
#爬虫配置文件
##########################################
#筛选条件设计为:
#出现任意【排除词】,则不显示
#必须包含全部【必须词】,否则不显示
#满足前两条件下,出现任意【包含词】,则显示
#若没有【包含词】,则不显示
##########################################
#【排除词】。出现任意一个词,将被筛掉。优先级最高
exclude = ['停止招生']
#【必须词】。必须出现这个词,否则不显示。优先级为中
focus_include = ['2020']
#【包含词】。出现任意一个词,将显示。优先级为低
include = ['计算机', '软件', '电子信息', '人工智能', '网络', '大数据']
#找多少页
page_end= 50
#爬虫的时间间隔切勿将本选项设置过低否则坐等被封IP
interval= 3
#缓存文件目录
cache_file = '/cache.dat'
#是否开启调试报告
debug_info = False
if __name__ == "__main__":
print("execute spider.py!!!\n NOT config.py")