├── __init__.py ├── CNAME ├── gsil ├── tests │ ├── __init__.py │ ├── test_process.py │ ├── test_notification.py │ └── test_github.py ├── log.py ├── notification.py ├── __init__.py ├── process.py ├── config.py └── engine.py ├── _config.yml ├── .gitignore ├── requirements.txt ├── config.gsil.cfg.example ├── rules.gsil.yaml.example ├── tests.py ├── tests └── test.gsil ├── gsil.py ├── README-zh.md ├── README.md └── LICENSE /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | gsil.feei.cn -------------------------------------------------------------------------------- /gsil/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-midnight -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | .idea/ 3 | .cache/ 4 | *.pyc 5 | config.gsil 6 | rules.gsil -------------------------------------------------------------------------------- /gsil/tests/test_process.py: -------------------------------------------------------------------------------- 1 | from gsil.process import clone 2 | 3 | 4 | def test_clone(): 5 | clone('https://github.com/FeeiCN/dict', 'ttt') 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.6.0 2 | PyGithub==1.35 3 | IPy==0.83 4 | tld==0.7.9 5 | html5lib==0.999999999 6 | requests>=2.20.0 7 | colorlog==3.1.0 8 | Jinja2==2.11.3 9 | pyyaml -------------------------------------------------------------------------------- /config.gsil.cfg.example: -------------------------------------------------------------------------------- 1 | [mail] 2 | host : smtp.exmail.qq.com 3 | port : 25 4 | mails : your_mail 5 | from : GSIL 6 | password : your_password 7 | to : feei@feei.cn 8 | cc : feei@feei.cn 9 | 10 | [github] 11 | clone : false 12 | tokens : your_github_token 13 | -------------------------------------------------------------------------------- /rules.gsil.yaml.example: -------------------------------------------------------------------------------- 1 | { 2 | "test": { 3 | "mogujie.com": { 4 | "\"mogujie.org\"": { 5 | "mode": "normal-match", 6 | "ext": "php,java,python,go,js,properties" 7 | } 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from gsil.notification import Notification 3 | 4 | 5 | class Tests(unittest.TestCase): 6 | def test_notification(self): 7 | subject = 'test from GSIL' 8 | to = cc = 'feei@feei.cn' 9 | html = '

Test Content from GSIL

' 10 | self.assertTrue(Notification(subject, to, cc).notification(html)) 11 | 12 | 13 | if __name__ == '__main__': 14 | unittest.main() 15 | -------------------------------------------------------------------------------- /tests/test.gsil: -------------------------------------------------------------------------------- 1 | # 如果从扫描器看过来的,对比这个文件的最后修改时间和收到泄露通知的时间 2 | # 如果大于10分钟,可以直接使用GSIL替换掉原有的泄露监控 3 | http://gsil.home.mogujie.org/test_by_feei_for_gsil.py 4 | http://gsil.tencent.oa.com/test_by_feei_for_gsil.py 5 | http://gsil.www.mogujie.org/test_by_feei_for_gsil.java 6 | http://gsil.iqiyi.host/test_by_feei_for_gsil.java 7 | http://gsil.dev.qunar.com/test_by_feei_for_gsil.java 8 | http://gsil.taobao.net/test_by_feei_for_gsil.java 9 | http://gsil.alipay.net/test_by_feei_for_gsil.java 10 | http://gsil.intra.xiaojukeji.com/test_by_feei_for_gsil.java 11 | http://gsil.tencent.com/test_by_feei_for_gsil.java 12 | -------------------------------------------------------------------------------- /gsil/tests/test_notification.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | tests.text_notification 5 | ~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | Implements test notification 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | from gsil.notification import Notification 15 | from gsil.process import send_running_data_report 16 | 17 | 18 | def test_send(): 19 | assert True is Notification('Test', 'feei@feei.cn').notification('This is a test mail') 20 | 21 | 22 | def test_send_running_data(): 23 | assert send_running_data_report() 24 | -------------------------------------------------------------------------------- /gsil.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | GSIL 6 | ~~~~ 7 | 8 | Implements GSIL entry 9 | 10 | Usage: 11 | python gsil.py test 12 | 13 | :author: Feei 14 | :homepage: https://github.com/wufeifei/cobra 15 | :license: MIT, see LICENSE for more details. 16 | :copyright: Copyright (c) 2018 Feei. All rights reserved 17 | """ 18 | import sys 19 | import traceback 20 | from gsil import gsil 21 | from gsil.notification import Notification 22 | 23 | if __name__ == '__main__': 24 | try: 25 | if len(sys.argv) < 2: 26 | print('python gsil.py ') 27 | exit(0) 28 | sys.exit(gsil()) 29 | except Exception as e: 30 | # 发送异常报告 31 | content = '{a}\r\n{e}'.format(a=' '.join(sys.argv), e=traceback.format_exc()) 32 | Notification('GSIL Exception').notification(content) 33 | -------------------------------------------------------------------------------- /gsil/tests/test_github.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | tests.test_github 5 | ~~~~~~~~~~~~~~~~~ 6 | 7 | Implements test GitHub API 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import base64 15 | import pytest 16 | from github import Github, BadCredentialsException 17 | 18 | TOKEN = base64.b64decode('YzA4YTVhOTA1ZGExYjg5YTc1ZmI4NmE3MmM3ZjUyNzg2NmRmZmRlNA==').decode() 19 | 20 | 21 | def test_init_success(): 22 | g = Github(login_or_token=TOKEN) 23 | try: 24 | limit, limit2 = g.rate_limiting 25 | assert limit > 4900 26 | except BadCredentialsException as e: 27 | assert False 28 | 29 | g = Github(login_or_token=TOKEN[0:18]) 30 | with pytest.raises(BadCredentialsException) as e: 31 | g = g.rate_limiting 32 | -------------------------------------------------------------------------------- /gsil/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | log 5 | ~~~ 6 | 7 | Implements color logger 8 | 9 | :author: Feei 10 | :homepage: https://github.com/wufeifei/cobra 11 | :license: MIT, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import os 15 | import colorlog 16 | import logging 17 | from logging import handlers 18 | 19 | log_path = 'logs' 20 | if os.path.isdir(log_path) is not True: 21 | os.mkdir(log_path, 0o755) 22 | logfile = os.path.join(log_path, 'gsil.log') 23 | 24 | handler = colorlog.StreamHandler() 25 | formatter = colorlog.ColoredFormatter( 26 | '%(log_color)s%(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s', 27 | datefmt=None, 28 | reset=True, 29 | log_colors={ 30 | 'DEBUG': 'cyan', 31 | 'INFO': 'green', 32 | 'WARNING': 'yellow', 33 | 'ERROR': 'red', 34 | 'CRITICAL': 'red,bg_white', 35 | }, 36 | secondary_log_colors={}, 37 | style='%' 38 | ) 39 | handler.setFormatter(formatter) 40 | 41 | file_handler = handlers.RotatingFileHandler(logfile, maxBytes=(1048576 * 5), backupCount=7) 42 | file_handler.setFormatter(formatter) 43 | 44 | logger = colorlog.getLogger('GSIL') 45 | logger.addHandler(handler) 46 | logger.addHandler(file_handler) 47 | logger.setLevel(logging.INFO) 48 | -------------------------------------------------------------------------------- /gsil/notification.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | notification 5 | ~~~~~~~~~~~~ 6 | 7 | Implements notification(mail) 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import random 15 | import smtplib 16 | import traceback 17 | from smtplib import SMTPException 18 | from email.mime.text import MIMEText 19 | from email.mime.multipart import MIMEMultipart 20 | from .config import get 21 | from .log import logger 22 | 23 | 24 | class Notification(object): 25 | def __init__(self, subject, to=None, cc=None): 26 | """ 27 | Initialize notification class 28 | :param subject: 29 | :param to: 30 | """ 31 | self.subject = subject 32 | if to is None: 33 | self.to = get('mail', 'to') 34 | else: 35 | self.to = to 36 | if cc is None: 37 | self.cc = get('mail', 'cc') 38 | else: 39 | self.cc = cc 40 | 41 | def notification(self, html): 42 | """ 43 | Send notification use by mail 44 | :param html: 45 | :return: 46 | """ 47 | # 随机挑选一个邮箱来发送,避免由于发送量过大导致被封 48 | mails = get('mail', 'mails').split(',') 49 | mail = random.choice(mails) 50 | mail_from = get('mail', 'from') 51 | msg = MIMEMultipart() 52 | msg['Subject'] = self.subject 53 | msg['From'] = f'{mail} <{mail_from}>' 54 | # 支持多用户接收邮件 55 | msg['To'] = self.to 56 | msg['Cc'] = self.cc 57 | 58 | text = MIMEText(html, 'html', 'utf-8') 59 | msg.attach(text) 60 | host = get('mail', 'host').strip() 61 | port = get('mail', 'port').strip() 62 | 63 | try: 64 | if port == '465': 65 | port = int(port) 66 | s = smtplib.SMTP_SSL(host, port) 67 | else: 68 | s = smtplib.SMTP(host, port) 69 | s.ehlo() 70 | s.starttls() 71 | s.ehlo() 72 | s.login(mail, get('mail', 'password')) 73 | s.sendmail(mail, self.to.split(',')+self.cc.split(','), msg.as_string()) 74 | s.quit() 75 | return True 76 | except SMTPException: 77 | logger.critical('Send mail failed') 78 | traceback.print_exc() 79 | return False 80 | -------------------------------------------------------------------------------- /README-zh.md: -------------------------------------------------------------------------------- 1 | # GSIL(GitHub敏感信息泄露) 2 | 3 | [English documents](https://github.com/FeeiCN/GSIL/blob/master/README.md) 4 | 5 | > 近实时监控GitHub敏感信息泄露,并发送告警通知。 6 | 7 | ## 安装 8 | 9 | > 仅在Python3下验证过 10 | 11 | ```bash 12 | $ git clone https://github.com/FeeiCN/GSIL.git 13 | $ cd GSIL/ 14 | $ pip install -r requirements.txt 15 | ``` 16 | 17 | ## 配置 18 | 19 | ### GSIL/config.gsil(复制config.gsil.cfg.example并重命名config.gsil.cfg): 告警邮箱和Github配置 20 | 21 | ``` 22 | [mail] 23 | host : smtp.exmail.qq.com 24 | # SMTP端口(非SSL端口,但会使用TLS加密) 25 | port : 25 26 | # 多个发件人使用逗号(,)分隔 27 | mails : gsil@feei.cn 28 | from : GSIL 29 | password : your_password 30 | # 多个收件人使用逗号(,)分隔 31 | to : feei@feei.cn 32 | 33 | [github] 34 | # 扫描到的漏洞仓库是否立刻Clone到本地(~/.gsil/codes/) 35 | # 此选项用作监控其它厂商,避免因为仓库所有者发现后被删除 36 | clone: false 37 | 38 | # GitHub Token用来调用相关API,多个Token使用逗号(,)分隔 39 | # https://github.com/settings/tokens 40 | # GitHub已调整最新的速度限制,请求数量大幅降低,单账户多Token将共享限速,详见https://docs.github.com/en/rest/reference/search#rate-limit 以及 https://docs.github.com/en/rest/reference/rate-limit 41 | # 建议你根据关键词数量配置多个GitHub账户的Token以避免超速 42 | tokens : your_token 43 | ``` 44 | 45 | ### GSIL/rules.gsil(复制rules.gsil.yaml.example并重命名rules.gsil.yaml): 扫描规则 46 | 47 | > 规则一般选用内网独立的特征,比如蘑菇街的外网是mogujie.com,蘑菇街的内网是mogujie.org,则可以将mogujie.org作为一条规则。 48 | 49 | > 其它还有类似代码头部特征、外部邮箱特征等 50 | 51 | | 字段 | 意义 | 选填 | 默认 | 描述 | 52 | | --- | --- | --- | --- | --- | 53 | | keyword | 关键词 | 必填 | - | 多个关键词可以用空格,比如‘账号 密码’;某些关键字出现的结果非常多,所以需要精确搜索时可以用双引号括起来,比如‘”ele.me“’;| 54 | | ext | 指定文件后缀 | 可选 | 全部后缀 | 多个后缀可以使用英文半角逗号(,)分隔,比如`java,php,python` | 55 | | mode | 匹配模式 | 可选 | 正常匹配(normal-match) | 正常匹配(normal-match):匹配包含keyword的行,并记录该行附近行 / 仅匹配(only-match):仅匹配包含keyword行 / 全部匹配(full-match)(不推荐使用):搜出来的整个问题都算作结果 | 56 | 57 | ``` 58 | { 59 | # 一级分类,一般使用公司名,用作开启扫描的第一个参数(python gsil.py test) 60 | "test": { 61 | # 二级分类,一般使用产品线 62 | "mogujie": { 63 | # 公司内部域名 64 | "\"mogujie.org\"": { 65 |                # mode/ext默认可不填 66 |                "mode": "normal-match", 67 | "ext": "php,java,python,go,js,properties" 68 | }, 69 | # 公司代码特征 70 | "copyright meili inc": {}, 71 | # 内部主机域名 72 | "yewu1.db.mogujie.host": {}, 73 | # 外部邮箱 74 | "mail.mogujie.com": {} 75 | }, 76 | "meilishuo": { 77 | "meilishuo.org": {}, 78 | "meilishuo.io": {} 79 | } 80 | } 81 | } 82 | ``` 83 | 84 | ## 用法 85 | 86 | ```bash 87 | # 启动测试 88 | $ python gsil.py test 89 | 90 | # 测试token有效性 91 | $ python gsil.py --verify-tokens 92 | ``` 93 | 94 | ```bash 95 | $ crontab -e 96 | 97 | # 漏洞报告,此项任务发现漏洞后会立刻发送漏洞报告 98 | # 每个小时运行一次,GitHub API接口调用频率限制可以根据token数量、规则数量来调整crontab频率实现,若觉得麻烦可简单配置多个token来实现。 99 | # crontab执行时间决定了报告的发送时效性,间隔越短报告越快但频率限制越容易触发 100 | # 建议配置5个token+20条规则,每15分钟运行一次(可以配置更短,根据各自需求确定) 101 | */15 * * * * /usr/bin/python /var/app/GSIL/gsil.py test > /tmp/gsil 102 | 103 | # 统计报告,发送一天的扫描进展,包括运行次数、成功次数、失败次数、发现漏洞数、各域名状况、异常等等 104 | # 每天晚上11点发送统计报告 105 | 0 23 * * * /usr/bin/python /var/app/GSIL/gsil.py --report 106 | ``` 107 | 108 | *扫描报告过一次的将不会重复报告,缓存记录在~/.gsil/目录* 109 | 110 | ## 引用 111 | 112 | - [GSIL详细介绍](https://feei.cn/gsil/) 113 | -------------------------------------------------------------------------------- /gsil/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | GSIL 5 | ~~~~ 6 | 7 | Implements Github Sensitive Information Leak 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import sys 15 | import time 16 | import random 17 | import traceback 18 | import multiprocessing 19 | from .engine import Engine 20 | from .log import logger 21 | from .config import Config, get_rules, tokens, daily_run_data 22 | from .process import send_running_data_report 23 | 24 | running_data = [] 25 | 26 | 27 | # search single rule 28 | def search(idx, rule): 29 | """ 30 | class instance can't pickle in apply_async 31 | :param idx: 32 | :param rule: 33 | :return: 34 | """ 35 | token = random.choice(tokens) 36 | try: 37 | return Engine(token=token).search(rule) 38 | except Exception as e: 39 | traceback.print_exc() 40 | return False, None, traceback.format_exc() 41 | 42 | 43 | # store search result 44 | def store_result(result): 45 | """ 46 | store running result 47 | :param result: 48 | :return: 49 | """ 50 | r_ret, r_rule, r_msg = result 51 | if r_ret: 52 | r_datetime = time.strftime("%Y-%m-%d %H:%M:%S") 53 | # 不需要的类型过滤掉 54 | if r_rule.corp.lower() in ['vulbox']: 55 | return 56 | with open(Config().run_data, 'a') as f: 57 | rule = f'[{r_rule.types}][{r_rule.corp}][{r_rule.keyword}]' 58 | f.write(f'{r_datetime} {r_ret} {rule} {r_msg}\r\n') 59 | # store list 60 | running_data.append([r_datetime, r_ret, rule, r_msg]) 61 | 62 | 63 | # start 64 | def start(rule_types): 65 | rules = get_rules(rule_types) 66 | if len(rules) == 0: 67 | logger.critical('get rules failed, rule types not found!') 68 | exit(0) 69 | logger.info(f'rules length: {len(rules)}') 70 | pool = multiprocessing.Pool() 71 | for idx, rule_object in enumerate(rules): 72 | logger.info(f'>>>>>>>>>>>>> {rule_object.corp} > {rule_object.keyword} >>>>>>') 73 | pool.apply_async(search, args=(idx, rule_object), callback=store_result) 74 | pool.close() 75 | pool.join() 76 | 77 | 78 | # generate report file 79 | def generate_report(data): 80 | for rd in data: 81 | datetime, ret, rule, msg = rd 82 | html = f'
  • {datetime} {ret} {rule} {msg}
  • ' 83 | run_data = daily_run_data() 84 | run_data['list'].append(html) 85 | if ret: 86 | run_data['found_count'] += msg 87 | run_data['job_success'] += 1 88 | else: 89 | run_data['job_failed'] += 1 90 | daily_run_data(run_data) 91 | 92 | 93 | def gsil(): 94 | if sys.argv[1] == '--report': 95 | # send daily running data report 96 | send_running_data_report() 97 | elif sys.argv[1] == '--verify-tokens': 98 | # verify tokens 99 | for i, token in enumerate(tokens): 100 | ret, msg = Engine(token=token).verify() 101 | logger.info(f'{i} {ret} token: {token} {msg}') 102 | else: 103 | logger.info('start monitor github information leakage: {types}'.format(types=sys.argv[1])) 104 | # start 105 | start(sys.argv[1]) 106 | # start generate report file 107 | generate_report(running_data) 108 | 109 | 110 | if __name__ == '__main__': 111 | gsil() 112 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GSIL(GitHub Sensitive Information Leakage) 2 | 3 | [中文文档](https://github.com/FeeiCN/GSIL/blob/master/README-zh.md) 4 | 5 | > Monitor Github sensitive information leaks in near real time and send alert notifications. 6 | 7 | ## Installation 8 | 9 | > Python3(Python2 is not tested) 10 | 11 | ```bash 12 | $ git clone https://github.com/FeeiCN/GSIL.git 13 | $ cd GSIL/ 14 | $ pip install -r requirements.txt 15 | ``` 16 | 17 | ## Configuration 18 | 19 | ### GSIL/config.gsil(Copy config.gsil.cfg.example to config.gsil.cfg): Alarm mailbox and Github configuration 20 | 21 | ```conf 22 | [mail] 23 | host : smtp.exmail.qq.com 24 | # SMTP port (Not SSL port, but will use TLS encryption) 25 | port : 25 26 | # Multiple senders are separated by comma (,) 27 | mails : gsil@domain.com 28 | from : GSIL 29 | password : your_password 30 | # Multiple recipients are separated by comma (,) 31 | to : feei@feei.cn 32 | 33 | [github] 34 | # Whether the scanned data will be cloned to the local area immediately 35 | # Clone to ~/.gsil/codes/ directory 36 | clone: false 37 | 38 | # Github Token, multiple tokens are separated by comma (,) 39 | # https://github.com/settings/tokens 40 | tokens : your_token 41 | ``` 42 | 43 | ### GSIL/rules.gsil(Copy rules.gsil.yaml.example to rules.gsil.yaml): scanning rules 44 | 45 | > Generally, The best rule is the characteristic code of the intranet(Example: mogujie's extranet is `mogujie.com`, intranet is `mogujie.org`. At this time, `mogujie.org` can be used as a rule) 46 | 47 | > There are other similar code head characteristic code, external mailbox characteristic code, and so on 48 | 49 | | field | meaning | optional | default | describe | 50 | | --- | --- | --- | --- | --- | 51 | | keyword | key word | required | - | When multiple keywords are used, space segmentation is used(Example: `'username password'`), When you need a precise search, use double(Example: `"quotesele.me"`) | 52 | | ext | file suffix | optional | all suffixes | Multiple suffixes are separated by comma(Example: `java,php,python`) | 53 | | mode | matching mode | optional | normal-match | `normal-match`(The line that contains the keyword is matched, and the line near the line is matched) / `only-match`(Only the lines that match the key words) / `full-match`(Not recommended for use)(The search results show the entire file)| 54 | 55 | ``` 56 | { 57 | # usually using the company name, used as the first parameter to open the scan(Example:`python gsil.py test`) 58 | "test": { 59 | # General use of product name 60 | "mogujie": { 61 | # Internal domain name of the company 62 | "\"mogujie.org\"": { 63 |                # mode/ext options no need to configure by default 64 |                "mode": "normal-match", 65 | "ext": "php,java,python,go,js,properties" 66 | }, 67 | # Company code's characteristic code 68 | "copyright meili inc": {}, 69 | # Internal host domain name 70 | "yewu1.db.mogujie.host": {}, 71 | # External mailbox 72 | "mail.mogujie.com": {} 73 | }, 74 | "meilishuo": { 75 | "meilishuo.org": {}, 76 | "meilishuo.io": {} 77 | } 78 | } 79 | } 80 | ``` 81 | 82 | ## Usage 83 | 84 | ```bash 85 | $ python gsil.py test 86 | 87 | # Verify tokens validity 88 | $ python gsil.py --verify-tokens 89 | ``` 90 | 91 | ```bash 92 | $ crontab -e 93 | 94 | # Run every hour 95 | 0 * * * * /usr/bin/python /var/app/GSIL/gsil.py test > /tmp/gsil 96 | # Send a statistical report at 11 p. m. every night 97 | 0 23 * * * /usr/bin/python /var/app/GSIL/gsil.py --report 98 | ``` 99 | * Once the scan report will not repeat the report, the cache records in ~/.gsil/ directory * 100 | 101 | ## Reference 102 | - [GSIL详细介绍](https://feei.cn/gsil/) 103 | -------------------------------------------------------------------------------- /gsil/process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | process 5 | ~~~~~~~ 6 | 7 | Implements process content 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import os 15 | import time 16 | import shutil 17 | import subprocess 18 | from jinja2 import utils 19 | from .notification import Notification 20 | from .config import Config, get, daily_run_data, code_path 21 | from .log import logger 22 | 23 | 24 | class Process(object): 25 | def __init__(self, content, rule_object): 26 | """ 27 | Process content 28 | :param content: 29 | :param rule_object: 30 | """ 31 | self.content = content 32 | self.rule_object = rule_object 33 | 34 | def process(self, maybe_mistake=False): 35 | logger.info(f'Process count: {len(self.content)}') 36 | ret_mail = self._send_mail(maybe_mistake) 37 | if ret_mail: 38 | for i, v in self.content.items(): 39 | Config().add_hash(v['hash']) 40 | logger.debug('{hash} add success!'.format(hash=v['hash'])) 41 | logger.debug('send mail success!') 42 | return ret_mail 43 | 44 | def _send_mail(self, maybe_mistake=False): 45 | """ 46 | Send mail 47 | :return: boolean 48 | """ 49 | if len(self.content) == 0: 50 | logger.info('none content for send mail') 51 | return True 52 | if maybe_mistake: 53 | title = '〔GSIL〕MB_MT ' 54 | else: 55 | title = '〔GSIL〕' 56 | subject = f'{title}[{self.rule_object.types}] [{self.rule_object.corp}] {len(self.content)}' 57 | to = get('mail', 'to') 58 | cc = get('mail', 'cc') 59 | html = '

    Rule: {rule_regex} Count: {count} Datetime: {datetime}

    '.format( 60 | rule_regex=self.rule_object.keyword, datetime=time.strftime("%Y-%m-%d %H:%M:%S"), count=len(self.content)) 61 | for i, v in self.content.items(): 62 | html += '

    ({i}){hash} {repository}/{path}

    '.format(i=i, url=v['url'], 63 | hash=v['hash'][:6], 64 | repository=v['repository'], 65 | path=v['path']) 66 | if len(v['match_codes']) > 0: 67 | code = '' 68 | for c in v['match_codes']: 69 | code += '{c}
    '.format(c=utils.escape(c)) 70 | html += '{code}
    '.format(code=code) 71 | self._save_file(v['hash'], v['code']) 72 | html += '' 73 | return Notification(subject, to, cc).notification(html) 74 | 75 | @staticmethod 76 | def _save_file(sha, data): 77 | """ 78 | Save content to file 79 | :param sha: 80 | :param data: 81 | :return: 82 | """ 83 | with open(os.path.join(Config().data_path, sha), 'w+', encoding='utf-8') as f: 84 | f.writelines(data) 85 | return True 86 | 87 | 88 | def send_running_data_report(): 89 | data = daily_run_data() 90 | subject = '〔GSIL〕RUN DATA <{date}>'.format(date=time.strftime("%m-%d")) 91 | to = get('mail', 'to') 92 | content = '

    FOUND COUNT: {c} / JOB SUCCESS: {s} / JOB FAILED: {f}

    '.format( 93 | c=data['found_count'], 94 | s=data['job_success'], 95 | f=data['job_failed'] 96 | ) 97 | for l in data['list']: 98 | content += l 99 | ret = Notification(subject, to).notification(content) 100 | logger.info(f'Ret: {ret}') 101 | return ret 102 | 103 | 104 | def clone(git_url, dist_dir): 105 | # 下载会非常占用磁盘 106 | if get('github', 'clone').strip().lower() == 'false': 107 | return 108 | path = os.path.join(code_path, dist_dir) 109 | if os.path.isdir(path): 110 | shutil.rmtree(path) 111 | param = ['/usr/bin/git', 'clone', git_url, path] 112 | print(' '.join(param)) 113 | assert subprocess.Popen(param, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 114 | -------------------------------------------------------------------------------- /gsil/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | config 5 | ~~~~~~ 6 | 7 | Implements configuration 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import os 15 | import time 16 | import json 17 | import yaml 18 | import traceback 19 | import configparser 20 | from .log import logger 21 | 22 | home_path = os.path.join(os.path.expandvars(os.path.expanduser("~")), ".gsil") 23 | code_path = os.path.join(home_path, 'codes') 24 | project_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) 25 | config_path = os.path.join(project_directory, 'config.gsil.cfg') 26 | rules_path = os.path.join(project_directory, 'rules.gsil.yaml') 27 | 28 | 29 | def get(level1=None, level2=None): 30 | """ 31 | Get config value 32 | :param level1: 33 | :param level2: 34 | :return: string 35 | """ 36 | if level1 is None and level2 is None: 37 | return 38 | config = configparser.ConfigParser() 39 | 40 | config.read(config_path) 41 | value = None 42 | try: 43 | value = config.get(level1, level2) 44 | value = value.strip() 45 | except Exception as e: 46 | print(level1, level2) 47 | traceback.print_exc() 48 | print(f"GSIL/config.gsil.yaml file configure failed.\nError: {e}") 49 | return value 50 | 51 | 52 | # GitHub tokens 53 | try: 54 | tokens = get('github', 'tokens') 55 | if ',' in tokens: 56 | tokens = tokens.split(',') 57 | else: 58 | tokens = [tokens] 59 | except Exception as e: 60 | logger.critical(f'github -> tokens sections error {traceback.format_exc()}') 61 | exit(0) 62 | 63 | exclude_repository_rules = [ 64 | # 65 | # 添加此规则要确保一定不会出现误报 66 | # 由于repository_path全部转为小写了,所以规则也全部小写 67 | # 68 | # GitHub博客 69 | r'(github.io)|(github.com)$', 70 | # Android客户端项目 71 | r'(app/src/main)', 72 | # 爬虫 73 | r'(crawler)|(spider)|(scrapy)|(爬虫)', 74 | # 文档 75 | # doc可能存在误报 76 | r'((开发文档)|(api))', 77 | # 软件作者 78 | r'(jquery)|(contact)|(readme)|(authors)', 79 | # 软件配置 80 | r'(surge)|(adblock)|(hosts)|(\.pac)|(ads)|(blacklist)|(package\.json)|(podspec\.json)|(tracking_servers)', 81 | # 无用东西 82 | r'(linux_command_set)|(domains)|(sdk)|(linux)|(\.html)|(\.apk)|(domain-list)|(easylist)|(urls)|(easylist)|(http_analytic)|(filtersregistry)|(PhyWall\.java)', 83 | ] 84 | 85 | exclude_codes_rules = [ 86 | # 超链接 87 | r'(href)', 88 | # 框架 89 | r'(iframe\ src)', 90 | # 邮件schema 91 | r'(mailto:)', 92 | # Markdown 93 | r'(\]\()', 94 | r'(npm\.taobao\.org)', 95 | r'(HOST-SUFFIX)|(DOMAIN-SUFFIX)', 96 | ] 97 | 98 | public_mail_services = [ 99 | 'msg.com', 100 | '126.com', 101 | '139.com', 102 | '163.com', 103 | 'qq.com', 104 | 'vip.qq.com', 105 | 'gmail.com', 106 | 'sina.com.cn', 107 | 'sina.com', 108 | 'aliyun.com', 109 | 'sohu.com', 110 | 'yeah.net', 111 | 'msn.com', 112 | 'mail.com', 113 | 'outlook.com', 114 | 'live.com', 115 | 'foxmail.com', 116 | 'mai.com', 117 | 'example.com', 118 | 'example.org', 119 | 'yourdomain.com', 120 | 'domain.com', 121 | 'company.com', 122 | 'otherdomain.com', 123 | 'mydomain.com', 124 | 'host.com', 125 | 'yourhost.com', 126 | 'domain.tld', 127 | 'foo.bar', 128 | 'bar.com', 129 | 'dom.ain', 130 | 'localhost.com', 131 | 'xxxxx.com', 132 | 'xxxx.com', 133 | 'xxx.com', 134 | 'xx.com', 135 | 'email.com' 136 | ] 137 | 138 | # Rules Structure Design 139 | # 140 | # 'rule keywords': { 141 | # 'mode': '' // RuleMode: normal-match(default)/only-match/full-match/mail 142 | # 'extension': '' // search extension: (default)/txt/md/java/python/etc... 143 | # } 144 | # 145 | try: 146 | with open(rules_path, 'r') as f: 147 | rules_dict = yaml.safe_load(f) 148 | except Exception as e: 149 | logger.critical('please config GSIL/rules.gsil.yaml') 150 | logger.critical(traceback.format_exc()) 151 | 152 | 153 | class Rule(object): 154 | def __init__(self, types=None, corp=None, keyword=None, mode='normal-match', extension=None): 155 | self.types = types 156 | self.corp = corp 157 | self.keyword = keyword 158 | self.mode = mode 159 | self.extension = extension 160 | 161 | 162 | def get_rules(rule_types): 163 | if ',' in rule_types: 164 | rule_types = rule_types.split(',') 165 | else: 166 | rule_types = [rule_types] 167 | rules_objects = [] 168 | for types, rule_list in rules_dict.items(): 169 | # 仅选择指定的规则类型 170 | if types in rule_types: 171 | for corp_name, corp_rules in rule_list.items(): 172 | for rule_keyword, rule_attr in corp_rules.items(): 173 | rule_keyword = rule_keyword.strip() 174 | corp_name = corp_name.strip() 175 | types = types.upper() 176 | if 'mode' in rule_attr: 177 | mode = rule_attr['mode'].strip().lower() 178 | else: 179 | # 默认匹配模式 180 | mode = 'normal-match' 181 | if 'ext' in rule_attr: 182 | extension = rule_attr['ext'].strip() 183 | else: 184 | # 默认为空 185 | extension = None 186 | r = Rule(types, corp_name, rule_keyword, mode, extension) 187 | rules_objects.append(r) 188 | return rules_objects 189 | 190 | 191 | def get_rule_types(): 192 | types = [] 193 | for k, v in rules_dict.items(): 194 | types.append(k.upper()) 195 | return types 196 | 197 | 198 | def get_rule_corps(): 199 | corps = [] 200 | for k, v in rules_dict.items(): 201 | for k2, v2 in v.items(): 202 | corps.append(k2) 203 | return corps 204 | 205 | 206 | class Config(object): 207 | def __init__(self): 208 | self.project_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) 209 | if os.path.isdir(home_path) is not True: 210 | os.makedirs(home_path) 211 | self.hash_path = os.path.join(home_path, 'hash') 212 | if os.path.isfile(self.hash_path) is not True: 213 | open(self.hash_path, 'a').close() 214 | self.data_path = os.path.join(home_path, 'data') 215 | if os.path.isdir(self.data_path) is not True: 216 | os.makedirs(self.data_path) 217 | self.run_data = os.path.join(home_path, 'run') 218 | self.run_data_daily = os.path.join(home_path, 'run-{date}'.format(date=time.strftime('%y-%m-%d'))) 219 | 220 | def hash_list(self): 221 | """ 222 | Get all hash list 223 | :return: list 224 | """ 225 | with open(self.hash_path) as f: 226 | return f.read().splitlines() 227 | 228 | def add_hash(self, sha): 229 | """ 230 | Append hash to file 231 | :param sha: 232 | :return: True 233 | """ 234 | with open(self.hash_path, 'a') as f: 235 | f.write(f'\r\n{sha}') 236 | return True 237 | 238 | @staticmethod 239 | def copy(source, destination): 240 | """ 241 | Copy file 242 | :param source: 243 | :param destination: 244 | :return: 245 | """ 246 | if os.path.isfile(destination) is not True: 247 | logger.info('Not set configuration, setting....') 248 | with open(source) as f: 249 | content = f.readlines() 250 | with open(destination, 'w+') as f: 251 | f.writelines(content) 252 | logger.info(f'Config file set success({source})') 253 | else: 254 | return 255 | 256 | 257 | class Conf(object): 258 | def __init__(self, base_config_file): 259 | self.base_config_file = base_config_file 260 | 261 | def get(self, extend_config_file): 262 | config = configparser.ConfigParser() 263 | config.read(self.base_config_file) 264 | base_dict = config._sections 265 | config = configparser.ConfigParser() 266 | config.read(extend_config_file) 267 | target_dict = config._sections 268 | 269 | for b_key, b_value in base_dict.items(): 270 | for t_key, t_value in target_dict.items(): 271 | if b_key == t_key: 272 | b_ports = b_value['ports'].split(',') 273 | t_ports = t_value['ports'].split(',') 274 | for t_port in t_ports: 275 | if t_port not in b_ports: 276 | b_ports.append(t_port) 277 | base_dict[b_key]['ports'] = ','.join(b_ports) 278 | return base_dict 279 | 280 | 281 | c_default = { 282 | 'job_success': 0, 283 | 'job_failed': 0, 284 | 'found_count': 0, 285 | 'list': [] 286 | } 287 | 288 | 289 | def daily_run_data(data=None): 290 | run_data_path = Config().run_data_daily 291 | if data is None: 292 | if os.path.isfile(run_data_path): 293 | with open(run_data_path) as f: 294 | c = f.readline() 295 | c = json.loads(c) 296 | if c == '': 297 | c = c_default 298 | else: 299 | c = c_default 300 | return c 301 | else: 302 | with open(run_data_path, 'w') as f: 303 | d = json.dumps(data) 304 | f.writelines(d) 305 | -------------------------------------------------------------------------------- /gsil/engine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | engine 5 | ~~~~~~ 6 | 7 | Implements Github search engine 8 | 9 | :author: Feei 10 | :homepage: https://github.com/FeeiCN/gsil 11 | :license: GPL, see LICENSE for more details. 12 | :copyright: Copyright (c) 2018 Feei. All rights reserved 13 | """ 14 | import re 15 | import socket 16 | import traceback 17 | import requests 18 | from github import Github, GithubException 19 | from bs4 import BeautifulSoup 20 | from gsil.config import Config, public_mail_services, exclude_repository_rules, exclude_codes_rules 21 | from .process import Process, clone 22 | from IPy import IP 23 | from tld import get_tld 24 | from .log import logger 25 | 26 | regex_mail = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" 27 | regex_host = r"@([a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" 28 | regex_pass = r"(pass|password|pwd)" 29 | regex_title = r"(.*)<\/title>" 30 | regex_ip = r"^((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))$" 31 | 32 | # Increase the number of single pages to reduce the number of requests 33 | # https://developer.github.com/v3/#pagination 34 | # 每一页的数量(会影响到报告的效率) 35 | per_page = 50 36 | 37 | # TODO The number of pre calculated requests according to rule number and number of pages 38 | # 39 | # pages * per_page * rules = requests 40 | # 2 * 30 * 24 = 1440 41 | # 42 | # 默认扫描页数 43 | default_pages = 4 44 | 45 | 46 | class Engine(object): 47 | def __init__(self, token): 48 | """ 49 | GitHub engine 50 | """ 51 | self.token = token 52 | self.g = Github(login_or_token=token, per_page=per_page) 53 | self.rule_object = None 54 | self.code = '' 55 | # jquery/jquery 56 | self.full_name = '' 57 | self.sha = '' 58 | self.url = '' 59 | # src/attributes/classes.js 60 | self.path = '' 61 | 62 | self.result = None 63 | # 被排除掉的结果,为防止误报,将发送邮件人工核查 64 | self.exclude_result = None 65 | self.hash_list = None 66 | self.processed_count = None 67 | self.next_count = None 68 | 69 | def process_pages(self, pages_content, page, total): 70 | for index, content in enumerate(pages_content): 71 | current_i = page * per_page + index 72 | base_info = f'[{self.rule_object.keyword}] [{current_i}/{total}]' 73 | 74 | # 没有处理成功的,且遇到三个已处理的则跳过之后所有的 75 | if self.next_count == 0 and self.processed_count > 3: 76 | logger.info( 77 | f'{base_info} Has encountered {self.processed_count} has been processed, skip the current rules!') 78 | return False 79 | 80 | # html_url 81 | self.url = content.html_url 82 | 83 | # sha 84 | try: 85 | self.sha = content.sha 86 | except Exception as e: 87 | logger.warning(f'sha exception {e}') 88 | self.sha = '' 89 | self.url = '' 90 | 91 | if self.sha in self.hash_list: 92 | # pass already processed 93 | logger.info(f'{base_info} Processed, skip! ({self.processed_count})') 94 | self.processed_count += 1 95 | continue 96 | 97 | # path 98 | self.path = content.path 99 | 100 | # full name 101 | self.full_name = content.repository.full_name.strip() 102 | if self._exclude_repository(): 103 | # pass exclude repository 104 | logger.info(f'{base_info} Excluded because of the path, skip!') 105 | continue 106 | 107 | # code 108 | try: 109 | self.code = content.decoded_content.decode('utf-8') 110 | except Exception as e: 111 | logger.warning(f'Get Content Exception: {e} retrying...') 112 | continue 113 | 114 | match_codes = self.codes() 115 | if len(match_codes) == 0: 116 | logger.info(f'{base_info} Did not match the code, skip!') 117 | continue 118 | result = { 119 | 'url': self.url, 120 | 'match_codes': match_codes, 121 | 'hash': self.sha, 122 | 'code': self.code, 123 | 'repository': self.full_name, 124 | 'path': self.path, 125 | } 126 | if self._exclude_codes(match_codes): 127 | logger.info(f'{base_info} Code may be useless, do not skip, add to list to be reviewed!') 128 | self.exclude_result[current_i] = result 129 | else: 130 | self.result[current_i] = result 131 | 132 | # 独立进程下载代码 133 | git_url = content.repository.html_url 134 | clone(git_url, self.sha) 135 | logger.info(f'{base_info} Processing is complete, the next one!') 136 | self.next_count += 1 137 | 138 | return True 139 | 140 | def verify(self): 141 | try: 142 | ret = self.g.rate_limiting 143 | return True, f'TOKEN-PASSED: {ret}' 144 | except GithubException as e: 145 | return False, f'TOKEN-FAILED: FAILED' 146 | 147 | def search(self, rule_object): 148 | """ 149 | Search content by rule on GitHub 150 | :param rule_object: 151 | :return: (ret, rule, msg) 152 | """ 153 | self.rule_object = rule_object 154 | 155 | # 已经处理过的数量 156 | self.processed_count = 0 157 | # 处理成功的数量 158 | self.next_count = 0 159 | 160 | # max 5000 requests/H 161 | try: 162 | rate_limiting = self.g.rate_limiting 163 | rate_limiting_reset_time = self.g.rate_limiting_resettime 164 | logger.info('----------------------------') 165 | 166 | # RATE_LIMIT_REQUEST: rules * 1 167 | # https://developer.github.com/v3/search/#search-code 168 | ext_query = '' 169 | if self.rule_object.extension is not None: 170 | for ext in self.rule_object.extension.split(','): 171 | ext_query += f'extension:{ext.strip().lower()} ' 172 | keyword = f'{self.rule_object.keyword} {ext_query}' 173 | logger.info(f'Search keyword: {keyword}') 174 | resource = self.g.search_code(keyword, sort="indexed", order="desc") 175 | except GithubException as e: 176 | msg = f'GitHub [search_code] exception(code: {e.status} msg: {e.data} {self.token}' 177 | logger.critical(msg) 178 | return False, self.rule_object, msg 179 | 180 | logger.info( 181 | f'[{self.rule_object.keyword}] Speed Limit Results (Remaining Times / Total Times): {rate_limiting} Speed limit reset time: {rate_limiting_reset_time}') 182 | logger.info( 183 | '[{k}] The expected number of acquisitions: {page}(Pages) * {per}(Per Page) = {total}(Total)'.format( 184 | k=self.rule_object.keyword, page=default_pages, per=per_page, total=default_pages * per_page)) 185 | 186 | # RATE_LIMIT_REQUEST: rules * 1 187 | try: 188 | total = resource.totalCount 189 | logger.info(f'[{self.rule_object.keyword}] The actual number: {total}') 190 | except socket.timeout as e: 191 | return False, self.rule_object, e 192 | except GithubException as e: 193 | msg = f'GitHub [search_code] exception(code: {e.status} msg: {e.data} {self.token}' 194 | logger.critical(msg) 195 | return False, self.rule_object, msg 196 | 197 | self.hash_list = Config().hash_list() 198 | if total < per_page: 199 | pages = 1 200 | else: 201 | pages = default_pages 202 | for page in range(pages): 203 | self.result = {} 204 | self.exclude_result = {} 205 | try: 206 | # RATE_LIMIT_REQUEST: pages * rules * 1 207 | pages_content = resource.get_page(page) 208 | except socket.timeout: 209 | logger.info(f'[{self.rule_object.keyword}] [get_page] Time out, skip to get the next page!') 210 | continue 211 | except GithubException as e: 212 | msg = f'GitHub [get_page] exception(code: {e.status} msg: {e.data} {self.token}' 213 | logger.critical(msg) 214 | return False, self.rule_object, msg 215 | 216 | logger.info(f'[{self.rule_object.keyword}] Get page {page} data for {len(pages_content)}') 217 | if not self.process_pages(pages_content, page, total): 218 | # 若遇到处理过的,则跳过整个规则 219 | break 220 | # 每一页发送一份报告 221 | Process(self.result, self.rule_object).process() 222 | # 暂时不发送可能存在的误报 TODO 223 | # Process(self.exclude_result, self.rule_object).process(True) 224 | 225 | logger.info( 226 | f'[{self.rule_object.keyword}] The current rules are processed, the process of normal exit!') 227 | return True, self.rule_object, len(self.result) 228 | 229 | def codes(self): 230 | # 去除图片的显示 231 | self.code = self.code.replace('<img', '') 232 | codes = self.code.splitlines() 233 | codes_len = len(codes) 234 | keywords = self._keywords() 235 | match_codes = [] 236 | if self.rule_object.mode == 'mail': 237 | return self._mail() 238 | elif self.rule_object.mode == 'only-match': 239 | # only match mode(只匹配存在关键词的行) 240 | for code in codes: 241 | for kw in keywords: 242 | if kw in code: 243 | match_codes.append(code) 244 | return match_codes 245 | elif self.rule_object.mode == 'normal-match': 246 | # normal-match(匹配存在关键词的行及其上下3行) 247 | for idx, code in enumerate(codes): 248 | for keyword in keywords: 249 | if keyword in code: 250 | idxs = [] 251 | # prev lines 252 | for i in range(-3, -0): 253 | i_idx = idx + i 254 | if i_idx in idxs: 255 | continue 256 | if i_idx < 0: 257 | continue 258 | if codes[i_idx].strip() == '': 259 | continue 260 | logger.debug(f'P:{i_idx}/{codes_len}: {codes[i_idx]}') 261 | idxs.append(i_idx) 262 | match_codes.append(codes[i_idx]) 263 | # current line 264 | if idx not in idxs: 265 | logger.debug(f'C:{idx}/{codes_len}: {codes[idx]}') 266 | match_codes.append(codes[idx]) 267 | # next lines 268 | for i in range(1, 4): 269 | i_idx = idx + i 270 | if i_idx in idxs: 271 | continue 272 | if i_idx >= codes_len: 273 | continue 274 | if codes[i_idx].strip() == '': 275 | continue 276 | logger.debug(f'N:{i_idx}/{codes_len}: {codes[i_idx]}') 277 | idxs.append(i_idx) 278 | match_codes.append(codes[i_idx]) 279 | return match_codes 280 | else: 281 | # 匹配前20行 282 | return self.code.splitlines()[0:20] 283 | 284 | def _keywords(self): 285 | if '"' not in self.rule_object.keyword and ' ' in self.rule_object.keyword: 286 | return self.rule_object.keyword.split(' ') 287 | else: 288 | if '"' in self.rule_object.keyword: 289 | return [self.rule_object.keyword.replace('"', '')] 290 | else: 291 | return [self.rule_object.keyword] 292 | 293 | def _mail(self): 294 | logger.info(f'[{self.rule_object.keyword}] mail rule') 295 | match_codes = [] 296 | mails = [] 297 | # 找到所有邮箱地址 298 | # TODO 此处可能存在邮箱账号密码是加密的情况,导致取不到邮箱地址 299 | mail_multi = re.findall(regex_mail, self.code) 300 | for mm in mail_multi: 301 | mail = mm.strip().lower() 302 | if mail in mails: 303 | logger.info('[SKIPPED] Mail already processed!') 304 | continue 305 | host = re.findall(regex_host, mail) 306 | host = host[0].strip() 307 | if host in public_mail_services: 308 | logger.info('[SKIPPED] Public mail services!') 309 | continue 310 | mails.append(mail) 311 | 312 | # get mail host's title 313 | is_inner_ip = False 314 | if re.match(regex_ip, host) is None: 315 | try: 316 | top_domain = get_tld(host, fix_protocol=True) 317 | except Exception as e: 318 | logger.warning(f'get top domain exception {e}') 319 | top_domain = host 320 | if top_domain == host: 321 | domain = f'http://www.{host}' 322 | else: 323 | domain = f'http://{host}' 324 | else: 325 | if IP(host).iptype() == 'PRIVATE': 326 | is_inner_ip = True 327 | domain = f'http://{host}' 328 | title = '<Unknown>' 329 | if is_inner_ip is False: 330 | try: 331 | response = requests.get(domain, timeout=4).content 332 | except Exception as e: 333 | title = f'<{e}>' 334 | else: 335 | try: 336 | soup = BeautifulSoup(response, "html5lib") 337 | if hasattr(soup.title, 'string'): 338 | title = soup.title.string.strip()[0:150] 339 | except Exception as e: 340 | title = 'Exception' 341 | traceback.print_exc() 342 | 343 | else: 344 | title = '<Inner IP>' 345 | 346 | match_codes.append(f"{mail} {domain} {title}") 347 | logger.info(f' - {mail} {domain} {title}') 348 | return match_codes 349 | 350 | def _exclude_repository(self): 351 | """ 352 | Exclude some repository(e.g. github.io blog) 353 | :return: 354 | """ 355 | ret = False 356 | # 拼接完整的项目链接 357 | full_path = f'{self.full_name.lower()}/{self.path.lower()}' 358 | for err in exclude_repository_rules: 359 | if re.search(err, full_path) is not None: 360 | return True 361 | return ret 362 | 363 | @staticmethod 364 | def _exclude_codes(codes): 365 | ret = False 366 | for ecr in exclude_codes_rules: 367 | if re.search(ecr, '\n'.join(codes)) is not None: 368 | return True 369 | return ret 370 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see <http://www.gnu.org/licenses/>. 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | <http://www.gnu.org/licenses/>. 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>. --------------------------------------------------------------------------------