├── .gitattributes
├── .gitignore
├── .travis.yml
├── Dockerfile
├── Dockerfile-debug
├── LICENSE
├── README.md
├── demo.py
├── pics
    └── sample_report.png
├── setup.py
└── stagesep2
    ├── __init__.py
    ├── analyser
        ├── __init__.py
        ├── base.py
        ├── match_template.py
        ├── ocr.py
        └── trend.py
    ├── config.py
    ├── executor.py
    ├── loader.py
    ├── logger.py
    ├── painter.py
    ├── reporter.py
    └── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # IPython
 77 | profile_default/
 78 | ipython_config.py
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .venv
 92 | env/
 93 | venv/
 94 | ENV/
 95 | env.bak/
 96 | venv.bak/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 | 
113 | # Pyre type checker
114 | .pyre/
115 | 
116 | # custom
117 | .idea/
118 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | os: linux
 2 | dist: xenial
 3 | language: python
 4 | sudo: true
 5 | python:
 6 |   - "3.6"
 7 |   - "3.7"
 8 | 
 9 | install:
10 |   - sudo apt-get -qq update
11 |   - sudo apt-get -y install tesseract-ocr tesseract-ocr-chi-sim libtesseract-dev libleptonica-dev pkg-config
12 |   - sudo apt-get -y install libglib2.0 libsm6 libxrender1 libxext-dev
13 |   - pip install Cython
14 |   - pip install .
15 | 
16 | script:
17 |   - git clone https://github.com/williamfzc/stagesep2-sample.git
18 |   - cd stagesep2-sample
19 |   - python sample.py
20 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3-slim
 2 | 
 3 | USER root
 4 | 
 5 | RUN apt-get update \
 6 |     && apt-get -y install gcc build-essential \
 7 |     && apt-get -y install tesseract-ocr tesseract-ocr-chi-sim libtesseract-dev libleptonica-dev pkg-config \
 8 |     && apt-get -y install libglib2.0 libsm6 libxrender1 libxext-dev
 9 | 
10 | # install stagesep2
11 | WORKDIR /usr/src/app
12 | COPY . .
13 | RUN pip install . \
14 |     && apt-get purge -y --auto-remove gcc build-essential \
15 |     && apt-get clean \
16 |     && rm -rf /var/lib/apt/lists/*
17 | 
18 | WORKDIR /root/stagesep2
19 | CMD ["bash"]
20 | 


--------------------------------------------------------------------------------
/Dockerfile-debug:
--------------------------------------------------------------------------------
 1 | FROM python:3-slim
 2 | 
 3 | USER root
 4 | 
 5 | WORKDIR /usr/src/app
 6 | COPY . .
 7 | 
 8 | RUN apt-get update \
 9 |     && apt-get -y install gcc build-essential tesseract-ocr tesseract-ocr-chi-sim libtesseract-dev libleptonica-dev pkg-config \
10 |     && apt-get -y install libglib2.0 libsm6 libxrender1 libxext-dev \
11 |     && pip install . \
12 |     && apt-get purge -y --auto-remove gcc build-essential \
13 |     && apt-get clean \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | WORKDIR /root/stagesep2
17 | CMD ["bash"]
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 William Feng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | > 该项目处于低活跃状态，建议使用 https://github.com/williamfzc/stagesepx
  2 | 
  3 | # stagesep2
  4 | 
  5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/stagesep2.svg?style=flat-square)
  6 | [![PyPI version](https://badge.fury.io/py/stagesep2.svg)](https://badge.fury.io/py/stagesep2)
  7 | [![Build Status](https://travis-ci.org/williamfzc/stagesep2.svg?branch=master)](https://travis-ci.org/williamfzc/stagesep2)
  8 | [![Maintainability](https://api.codeclimate.com/v1/badges/ad4c729fcf98d98497b8/maintainability)](https://codeclimate.com/github/williamfzc/stagesep2/maintainability)
  9 | 
 10 | ---
 11 | 
 12 | Analyse, and convert video into useful data.
 13 | 
 14 | # 原理
 15 | 
 16 | stagesep2 的两个核心功能：
 17 | 
 18 | - ocr（tesseract）
 19 | - 图像识别（opencv）
 20 | 
 21 | 视频是由一系列连续的图片（帧）组成的，通过逐帧分析，我们能够从视频中提取出流程相关信息。理论上，每一帧都存在能够区分它与其他帧的标识，可能是文字或图像。
 22 | 
 23 | 例如，我们需要测试 从桌面启动chrome，打开amazon 的速度：
 24 | 
 25 | - 在操作前，我们在主页。主页上会有特定的文字与图像（例如chrome icon）
 26 | - 在操作时，页面会有特定的变化（例如chrome icon变暗，或出现点击位置反馈）
 27 | - 在操作后（chrome启动后），页面发生切换，页面上的文字与图像都会发生改变（例如amazon logo出现）
 28 | 
 29 | 那么，我们可以通过两个方面来进行检测。首先是通过标志性图片：
 30 | 
 31 | ![](pics/sample_report.png)
 32 | 
 33 | 可以看到，在 0.2s 时chrome图标被点击，在 0.96s 时amazon图标正式出现。再看看文字的：
 34 | 
 35 | ```json
 36 | {
 37 | 	"result_id": "c2e5116c-462b-11e9-9ed2-005056c00008",
 38 | 	"video_name": "./videos/demo1.mp4",
 39 | 	"frame_id": 24,
 40 | 	"current_time": 0.96,
 41 | 	"ocr": ["PO", "/", "1dX00d", "¥", ":", "00Xv00Yv00", "—", "ESRRGRERSize0.0215", ":", "27dSlsB", "(", "ayhttps", "/", "/", "www", ".", "amazon", ".", "comQO", "°", "oa", "\"", "四", "Ney", ",", "DepartmentsListsDealsVideoMusic", "©", "DelivertoChinaWeshipinternationallyWe", "'", "reshowingyouitemsthatshiptoChina", ".", "Toseeiemsthatshiptoadifferentcountrychangeyourgetiveryaddres5AdditionallanguagesettingsareavailableSomecontentmaybeauto", "-", "translatedCHANGETHEADDRESSrs", "并", "~", "Shopwith100%", "ConfidenceonAmazonLJ", "—", ")", "SigninforthebestexperienceCoO00", "@"],
 42 | 	"match_template": {
 43 | 		"amazon": {
 44 | 			"min": -0.4684264361858368,
 45 | 			"max": 0.6224471926689148
 46 | 		},
 47 | 		"chrome": {
 48 | 			"min": -0.4022962152957916,
 49 | 			"max": 0.7294253706932068
 50 | 		},
 51 | 		"chrome_clicked": {
 52 | 			"min": -0.6132965087890625,
 53 | 			"max": 0.7038567066192627
 54 | 		}
 55 | 	},
 56 | 	"trend": {
 57 | 		"previous": 0.8785946933890821,
 58 | 		"first": 0.8719320065296263,
 59 | 		"last": 0.5842399940047383
 60 | 	}
 61 | }
 62 | ```
 63 | 
 64 | 节选 0.96s 的检测结果，可以发现在该时刻amazon网页上的相关字样开始出现，与图片检测的结果是一致的。
 65 | 
 66 | 通过对这些阶段进行分析，得到每个阶段及帧对应的时间戳，我们就能够准确地知晓视频每个阶段发生的事情。你的视频FPS越高，数据会越精确。
 67 | 
 68 | # 目的
 69 | 
 70 | > 值得注意的是，在性能测试范畴内，fps的稳定性是非常重要的。而目前软件录制手段实际上并不稳定，容易造成结果不准确的问题。如果用于生产环境，建议使用硬件方式（例如外置摄像头）。
 71 | 
 72 | 全平台的 性能测试/有效性验证 方案
 73 | 
 74 | - 渲染内容是否符合期望
 75 | - 渲染性能
 76 | 
 77 | ## 为什么介入图像识别
 78 | 
 79 | ### UI
 80 | 
 81 | 现阶段的UI测试大多属于纯代码层面的行为，而对于控件是否真的渲染成为我们希望的样子我们并不知晓。
 82 | 
 83 | ### 性能
 84 | 
 85 | 在常规速度类性能测试中通常通过提前埋点进行测试，一般会有两个问题：
 86 | 
 87 | - 具有侵入性（需要改动源码）
 88 | - 对于界面相关的场景不适用（并不知道界面是否已经被真正渲染出来）
 89 | 
 90 | ## 图像识别在测试中的应用
 91 | 
 92 | 一般来说，通过图像识别来进行测试分为三个步骤：
 93 | 
 94 | - 图像/视频 采集
 95 |     - 这个部分通常由高速摄像机或稳定帧率的外置相机进行拍摄，得到固定帧率的视频
 96 |     - 软件录制是不靠谱的，很容易出现帧率不稳定的情况。而如果时间与帧数不能精确对应的话数据会失真
 97 | 
 98 | - 视频处理
 99 |     - 提取视频中的信息，输出成为我们需要的形式
100 |     - 也是整个流程最关键的部分
101 | 
102 | - 数据分析
103 |     - 将视频处理的结果进行分析，得到结论或生成报告
104 | 
105 | 该项目将承载视频处理的部分，将录制好的视频解析成开发者需要的格式。
106 | 
107 | # 使用
108 | 
109 | 目前支持两种使用方式：
110 | 
111 | - docker（推荐）
112 | - python包
113 | 
114 | 从 [官方示例](https://github.com/williamfzc/stagesep2-sample) 开始吧。
115 | 
116 | # 相关内容
117 | 
118 | ## 依赖
119 | 
120 | - [opencv](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_tutorials.html): 图像与视频处理
121 | - [tesseract-ocr](https://github.com/tesseract-ocr/tesseract/wiki/Downloads) & [tesserocr](https://github.com/sirfz/tesserocr): 文本检测
122 | - [skimage](https://github.com/scikit-image/scikit-image): 主要用于图片相似度比较
123 | - [jieba](https://github.com/fxsjy/jieba)：ocr结果的进一步处理
124 | - [pyecharts](https://github.com/pyecharts/pyecharts)：结果图表渲染
125 | 
126 | ## 旧版本
127 | 
128 | - [stagesep](https://github.com/williamfzc/stagesep)
129 | - [利用图像识别与 OCR 进行速度类测试](https://testerhome.com/topics/16063)
130 | 
131 | # Bug与建议
132 | 
133 | 欢迎通过 issue 告知，或直接发起 PR 加入 :)
134 | 
135 | # 协议
136 | 
137 | [MIT](LICENSE)
138 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | # :)
 2 | from stagesep2 import VideoManager, AnalysisRunner, NormalConfig, OCRConfig, MatchTemplateConfig
 3 | import cv2
 4 | 
 5 | 
 6 | # 添加待测视频
 7 | # 虽然能够支持多视频，但还是建议每次只分析一个
 8 | video = VideoManager.add('./temp.avi')
 9 | 
10 | # 旋转视频
11 | # 因为拍摄设备的差异，拍摄的视频可能方向是不同预期的，容易导致分析器分析不出有效结果
12 | # 设定该参数能够旋转视频，使分析器可以正常生效
13 | # 逆时针旋转 90 * rotate 度
14 | video.rotate = 3
15 | 
16 | # 添加match template的样本图
17 | video.template_manager.add('./template.png')
18 | 
19 | # 分析器列表
20 | # 默认情况下会全选，使用 OCR/模型匹配/首尾帧相似度 进行处理
21 | NormalConfig.ANALYSER_LIST = ['ocr', 'match_template', 'trend']
22 | 
23 | # 修改OCR的语种
24 | # 默认情况下是英文（tesseract自带了英文）
25 | # 如果使用中文，则需要自行安装tesseract的中文支持包，详见tesseract wiki：
26 | # https://github.com/tesseract-ocr/tesseract/wiki
27 | OCRConfig.lang = 'eng'
28 | # 设置方法与tesseract保持一致，其他语言请参考官方文档
29 | # 这是简体中文的例子
30 | # OCRConfig.lang = 'chi_sim'
31 | 
32 | # 修改match template的算法
33 | # 此处直接使用了opencv提供的matchTemplate
34 | # 可参考 https://docs.opencv.org/master/d4/dc6/tutorial_py_template_matching.html
35 | MatchTemplateConfig.cv_method = cv2.TM_SQDIFF_NORMED
36 | 
37 | # 启动分析
38 | result = AnalysisRunner.run()
39 | 
40 | # 分析的结果（dict）
41 | dict_data = result.data
42 | 
43 | # 或者输出到文件内（json）
44 | result.export('./result.json')
45 | 
46 | # 绘制结果报告
47 | result.draw('./result_report.html')
48 | 


--------------------------------------------------------------------------------
/pics/sample_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williamfzc/stagesep2/20c00187c86e8b807bbb3373f58918a575c2ccc9/pics/sample_report.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name='stagesep2',
 6 |     version='0.2.6',
 7 |     description='Analyse, and convert video into useful data.',
 8 |     author='williamfzc',
 9 |     author_email='fengzc@vip.qq.com',
10 |     url='https://github.com/williamfzc/stagesep2',
11 |     packages=find_packages(),
12 |     license='MIT',
13 |     classifiers=[
14 |         'License :: OSI Approved :: MIT License',
15 |         'Programming Language :: Python',
16 |         'Programming Language :: Python :: 3',
17 |         'Programming Language :: Python :: 3.6',
18 |         'Programming Language :: Python :: 3.7',
19 |     ],
20 |     python_requires=">=3.6",
21 |     install_requires=[
22 |         'opencv-python',
23 |         'structlog',
24 |         'numpy',
25 |         'jieba',
26 |         'scikit-image',
27 |         'pyecharts==0.5.11',
28 |         'pyecharts_snapshot',
29 |         'findit',
30 |         'tesserocr',
31 |         'Pillow',
32 |     ]
33 | )
34 | 


--------------------------------------------------------------------------------
/stagesep2/__init__.py:
--------------------------------------------------------------------------------
1 | from stagesep2.loader import VideoManager
2 | from stagesep2.executor import AnalysisRunner
3 | from stagesep2.config import NormalConfig, OCRConfig, MatchTemplateConfig
4 | 


--------------------------------------------------------------------------------
/stagesep2/analyser/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | analyser
 3 | 
 4 | opencv frame -> result
 5 | """
 6 | from stagesep2.analyser.ocr import OCRAnalyser
 7 | from stagesep2.analyser.match_template import MatchTemplateAnalyser
 8 | from stagesep2.analyser.trend import TrendAnalyser
 9 | 
10 | 
11 | ANALYSER_DICT = {
12 |     'ocr': OCRAnalyser,
13 |     'match_template': MatchTemplateAnalyser,
14 |     'trend': TrendAnalyser,
15 | }
16 | 


--------------------------------------------------------------------------------
/stagesep2/analyser/base.py:
--------------------------------------------------------------------------------
 1 | class BaseAnalyser(object):
 2 |     name = ''
 3 | 
 4 |     @classmethod
 5 |     def run(cls, *args, **kwargs):
 6 |         """
 7 |         normally,
 8 | 
 9 |         1. frame (opencv image object)
10 |         2. ssv (provide necessary info, eg: template pictures)
11 | 
12 |         :param args:
13 |         :param kwargs:
14 |         :return:
15 |         """
16 |         return ''
17 | 
18 |     @classmethod
19 |     def clean(cls, *args, **kwargs):
20 |         """
21 |         clean Analyser config after usage
22 | 
23 |         :param args:
24 |         :param kwargs:
25 |         :return:
26 |         """
27 | 


--------------------------------------------------------------------------------
/stagesep2/analyser/match_template.py:
--------------------------------------------------------------------------------
 1 | from stagesep2.analyser.base import BaseAnalyser
 2 | from stagesep2.config import MatchTemplateConfig
 3 | from findit import FindIt
 4 | 
 5 | fi = FindIt(
 6 |     engine=['template'],
 7 |     cv_method_name=MatchTemplateConfig.cv_method,
 8 | )
 9 | 
10 | 
11 | class MatchTemplateAnalyser(BaseAnalyser):
12 |     """ match-template analyser """
13 |     name = 'match_template'
14 | 
15 |     @classmethod
16 |     def run(cls, frame, ssv):
17 |         template_pic_dict = ssv.template_manager.get_dict()
18 |         for each_pic_name, each_pic in template_pic_dict.items():
19 |             fi.load_template(each_pic_name, pic_object=each_pic.cv_object)
20 | 
21 |         match_result = fi.find(
22 |             'temp',
23 |             target_pic_object=frame,
24 |             engine_template_scale=(1, 2, 5)
25 |         )['data']
26 | 
27 |         result_dict = dict()
28 |         for each_pic_name, each_pic_result in match_result.items():
29 |             result_dict[each_pic_name] = {
30 |                 # no min any more
31 |                 'min': -1,
32 |                 'max': each_pic_result['TemplateEngine']['target_sim'],
33 |             }
34 |         fi.clear()
35 |         return result_dict
36 | 


--------------------------------------------------------------------------------
/stagesep2/analyser/ocr.py:
--------------------------------------------------------------------------------
 1 | import jieba
 2 | from PIL import Image
 3 | from tesserocr import PyTessBaseAPI
 4 | 
 5 | from stagesep2.analyser.base import BaseAnalyser
 6 | from stagesep2.config import OCRConfig
 7 | 
 8 | 
 9 | def content_filter(old_content):
10 |     """ remove unused content and rebuild a word list """
11 |     new_content = old_content.replace(' ', '').replace('\n', '').replace('\r', '')
12 |     return list(jieba.cut(new_content))
13 | 
14 | 
15 | class OCRAnalyser(BaseAnalyser):
16 |     """ ocr analyser """
17 | 
18 |     name = 'ocr'
19 |     TAG = 'OCRAnalyser'
20 |     tesserocr_api = PyTessBaseAPI(lang=OCRConfig.lang)
21 | 
22 |     @classmethod
23 |     def run(cls, frame, *args, **kwargs):
24 |         """
25 |         run ocr analyser
26 | 
27 |         :param frame: ndarray
28 |         :return:
29 |         """
30 |         frame = Image.fromarray(frame)
31 |         cls.tesserocr_api.SetImage(frame)
32 |         result = cls.tesserocr_api.GetUTF8Text()
33 | 
34 |         # content filter
35 |         result = content_filter(result)
36 | 
37 |         return result
38 | 


--------------------------------------------------------------------------------
/stagesep2/analyser/trend.py:
--------------------------------------------------------------------------------
 1 | from skimage.measure import compare_ssim
 2 | 
 3 | from stagesep2.analyser.base import BaseAnalyser
 4 | 
 5 | 
 6 | class TrendAnalyser(BaseAnalyser):
 7 |     """ trend analyser """
 8 |     name = 'trend'
 9 |     previous_frame = None
10 | 
11 |     @classmethod
12 |     def run(cls, frame, ssv):
13 |         """
14 |         return a dict, eg: {'previous': 1.0, 'first': 0.9, 'last': 0.2}
15 | 
16 |         :param frame:
17 |         :param ssv:
18 |         :return:
19 |         """
20 |         # init previous frame
21 |         if cls.previous_frame is None:
22 |             cls.previous_frame = frame
23 | 
24 |         # calculate sim
25 |         previous_sim = compare_ssim(cls.previous_frame, frame)
26 |         first_sim = compare_ssim(ssv.first_frame, frame)
27 |         last_sim = compare_ssim(ssv.last_frame, frame)
28 | 
29 |         # update previous frame
30 |         cls.previous_frame = frame
31 | 
32 |         result_dict = {
33 |             'previous': previous_sim,
34 |             'first': first_sim,
35 |             'last': last_sim,
36 |         }
37 |         return result_dict
38 | 
39 |     @classmethod
40 |     def clean(cls, *args, **kwargs):
41 |         cls.previous_frame = None
42 | 


--------------------------------------------------------------------------------
/stagesep2/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 分析配置存放在此处
 3 | 支持读取外部json配置
 4 | """
 5 | import os
 6 | import cv2
 7 | 
 8 | 
 9 | class OCRConfig(object):
10 |     lang = 'eng+chi_sim'
11 | 
12 | 
13 | class MatchTemplateConfig(object):
14 |     cv_method = 'cv2.TM_CCOEFF_NORMED'
15 | 
16 | 
17 | class NormalConfig(object):
18 |     # project root path
19 |     PROJECT_PATH = os.path.dirname(os.path.dirname(__file__))
20 | 
21 |     # default analyser list (will run)
22 |     ANALYSER_LIST = ['ocr', 'match_template', 'trend']
23 | 
24 |     # default encoding
25 |     CHARSET = 'utf-8-sig'
26 | 


--------------------------------------------------------------------------------
/stagesep2/executor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 分析器
 3 | 
 4 | - OCR
 5 | - 特征识别
 6 | """
 7 | import cv2
 8 | 
 9 | from stagesep2.loader import VideoManager, frame_prepare, SSVideo
10 | from stagesep2.config import NormalConfig
11 | from stagesep2.logger import logger
12 | from stagesep2.reporter import ResultReporter, ResultRow
13 | from stagesep2.utils import *
14 | 
15 | 
16 | class AnalysisRunner(object):
17 |     """
18 |     主要逻辑
19 | 
20 |     - 从VideoManager中导入视频对象
21 |     - 从config中读取需要使用的Analyser
22 |     - 遍历视频列表
23 |         - 切割视频，遍历帧
24 |             - 用不同的Analyser分析帧
25 |             - 记录结果
26 |     - 将结果传递给reporter进行处理
27 |     """
28 |     TAG = 'AnalyserRunner'
29 | 
30 |     @classmethod
31 |     def run(cls) -> (ResultReporter, list):
32 |         analyser_list = check_analyser(NormalConfig.ANALYSER_LIST)
33 |         video_dict = VideoManager.video_dict
34 |         logger.info(cls.TAG, analyser=analyser_list, video=video_dict)
35 | 
36 |         result_reporter_list = list()
37 |         for each_video_name, each_ssv in video_dict.items():
38 |             result_reporter = ResultReporter(each_video_name)
39 |             cls.analyse_video(each_ssv, analyser_list, result_reporter)
40 |             result_reporter_list.append(result_reporter)
41 | 
42 |         # export result
43 |         # 如果同时分析多个视频，最终结果是一个装着Reporter的list
44 |         # 如果只分析一个视频，最终结果是一个Reporter
45 |         if len(result_reporter_list) <= 1:
46 |             return result_reporter_list[0]
47 |         return result_reporter_list
48 | 
49 |     @classmethod
50 |     def analyse_video(cls, ssv_video: SSVideo, analyser_list: list, result_reporter: ResultReporter):
51 |         """ analyse ssv video """
52 |         with video_capture(ssv_video) as each_video:
53 |             ret, frame = each_video.read()
54 |             while ret:
55 |                 if not ret:
56 |                     # end of video
57 |                     break
58 | 
59 |                 # prepare frame
60 |                 frame = frame_prepare(frame)
61 |                 # rotate
62 |                 frame = rotate_pic(frame, ssv_video.rotate)
63 | 
64 |                 # current status
65 |                 cur_frame_count = int(each_video.get(cv2.CAP_PROP_POS_FRAMES))
66 |                 cur_second = each_video.get(cv2.CAP_PROP_POS_MSEC) / 1000
67 | 
68 |                 # new row of result
69 |                 new_row = ResultRow(
70 |                     result_reporter.result_id,
71 |                     ssv_video.video_path,
72 |                     cur_frame_count,
73 |                     cur_second,
74 |                 )
75 | 
76 |                 for each_analyser in analyser_list:
77 |                     result = each_analyser.run(frame, ssv_video)
78 |                     new_row.add_analyser_result(each_analyser.name, result)
79 | 
80 |                 logger.info(cls.TAG, msg='analysing', **new_row.__dict__)
81 |                 result_reporter.add_row(new_row)
82 | 
83 |                 # read new frame
84 |                 ret, frame = each_video.read()
85 | 
86 |         # clean analyser
87 |         for each in analyser_list:
88 |             each.clean()
89 | 


--------------------------------------------------------------------------------
/stagesep2/loader.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 负责资源导入、模块检查
  3 | """
  4 | import os
  5 | import cv2
  6 | 
  7 | from stagesep2.logger import logger
  8 | from stagesep2.utils import *
  9 | 
 10 | 
 11 | def path_to_name(file_path: str) -> str:
 12 |     """ full path -> file name """
 13 |     return os.path.splitext(os.path.basename(file_path))[0]
 14 | 
 15 | 
 16 | def is_path_existed(file_path: str):
 17 |     """ check if file is existed """
 18 |     return os.path.isfile(file_path)
 19 | 
 20 | 
 21 | def frame_prepare(frame):
 22 |     gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 23 |     blur_gray_frame = cv2.medianBlur(gray_frame, 3)
 24 |     return blur_gray_frame
 25 | 
 26 | 
 27 | class TemplatePicture(object):
 28 |     def __init__(self, pic_path: str):
 29 |         if not is_path_existed(pic_path):
 30 |             raise FileNotFoundError('file not existed: {}'.format(pic_path))
 31 | 
 32 |         self.pic_name: str = path_to_name(pic_path)
 33 |         self.pic_path: str = pic_path
 34 |         self.cv_object = frame_prepare(cv2.imread(self.pic_path))
 35 | 
 36 | 
 37 | class TemplateManager(object):
 38 |     TAG = 'TemplateManager'
 39 | 
 40 |     def __init__(self, video_name: str):
 41 |         self.video_name = video_name
 42 | 
 43 |         # match template 需要模板图片
 44 |         # 该视频需要的模板图片会被放置在此处
 45 |         self._match_template_pic_dict = dict()
 46 |         # eg:
 47 |         # { pic_name: TemplatePicture(pic_path), }
 48 | 
 49 |     def add(self, pic_path: str):
 50 |         new_pic = TemplatePicture(pic_path)
 51 |         new_pic_name = new_pic.pic_name
 52 |         self._match_template_pic_dict[new_pic_name] = new_pic
 53 |         logger.info(self.TAG, msg='LOAD PICTURE', path=pic_path, name=new_pic_name, video=self.video_name)
 54 | 
 55 |     def remove(self, pic_name: str):
 56 |         if pic_name in self._match_template_pic_dict:
 57 |             del self._match_template_pic_dict[pic_name]
 58 |             return True
 59 |         logger.warn(self.TAG, msg='no pic named {}'.format(pic_name))
 60 |         return False
 61 | 
 62 |     def get_dict(self):
 63 |         return self._match_template_pic_dict
 64 | 
 65 | 
 66 | class SSVideo(object):
 67 |     """ video object """
 68 | 
 69 |     def __init__(self, video_path: str):
 70 |         if not is_path_existed(video_path):
 71 |             raise FileNotFoundError('file not existed: {}'.format(video_path))
 72 | 
 73 |         self.video_name = path_to_name(video_path)
 74 |         self.video_path = video_path
 75 | 
 76 |         # add template example:
 77 |         # ssv = SSVideo('some_path/123.mp4')
 78 |         # ssv.template_manager.add('some_path/123.png')
 79 |         self.template_manager = TemplateManager(self.video_name)
 80 | 
 81 |         # degree = rotate * 90, 逆时针
 82 |         self._rotate = 0
 83 | 
 84 |         # video info
 85 |         # total frame count
 86 |         self.total_frame = None
 87 | 
 88 |         # first and last frame
 89 |         self.first_frame = None
 90 |         self.last_frame = None
 91 | 
 92 |         # load video base info
 93 |         self.load_video_info()
 94 | 
 95 |     @property
 96 |     def rotate(self):
 97 |         return self._rotate
 98 | 
 99 |     @rotate.setter
100 |     def rotate(self, value: int):
101 |         if not isinstance(value, int):
102 |             raise TypeError('rotate should be int')
103 |         self._rotate = value
104 |         self.load_video_info()
105 | 
106 |     def load_video_info(self):
107 |         # TODO need more info?
108 |         # get info from video
109 |         with video_capture(self) as video_src:
110 |             total_frame = video_src.get(cv2.CAP_PROP_FRAME_COUNT)
111 | 
112 |             _, first_frame = video_src.read()
113 |             video_src.set(1, total_frame - 1)
114 |             _, last_frame = video_src.read()
115 | 
116 |             # prepare, and rotate
117 |             first_frame, last_frame = [
118 |                 frame_prepare(rotate_pic(each, self._rotate))
119 |                 for each in (first_frame, last_frame)]
120 | 
121 |         # init
122 |         self.first_frame = first_frame
123 |         self.last_frame = last_frame
124 |         self.total_frame = total_frame
125 | 
126 | 
127 | class VideoManager(object):
128 |     """
129 |     Analyser需要的信息都应该在此处被导入
130 |     例如 作为分析主体的 视频
131 |     例如 match template需要的 模板图片
132 |     """
133 |     TAG = 'VideoManager'
134 | 
135 |     # 待测视频会被添加到这里
136 |     # 在分析开始时，会遍历此字典
137 |     video_dict = dict()
138 | 
139 |     # eg:
140 |     # { video_name: SSVideo(video_path), }
141 | 
142 |     def __init__(self):
143 |         raise NotImplementedError('should not init')
144 | 
145 |     @classmethod
146 |     def add(cls, video_path: str):
147 |         new_video = SSVideo(video_path)
148 |         new_video_name = new_video.video_name
149 |         cls.video_dict[new_video_name] = new_video
150 |         logger.info(cls.TAG, msg='LOAD VIDEO', path=video_path, name=new_video_name)
151 |         return new_video
152 | 
153 |     @classmethod
154 |     def remove(cls, video_name: str):
155 |         if video_name in cls.video_dict:
156 |             del cls.video_dict[video_name]
157 |             return True
158 |         logger.warn(cls.TAG, msg='no video named {}'.format(video_name))
159 |         return False
160 | 


--------------------------------------------------------------------------------
/stagesep2/logger.py:
--------------------------------------------------------------------------------
1 | from structlog import getLogger
2 | 
3 | 
4 | logger = getLogger(__name__)
5 | 


--------------------------------------------------------------------------------
/stagesep2/painter.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pyecharts import Line, Page
  3 | 
  4 | from stagesep2.logger import logger
  5 | from stagesep2.config import NormalConfig
  6 | 
  7 | 
  8 | # 如果后续图表需要更加复杂，这个部分最好用类似pandas的库进行比较规范的数据格式处理。只是简单展示就不用了
  9 | class ReportPainter(object):
 10 |     TAG = 'Painter'
 11 | 
 12 |     @classmethod
 13 |     def draw_with_file(cls, file_path: str, dst_path: str):
 14 |         """ draw with json file path, and export to dst path """
 15 |         with open(file_path, encoding=NormalConfig.CHARSET) as f:
 16 |             cls.draw(json.load(f), dst_path)
 17 | 
 18 |     @classmethod
 19 |     def draw_with_json(cls, json_str: str, dst_path: str):
 20 |         """ draw with raw json content, and export to dst path """
 21 |         content = json.loads(json_str)
 22 |         cls.draw(content, dst_path)
 23 | 
 24 |     @classmethod
 25 |     def draw(cls, content: list, dst_path: str):
 26 |         # base page
 27 |         page = Page()
 28 | 
 29 |         # time line，直接用float作为x轴会出现 echarts 兼容问题
 30 |         time_list = [str(each['current_time']) for each in content]
 31 | 
 32 |         # 目前只支持 match_template 与 trend 的图表绘制
 33 |         draw_type_dict = {
 34 |             'match_template': cls.build_match_template_line,
 35 |             'trend': cls.build_trend_line,
 36 |         }
 37 |         for each_type, each_func in draw_type_dict.items():
 38 |             # 以第一项为例检验是否包含该类分析结果
 39 |             if each_type not in content[0]:
 40 |                 continue
 41 | 
 42 |             data_list = [each_data[each_type] for each_data in content]
 43 |             data_line = draw_type_dict[each_type](time_list, data_list)
 44 |             page.add(data_line)
 45 | 
 46 |         page.render(dst_path)
 47 |         logger.info(cls.TAG, msg='report built finished: "{}"'.format(dst_path))
 48 | 
 49 |     @classmethod
 50 |     def build_trend_line(cls, time_list: list, trend_list: list):
 51 |         trend_line = Line('trend')
 52 |         for each_attr in ['previous', 'first', 'last']:
 53 |             each_list = [i[each_attr] for i in trend_list]
 54 |             trend_line.add(
 55 |                 each_attr,
 56 |                 time_list,
 57 |                 each_list,
 58 |                 yaxis_min='dataMin',
 59 |                 is_more_utils=True,
 60 |             )
 61 |         return trend_line
 62 | 
 63 |     @classmethod
 64 |     def build_match_template_line(cls, time_list: list, match_template_list: list):
 65 |         """
 66 | 
 67 |         :param time_list:
 68 |         :param match_template_list:
 69 | 
 70 |             looks like:
 71 | 
 72 |             [
 73 |                 {
 74 |                     "pic1": {
 75 |                         "min": -0.4684264361858368,
 76 |                         "max": 0.6224471926689148
 77 |                     },
 78 |                     "pic2": {
 79 |                         "min": -0.4022962152957916,
 80 |                         "max": 0.7294253706932068
 81 |                     },
 82 |                     "pic3": {
 83 |                         "min": -0.6132965087890625,
 84 |                         "max": 0.7038567066192627
 85 |                     }
 86 |                 }
 87 | 
 88 |                 ...
 89 |             ]
 90 | 
 91 |         :return:
 92 |         """
 93 | 
 94 |         # build data structure for drawing
 95 |         data_to_draw = {
 96 |             name: list()
 97 |             for name in match_template_list[0].keys()
 98 |         }
 99 | 
100 |         for each_name in data_to_draw.keys():
101 |             # draw only max values
102 |             data_to_draw[each_name] = [i[each_name]['max'] for i in match_template_list]
103 | 
104 |         match_template_line = Line('match_template')
105 |         for each_name, each_data in data_to_draw.items():
106 |             match_template_line.add(
107 |                 each_name,
108 |                 x_axis=time_list,
109 |                 y_axis=each_data,
110 |                 is_more_utils=True,
111 |             )
112 | 
113 |         return match_template_line
114 | 


--------------------------------------------------------------------------------
/stagesep2/reporter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 处理分析结果
 3 | 
 4 | 应至少支持：
 5 | - 文件（json）
 6 | - 对象
 7 | - 数据库交互
 8 | """
 9 | import uuid
10 | import json
11 | import os
12 | 
13 | from stagesep2.logger import logger
14 | from stagesep2.painter import ReportPainter
15 | 
16 | 
17 | class ResultRow(object):
18 |     def __init__(
19 |         self,
20 |         # must
21 |         result_id,
22 |         video_name,
23 |         frame_id,
24 |         current_time,
25 |     ):
26 |         # task id
27 |         self.result_id = result_id
28 |         # video name (id), a task may contains more than one video
29 |         self.video_name = video_name
30 |         # current frame number
31 |         # 当前的结果对应的帧编号
32 |         self.frame_id = frame_id
33 |         # current time
34 |         # 当前的结果对应的帧，在视频中对应的时间
35 |         self.current_time = current_time
36 | 
37 |     def add_analyser_result(self, name, result):
38 |         """
39 |         add result of analyser
40 | 
41 |         :param name: analyser name
42 |         :param result: result of analyser
43 |         :return:
44 |         """
45 |         # analyse result
46 |         # ocr: list, eg: ['some_word', 'other_word']
47 |         # match_template: dict, eg: {'pic1': 0.85, 'pic2': 0.90}
48 | 
49 |         self.__dict__[name] = result
50 | 
51 |     def __str__(self):
52 |         return json.dumps(self.__dict__)
53 | 
54 |     def to_dict(self):
55 |         return self.__dict__
56 | 
57 |     __repr__ = __str__
58 | 
59 | 
60 | class ResultReporter(object):
61 |     TAG = 'ResultReporter'
62 | 
63 |     def __init__(self, video_name):
64 |         self.video_name = video_name
65 |         self.result_id = str(uuid.uuid1())
66 |         self._row_list = list()
67 | 
68 |     def add_row(self, new_row):
69 |         self._row_list.append(new_row)
70 | 
71 |     def export(self, file_path):
72 |         """ export result to json file. Path can be file, dir. """
73 | 
74 |         # check file path
75 |         if os.path.isfile(file_path):
76 |             logger.warn(self.TAG, msg='File "{}" already existed'.format(file_path))
77 |             file_path = os.path.join(os.path.dirname(file_path), self.result_id + '.json')
78 |         elif os.path.isdir(file_path):
79 |             logger.warn(self.TAG, msg='Path "{}" is a directory'.format(file_path))
80 |             file_path = os.path.join(file_path, self.result_id + '.json')
81 | 
82 |         # write file
83 |         with open(file_path, 'w+') as json_file:
84 |             json_file.write(str(self.data))
85 |             logger.info(self.TAG, msg='Result saved in "{}"'.format(file_path))
86 | 
87 |     @property
88 |     def data(self):
89 |         """ return data, consisted by pyobject """
90 |         return self._row_list
91 | 
92 |     def draw(self, dst):
93 |         """ draw analysis report to file named dst """
94 |         ReportPainter.draw_with_json(str(self.data), dst)
95 | 


--------------------------------------------------------------------------------
/stagesep2/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | public functions
 3 | """
 4 | import numpy as np
 5 | import cv2
 6 | import contextlib
 7 | 
 8 | from stagesep2.analyser import ANALYSER_DICT
 9 | 
10 | 
11 | def check_analyser(analyser_list: list) -> list:
12 |     """ check if analyser existed, and return list of runnable analyser """
13 |     new_analyser_list = list()
14 |     for each in analyser_list:
15 |         if each not in ANALYSER_DICT:
16 |             raise NotImplementedError('analyser {} not found'.format(each))
17 |         new_analyser_list.append(ANALYSER_DICT[each])
18 |     return new_analyser_list
19 | 
20 | 
21 | def rotate_pic(old_pic: np.ndarray, rotate_time: int) -> np.ndarray:
22 |     """ 帧逆时针旋转 90*rotate_time 度 """
23 |     new_pic = np.rot90(old_pic, rotate_time)
24 |     return new_pic
25 | 
26 | 
27 | @contextlib.contextmanager
28 | def video_capture(ssv):
29 |     """ 打开视频的上下文控制 """
30 |     video_cap = cv2.VideoCapture(ssv.video_path)
31 |     yield video_cap
32 |     video_cap.release()
33 | 
34 | 
35 | __all__ = [
36 |     'check_analyser',
37 |     'rotate_pic',
38 |     'video_capture',
39 | ]
40 | 


--------------------------------------------------------------------------------