├── test.mp3
├── config.json
├── requirements.txt
├── Dockerfile
├── example.py
├── .gitignore
├── tools_language.py
├── acrcloud_logger.py
├── acrcloud_scan_files_libary.py
├── README.md
├── tools_str_sim.py
├── acrcloud_scan_files_python.py
└── acrcloud_filter_libary.py


/test.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrcloud/acrcloud_scan_files_python/HEAD/test.mp3


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "host": "xxx",
3 |   "access_key": "xxx",
4 |   "access_secret": "xxx"
5 | }
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-Levenshtein
2 | fuzzywuzzy
3 | backports.csv
4 | requests
5 | openpyxl
6 | python-dateutil
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:2.7.16-slim-stretch
 2 | 
 3 | COPY . /acr_scan_tool
 4 | WORKDIR /acr_scan_tool
 5 | RUN chmod +x /acr_scan_tool/acrcloud_scan_files_python.py
 6 | 
 7 | ENV PATH=${PATH}:/acr_scan_tool
 8 | 
 9 | RUN apt-get update \
10 | && apt-get install -y --no-install-recommends git \
11 | && apt-get purge -y --auto-remove \
12 | && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN pip install git+https://github.com/acrcloud/acrcloud_sdk_python
15 | RUN pip install fuzzywuzzy requests openpyxl python-dateutil backports.csv
16 | 
17 | 
18 | ENTRYPOINT ["acrcloud_scan_files_python.py"]
19 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | import os
 5 | import sys
 6 | import json
 7 | from acrcloud_scan_files_libary import ACRCloud_Scan_Files
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     #ACRCloud Scan File Example
12 |     is_debug = 1   #display the log info, or is_debug=0
13 |     start_time = 0 #scan file start time(seconds)
14 |     stop_time = 0  #scan file end time(seconds), or you can set it to the duration of file
15 |     step = 10      #the length of each identified fragment (seconds)
16 |     rec_length = step
17 | 
18 |     #your acrcloud project host, access_key, access_secret
19 |     config = {
20 |         "host": "your project host",
21 |         "access_key": "your project access_key",
22 |         "access_secret": "your project access_secret"
23 |     }
24 |     #export dir
25 |     export_dir = "./"
26 | 
27 |     filepath = sys.argv[1]
28 | 
29 |     acr_sfile = ACRCloud_Scan_Files(config, is_debug)
30 | 
31 |     stop_time = acr_sfile.get_duration_by_file(filepath)
32 | 
33 |     """
34 |     #get a list of recognition results
35 |     result_list = acr_sfile.recognize_file(filepath, start_time, stop_time, step, rec_length)
36 |     #export to csv
37 |     export_filename_csv = filepath + ".csv"
38 |     acr_sfile.export_to_csv(result_list, export_filename_csv, export_dir)
39 |     #export to xlsx
40 |     export_filename_xlsx = filepath + ".xlsx"
41 |     acr_sfile.export_to_xlsx(result_list, export_filename_xlsx, export_dir)
42 |     """
43 | 
44 |     #iterator to get the result of each fragment
45 |     result_list2 = []
46 |     with open(filepath+"_raw_result.lst", "w") as wfile:
47 |         for item in acr_sfile.for_recognize_file(filepath, start_time, stop_time, step, rec_length):
48 |             result_list2.append(item)
49 |             filename = item["file"]
50 |             timestamp = item["timestamp"]
51 |             res = acr_sfile.parse_data(item["result"])
52 |             title = res[2]
53 |             print(filename, timestamp, title)
54 |             wfile.write("{0}\n".format(json.dumps(item)))
55 | 
56 |     #get results with played-duration
57 |     filter_results = acr_sfile.apply_filter(result_list2)
58 |     #export the results to xlsx
59 |     export_filtername_xlsx = filepath + "_with_duration.xlsx"
60 |     acr_sfile.export_to_xlsx(filter_results, export_filtername_xlsx, export_dir)
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *,cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask instance folder
 59 | instance/
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # IPython Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # dotenv
 80 | .env
 81 | 
 82 | # virtualenv
 83 | venv/
 84 | ENV/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | 
 89 | # Rope project settings
 90 | .ropeproject
 91 | ### JetBrains template
 92 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 93 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 94 | 
 95 | # User-specific stuff:
 96 | .idea/workspace.xml
 97 | .idea/tasks.xml
 98 | .idea/dictionaries
 99 | .idea/vcs.xml
100 | .idea/jsLibraryMappings.xml
101 | 
102 | # Sensitive or high-churn files:
103 | .idea/dataSources.ids
104 | .idea/dataSources.xml
105 | .idea/dataSources.local.xml
106 | .idea/sqlDataSources.xml
107 | .idea/dynamic.xml
108 | .idea/uiDesigner.xml
109 | 
110 | # Gradle:
111 | .idea/gradle.xml
112 | .idea/libraries
113 | 
114 | # Mongo Explorer plugin:
115 | .idea/mongoSettings.xml
116 | 
117 | ## File-based project format:
118 | *.iws
119 | 
120 | ## Plugin-specific files:
121 | 
122 | # IntelliJ
123 | /out/
124 | 
125 | # mpeltonen/sbt-idea plugin
126 | .idea_modules/
127 | 
128 | # JIRA plugin
129 | atlassian-ide-plugin.xml
130 | 
131 | # Crashlytics plugin (for Android Studio and IntelliJ)
132 | com_crashlytics_export_strings.xml
133 | crashlytics.properties
134 | crashlytics-build.properties
135 | fabric.properties
136 | 
137 | 


--------------------------------------------------------------------------------
/tools_language.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | class tools_language:
  5 |     def __init__(self):
  6 |         pass
  7 | 
  8 |     def is_chinese(self, uchar):
  9 |         if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
 10 |             return True
 11 |         else:
 12 |             return False
 13 | 
 14 |     def is_CJK(self, uchar):
 15 |         """判断一个unicode是否为CJK(中日韩)"""
 16 |         if uchar >= u'\u3000' and uchar <= u'\u303f':
 17 |             return True
 18 |         elif uchar >= u'\u3040' and uchar <= u'\u309f':
 19 |             return True
 20 |         elif uchar >= u'\u30a0' and uchar <= u'\u30ff':
 21 |             return True
 22 |         elif uchar >= u'\uff00' and uchar <= u'\u30ff':
 23 |             return True
 24 |         elif uchar >= u'\u4e00' and uchar <= u'\u9faf':
 25 |             return True
 26 |         elif uchar >= u'\u3400' and uchar <= u'\u4dbf':
 27 |             return True
 28 |         elif uchar >= u'\u0400' and uchar <= u'\u052f': #俄语
 29 |             return True
 30 |         elif uchar >= u'\uac00' and uchar <= u'\ud7ff': #韩文
 31 |             return True
 32 |         elif uchar >= u'\u4e00' and uchar <= u'\u9fa5': #中文
 33 |             return True
 34 |         elif uchar >= u'\uff61' and uchar <= u'\uff9f': #半角日文 半宽假名
 35 |             return True
 36 |         else:
 37 |             return False
 38 | 
 39 |     def is_number(self, uchar):
 40 |         if uchar >= u'\u0030' and uchar<=u'\uffef':
 41 |             return True
 42 |         else:
 43 |             return False
 44 | 
 45 |     def is_alphabet(self, uchar):
 46 |         if (uchar >= u'\u0041' and uchar<=u'\u005a') or (uchar >= u'\u0061' and uchar<=u'\u007a'):
 47 |             return True
 48 |         else:
 49 |             return False
 50 | 
 51 |     def is_other(self, uchar):
 52 |         if not (self.is_chinese(uchar) or self.is_number(uchar) or self.is_alphabet(uchar)):
 53 |             return True
 54 |         else:
 55 |             return False
 56 | 
 57 |     def B2Q(self, uchar):
 58 |         inside_code=ord(uchar)
 59 |         if inside_code<0x0020 or inside_code>0x7e:
 60 |             return uchar
 61 |         if inside_code==0x0020:
 62 |             inside_code=0x3000
 63 |         else:
 64 |             inside_code+=0xfee0
 65 |         return unichr(inside_code)
 66 | 
 67 |     def Q2B(self, uchar):
 68 |         inside_code=ord(uchar)
 69 |         if inside_code==0x3000:
 70 |             inside_code=0x0020
 71 |         else:
 72 |             inside_code-=0xfee0
 73 |         if inside_code<0x0020 or inside_code>0x7e:
 74 |             return uchar
 75 |         return unichr(inside_code)
 76 | 
 77 |     def stringQ2B(self, ustring):
 78 |         return "".join([self.Q2B(uchar) for uchar in ustring])
 79 | 
 80 |     def uniform(self, ustring):
 81 |         return self.stringQ2B(ustring).lower()
 82 | 
 83 |     def string2List(self, ustring):
 84 |         retList=[]
 85 |         utmp=[]
 86 |         for uchar in ustring:
 87 |             if self.is_other(uchar):
 88 |                 if len(utmp)==0:
 89 |                     continue
 90 |                 else:
 91 |                     retList.append("".join(utmp))
 92 |                     utmp=[]
 93 |             else:
 94 |                 utmp.append(uchar)
 95 |         if len(utmp)!=0:
 96 |             retList.append("".join(utmp))
 97 |         return retList
 98 | 
 99 |     def has_chinese(self, ustring):
100 |         ustring_lower = ustring.lower()
101 |         for uchar in ustring_lower:
102 |             if self.is_chinese(uchar):
103 |                 return True
104 |         return False
105 | 
106 |     def has_CJK(self, ustring):
107 |         ustring_lower = ustring.lower()
108 |         for uchar in ustring_lower:
109 |             if self.is_CJK(uchar):
110 |                 return True
111 |         return False
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/acrcloud_logger.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*
  3 | 
  4 | import os
  5 | import sys
  6 | import time
  7 | import logging
  8 | import traceback
  9 | from logging.handlers import TimedRotatingFileHandler
 10 | '''
 11 | traceback records log
 12 | try:
 13 |     pass
 14 | except Exception, e:
 15 |     logger.error('Failed to open file', exc_info=True)
 16 | '''
 17 | 
 18 | import logging
 19 | 
 20 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
 21 | 
 22 | COLORS = {
 23 |     'WARNING'  : YELLOW,
 24 |     'INFO'     : GREEN,
 25 |     'DEBUG'    : BLUE,
 26 |     'CRITICAL' : YELLOW,
 27 |     'ERROR'    : RED,
 28 |     'RED'      : RED,
 29 |     'GREEN'    : GREEN,
 30 |     'YELLOW'   : YELLOW,
 31 |     'BLUE'     : BLUE,
 32 |     'MAGENTA'  : MAGENTA,
 33 |     'CYAN'     : CYAN,
 34 |     'WHITE'    : WHITE,
 35 | }
 36 | 
 37 | RESET_SEQ = "\033[0m"
 38 | COLOR_SEQ = "\033[1;%dm"
 39 | BOLD_SEQ  = "\033[1m"
 40 | 
 41 | class ColoredFormatter(logging.Formatter):
 42 | 
 43 |     def __init__(self, *args, **kwargs):
 44 |         # can't do super(...) here because Formatter is an old school class
 45 |         logging.Formatter.__init__(self, *args, **kwargs)
 46 | 
 47 |     def format(self, record):
 48 |         levelname = record.levelname
 49 |         color     = COLOR_SEQ % (30 + COLORS[levelname])
 50 |         message   = logging.Formatter.format(self, record)
 51 |         message   = message.replace("$RESET", RESET_SEQ)\
 52 |                            .replace("$BOLD",  BOLD_SEQ)\
 53 |                            .replace("$COLOR", color)
 54 |         for k,v in COLORS.items():
 55 |             message = message.replace("$" + k,    COLOR_SEQ % (v+30))\
 56 |                              .replace("$BG" + k,  COLOR_SEQ % (v+40))\
 57 |                              .replace("$BG-" + k, COLOR_SEQ % (v+40))
 58 |         return message + RESET_SEQ
 59 | 
 60 | 
 61 | class AcrcloudLogger:
 62 | 
 63 |     def __init__(self, logname, loglevel = logging.INFO):
 64 |         self.logger = logging.getLogger(logname)
 65 |         self.logger.setLevel(loglevel)
 66 |         self.default_fmt = '%(asctime)s - %(name)s - %(levelname)8s - %(message)s'
 67 |         self.default_colorfmt = "$MAGENTA%(asctime)s$RESET - $COLOR%(name)-12s$RESET - $COLOR%(levelname)-6s$RESET - %(message)s"
 68 |         self.default_dir = './radioLog'
 69 | 
 70 |     def addFilehandler(self, logfile, logdir = None, fmt = '', loglevel = logging.INFO, when='D', interval=10, backupCount=1):
 71 |         try:
 72 |             filename = logfile
 73 |             if logdir is None:
 74 |                 logdir = self.default_dir
 75 |             if not os.path.exists(logdir):
 76 |                 os.makedirs(logdir)
 77 |             logfilepath = os.path.join(logdir, filename)
 78 |             #fhandler = logging.FileHandler(logfilepath)
 79 |             fhandler = TimedRotatingFileHandler(logfilepath, when, interval, backupCount)
 80 |             fhandler.setLevel(loglevel)
 81 |             formatter = logging.Formatter(fmt if fmt else self.default_fmt)
 82 |             fhandler.setFormatter(formatter)
 83 |             self.logger.addHandler(fhandler)
 84 |             return True
 85 |         except Exception as e:
 86 |             traceback.print_exc(file=sys.stdout)
 87 |             return False
 88 | 
 89 |     def addStreamHandler(self, fmt='', loglevel = logging.INFO):
 90 |         try:
 91 |             color_fmt = fmt if fmt else self.default_colorfmt
 92 |             shandler = logging.StreamHandler()
 93 |             shandler.setLevel(loglevel)
 94 |             color_formatter = ColoredFormatter(color_fmt)
 95 |             #f = logging.Formatter(self.default_fmt)
 96 |             shandler.setFormatter(color_formatter)
 97 |             self.logger.addHandler(shandler)
 98 |             return True
 99 |         except Exception as e:
100 |             traceback.print_exc(file=sys.stdout)
101 |             return False
102 | 
103 | if __name__ == '__main__':
104 | 
105 |     dlog = AcrcloudLogger('test', logging.INFO)
106 |     dlog.addFilehandler('test.log')
107 |     dlog.addStreamHandler()
108 |     #dlog.logger.warn("hel")
109 |     """
110 |     for i in range(300):
111 |         dlog.logger.warn('what!!!!!!!!!!!')
112 |         #dlog.logger.info('hahhahah')
113 |         #dlog.logger.error('it is monster!!')
114 |         time.sleep(1)
115 |     """
116 | 


--------------------------------------------------------------------------------
/acrcloud_scan_files_libary.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | import time
  7 | import json
  8 | import codecs
  9 | import logging
 10 | import openpyxl
 11 | from backports import csv
 12 | from openpyxl import Workbook
 13 | from acrcloud_filter_libary import FilterWorker
 14 | from acrcloud_logger import AcrcloudLogger
 15 | from acrcloud.recognizer import ACRCloudRecognizer
 16 | 
 17 | if sys.version_info.major == 2:
 18 |     reload(sys)
 19 |     sys.setdefaultencoding("utf8")
 20 | 
 21 | class ACRCloud_Scan_Files:
 22 | 
 23 |     def __init__(self, config, debug=1):
 24 |         self.openpyxl_version = ".".join(str(openpyxl.__version__).split(".")[:2])
 25 |         self.config = config
 26 |         self.debug = debug
 27 |         self.init_log()
 28 |         self.re_handler = ACRCloudRecognizer(self.config)
 29 | 
 30 |     def init_log(self):
 31 |         log_level = logging.ERROR
 32 |         if self.debug == 1:
 33 |             log_level = logging.DEBUG
 34 | 
 35 |         shandler = logging.StreamHandler()
 36 |         #shandler.setLevel(log_level)
 37 |         self.log = logging.getLogger("ACRCloud_ScanFile")
 38 |         self.log.setLevel(log_level)
 39 |         self.log.addHandler(shandler)
 40 | 
 41 |     def as_text(self, value):
 42 |         if value is None:
 43 |             return ""
 44 |         return str(value)
 45 | 
 46 |     def get_duration_by_file(self, filepath):
 47 |         return int(ACRCloudRecognizer.get_duration_ms_by_file(filepath)/1000)
 48 | 
 49 |     def export_to_xlsx(self, result_list, export_filename="ACRCloud_ScanFile_Results.xlsx", export_dir="./"):
 50 |         try:
 51 |             results = []
 52 |             for item in result_list:
 53 |                 filename = item["file"]
 54 |                 timestamp = item["timestamp"]
 55 |                 jsoninfo = item["result"]
 56 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
 57 |                     row = self.parse_data(jsoninfo)
 58 |                     row = [filename, timestamp] + list(row)
 59 |                     results.append(row)
 60 |             results = sorted(results, key=lambda x:x[1])
 61 | 
 62 |             wb = Workbook()
 63 |             sheet_music = wb.active
 64 |             sheet_music.title = "ACRCloud_Scan_File"
 65 | 
 66 |             header_row = ['filename', 'timestamp', 'custom_files_title', 'custom_acrid', 'title', 'artists', 'album',
 67 |                         'acrid', 'played_duration', 'label', 'isrc', 'upc', 'deezer', 'spotify', 'itunes', 'youtube']
 68 | 
 69 |             sheet_music.append(header_row)
 70 |             for row in results:
 71 |                 sheet_music.append(row)
 72 | 
 73 |             for column_cells in sheet_music.columns:
 74 |                 length = max(len(self.as_text(cell.value)) for cell in column_cells)
 75 |                 if length > 80:
 76 |                     length == 80
 77 |                 if self.openpyxl_version >= "2.6":
 78 |                     sheet_music.column_dimensions[column_cells[0].column_letter].width = length
 79 |                 else:
 80 |                     sheet_music.column_dimensions[column_cells[0].column].width = length
 81 | 
 82 |             export_filepath = os.path.join(export_dir, export_filename)
 83 |             wb.save(export_filepath)
 84 |             if self.debug:
 85 |                 self.log.info("export_to_xlsx.Save Data to xlsx: {0}".format(export_filename))
 86 |         except Exception as e:
 87 |             self.log.error("Error@export_to_xlsx", exc_info=True)
 88 | 
 89 |     def export_to_csv(self, result_list, export_filename="ACRCloud_ScanFile_Results.csv", export_dir="./"):
 90 |         try:
 91 |             results = []
 92 |             for item in result_list:
 93 |                 filename = item["file"]
 94 |                 timestamp = item["timestamp"]
 95 |                 jsoninfo = item["result"]
 96 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
 97 |                     row = self.parse_data(jsoninfo)
 98 |                     row = [filename, timestamp] + list(row)
 99 |                     results.append(row)
100 | 
101 |             results = sorted(results, key=lambda x:x[1])
102 | 
103 |             export_filepath = os.path.join(export_dir, export_filename)
104 | 
105 |             with codecs.open(export_filepath, 'w', 'utf-8-sig') as f:
106 |                 head_row = ['filename', 'timestamp',  'custom_files_title', 'custom_acrid', 'title', 'artists', 'album',
107 |                         'acrid', 'played_duration', 'label', 'isrc', 'upc', 'deezer', 'spotify', 'itunes', 'youtube']
108 |                 dw = csv.writer(f)
109 |                 dw.writerow(head_row)
110 |                 dw.writerows(results)
111 |                 if self.debug:
112 |                     self.log.info("export_to_csv.Save Data to csv: {0}".format(export_filename))
113 |         except Exception as e:
114 |             self.log.error("Error@export_to_csv", exc_info=True)
115 | 
116 |     def parse_data(self, jsoninfo):
117 |         try:
118 |             title, played_duration, isrc, upc, acrid, label, album = [""]*7
119 |             artists, deezer, spotify, itunes, youtube, custom_files_title, audio_id, custom_acrid  = [""]*8
120 | 
121 |             metadata = jsoninfo.get('metadata', {})
122 |             played_duration = metadata.get("played_duration", "")
123 |             if "music" in metadata and len(metadata["music"]) > 0:
124 |                 item = metadata["music"][0]
125 |                 title = item.get("title", "")
126 |                 offset = item.get("play_offset_ms", "")
127 |                 isrc = item.get("external_ids", {"isrc":""}).get("isrc","")
128 |                 upc = item.get("external_ids", {"upc":""}).get("upc","")
129 |                 acrid = item.get("acrid","")
130 |                 label = item.get("label", "")
131 |                 album = item.get("album", {"name":""}).get("name", "")
132 |                 artists =  ",".join([ ar["name"] for ar in item.get('artists', [{"name":""}]) if ar.get("name") ])
133 |                 deezer = item.get("external_metadata", {"deezer":{"track":{"id":""}}}).get("deezer", {"track":{"id":""}}).get("track", {"id":""}).get("id", "")
134 |                 spotify = item.get("external_metadata", {"spotify":{"track":{"id":""}}}).get("spotify", {"track":{"id":""}}).get("track", {"id":""}).get("id", "")
135 |                 itunes = item.get("external_metadata", {"itunes":{"track":{"id":""}}}).get("itunes", {"track":{"id":""}}).get("track", {"id":""}).get("id", "")
136 |                 youtube = item.get("external_metadata", {"youtube":{"vid":""}}).get("youtube", {"vid":""}).get("vid", "")
137 | 
138 |             if "custom_files" in metadata and len(metadata["custom_files"]) > 0:
139 |                 custom_item = metadata["custom_files"][0]
140 |                 custom_files_title = custom_item.get("title", "")
141 |                 audio_id = custom_item.get("audio_id", "")
142 |                 custom_acrid = custom_item.get("acrid", "")
143 |         except Exception as e:
144 |             self.log.error("Error@parse_data")
145 | 
146 |         res = (custom_files_title, custom_acrid, title, artists, album, acrid,
147 |                played_duration, label, isrc, upc, deezer, spotify, itunes, youtube,)
148 | 
149 |         return res
150 | 
151 |     def apply_filter(self, results):
152 |         fworker = FilterWorker()
153 |         result_new = fworker.apply_filter(results)
154 |         return result_new
155 | 
156 |     def do_recognize(self, filepath, start_time, rec_length):
157 |         current_time = time.strftime('%H:%M:%S', time.gmtime(start_time))
158 |         res_data = self.re_handler.recognize_by_file(filepath, start_time, rec_length)
159 |         return filepath, current_time, res_data
160 | 
161 |     def for_recognize_file(self,  filepath, start_time, stop_time, step, rec_length):
162 |         try:
163 |             for i in range(start_time, stop_time, step):
164 |                 filep, current_time, res_data = self.do_recognize(filepath, i, rec_length)
165 |                 if res_data:
166 |                     jsoninfo = json.loads(res_data)
167 |                     if "metadata" in jsoninfo and "timestamp_utc" in jsoninfo["metadata"]:
168 |                         jsoninfo["metadata"]["timestamp_utc"] = current_time
169 |                 else:
170 |                     jsoninfo = {}
171 |                 yield {"timestamp":current_time, "rec_length":rec_length, "result":jsoninfo, "file":filep}
172 |         except Exception as e:
173 |             self.log.error("Error@for_recognize_file", exc_info=True)
174 | 
175 |     def recognize_file(self, filepath, start_time, stop_time, step, rec_length):
176 |         try:
177 |             result_list = []
178 |             for i in range(start_time, stop_time, step):
179 |                 filep, current_time, res_data = self.do_recognize(filepath, i, rec_length)
180 |                 if res_data:
181 |                     jsoninfo = json.loads(res_data)
182 |                     try:
183 |                         if "metadata" in jsoninfo and "timestamp_utc" in jsoninfo["metadata"]:
184 |                             jsoninfo["metadata"]["timestamp_utc"] = current_time
185 | 
186 |                         code = jsoninfo["status"]["code"]
187 |                         msg = jsoninfo["status"]["msg"]
188 |                         if jsoninfo["status"]["code"] not in [0, 1001]:
189 |                             raise Exception("recognize_file.(timestamp: {0}, {1}, {2})".format(current_time, code, msg))
190 |                     except Exception as e:
191 |                         if self.debug:
192 |                             self.log.error(e)
193 |                         else:
194 |                             print (e)
195 |                         if code in [3001, 3003, 3013]:
196 |                             break
197 |                         else:
198 |                             continue
199 | 
200 |                     result_list.append({"timestamp":current_time, "rec_length":rec_length, "result":jsoninfo, "file":filep})
201 |                     if self.debug:
202 |                         parse_info = self.parse_data(jsoninfo)
203 |                         self.log.info('recognize_file.(timestamp: {0}, title: {1})'.format(current_time, parse_info[0]))
204 |         except Exception as e:
205 |             self.log.error("Error@recognize_file", exc_info=True)
206 |         return result_list
207 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [Audio Recognition](https://www.acrcloud.com/music-recognition) -- File Scan Tool (Python Script)
  2 | 
  3 | 
  4 | 
  5 | ## Overview
  6 |   [ACRCloud](https://www.acrcloud.com/) provides [Automatic Content Recognition](https://www.acrcloud.com/docs/introduction/automatic-content-recognition/) services for [Audio Fingerprinting](https://www.acrcloud.com/docs/introduction/audio-fingerprinting/) based applications such as **[Audio Recognition](https://www.acrcloud.com/music-recognition)** (supports music, video, ads for both online and offline), **[Broadcast Monitoring](https://www.acrcloud.com/broadcast-monitoring)**, **[Second Screen](https://www.acrcloud.com/second-screen-synchronization)**, **[Copyright Protection](https://www.acrcloud.com/copyright-protection-de-duplication)** and etc.<br>
  7 |   
  8 |   This tool can scan audio/video files and detect audios you want to recognize such as music, ads.
  9 | 
 10 |   Supported Format:
 11 |   
 12 | >>Audio: mp3, wav, m4a, flac, aac, amr, ape, ogg ...<br>
 13 | >>Video: mp4, mkv, wmv, flv, ts, avi ...
 14 | 
 15 | ## Requirements
 16 | 
 17 | **Notice: This tool only support Python 2.**
 18 | 
 19 | - Python 2.x
 20 | - fuzzywuzzy
 21 | - openpyxl
 22 | - backports.csv
 23 | - requests
 24 | - Follow one of the tutorials to create a project and get your host, access_key and access_secret.
 25 |  
 26 |  
 27 | ## Run as a Docker Container
 28 | - Install Docker 
 29 |   - If you are using Windows: Download [Docker Desktop for Windows](https://download.docker.com/win/stable/Docker%20for%20Windows%20Installer.exe) and install.
 30 |   - If you are using MacOs: Download [Docker Desktop for Mac](https://download.docker.com/mac/stable/Docker.dmg) and install.
 31 |   - If you are using Linux: Open the Terminal and input `bash <(curl -s https://get.docker.com/)`
 32 | - Change the config file (config.json).
 33 | - Run following command 
 34 |   ```
 35 |   git clone https://github.com/acrcloud/acrcloud_scan_files_python.git
 36 |   
 37 |   cd acrcloud_scan_files_python
 38 |   
 39 |   sudo docker build -t acrcloud/python_scan_tool .
 40 |   # Call it without arguments to display the full help
 41 |   sudo docker run --rm acrcloud/python_scan_tool
 42 |   
 43 |   # Basic usage
 44 |   sudo docker run --rm -v $(pwd):/tmp -v /Users/acrcloud/:/music/ acrcloud/python_scan_tool -f /music/test.mp4 -o /tmp
 45 |   
 46 |   You need to change /Users/acrcloud/ to the directory where your audio/video file is.
 47 |   And the report file will in the acrcloud_scan_files_python directory.
 48 |   ```
 49 | ## Installation 
 50 |  
 51 |  For Windows System, you must install [Python](https://www.python.org/downloads/windows/) and [pip](https://pip.pypa.io/en/stable/installing/).
 52 |  
 53 |  Open your terminal and change to the script directory of <strong>acrcloud_scan_files_python-master</strong>. Then run the command: 
 54 |  
 55 |  ```
 56 | pip install -r requirements.txt
 57 |  ```
 58 | ## Install ACRCloud Python SDK 
 59 |  
 60 | 
 61 |  You can run the following command to install it.
 62 | 
 63 |  ```
 64 |  python -m pip install git+https://github.com/acrcloud/acrcloud_sdk_python
 65 |  ```
 66 | 
 67 |  Or you can download the sdk and install it by following command.
 68 | 
 69 |  [ACRCloud Python SDK](https://github.com/acrcloud/acrcloud_sdk_python).
 70 | 
 71 | 
 72 |  ```
 73 |  sudo python setup.py install
 74 |  ```
 75 | 
 76 | ## For Windows
 77 | 
 78 | ### Install Library
 79 |  Windows Runtime Library
 80 |  
 81 |  X86: [download and install Library(windows/vcredist_x86.exe)](https://www.microsoft.com/en-us/download/details.aspx?id=5555)
 82 |  
 83 |  x64: [download and install Library(windows/vcredist_x64.exe)](https://www.microsoft.com/en-us/download/details.aspx?id=14632)
 84 | 
 85 |  
 86 | ## Usage for Scan File Tool: 
 87 | 
 88 |         _    ____ ____   ____ _                 _
 89 |        / \  / ___|  _ \ / ___| | ___  _   _  __| |
 90 |       / _ \| |   | |_) | |   | |/ _ \| | | |/ _` |
 91 |      / ___ \ |___|  _ <| |___| | (_) | |_| | (_| |
 92 |     /_/   \_\____|_| \_\\____|_|\___/ \____|\____|
 93 |  
 94 |  Before you use this script,you must have acrcloud host,access_key and access_secret.
 95 |  If you haven't have these ,you can register one https://console.acrcloud.com/signup
 96 |  
 97 |  Change the content of config.json,fill in your host, access_key and access_secret
 98 |  ```
 99 | {
100 |   "host": "xxxxx",
101 |   "access_key": "xxxxx",
102 |   "access_secret": "xxxxx"
103 | }
104 |  ```
105 |  
106 |  ```
107 |  python acrcloud_scan_files_python.py -d folder_path
108 |  python acrcloud_scan_files_python.py -f file_path
109 |  python acrcloud_scan_files_python.py -h get_usage_help
110 |  ```
111 | 
112 | ### Scan Folder Example:
113 |  ```
114 |  python acrcloud_scan_files_python.py -d ~/music
115 |  ```
116 | ### Scan File Example: 
117 |  ```
118 |  python acrcloud_scan_files_python.py -f ~/testfiles/test.mp3
119 |  ```
120 |  
121 | ### Add more params
122 | 
123 | "-s" ---- scan step. （The scan interval.）
124 | 
125 | "-l" ---- recongizing length.  (use how many seconds to recongize. for example: -s 20 -l 10, it will get 20 seconds of audio each time and use the first 10 seconds of audio to recognize)
126 | 
127 | "-r" ---- scan range. （The scan range. for example: -r 5-20, it will recognize the file starting from the 5th second and finish at the 20th second.）
128 | 
129 | "-c" ---- set the config file path.
130 | 
131 | "-w" ---- results with duration. (1-yes, 0-no), you must set offset config for your access key, pls contact support@acrcloud.com
132 | 
133 | "-o" ---- set the directory to save the results
134 | 
135 | "-t" ---- set the type of file.(csv[default] or xlsx).
136 |  ```
137 |  If you want to change scan interval or you want to set recognize range,you can add some params
138 |  Example:
139 |      python acrcloud_scan_files_python.py -f ~/testfiles/test.mp3 -s 30 -r 0-20
140 |      python acrcloud_scan_files_python.py -d ~/music -s 30 -w 1
141 |  ```
142 | 
143 | Default is scan folder where this script in.
144 | 
145 | The results are saved in the folder where this script in.
146 | 
147 | 
148 | ## Usage for Scan File Libary
149 | 
150 | Introduction all API.
151 | 
152 | ### acrcloud_scan_files_libary.py
153 | 
154 |  ```
155 |  class ACRCloud_Scan_Files:
156 |      def get_duration_by_file(self, filepath):
157 |         #@param filepath : query file path
158 |         #@return : total duration of the file
159 | 
160 |      def export_to_xlsx(self, result_list, export_filename, export_dir):
161 |         #@param result_list : the list of identification results
162 |         #@param export_filename : export to this file
163 |         #@param export_dir : export to this directory
164 | 
165 |      def export_to_csv(self, result_list, export_filename, export_dir):
166 |         #@param result_list : the list of recognition results
167 |         #@param export_filename : export to this file
168 |         #@param export_dir : export to this directory
169 | 
170 |      def parse_data(self, result):
171 |         #@param result : one recognition result
172 |         #@return : a tuple, as follow
173 |         #     (title, artists, album, acrid, played_duration, label, isrc, upc,
174 |         #       deezer, spotify, itunes, youtube, custom_files_title, audio_id)
175 | 
176 |      def apply_filter(self, results):
177 |         #@param results : the list of recognition results
178 |         #@return : a list results with played_duration
179 | 
180 |      def for_recognize_file(self, filepath, start_time, stop_time, step, rec_length):
181 |         #@param filepath : query file path
182 |         #@param start_time : the start offset to recognize (seconds)
183 |         #@param stop_time : the end offset to recognize (seconds)
184 |         #@param rec_length : the duration of each fragment to recognize
185 |         #@return : iterator to return the each recognition result
186 | 
187 |      def recognize_file(self, filepath, start_time, stop_time, step, rec_length):
188 |         #@param filepath : query file path
189 |         #@param start_time : the start offset to recognize (seconds)
190 |         #@param stop_time : the end offset to recognize (seconds)
191 |         #@param rec_length : the duration of each fragment to recognize
192 |         #@return : the list of recognition results
193 |  ```
194 | 
195 | ### Example
196 | 
197 | run Text: python example.py test.mp3
198 | 
199 |  ```
200 |  #!/usr/bin/env python
201 |  #-*- coding:utf-8 -*-
202 | 
203 |  import os
204 |  import sys
205 |  from acrcloud_scan_files_libary import ACRCloud_Scan_Files
206 | 
207 |  if __name__ == "__main__":
208 | 
209 |     #ACRCloud Scan File Example
210 |     is_debug = 1   #display the log info, or is_debug=0
211 |     start_time = 0 #scan file start time(seconds)
212 |     stop_time = 0  #scan file end time(seconds), or you can set it to the duration of file
213 |     step = 10      #the length of each identified fragment (seconds)
214 |     rec_length = step
215 | 
216 |     #your acrcloud project host, access_key, access_secret
217 |     config = {
218 |         "host": "XXX",
219 |         "access_key":"XXX",
220 |         "access_secret": "XXX"
221 |     }
222 | 
223 |     filepath = sys.argv[1]
224 | 
225 |     acr_sfile = ACRCloud_Scan_Files(config, is_debug)
226 | 
227 |     stop_time = acr_sfile.get_duration_by_file(filepath)
228 | 
229 |     #get a list of recognition results
230 |     result_list = acr_sfile.recognize_file(filepath, start_time, stop_time, step, rec_length)
231 | 
232 |     #export the result
233 |     export_dir = "./"
234 |     #export to csv
235 |     export_filename_csv = "test.csv"
236 |     acr_sfile.export_to_csv(result_list, export_filename_csv, export_dir)
237 |     #export to xlsx
238 |     export_filename_xlsx = "test.xlsx"
239 |     acr_sfile.export_to_xlsx(result_list, export_filename_xlsx, export_dir)
240 | 
241 |     #iterator to get the result of each fragment
242 |     result_list2 = []
243 |     for item in acr_sfile.for_recognize_file(filepath, start_time, stop_time, step, rec_length):
244 |         result_list2.append(item)
245 |         filename = item["file"]
246 |         timestamp = item["timestamp"]
247 |         res = acr_sfile.parse_data(item["result"])
248 |         title = res[0]
249 |         print filename, timestamp, title
250 | 
251 |     #get results with played-duration
252 |     filter_results = acr_sfile.apply_filter(result_list2)
253 |     #export the results to xlsx
254 |     export_filtername_xlsx = "test_with_duration.xlsx"
255 |     acr_sfile.export_to_xlsx(filter_results, export_filtername_xlsx, export_dir)
256 |  ```
257 | 


--------------------------------------------------------------------------------
/tools_str_sim.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding:utf-8 -*-
  3 | """
  4 | author: hong
  5 | Copyright (c) 2011 Adam Cohen
  6 | ......
  7 | 
  8 | """
  9 | import re
 10 | import sys
 11 | import string
 12 | from fuzzywuzzy import fuzz
 13 | 
 14 | reload(sys)
 15 | sys.setdefaultencoding("utf8")
 16 | 
 17 | RE_SPECIAL_STRING = """[ \[\]［］\(\)（）\n\t\r,\.\:"'‘“<>《》!！?？&]"""
 18 | RE_SUB_STRING = "(\(.*\))|(\[.*\])|(（.*）)"
 19 | THREADHOLD = 75
 20 | 
 21 | #https://stackoverflow.com/questions/286921/efficiently-replace-all-accented-characters-in-a-string
 22 | latin_map={
 23 | u"Á":"A",
 24 | u"Ă":"A",
 25 | u"Ắ":"A",
 26 | u"Ặ":"A",
 27 | u"Ằ":"A",
 28 | u"Ẳ":"A",
 29 | u"Ẵ":"A",
 30 | u"Ǎ":"A",
 31 | u"Â":"A",
 32 | u"Ấ":"A",
 33 | u"Ậ":"A",
 34 | u"Ầ":"A",
 35 | u"Ẩ":"A",
 36 | u"Ẫ":"A",
 37 | u"Ä":"A",
 38 | u"Ǟ":"A",
 39 | u"Ȧ":"A",
 40 | u"Ǡ":"A",
 41 | u"Ạ":"A",
 42 | u"Ȁ":"A",
 43 | u"À":"A",
 44 | u"Ả":"A",
 45 | u"Ȃ":"A",
 46 | u"Ā":"A",
 47 | u"Ą":"A",
 48 | u"Å":"A",
 49 | u"Ǻ":"A",
 50 | u"Ḁ":"A",
 51 | u"Ⱥ":"A",
 52 | u"Ã":"A",
 53 | u"Ꜳ":"AA",
 54 | u"Æ":"AE",
 55 | u"Ǽ":"AE",
 56 | u"Ǣ":"AE",
 57 | u"Ꜵ":"AO",
 58 | u"Ꜷ":"AU",
 59 | u"Ꜹ":"AV",
 60 | u"Ꜻ":"AV",
 61 | u"Ꜽ":"AY",
 62 | u"Ḃ":"B",
 63 | u"Ḅ":"B",
 64 | u"Ɓ":"B",
 65 | u"Ḇ":"B",
 66 | u"Ƀ":"B",
 67 | u"Ƃ":"B",
 68 | u"Ć":"C",
 69 | u"Č":"C",
 70 | u"Ç":"C",
 71 | u"Ḉ":"C",
 72 | u"Ĉ":"C",
 73 | u"Ċ":"C",
 74 | u"Ƈ":"C",
 75 | u"Ȼ":"C",
 76 | u"Ď":"D",
 77 | u"Ḑ":"D",
 78 | u"Ḓ":"D",
 79 | u"Ḋ":"D",
 80 | u"Ḍ":"D",
 81 | u"Ɗ":"D",
 82 | u"Ḏ":"D",
 83 | u"ǲ":"D",
 84 | u"ǅ":"D",
 85 | u"Đ":"D",
 86 | u"Ƌ":"D",
 87 | u"Ǳ":"DZ",
 88 | u"Ǆ":"DZ",
 89 | u"É":"E",
 90 | u"Ĕ":"E",
 91 | u"Ě":"E",
 92 | u"Ȩ":"E",
 93 | u"Ḝ":"E",
 94 | u"Ê":"E",
 95 | u"Ế":"E",
 96 | u"Ệ":"E",
 97 | u"Ề":"E",
 98 | u"Ể":"E",
 99 | u"Ễ":"E",
100 | u"Ḙ":"E",
101 | u"Ë":"E",
102 | u"Ė":"E",
103 | u"Ẹ":"E",
104 | u"Ȅ":"E",
105 | u"È":"E",
106 | u"Ẻ":"E",
107 | u"Ȇ":"E",
108 | u"Ē":"E",
109 | u"Ḗ":"E",
110 | u"Ḕ":"E",
111 | u"Ę":"E",
112 | u"Ɇ":"E",
113 | u"Ẽ":"E",
114 | u"Ḛ":"E",
115 | u"Ꝫ":"ET",
116 | u"Ḟ":"F",
117 | u"Ƒ":"F",
118 | u"Ǵ":"G",
119 | u"Ğ":"G",
120 | u"Ǧ":"G",
121 | u"Ģ":"G",
122 | u"Ĝ":"G",
123 | u"Ġ":"G",
124 | u"Ɠ":"G",
125 | u"Ḡ":"G",
126 | u"Ǥ":"G",
127 | u"Ḫ":"H",
128 | u"Ȟ":"H",
129 | u"Ḩ":"H",
130 | u"Ĥ":"H",
131 | u"Ⱨ":"H",
132 | u"Ḧ":"H",
133 | u"Ḣ":"H",
134 | u"Ḥ":"H",
135 | u"Ħ":"H",
136 | u"Í":"I",
137 | u"Ĭ":"I",
138 | u"Ǐ":"I",
139 | u"Î":"I",
140 | u"Ï":"I",
141 | u"Ḯ":"I",
142 | u"İ":"I",
143 | u"Ị":"I",
144 | u"Ȉ":"I",
145 | u"Ì":"I",
146 | u"Ỉ":"I",
147 | u"Ȋ":"I",
148 | u"Ī":"I",
149 | u"Į":"I",
150 | u"Ɨ":"I",
151 | u"Ĩ":"I",
152 | u"Ḭ":"I",
153 | u"Ꝺ":"D",
154 | u"Ꝼ":"F",
155 | u"Ᵹ":"G",
156 | u"Ꞃ":"R",
157 | u"Ꞅ":"S",
158 | u"Ꞇ":"T",
159 | u"Ꝭ":"IS",
160 | u"Ĵ":"J",
161 | u"Ɉ":"J",
162 | u"Ḱ":"K",
163 | u"Ǩ":"K",
164 | u"Ķ":"K",
165 | u"Ⱪ":"K",
166 | u"Ꝃ":"K",
167 | u"Ḳ":"K",
168 | u"Ƙ":"K",
169 | u"Ḵ":"K",
170 | u"Ꝁ":"K",
171 | u"Ꝅ":"K",
172 | u"Ĺ":"L",
173 | u"Ƚ":"L",
174 | u"Ľ":"L",
175 | u"Ļ":"L",
176 | u"Ḽ":"L",
177 | u"Ḷ":"L",
178 | u"Ḹ":"L",
179 | u"Ⱡ":"L",
180 | u"Ꝉ":"L",
181 | u"Ḻ":"L",
182 | u"Ŀ":"L",
183 | u"Ɫ":"L",
184 | u"ǈ":"L",
185 | u"Ł":"L",
186 | u"Ǉ":"LJ",
187 | u"Ḿ":"M",
188 | u"Ṁ":"M",
189 | u"Ṃ":"M",
190 | u"Ɱ":"M",
191 | u"Ń":"N",
192 | u"Ň":"N",
193 | u"Ņ":"N",
194 | u"Ṋ":"N",
195 | u"Ṅ":"N",
196 | u"Ṇ":"N",
197 | u"Ǹ":"N",
198 | u"Ɲ":"N",
199 | u"Ṉ":"N",
200 | u"Ƞ":"N",
201 | u"ǋ":"N",
202 | u"Ñ":"N",
203 | u"Ǌ":"NJ",
204 | u"Ó":"O",
205 | u"Ŏ":"O",
206 | u"Ǒ":"O",
207 | u"Ô":"O",
208 | u"Ố":"O",
209 | u"Ộ":"O",
210 | u"Ồ":"O",
211 | u"Ổ":"O",
212 | u"Ỗ":"O",
213 | u"Ö":"O",
214 | u"Ȫ":"O",
215 | u"Ȯ":"O",
216 | u"Ȱ":"O",
217 | u"Ọ":"O",
218 | u"Ő":"O",
219 | u"Ȍ":"O",
220 | u"Ò":"O",
221 | u"Ỏ":"O",
222 | u"Ơ":"O",
223 | u"Ớ":"O",
224 | u"Ợ":"O",
225 | u"Ờ":"O",
226 | u"Ở":"O",
227 | u"Ỡ":"O",
228 | u"Ȏ":"O",
229 | u"Ꝋ":"O",
230 | u"Ꝍ":"O",
231 | u"Ō":"O",
232 | u"Ṓ":"O",
233 | u"Ṑ":"O",
234 | u"Ɵ":"O",
235 | u"Ǫ":"O",
236 | u"Ǭ":"O",
237 | u"Ø":"O",
238 | u"Ǿ":"O",
239 | u"Õ":"O",
240 | u"Ṍ":"O",
241 | u"Ṏ":"O",
242 | u"Ȭ":"O",
243 | u"Ƣ":"OI",
244 | u"Ꝏ":"OO",
245 | u"Ɛ":"E",
246 | u"Ɔ":"O",
247 | u"Ȣ":"OU",
248 | u"Ṕ":"P",
249 | u"Ṗ":"P",
250 | u"Ꝓ":"P",
251 | u"Ƥ":"P",
252 | u"Ꝕ":"P",
253 | u"Ᵽ":"P",
254 | u"Ꝑ":"P",
255 | u"Ꝙ":"Q",
256 | u"Ꝗ":"Q",
257 | u"Ŕ":"R",
258 | u"Ř":"R",
259 | u"Ŗ":"R",
260 | u"Ṙ":"R",
261 | u"Ṛ":"R",
262 | u"Ṝ":"R",
263 | u"Ȑ":"R",
264 | u"Ȓ":"R",
265 | u"Ṟ":"R",
266 | u"Ɍ":"R",
267 | u"Ɽ":"R",
268 | u"Ꜿ":"C",
269 | u"Ǝ":"E",
270 | u"Ś":"S",
271 | u"Ṥ":"S",
272 | u"Š":"S",
273 | u"Ṧ":"S",
274 | u"Ş":"S",
275 | u"Ŝ":"S",
276 | u"Ș":"S",
277 | u"Ṡ":"S",
278 | u"Ṣ":"S",
279 | u"Ṩ":"S",
280 | u"Ť":"T",
281 | u"Ţ":"T",
282 | u"Ṱ":"T",
283 | u"Ț":"T",
284 | u"Ⱦ":"T",
285 | u"Ṫ":"T",
286 | u"Ṭ":"T",
287 | u"Ƭ":"T",
288 | u"Ṯ":"T",
289 | u"Ʈ":"T",
290 | u"Ŧ":"T",
291 | u"Ɐ":"A",
292 | u"Ꞁ":"L",
293 | u"Ɯ":"M",
294 | u"Ʌ":"V",
295 | u"Ꜩ":"TZ",
296 | u"Ú":"U",
297 | u"Ŭ":"U",
298 | u"Ǔ":"U",
299 | u"Û":"U",
300 | u"Ṷ":"U",
301 | u"Ü":"U",
302 | u"Ǘ":"U",
303 | u"Ǚ":"U",
304 | u"Ǜ":"U",
305 | u"Ǖ":"U",
306 | u"Ṳ":"U",
307 | u"Ụ":"U",
308 | u"Ű":"U",
309 | u"Ȕ":"U",
310 | u"Ù":"U",
311 | u"Ủ":"U",
312 | u"Ư":"U",
313 | u"Ứ":"U",
314 | u"Ự":"U",
315 | u"Ừ":"U",
316 | u"Ử":"U",
317 | u"Ữ":"U",
318 | u"Ȗ":"U",
319 | u"Ū":"U",
320 | u"Ṻ":"U",
321 | u"Ų":"U",
322 | u"Ů":"U",
323 | u"Ũ":"U",
324 | u"Ṹ":"U",
325 | u"Ṵ":"U",
326 | u"Ꝟ":"V",
327 | u"Ṿ":"V",
328 | u"Ʋ":"V",
329 | u"Ṽ":"V",
330 | u"Ꝡ":"VY",
331 | u"Ẃ":"W",
332 | u"Ŵ":"W",
333 | u"Ẅ":"W",
334 | u"Ẇ":"W",
335 | u"Ẉ":"W",
336 | u"Ẁ":"W",
337 | u"Ⱳ":"W",
338 | u"Ẍ":"X",
339 | u"Ẋ":"X",
340 | u"Ý":"Y",
341 | u"Ŷ":"Y",
342 | u"Ÿ":"Y",
343 | u"Ẏ":"Y",
344 | u"Ỵ":"Y",
345 | u"Ỳ":"Y",
346 | u"Ƴ":"Y",
347 | u"Ỷ":"Y",
348 | u"Ỿ":"Y",
349 | u"Ȳ":"Y",
350 | u"Ɏ":"Y",
351 | u"Ỹ":"Y",
352 | u"Ź":"Z",
353 | u"Ž":"Z",
354 | u"Ẑ":"Z",
355 | u"Ⱬ":"Z",
356 | u"Ż":"Z",
357 | u"Ẓ":"Z",
358 | u"Ȥ":"Z",
359 | u"Ẕ":"Z",
360 | u"Ƶ":"Z",
361 | u"Ĳ":"IJ",
362 | u"Œ":"OE",
363 | u"ᴀ":"A",
364 | u"ᴁ":"AE",
365 | u"ʙ":"B",
366 | u"ᴃ":"B",
367 | u"ᴄ":"C",
368 | u"ᴅ":"D",
369 | u"ᴇ":"E",
370 | u"ꜰ":"F",
371 | u"ɢ":"G",
372 | u"ʛ":"G",
373 | u"ʜ":"H",
374 | u"ɪ":"I",
375 | u"ʁ":"R",
376 | u"ᴊ":"J",
377 | u"ᴋ":"K",
378 | u"ʟ":"L",
379 | u"ᴌ":"L",
380 | u"ᴍ":"M",
381 | u"ɴ":"N",
382 | u"ᴏ":"O",
383 | u"ɶ":"OE",
384 | u"ᴐ":"O",
385 | u"ᴕ":"OU",
386 | u"ᴘ":"P",
387 | u"ʀ":"R",
388 | u"ᴎ":"N",
389 | u"ᴙ":"R",
390 | u"ꜱ":"S",
391 | u"ᴛ":"T",
392 | u"ⱻ":"E",
393 | u"ᴚ":"R",
394 | u"ᴜ":"U",
395 | u"ᴠ":"V",
396 | u"ᴡ":"W",
397 | u"ʏ":"Y",
398 | u"ᴢ":"Z",
399 | u"á":"a",
400 | #"á":"a",
401 | u"ă":"a",
402 | u"ắ":"a",
403 | u"ặ":"a",
404 | u"ằ":"a",
405 | u"ẳ":"a",
406 | u"ẵ":"a",
407 | u"ǎ":"a",
408 | u"â":"a",
409 | u"ấ":"a",
410 | u"ậ":"a",
411 | u"ầ":"a",
412 | u"ẩ":"a",
413 | u"ẫ":"a",
414 | u"ä":"a",
415 | u"ǟ":"a",
416 | u"ȧ":"a",
417 | u"ǡ":"a",
418 | u"ạ":"a",
419 | u"ȁ":"a",
420 | u"à":"a",
421 | u"ả":"a",
422 | u"ȃ":"a",
423 | u"ā":"a",
424 | u"ą":"a",
425 | u"ᶏ":"a",
426 | u"ẚ":"a",
427 | u"å":"a",
428 | u"ǻ":"a",
429 | u"ḁ":"a",
430 | u"ⱥ":"a",
431 | u"ã":"a",
432 | u"ꜳ":"aa",
433 | u"æ":"ae",
434 | u"ǽ":"ae",
435 | u"ǣ":"ae",
436 | u"ꜵ":"ao",
437 | u"ꜷ":"au",
438 | u"ꜹ":"av",
439 | u"ꜻ":"av",
440 | u"ꜽ":"ay",
441 | u"ḃ":"b",
442 | u"ḅ":"b",
443 | u"ɓ":"b",
444 | u"ḇ":"b",
445 | u"ᵬ":"b",
446 | u"ᶀ":"b",
447 | u"ƀ":"b",
448 | u"ƃ":"b",
449 | u"ɵ":"o",
450 | u"ć":"c",
451 | u"č":"c",
452 | u"ç":"c",
453 | u"ḉ":"c",
454 | u"ĉ":"c",
455 | u"ɕ":"c",
456 | u"ċ":"c",
457 | u"ƈ":"c",
458 | u"ȼ":"c",
459 | u"ď":"d",
460 | u"ḑ":"d",
461 | u"ḓ":"d",
462 | u"ȡ":"d",
463 | u"ḋ":"d",
464 | u"ḍ":"d",
465 | u"ɗ":"d",
466 | u"ᶑ":"d",
467 | u"ḏ":"d",
468 | u"ᵭ":"d",
469 | u"ᶁ":"d",
470 | u"đ":"d",
471 | u"ɖ":"d",
472 | u"ƌ":"d",
473 | u"ı":"i",
474 | u"ȷ":"j",
475 | u"ɟ":"j",
476 | u"ʄ":"j",
477 | u"ǳ":"dz",
478 | u"ǆ":"dz",
479 | u"é":"e",
480 | u"ĕ":"e",
481 | u"ě":"e",
482 | u"ȩ":"e",
483 | u"ḝ":"e",
484 | u"ê":"e",
485 | u"ế":"e",
486 | u"ệ":"e",
487 | u"ề":"e",
488 | u"ể":"e",
489 | u"ễ":"e",
490 | u"ḙ":"e",
491 | u"ë":"e",
492 | u"ė":"e",
493 | u"ẹ":"e",
494 | u"ȅ":"e",
495 | u"è":"e",
496 | u"ẻ":"e",
497 | u"ȇ":"e",
498 | u"ē":"e",
499 | u"ḗ":"e",
500 | u"ḕ":"e",
501 | u"ⱸ":"e",
502 | u"ę":"e",
503 | u"ᶒ":"e",
504 | u"ɇ":"e",
505 | u"ẽ":"e",
506 | u"ḛ":"e",
507 | u"ꝫ":"et",
508 | u"ḟ":"f",
509 | u"ƒ":"f",
510 | u"ᵮ":"f",
511 | u"ᶂ":"f",
512 | u"ǵ":"g",
513 | u"ğ":"g",
514 | u"ǧ":"g",
515 | u"ģ":"g",
516 | u"ĝ":"g",
517 | u"ġ":"g",
518 | u"ɠ":"g",
519 | u"ḡ":"g",
520 | u"ᶃ":"g",
521 | u"ǥ":"g",
522 | u"ḫ":"h",
523 | u"ȟ":"h",
524 | u"ḩ":"h",
525 | u"ĥ":"h",
526 | u"ⱨ":"h",
527 | u"ḧ":"h",
528 | u"ḣ":"h",
529 | u"ḥ":"h",
530 | u"ɦ":"h",
531 | u"ẖ":"h",
532 | u"ħ":"h",
533 | u"ƕ":"hv",
534 | u"í":"i",
535 | u"ĭ":"i",
536 | u"ǐ":"i",
537 | u"î":"i",
538 | u"ï":"i",
539 | u"ḯ":"i",
540 | u"ị":"i",
541 | u"ȉ":"i",
542 | u"ì":"i",
543 | u"ỉ":"i",
544 | u"ȋ":"i",
545 | u"ī":"i",
546 | u"į":"i",
547 | u"ᶖ":"i",
548 | u"ɨ":"i",
549 | u"ĩ":"i",
550 | u"ḭ":"i",
551 | u"ꝺ":"d",
552 | u"ꝼ":"f",
553 | u"ᵹ":"g",
554 | u"ꞃ":"r",
555 | u"ꞅ":"s",
556 | u"ꞇ":"t",
557 | u"ꝭ":"is",
558 | u"ǰ":"j",
559 | u"ĵ":"j",
560 | u"ʝ":"j",
561 | u"ɉ":"j",
562 | u"ḱ":"k",
563 | u"ǩ":"k",
564 | u"ķ":"k",
565 | u"ⱪ":"k",
566 | u"ꝃ":"k",
567 | u"ḳ":"k",
568 | u"ƙ":"k",
569 | u"ḵ":"k",
570 | u"ᶄ":"k",
571 | u"ꝁ":"k",
572 | u"ꝅ":"k",
573 | u"ĺ":"l",
574 | u"ƚ":"l",
575 | u"ɬ":"l",
576 | u"ľ":"l",
577 | u"ļ":"l",
578 | u"ḽ":"l",
579 | u"ȴ":"l",
580 | u"ḷ":"l",
581 | u"ḹ":"l",
582 | u"ⱡ":"l",
583 | u"ꝉ":"l",
584 | u"ḻ":"l",
585 | u"ŀ":"l",
586 | u"ɫ":"l",
587 | u"ᶅ":"l",
588 | u"ɭ":"l",
589 | u"ł":"l",
590 | u"ǉ":"lj",
591 | u"ſ":"s",
592 | u"ẜ":"s",
593 | u"ẛ":"s",
594 | u"ẝ":"s",
595 | u"ḿ":"m",
596 | u"ṁ":"m",
597 | u"ṃ":"m",
598 | u"ɱ":"m",
599 | u"ᵯ":"m",
600 | u"ᶆ":"m",
601 | u"ń":"n",
602 | u"ň":"n",
603 | u"ņ":"n",
604 | u"ṋ":"n",
605 | u"ȵ":"n",
606 | u"ṅ":"n",
607 | u"ṇ":"n",
608 | u"ǹ":"n",
609 | u"ɲ":"n",
610 | u"ṉ":"n",
611 | u"ƞ":"n",
612 | u"ᵰ":"n",
613 | u"ᶇ":"n",
614 | u"ɳ":"n",
615 | u"ñ":"n",
616 | u"ǌ":"nj",
617 | u"ó":"o",
618 | u"ŏ":"o",
619 | u"ǒ":"o",
620 | u"ô":"o",
621 | u"ố":"o",
622 | u"ộ":"o",
623 | u"ồ":"o",
624 | u"ổ":"o",
625 | u"ỗ":"o",
626 | u"ö":"o",
627 | u"ȫ":"o",
628 | u"ȯ":"o",
629 | u"ȱ":"o",
630 | u"ọ":"o",
631 | u"ő":"o",
632 | u"ȍ":"o",
633 | u"ò":"o",
634 | u"ỏ":"o",
635 | u"ơ":"o",
636 | u"ớ":"o",
637 | u"ợ":"o",
638 | u"ờ":"o",
639 | u"ở":"o",
640 | u"ỡ":"o",
641 | u"ȏ":"o",
642 | u"ꝋ":"o",
643 | u"ꝍ":"o",
644 | u"ⱺ":"o",
645 | u"ō":"o",
646 | u"ṓ":"o",
647 | u"ṑ":"o",
648 | u"ǫ":"o",
649 | u"ǭ":"o",
650 | u"ø":"o",
651 | u"ǿ":"o",
652 | u"õ":"o",
653 | u"ṍ":"o",
654 | u"ṏ":"o",
655 | u"ȭ":"o",
656 | u"ƣ":"oi",
657 | u"ꝏ":"oo",
658 | u"ɛ":"e",
659 | u"ᶓ":"e",
660 | u"ɔ":"o",
661 | u"ᶗ":"o",
662 | u"ȣ":"ou",
663 | u"ṕ":"p",
664 | u"ṗ":"p",
665 | u"ꝓ":"p",
666 | u"ƥ":"p",
667 | u"ᵱ":"p",
668 | u"ᶈ":"p",
669 | u"ꝕ":"p",
670 | u"ᵽ":"p",
671 | u"ꝑ":"p",
672 | u"ꝙ":"q",
673 | u"ʠ":"q",
674 | u"ɋ":"q",
675 | u"ꝗ":"q",
676 | u"ŕ":"r",
677 | u"ř":"r",
678 | u"ŗ":"r",
679 | u"ṙ":"r",
680 | u"ṛ":"r",
681 | u"ṝ":"r",
682 | u"ȑ":"r",
683 | u"ɾ":"r",
684 | u"ᵳ":"r",
685 | u"ȓ":"r",
686 | u"ṟ":"r",
687 | u"ɼ":"r",
688 | u"ᵲ":"r",
689 | u"ᶉ":"r",
690 | u"ɍ":"r",
691 | u"ɽ":"r",
692 | u"ↄ":"c",
693 | u"ꜿ":"c",
694 | u"ɘ":"e",
695 | u"ɿ":"r",
696 | u"ś":"s",
697 | u"ṥ":"s",
698 | u"š":"s",
699 | u"ṧ":"s",
700 | u"ş":"s",
701 | u"ŝ":"s",
702 | u"ș":"s",
703 | u"ṡ":"s",
704 | u"ṣ":"s",
705 | u"ṩ":"s",
706 | u"ʂ":"s",
707 | u"ᵴ":"s",
708 | u"ᶊ":"s",
709 | u"ȿ":"s",
710 | u"ɡ":"g",
711 | u"ᴑ":"o",
712 | u"ᴓ":"o",
713 | u"ᴝ":"u",
714 | u"ť":"t",
715 | u"ţ":"t",
716 | u"ṱ":"t",
717 | u"ț":"t",
718 | u"ȶ":"t",
719 | u"ẗ":"t",
720 | u"ⱦ":"t",
721 | u"ṫ":"t",
722 | u"ṭ":"t",
723 | u"ƭ":"t",
724 | u"ṯ":"t",
725 | u"ᵵ":"t",
726 | u"ƫ":"t",
727 | u"ʈ":"t",
728 | u"ŧ":"t",
729 | u"ᵺ":"th",
730 | u"ɐ":"a",
731 | u"ᴂ":"ae",
732 | u"ǝ":"e",
733 | u"ᵷ":"g",
734 | u"ɥ":"h",
735 | u"ʮ":"h",
736 | u"ʯ":"h",
737 | u"ᴉ":"i",
738 | u"ʞ":"k",
739 | u"ꞁ":"l",
740 | u"ɯ":"m",
741 | u"ɰ":"m",
742 | u"ᴔ":"oe",
743 | u"ɹ":"r",
744 | u"ɻ":"r",
745 | u"ɺ":"r",
746 | u"ⱹ":"r",
747 | u"ʇ":"t",
748 | u"ʌ":"v",
749 | u"ʍ":"w",
750 | u"ʎ":"y",
751 | u"ꜩ":"tz",
752 | u"ú":"u",
753 | u"ŭ":"u",
754 | u"ǔ":"u",
755 | u"û":"u",
756 | u"ṷ":"u",
757 | u"ü":"u",
758 | u"ǘ":"u",
759 | u"ǚ":"u",
760 | u"ǜ":"u",
761 | u"ǖ":"u",
762 | u"ṳ":"u",
763 | u"ụ":"u",
764 | u"ű":"u",
765 | u"ȕ":"u",
766 | u"ù":"u",
767 | u"ủ":"u",
768 | u"ư":"u",
769 | u"ứ":"u",
770 | u"ự":"u",
771 | u"ừ":"u",
772 | u"ử":"u",
773 | u"ữ":"u",
774 | u"ȗ":"u",
775 | u"ū":"u",
776 | u"ṻ":"u",
777 | u"ų":"u",
778 | u"ᶙ":"u",
779 | u"ů":"u",
780 | u"ũ":"u",
781 | u"ṹ":"u",
782 | u"ṵ":"u",
783 | u"ᵫ":"ue",
784 | u"ꝸ":"um",
785 | u"ⱴ":"v",
786 | u"ꝟ":"v",
787 | u"ṿ":"v",
788 | u"ʋ":"v",
789 | u"ᶌ":"v",
790 | u"ⱱ":"v",
791 | u"ṽ":"v",
792 | u"ꝡ":"vy",
793 | u"ẃ":"w",
794 | u"ŵ":"w",
795 | u"ẅ":"w",
796 | u"ẇ":"w",
797 | u"ẉ":"w",
798 | u"ẁ":"w",
799 | u"ⱳ":"w",
800 | u"ẘ":"w",
801 | u"ẍ":"x",
802 | u"ẋ":"x",
803 | u"ᶍ":"x",
804 | u"ý":"y",
805 | u"ŷ":"y",
806 | u"ÿ":"y",
807 | u"ẏ":"y",
808 | u"ỵ":"y",
809 | u"ỳ":"y",
810 | u"ƴ":"y",
811 | u"ỷ":"y",
812 | u"ỿ":"y",
813 | u"ȳ":"y",
814 | u"ẙ":"y",
815 | u"ɏ":"y",
816 | u"ỹ":"y",
817 | u"ź":"z",
818 | u"ž":"z",
819 | u"ẑ":"z",
820 | u"ʑ":"z",
821 | u"ⱬ":"z",
822 | u"ż":"z",
823 | u"ẓ":"z",
824 | u"ȥ":"z",
825 | u"ẕ":"z",
826 | u"ᵶ":"z",
827 | u"ᶎ":"z",
828 | u"ʐ":"z",
829 | u"ƶ":"z",
830 | u"ɀ":"z",
831 | u"ﬀ":"ff",
832 | u"ﬃ":"ffi",
833 | u"ﬄ":"ffl",
834 | u"ﬁ":"fi",
835 | u"ﬂ":"fl",
836 | u"ĳ":"ij",
837 | u"œ":"oe",
838 | u"ﬆ":"st",
839 | u"ₐ":"a",
840 | u"ₑ":"e",
841 | u"ᵢ":"i",
842 | u"ⱼ":"j",
843 | u"ₒ":"o",
844 | u"ᵣ":"r",
845 | u"ᵤ":"u",
846 | u"ᵥ":"v",
847 | u"ₓ":"x",
848 | }
849 | 
850 | 
851 | 
852 | def latinize(old_str):
853 |     old_str = old_str.lower()
854 |     new_a = ""
855 |     for a in old_str:
856 |         new_a += str(latin_map.get(a, a))
857 |     return new_a
858 | 
859 | def str_filter_sub(old_str):
860 |     old_str_sub = re.sub(RE_SUB_STRING, "", old_str)
861 |     new_str = re.sub(RE_SPECIAL_STRING, '', old_str_sub)
862 |     return new_str
863 | 
864 | def str_filter(old_str):
865 |     return re.sub(RE_SPECIAL_STRING, '', old_str).strip()
866 | 
867 | def remove_punct(input_str):
868 |     if not input_str:
869 |         return input_str
870 |     del_estr = string.punctuation
871 |     replace = " "*len(del_estr)
872 |     tran_tab = string.maketrans(del_estr, replace)
873 |     input_str = input_str.translate(tran_tab)
874 |     return " ".join(input_str.split())
875 | 
876 | def str_sub(old_str):
877 |     old_str = old_str.lower()
878 |     new_str = re.sub(RE_SUB_STRING, "", old_str).strip()
879 |     if new_str.find(" - ") != -1:
880 |         new_str = new_str[:new_str.find(" - ")]
881 |     new_str = latinize(new_str)
882 |     new_str = remove_punct(new_str.strip())
883 |     return new_str
884 | 
885 | def str_sim(str1_old, str2_old):
886 |     '''
887 |     warning: do not str1=str(str1)
888 |     '''
889 |     str1 = str(str1_old)
890 |     str2 = str(str2_old)
891 | 
892 |     format_str1 = str_filter(str1.lower().strip())
893 |     format_str2 = str_filter(str2.lower().strip())
894 |     if format_str1 == format_str2 or format_str1.find(format_str2) != -1 or format_str2.find(format_str1) != -1:
895 | 	return True, ""
896 | 
897 |     format_str1 = str_filter_sub(str1.lower().strip())
898 |     format_str2 = str_filter_sub(str2.lower().strip())
899 |     ratio = fuzz.ratio(format_str1, format_str2)
900 |     return ratio >= THREADHOLD or format_str1 == format_str2 or format_str1.find(format_str2) != -1 or format_str2.find(format_str1) != -1 , str(ratio)
901 | 
902 | 


--------------------------------------------------------------------------------
/acrcloud_scan_files_python.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | import time
  7 | import json
  8 | import codecs
  9 | import optparse
 10 | import logging
 11 | import openpyxl
 12 | from backports import csv
 13 | from openpyxl import Workbook
 14 | from acrcloud_logger import AcrcloudLogger
 15 | from acrcloud_filter_libary import FilterWorker
 16 | from acrcloud.recognizer import ACRCloudRecognizer
 17 | 
 18 | if sys.version_info.major == 2:
 19 |     reload(sys)
 20 |     sys.setdefaultencoding("utf8")
 21 | 
 22 | 
 23 | class ACRCloud_Scan_Files:
 24 | 
 25 |     def __init__(self, config_file):
 26 |         self.config = {
 27 |             'host': '',
 28 |             'access_key': '',
 29 |             'access_secret': '',
 30 |             'debug': False,
 31 |             'timeout': 10  # seconds
 32 |         }
 33 |         self.openpyxl_version = ".".join(str(openpyxl.__version__).split(".")[:2])
 34 |         self.config_file = config_file
 35 |         self.init_log()
 36 |         self.init_config()
 37 | 
 38 |     def init_log(self):
 39 |         self.dlog = AcrcloudLogger('ACRCloud_ScanF', logging.INFO)
 40 |         if not self.dlog.addFilehandler(logfile="log_scan_files.log", logdir="./", loglevel=logging.WARN):
 41 |             sys.exit(1)
 42 |         if not self.dlog.addStreamHandler():
 43 |             sys.exit(1)
 44 | 
 45 |     def init_config(self):
 46 |         try:
 47 |             json_config = None
 48 |             with codecs.open(self.config_file, 'r') as f:
 49 |                 json_config = json.loads(f.read())
 50 |             for k in ["host", "access_key", "access_secret"]:
 51 |                 if k in json_config and json_config[k].strip():
 52 |                     self.config[k] = str(json_config[k].strip())
 53 |                 else:
 54 |                     self.dlog.logger.error("init_config.not found {0} from config.json, pls check".format(k))
 55 |                     sys.exit(1)
 56 | 
 57 |             self.re_handler = ACRCloudRecognizer(self.config)
 58 |             if self.re_handler:
 59 |                 self.dlog.logger.warning("init_config success!")
 60 |         except Exception as e:
 61 |             self.dlog.logger.error("init_config.error", exc_info=True)
 62 | 
 63 |     def read_file(self, infile, jFirst=True):
 64 |         with open(infile, "rb") as rfile:
 65 |             for line in rfile:
 66 |                 if jFirst:
 67 |                     jFirst = False
 68 |                     continue
 69 |                 yield line.strip()
 70 | 
 71 |     def write_error(self, file_path, error_time, error_detail):
 72 |         with open('error_scan.txt', 'a', ) as f:
 73 |             msg = file_path + '||' + str(error_time) + '||' + str(error_detail) + '\n'
 74 |             f.write(msg)
 75 | 
 76 |     def empty_error_scan(self):
 77 |         if os.path.exists('error_scan.txt'):
 78 |             os.remove('error_scan.txt')
 79 | 
 80 |     def export_to_csv(self, result_list, export_filename="ACRCloud_ScanFile_Results.csv", export_dir="./"):
 81 |         try:
 82 |             results = []
 83 |             for item in result_list:
 84 |                 filename = item["file"]
 85 |                 timestamp = item["timestamp"]
 86 |                 jsoninfo = item["result"]
 87 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
 88 |                     row = self.parse_data(jsoninfo)
 89 |                     row = [filename, timestamp] + list(row)
 90 |                     results.append(row)
 91 | 
 92 |             export_filepath = os.path.join(export_dir, export_filename)
 93 | 
 94 |             with codecs.open(export_filepath, 'w', 'utf-8-sig') as f:
 95 |                 head_row = ['filename', 'timestamp', 'title', 'artists', 'album', 'acrid', 'played_duration', 'label',
 96 |                             'isrc', 'upc', 'deezer', 'spotify', 'itunes', 'youtube', 'custom_files_title', 'audio_id']
 97 |                 dw = csv.writer(f)
 98 |                 dw.writerow(head_row)
 99 |                 dw.writerows(results)
100 |                 self.dlog.logger.info("export_to_csv.Save Data to csv: {0}".format(export_filepath))
101 |         except Exception as e:
102 |             self.dlog.logger.error("Error export_to_csv", exc_info=True)
103 | 
104 |     def export_to_json(self, result_list, export_filename="ACRCloud_ScanFile_Results.json", export_dir="./"):
105 |         try:
106 |             results = []
107 |             json_results = []
108 |             new_results = {}
109 |             export_filepath = os.path.join(export_dir, export_filename)
110 | 
111 |             head_row = ['filename', 'timestamp', 'title', 'artists', 'album', 'acrid', 'played_duration', 'label',
112 |                         'isrc', 'upc', 'deezer', 'spotify', 'itunes', 'youtube', 'custom_files_title', 'audio_id']
113 | 
114 |             for item in result_list:
115 |                 filename = item["file"]
116 |                 timestamp = item["timestamp"]
117 |                 jsoninfo = item["result"]
118 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
119 |                     row = self.parse_data(jsoninfo)
120 |                     row = [filename, timestamp] + list(row)
121 |                     results.append(row)
122 | 
123 |             for i in results:
124 |                 for k in range(len(head_row)):
125 |                     new_results[head_row[k]] = i[k]
126 | 
127 |                 json_results.append(new_results)
128 | 
129 |             with codecs.open(export_filepath, 'w', 'utf-8-sig') as f:
130 |                 f.write(json.dumps(json_results))
131 |         except Exception as e:
132 |             self.dlog.logger.error("Error export_to_json", exc_info=True)
133 | 
134 |     def export_to_xlsx(self, result_list, export_filename="ACRCloud_ScanFile_Results.xlsx", export_dir="./"):
135 |         try:
136 |             wb = Workbook()
137 |             sheet_channels = wb.active
138 |             sheet_channels.title = "Results"
139 |             head_row = ['filename', 'timestamp', 'title', 'artists', 'album', 'acrid', 'played_duration', 'label',
140 |                         'isrc', 'upc', 'deezer', 'spotify', 'itunes', 'youtube', 'custom_files_title', 'audio_id']
141 |             sheet_channels.append(head_row)
142 | 
143 |             for item in result_list:
144 |                 filename = item["file"]
145 |                 timestamp = item["timestamp"]
146 |                 jsoninfo = item["result"]
147 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
148 |                     row = self.parse_data(jsoninfo)
149 |                     row = [filename, timestamp] + list(row)
150 |                     sheet_channels.append(row)
151 | 
152 |             export_filepath = os.path.join(export_dir, export_filename)
153 | 
154 |             for column_cells in sheet_channels.columns:
155 |                 length = max(len(str(cell.value) if cell.value else "") for cell in column_cells)
156 |                 if length > 100:
157 |                     length == 100
158 |                 if self.openpyxl_version >= "2.6":
159 |                     sheet_channels.column_dimensions[column_cells[0].column_letter].width = length
160 |                 else:
161 |                     sheet_channels.column_dimensions[column_cells[0].column].width = length
162 |             wb.save(export_filepath)
163 | 
164 |             self.dlog.logger.info("export_to_xlsx.Save Data to xlsx: {0}".format(export_filepath))
165 |         except Exception as e:
166 |             self.dlog.logger.error("Error export_to_xlsx", exc_info=True)
167 | 
168 |     def parse_data(self, jsoninfo):
169 |         try:
170 |             title, played_duration, isrc, upc, acrid, label, album = [""] * 7
171 |             artists, deezer, spotify, itunes, youtube, custom_files_title, audio_id = [""] * 7
172 | 
173 |             metadata = jsoninfo.get('metadata', {})
174 |             played_duration = metadata.get("played_duration", "")
175 |             if "music" in metadata and len(metadata["music"]) > 0:
176 |                 item = metadata["music"][0]
177 |                 title = item.get("title", "")
178 |                 offset = item.get("play_offset_ms", "")
179 |                 if "external_ids" in item:
180 |                     if "isrc" in item["external_ids"]:
181 |                         isrc_obj = item["external_ids"]["isrc"]
182 |                         isrc = isrc_obj[0] if type(isrc_obj) == list else isrc_obj
183 |                     if "upc" in item["external_ids"]:
184 |                         upc_obj = item["external_ids"]["upc"]
185 |                         upc = upc_obj[0] if type(upc_obj) == list else upc_obj
186 |                 acrid = item.get("acrid", "")
187 |                 label = item.get("label", "")
188 |                 album = item.get("album", {"name": ""}).get("name", "")
189 |                 artists = ",".join([ar["name"] for ar in item.get('artists', [{"name": ""}]) if ar.get("name")])
190 |                 if "external_metadata" in item:
191 |                     e_metadata = item["external_metadata"]
192 |                     if "deezer" in e_metadata:
193 |                         deezer_obj = e_metadata["deezer"]
194 |                         deezer = deezer_obj[0]["track"]["id"] if type(deezer_obj) == list else deezer_obj["track"]["id"]
195 |                     if "spotify" in e_metadata:
196 |                         spotify_obj = e_metadata["spotify"]
197 |                         spotify = spotify_obj[0]["track"]["id"] if type(spotify_obj) == list else spotify_obj["track"][
198 |                             "id"]
199 |                     if "youtube" in e_metadata:
200 |                         youtube_obj = e_metadata["youtube"]
201 |                         youtube = youtube_obj[0]["vid"] if type(youtube_obj) == list else youtube_obj["vid"]
202 | 
203 |             if "custom_files" in metadata and len(metadata["custom_files"]) > 0:
204 |                 custom_item = metadata["custom_files"][0]
205 |                 custom_files_title = custom_item.get("title", "")
206 |                 audio_id = custom_item.get("audio_id", "")
207 |         except Exception as e:
208 |             self.dlog.logger.error("parse_data.error.data:{0}".format(metadata), exc_info=True)
209 | 
210 |         res = (title, artists, album, acrid, played_duration, label, isrc, upc,
211 |                deezer, spotify, itunes, youtube, custom_files_title, audio_id)
212 |         return res
213 | 
214 |     def apply_filter(self, results):
215 |         fworker = FilterWorker()
216 |         result_new = fworker.apply_filter(results)
217 |         return result_new
218 | 
219 |     def do_recognize(self, filepath, start_time, rec_length):
220 |         try:
221 |             current_time = time.strftime('%H:%M:%S', time.gmtime(start_time))
222 |             res_data = self.re_handler.recognize_by_file(filepath, start_time, rec_length)
223 |             return filepath, current_time, res_data
224 |         except Exception as e:
225 |             self.dlog.logger.error("do_recognize.error.({0}, {1}, {2})".format(filepath, start_time, rec_length),
226 |                                    exc_info=True)
227 |         return filepath, current_time, None
228 | 
229 |     def recognize_file(self, filepath, start_time, stop_time, step, rec_length, with_duration=0):
230 |         self.dlog.logger.warning("scan_file.start_to_run: {0}".format(filepath))
231 | 
232 |         result = []
233 |         for i in range(start_time, stop_time, step):
234 |             filep, current_time, res_data = self.do_recognize(filepath, i, rec_length)
235 |             try:
236 |                 print(res_data)
237 |                 jsoninfo = json.loads(res_data)
238 |                 code = jsoninfo['status']['code']
239 |                 msg = jsoninfo['status']['msg']
240 |                 if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
241 |                     result.append(
242 |                         {"timestamp": current_time, "rec_length": rec_length, "result": jsoninfo, "file": filep})
243 |                     res = self.parse_data(jsoninfo)
244 |                     # self.dlog.logger.info('recognize_file.(time:{0}, title: {1})'.format(current_time, res[0]))
245 |                     self.dlog.logger.info(
246 |                         'recognize_file.(time:{0}, title: {1}, custom title: {2})'.format(current_time, res[0],
247 |                                                                                           res[-2]))
248 |                 if code == 2005:
249 |                     self.dlog.logger.warning('recognize_file.(time:{0}, code:{1}, Done!)'.format(current_time, code))
250 |                     break
251 |                 elif code == 1001:
252 |                     result.append(
253 |                         {"timestamp": current_time, "rec_length": rec_length, "result": jsoninfo, "file": filep})
254 |                     self.dlog.logger.info("recognize_file.(time:{0}, code:{1}, No_Result)".format(current_time, code))
255 |                 elif code == 3001:
256 |                     self.dlog.logger.error(
257 |                         'recognize_file.(time:{0}, code:{1}, Missing/Invalid Access Key)'.format(current_time, code))
258 |                     break
259 |                 elif code == 3003:
260 |                     self.dlog.logger.error(
261 |                         'recognize_file.(time:{0}, code:{1}, Limit exceeded)'.format(current_time, code))
262 |                 elif code == 3000:
263 |                     self.dlog.logger.error('recognize_file.(time:{0}, {1}, {2})'.format(current_time, code, msg))
264 |                     self.write_error(filepath, i, 'NETWORK ERROR')
265 |                 i += step
266 |             except Exception as e:
267 |                 self.dlog.logger.error('recognize_file.error', exc_info=True)
268 |                 self.write_error(filepath, i, 'JSON ERROR')
269 |         return result
270 | 
271 |     def scan_file_main(self, option, start_time, stop_time):
272 |         try:
273 |             filepath = option.file_path
274 |             step = option.step
275 |             rec_length = option.rec_length
276 |             with_duration = option.with_duration
277 |             out_dir = option.out_dir
278 |             if out_dir and not os.path.exists(out_dir):
279 |                 try:
280 |                     os.makedirs(out_dir)
281 |                 except Exception as e:
282 |                     self.dlog.logger.error("scan_file_main.create_out_dir_error:{0}, please check it!".format(out_dir),
283 |                                            exc_info=True)
284 |                     return
285 | 
286 |             file_type = option.file_type
287 |             if start_time == 0 and stop_time == 0:
288 |                 file_total_seconds = int(ACRCloudRecognizer.get_duration_ms_by_file(filepath) / 1000)
289 |                 results = self.recognize_file(filepath, start_time, file_total_seconds, step, rec_length, with_duration)
290 |             else:
291 |                 results = self.recognize_file(filepath, start_time, stop_time, step, rec_length, with_duration)
292 | 
293 |             filename_csv = 'result-' + os.path.basename(filepath.strip()) + '.csv'
294 |             filename_xlsx = 'result-' + os.path.basename(filepath.strip()) + '.xlsx'
295 |             filename_json = 'result-' + os.path.basename(filepath.strip()) + '.json'
296 | 
297 |             if results:
298 |                 if file_type == "csv":
299 |                     self.export_to_csv(results, filename_csv, out_dir)
300 |                 elif file_type == "json":
301 |                     self.export_to_json(results, filename_json, out_dir)
302 |                 else:
303 |                     self.export_to_xlsx(results, filename_xlsx, out_dir)
304 | 
305 |             if with_duration == 1:
306 |                 new_results = []
307 |                 if results:
308 |                     new_results = self.apply_filter(results)
309 | 
310 |                 filename_with_duration_csv = 'result-' + os.path.basename(filepath.strip()) + '_with_duration.csv'
311 |                 filename_with_duration_xlsx = 'result-' + os.path.basename(filepath.strip()) + '_with_duration.xlsx'
312 |                 filename_with_duration_json = 'result-' + os.path.basename(filepath.strip()) + '_with_duration.json'
313 | 
314 |                 if file_type == "csv":
315 |                     self.export_to_csv(new_results, filename_with_duration_csv, out_dir)
316 |                 elif file_type == "json":
317 | 
318 |                     self.export_to_json(new_results, filename_with_duration_json, out_dir)
319 |                 else:
320 |                     self.export_to_xlsx(new_results, filename_with_duration_xlsx, out_dir)
321 |         except Exception as e:
322 |             self.dlog.logger.error("scan_file_main.error", exc_info=True)
323 |         return
324 | 
325 |     def scan_folder_main(self, option, start_time, stop_time):
326 |         try:
327 |             path = option.folder_path
328 |             file_list = os.listdir(path)
329 |             for i in file_list:
330 |                 option.file_path = path + '/' + i
331 |                 self.scan_file_main(option, start_time, stop_time)
332 |         except Exception as e:
333 |             self.dlog.logger.error("scan_folder_main.error", exc_info=True)
334 | 
335 | 
336 | if __name__ == '__main__':
337 |     usage = r'''
338 |         _    ____ ____   ____ _                 _
339 |        / \  / ___|  _ \ / ___| | ___  _   _  __| |
340 |       / _ \| |   | |_) | |   | |/ _ \| | | |/ _` |
341 |      / ___ \ |___|  _ <| |___| | (_) | |_| | (_| |
342 |     /_/   \_\____|_| \_\\____|_|\___/ \____|\____|
343 | 
344 |     Usage:
345 |     python acrcloud_scan_files_python.py -d folder_path
346 |         python acrcloud_scan_files_python.py -f file_path
347 |     Example:
348 |         python acrcloud_scan_files_python.py -d ~/music
349 |         python acrcloud_scan_files_python.py -f ~/testfiles/test.mp3
350 |     If you want to change scan interval or you want to set recognize range,you can add some params
351 |     Example:
352 |         python acrcloud_scan_files_python.py -f ~/testfiles/test.mp3 -s 30 -r 0-20 -l 10
353 |         python acrcloud_scan_files_python.py -d ~/music -s 30
354 |     '''
355 | 
356 |     parser = optparse.OptionParser()
357 |     parser.add_option('-f', '--file', dest='file_path', type='string', help='Scan file you want to recognize')
358 |     parser.add_option('-c', '--config', dest='config', type='string', default="config.json", help='config file')
359 |     parser.add_option('-d', '--folder', dest='folder_path', type='string', help='Scan folder you want to recognize')
360 |     parser.add_option('-s', '--step', dest='step', type='int', default=10, help='step')
361 |     parser.add_option('-l', '--rec_length', dest='rec_length', type='int', default=10, help='rec_length')
362 |     parser.add_option('-e', '--error_file', dest='error_file', type='string', help='error scan file')
363 |     parser.add_option('-r', '--range', dest='range', type='string', default='0-0', help='error scan file')
364 |     parser.add_option('-w', '--with_duration', dest="with_duration", type='int', default=0, help='with_duration')
365 |     parser.add_option('-o', '--out_dir', dest="out_dir", type='string', default="./", help='out_dir')
366 |     parser.add_option('-t', '--file_type', dest="file_type", type='string', default="csv", help='file_type')
367 | 
368 |     (options, args) = parser.parse_args()
369 |     start = int(options.range.split('-')[0])
370 |     stop = int(options.range.split('-')[1])
371 | 
372 |     asf = ACRCloud_Scan_Files(options.config)
373 |     if options.file_path:
374 |         asf.empty_error_scan()
375 |         asf.scan_file_main(options, start, stop)
376 |     elif options.folder_path:
377 |         asf.empty_error_scan()
378 |         asf.scan_folder_main(options, start, stop)
379 |     else:
380 |         print(usage)
381 | 


--------------------------------------------------------------------------------
/acrcloud_filter_libary.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | #-*- coding:utf-8 -*-
   3 | 
   4 | import os
   5 | import sys
   6 | import json
   7 | import copy
   8 | import math
   9 | import datetime
  10 | import traceback
  11 | import tools_str_sim
  12 | import acrcloud_logger
  13 | from dateutil.relativedelta import *
  14 | 
  15 | if sys.version_info.major == 2:
  16 |     reload(sys)
  17 |     sys.setdefaultencoding("utf8")
  18 | 
  19 | NORESULT = "noResult"
  20 | 
  21 | class ResultFilter:
  22 | 
  23 |     def __init__(self, dlog):
  24 |         self._dlog = dlog
  25 |         self._real_music = {}
  26 |         self._real_music_list_num = 3
  27 |         self._real_custom = {}
  28 |         self._real_custom_list_num = 3
  29 |         self._real_custom_valid_interval = 5*60
  30 |         self._delay_music = {}
  31 |         self._delay_music_last_result = {}
  32 |         self._delay_music_interval_threshold = 2*60
  33 |         self._delay_custom = {}
  34 |         self._delay_custom_played_duration_min = 2
  35 |         self._delay_list_max_num = 35
  36 |         self._delay_list_threshold = 120
  37 | 
  38 |     def get_mutil_result_title(self, data, itype='music', isize = 1):
  39 |         ret_list = []
  40 |         index = 0
  41 |         json_res = data["result"]
  42 |         if json_res == NORESULT:
  43 |             return [NORESULT]
  44 |         try:
  45 |             if json_res['status']['code'] == 0:
  46 |                 if itype == 'music':
  47 |                     if 'metadata' in json_res and 'music' in json_res['metadata']:
  48 |                         for item in json_res['metadata']['music']:
  49 |                             ret_list.append(item['title'])
  50 |                             index += 1
  51 |                             if index >= isize:
  52 |                                 break
  53 |                     elif 'metainfos' in json_res:
  54 |                         for item in json_res['metainfos']:
  55 |                             ret_list.append(item['title'])
  56 |                             index += 1
  57 |                             if index >= isize:
  58 |                                 break
  59 |                 elif itype == 'custom':
  60 |                     if 'metadata' in json_res and 'custom_files' in json_res['metadata']:
  61 |                         for item in json_res['metadata']['custom_files']:
  62 |                             ret_list.append(item['title'])
  63 |                             index += 1
  64 |                             if index >= isize:
  65 |                                 break
  66 |         except Exception as e:
  67 |             self._dlog.logger.error("Error@get_mutil_result_title", exc_info=True)
  68 |             self._dlog.logger.error("Error_Data: {0}".format(data))
  69 |         return ret_list if ret_list else [NORESULT]
  70 | 
  71 |     def get_mutil_result_acrid(self, data, itype='music', isize = 1):
  72 |         ret_list = []
  73 |         index = 0
  74 |         json_res = data["result"]
  75 |         if json_res == NORESULT:
  76 |             return [NORESULT]
  77 |         try:
  78 |             if json_res['status']['code'] == 0:
  79 |                 if itype == 'music':
  80 |                     if 'metadata' in json_res and 'music' in json_res['metadata']:
  81 |                         for item in json_res['metadata']['music']:
  82 |                             ret_list.append(item['acrid'])
  83 |                             index += 1
  84 |                             if index >= isize:
  85 |                                 break
  86 |                     elif 'metainfos' in json_res:
  87 |                         for item in json_res['metainfos']:
  88 |                             ret_list.append(item['acrid'])
  89 |                             index += 1
  90 |                             if index >= isize:
  91 |                                 break
  92 |                 elif itype == 'custom':
  93 |                     if 'metadata' in json_res and 'custom_files' in json_res['metadata']:
  94 |                         for item in json_res['metadata']['custom_files']:
  95 |                             ret_list.append(item['acrid'])
  96 |                             index += 1
  97 |                             if index >= isize:
  98 |                                 break
  99 |         except Exception as e:
 100 |             self._dlog.logger.error("Error@get_mutil_result_acrid", exc_info=True)
 101 |             self._dlog.logger.error("Error_Data: {0}".format(json.dumps(result)))
 102 |         return ret_list if ret_list else [NORESULT]
 103 | 
 104 |     def swap_position(self, ret_title, ret_data, itype):
 105 |         json_res = ret_data["result"]
 106 |         meta_type = None
 107 |         music_list = []
 108 |         if itype == 'music':
 109 |             if 'metadata' in json_res:
 110 |                 music_list = json_res['metadata']['music']
 111 |             elif 'metainfos' in json_res:
 112 |                 music_list = json_res['metainfos']
 113 |         elif itype == 'custom':
 114 |             music_list = json_res['metadata']['custom_files']
 115 | 
 116 |         if music_list:
 117 |             ret_index = 0
 118 |             for index, item in enumerate(music_list):
 119 |                 if itype == "music":
 120 |                     if item['title'] == ret_title:
 121 |                         ret_index = index
 122 |                         break
 123 |                 else:
 124 |                     if item['acrid'] == ret_title:
 125 |                         ret_index = index
 126 |                         break
 127 |             if ret_index > 0:
 128 |                 music_list[0], music_list[ret_index] = music_list[ret_index], music_list[0]
 129 | 
 130 |     def custom_result_append(self, ret_data, title, from_data, count, tmp_deal_title_map):
 131 |         ret_title_set = set()
 132 |         for item in ret_data['result']['metadata']['custom_files']:
 133 |             ret_title_set.add(item['acrid'])
 134 | 
 135 |         for item in from_data['result']['metadata']['custom_files']:
 136 |             acrid = item['acrid']
 137 |             if acrid == title and acrid not in ret_title_set:
 138 |                 item['count'] = count
 139 |                 ret_data['result']['metadata']['custom_files'].append(item)
 140 |                 ret_title_set.add(acrid)
 141 | 
 142 |         for item in from_data['result']['metadata']['custom_files']:
 143 |             acrid = item['acrid']
 144 |             if acrid not in ret_title_set:
 145 |                 if acrid in tmp_deal_title_map:
 146 |                     item['count'] = tmp_deal_title_map[acrid]['count']
 147 |                     ret_data['result']['metadata']['custom_files'].append(item)
 148 | 
 149 |     def get_play_offset(self, data, itype='music'):
 150 |         try:
 151 |             play_offset_ms = 0
 152 |             result = data['result']
 153 |             if result['status']['code'] == 1001:
 154 |                 return 0
 155 |             if itype == 'music':
 156 |                 play_offset_ms = result['metadata']['music'][0]['play_offset_ms']
 157 |             elif itype == 'custom':
 158 |                 play_offset_ms = result['metadata']['custom_files'][0]['play_offset_ms']
 159 |         except Exception as e:
 160 |             self._dlog.logger.error("Error@Get_Play_Offset, error_data: {0}, {1}".format(itype, data), exc_info=True)
 161 |         return play_offset_ms/1000.0
 162 | 
 163 |     def get_db_play_offset(self, data, offset_type="begin", itype='music'):
 164 |         """
 165 |         itype : music or custom
 166 |         offset_type : begin or end offset
 167 |         """
 168 |         try:
 169 |             if offset_type not in ['begin', 'end']:
 170 |                 self._dlog.logger.error("Error@Get_DB_Play_Offset.offset_type({0}) error".format(offset_type))
 171 |                 return (None, self.get_play_offset(data, itype)) #if offset_type error, return play_offset_ms
 172 | 
 173 |             db_offset_key = "db_{0}_time_offset_ms".format(offset_type)
 174 |             sample_offset_key = "sample_{0}_time_offset_ms".format(offset_type)
 175 | 
 176 |             db_play_offset_ms = 0 #ms
 177 |             sample_play_offset_ms = 0
 178 |             result = data['result']
 179 |             if result['status']['code'] == 1001:
 180 |                 return 0
 181 |             if itype == 'music':
 182 |                 db_play_offset_ms = result['metadata']['music'][0][db_offset_key]
 183 |                 sample_play_offset_ms = result['metadata']['music'][0][sample_offset_key]
 184 |             elif itype == 'custom':
 185 |                 db_play_offset_ms = result['metadata']['custom_files'][0][db_offset_key]
 186 |                 sample_play_offset_ms = result['metadata']['custom_files'][0][sample_offset_key]
 187 | 
 188 |             return (int(sample_play_offset_ms)/1000.0, int(db_play_offset_ms)/1000.0)
 189 |         except Exception as e:
 190 |             self._dlog.logger.error("Error@please contact support@acrcloud.com to add offset config for your access_key")
 191 |         return (None, None)
 192 | 
 193 |     def get_duration(self, end_timestamp, start_timestamp):
 194 |         end = datetime.datetime.strptime(end_timestamp, '%H:%M:%S')
 195 |         start = datetime.datetime.strptime(start_timestamp, '%H:%M:%S')
 196 |         return (end - start).total_seconds()
 197 | 
 198 |     def get_duration_accurate(self, end_data, start_data, itype='music'):
 199 |         monitor_len = end_data.get('rec_length', 10)
 200 |         end_play_offset = self.get_play_offset(end_data, itype)
 201 |         start_play_offset = self.get_play_offset(start_data, itype)
 202 |         pre_seconds = max(20, monitor_len*2)
 203 |         if int(start_play_offset) < pre_seconds:
 204 |             start_play_offset = 0
 205 |         else:
 206 |             start_play_offset = start_play_offset - (monitor_len/2)
 207 |         return int(round(end_play_offset - start_play_offset))
 208 | 
 209 |     def get_duration_accurate_use_db_offset(self, end_data, begin_data, isize, itype='music'):
 210 |         begin_timestamp = datetime.datetime.strptime(begin_data['timestamp'], "%H:%M:%S")
 211 | 
 212 |         monitor_len = end_data.get('rec_length', 10)
 213 | 
 214 |         end_sample_offset, end_db_offset = self.get_db_play_offset(end_data, 'end', itype)
 215 |         begin_sample_offset, begin_db_offset = self.get_db_play_offset(begin_data, 'begin', itype)
 216 |         for i in [ end_sample_offset, end_db_offset, begin_sample_offset, begin_db_offset]:
 217 |             if i is None:
 218 |                 return 0, 0, 0, begin_data["timestamp"]
 219 | 
 220 |         accurate_begin_timestamp = (begin_timestamp + relativedelta(seconds=int(float(begin_sample_offset)))).strftime("%H:%M:%S")
 221 | 
 222 |         db_len = int(round(end_db_offset - begin_db_offset))
 223 |         sample_len = int(round(end_sample_offset - begin_sample_offset + (isize-1)*monitor_len))
 224 | 
 225 |         mix_len = 0
 226 |         if int(begin_sample_offset) == 0 and int(begin_db_offset) == 0:
 227 |             mix_len = (isize-1)*monitor_len + end_sample_offset
 228 |         elif int(begin_sample_offset) == 0:
 229 |             if begin_db_offset <= monitor_len:
 230 |                 mix_len = (isize-1)*monitor_len + end_sample_offset
 231 |             else:
 232 |                 mix_len = (isize-1)*monitor_len + end_sample_offset - begin_sample_offset
 233 |         elif int(begin_db_offset) == 0:
 234 |             mix_len = (isize-1)*monitor_len + end_sample_offset - begin_sample_offset
 235 |         else:
 236 |             mix_len = (isize-1)*monitor_len + end_sample_offset - begin_sample_offset
 237 |         mix_len = int(round(mix_len))
 238 | 
 239 |         return sample_len, db_len, mix_len, accurate_begin_timestamp
 240 | 
 241 |     def judge_zero_item_contain_current_result(self, ret_sim_title, zero_data, itype="music"):
 242 |         """
 243 |         itype: music => title is track name
 244 |         itype: custom => title is acrid
 245 |         """
 246 |         try:
 247 |             is_contain = False
 248 |             if itype == "music":
 249 |                 zero_title_list = self.get_mutil_result_title(zero_data, 'music', 5)
 250 |             elif itype == "custom":
 251 |                 zero_title_list = self.get_mutil_result_acrid(zero_data, 'custom', 5)
 252 |             else:
 253 |                 return is_contain
 254 | 
 255 |             for ztitle in zero_title_list:
 256 |                 if ztitle == NORESULT:
 257 |                     break
 258 |                 sim_zero_title = self.tryStrSub(ztitle)[0] if itype == "music" else ztitle
 259 |                 if sim_zero_title == ret_sim_title:
 260 |                     is_contain = True
 261 |                     self.swap_position(ztitle, zero_data, itype)
 262 |                     break
 263 |         except Exception as e:
 264 |             self._dlog.logger.error("Error@judge_zero_item_contain_current_result", exc_info=True)
 265 |         return is_contain
 266 | 
 267 |     def judge_latter_item_contain_current_result(self, ret_sim_title, latter_data, itype="music"):
 268 |         """
 269 |         itype: music => title is track name
 270 |         itype: custom => title is acrid
 271 |         """
 272 |         try:
 273 |             is_contain = False
 274 |             latter_data_swaped = None
 275 |             if itype == "music":
 276 |                 latter_title_list = self.get_mutil_result_title(latter_data, 'music', 5)
 277 |             elif itype == "custom":
 278 |                 latter_title_list = self.get_mutil_result_acrid(latter_data, 'custom', 5)
 279 |             else:
 280 |                 return is_contain, latter_data_swaped
 281 | 
 282 |             for ltitle in latter_title_list:
 283 |                 if ltitle == NORESULT:
 284 |                     break
 285 |                 sim_latter_title = self.tryStrSub(ltitle)[0] if itype == "music" else ltitle
 286 |                 if sim_latter_title == ret_sim_title:
 287 |                     is_contain = True
 288 |                     latter_data_swaped = copy.deepcopy(latter_data)
 289 |                     self.swap_position(ltitle, latter_data_swaped, itype)
 290 |                     break
 291 |         except Exception as e:
 292 |             self._dlog.logger.error("Error@judge_latter_item_contain_current_result", exc_info=True)
 293 |         return is_contain, latter_data_swaped
 294 | 
 295 |     def real_check_title_custom(self, stream_id, title, timestamp_obj):
 296 |         now_timestamp = timestamp_obj #datetime.datetime.utcnow()
 297 |         if stream_id not in self._real_custom:
 298 |             self._real_custom[stream_id] = [[('','')], '']
 299 | 
 300 |         if len(self._real_custom[stream_id][0]) > self._real_custom_list_num:
 301 |             self._real_custom[stream_id][0] = self._real_custom[stream_id][0][-self._real_custom_list_num:]
 302 |             his_list_num = self._real_custom_list_num
 303 |         else:
 304 |             his_list_num = len(self._real_custom[stream_id][0])
 305 | 
 306 |         for i in range(his_list_num-1, -1, -1):
 307 |             if self._real_custom[stream_id][0][i][0] == title:
 308 |                 his_timestamp = self._real_custom[stream_id][0][i][1]
 309 |                 his_time_obj = datetime.datetime.strptime(his_timestamp, '%H:%M:%S')
 310 |                 if (now_timestamp - his_time_obj).total_seconds() <= self._real_custom_valid_interval:
 311 |                     return True
 312 |             if title == NORESULT:
 313 |                 break
 314 | 
 315 |         return False
 316 | 
 317 |     def checkResultSim(self, idx, curr_title, his_title, stream_id):
 318 |         if not curr_title or not his_title:
 319 |             return False
 320 |         sim, detail = tools_str_sim.str_sim(curr_title, his_title)
 321 |         if not sim and curr_title != NORESULT and his_title != NORESULT:
 322 |             pass
 323 |         return sim
 324 | 
 325 |     def checkSame(self, curr_title, stream_id):
 326 |         self._real_music[stream_id] = self._real_music.get(stream_id, [[''], ''])
 327 |         if len(self._real_music[stream_id][0]) > self._real_music_list_num:
 328 |             self._real_music[stream_id][0] = self._real_music[stream_id][0][-self._real_music_list_num:]
 329 |             his_max = self._real_music_list_num
 330 |         else:
 331 |             his_max = len(self._real_music[stream_id][0])
 332 |         for i in range(his_max-1, -1, -1):
 333 |             if self.checkResultSim(i, curr_title, self._real_music[stream_id][0][i], stream_id):
 334 |                 return True
 335 |             if curr_title == NORESULT:
 336 |                 break
 337 |         return False
 338 | 
 339 |     def updateResultTitle(self, data, new_title):
 340 |         if new_title == NORESULT:
 341 |             return
 342 |         try:
 343 |             json_res = data["result"]
 344 |             metainfos = json_res.get("metainfos")
 345 |             metadata = json_res.get("metadata")
 346 |             if metainfos:
 347 |                 metainfos[0]['title'] = new_title
 348 |             else:
 349 |                 if metadata.get('music'):
 350 |                     metadata['music'][0]['title'] = new_title
 351 |                 else:
 352 |                     metadata['custom_files'][0]['title'] = new_title
 353 |         except Exception as e:
 354 |             self._dlog.logger.error("Error@updateResultTitle", exc_info=True)
 355 | 
 356 |     def tryStrSub(self, try_str):
 357 |         sub_str = tools_str_sim.str_sub(try_str)
 358 |         if len(sub_str) > 0 and len(try_str) > len(sub_str):
 359 |             return sub_str, True
 360 |         return try_str, False
 361 | 
 362 |     def tryUpdateResultTitle(self, data, itype):
 363 |         if itype == 'custom':
 364 |             title = self.get_mutil_result_title(data, 'custom', 1)[0]
 365 |             return title
 366 |         title = self.get_mutil_result_title(data, 'music', 1)[0]
 367 |         stream_id = data.get("stream_id")
 368 |         new_title, try_status = self.tryStrSub(title)
 369 |         if try_status:
 370 |             self.updateResultTitle(data, new_title)
 371 |             return new_title
 372 |         return title
 373 | 
 374 |     def deal_real_history(self, data):
 375 |         is_new = False
 376 |         result = None
 377 |         curr_title = self.get_mutil_result_title(data, 'music', 1)[0]
 378 |         stream_id = data.get("stream_id")
 379 |         if not stream_id:
 380 |             return result, is_new
 381 |         if curr_title == NORESULT:
 382 |             if not self.checkSame(curr_title, stream_id):
 383 |                 self._real_music[stream_id][0].append(curr_title)
 384 |                 self._real_music[stream_id][1] = data
 385 |                 result = data
 386 |                 is_new = True
 387 |             else:
 388 |                 result = None
 389 |                 is_new = False
 390 |         else:
 391 |             if self.checkSame(curr_title, stream_id):
 392 |                 result = self._real_music[stream_id][1]
 393 |                 is_new = False
 394 |             else:
 395 |                 self._real_music[stream_id][0].append(curr_title)
 396 |                 self._real_music[stream_id][1] = data
 397 |                 result = data
 398 |                 is_new = True
 399 | 
 400 |         return result, is_new
 401 | 
 402 |     def deal_delay_history(self, data):
 403 |         stream_id = data.get("stream_id")
 404 |         timestamp = data.get("timestamp")
 405 |         raw_title = self.get_mutil_result_title(data, 'music', 1)[0]
 406 |         sim_title = self.tryStrSub(raw_title)
 407 |         if stream_id not in self._delay_music:
 408 |             self._delay_music[stream_id] = [(raw_title, sim_title[0], timestamp, data)]
 409 |         else:
 410 |             self._delay_music[stream_id].append((raw_title, sim_title[0], timestamp, data))
 411 | 
 412 |         if len(self._delay_music[stream_id]) > self._delay_list_max_num :
 413 |             return self.runDelayX_for_music_delay2(stream_id)
 414 |         else:
 415 |             return None
 416 | 
 417 |     def compute_played_duration(self, history_data, start_index, end_index, judge_zero_or_latter=True, itype="music"):
 418 |         retdata = history_data[start_index][-1]
 419 | 
 420 |         if itype == "music":
 421 |             ret_title = self.get_mutil_result_title(retdata, 'music', 1)[0]
 422 |             ret_sim_title = history_data[start_index][1]
 423 |         elif itype == "custom":
 424 |             ret_title = self.get_mutil_result_acrid(retdata, 'custom', 1)[0]
 425 |             ret_sim_title = ret_title
 426 | 
 427 |         if judge_zero_or_latter and start_index == 1:
 428 |             if self.judge_zero_item_contain_current_result(ret_sim_title, history_data[0][-1], itype):
 429 |                 start_index = 0
 430 | 
 431 |         is_contain = False
 432 |         latter_data_swaped = None
 433 |         if judge_zero_or_latter and (end_index + 1 <= len(history_data) - 1):
 434 |             is_contain, latter_data_swaped = self.judge_latter_item_contain_current_result(ret_sim_title, history_data[end_index+1][-1], itype)
 435 | 
 436 |         if itype == "music":
 437 |             start_timestamp = history_data[start_index][2]
 438 |             end_timestamp = history_data[end_index][2]
 439 |             start_data = history_data[start_index][3]
 440 |             end_data = history_data[end_index][3]
 441 |         else:
 442 |             start_timestamp = history_data[start_index][1]
 443 |             end_timestamp = history_data[end_index][1]
 444 |             start_data = history_data[start_index][2]
 445 |             end_data = history_data[end_index][2]
 446 | 
 447 |         duration = self.get_duration(end_timestamp, start_timestamp)
 448 |         duration_accurate = self.get_duration_accurate(end_data, start_data, itype)
 449 |         isize = end_index - start_index + 1
 450 |         if is_contain:
 451 |             end_data = latter_data_swaped
 452 |             isize += 1
 453 | 
 454 |         sample_duraion, db_duration, mix_duration, accurate_timestamp_utc = self.get_duration_accurate_use_db_offset(end_data, start_data, isize, itype)
 455 | 
 456 |         ret_dict = {
 457 |             "duration" : duration,
 458 |             "duration_accurate" : duration_accurate,
 459 |             "sample_duration" : sample_duraion,
 460 |             "db_duration" : db_duration,
 461 |             "mix_duration" : mix_duration,
 462 |             "accurate_timestamp_utc" : accurate_timestamp_utc,
 463 |         }
 464 |         return ret_dict
 465 | 
 466 |     def get_data_duration_ms(self, data):
 467 |         try:
 468 |             duration_ms = -1
 469 |             json_res = data["result"]
 470 |             if json_res['status']['code'] == 0:
 471 |                 if 'metadata' in json_res and 'music' in json_res['metadata']:
 472 |                     if len(json_res['metadata']['music']) > 0:
 473 |                         duration_ms = json_res["metadata"]["music"][0]["duration_ms"]
 474 |         except Exception as e:
 475 |             self._dlog.logger.error("Error@get_data_duration_ms", exc_info=True)
 476 |         return (duration_ms/1000.0) if duration_ms != -1 else duration_ms
 477 | 
 478 |     def get_time_diff(self, start_timestamp, end_timestamp, tformat="%Y-%m-%d %H:%M:%S"):
 479 |         try:
 480 |             diff_sec = 0
 481 |             start_obj = datetime.datetime.strptime(start_timestamp, tformat)
 482 |             end_obj = datetime.datetime.strptime(end_timestamp, tformat)
 483 |             diff_sec = int((end_obj - start_obj).total_seconds())
 484 |         except Exception as e:
 485 |             self._dlog.logger.error("Error@get_diff_seconds", exc_info=True)
 486 |         return diff_sec
 487 | 
 488 |     def remove_next_result_from_now_result_list_for_music_delay2(self, history_data, ret_data, max_index):
 489 |         #Just for music delay2 filter
 490 |         try:
 491 |             if ret_data and len(history_data) >= max_index+2:
 492 |                 raw_title, sim_title, timestamp, next_data = history_data[max_index + 1]
 493 |                 if next_data:
 494 |                     next_title_list = self.get_mutil_result_title(next_data, 'music', 1)
 495 |                     next_title_set = set(next_title_list)
 496 |                     new_ret_music = []
 497 |                     for index, item in enumerate(ret_data["result"]["metadata"]["music"]):
 498 |                         if index == 0 or (item["title"] not in next_title_set):
 499 |                             new_ret_music.append(item)
 500 |                     ret_data["result"]["metadata"]["music"] = new_ret_music
 501 |         except Exception as e:
 502 |             self._dlog.logger.error("Error@remove_next_result_from_now_result_list_for_music_delay2", exc_info=True)
 503 | 
 504 |     def result_append_for_music_delay2(self, ret_data, title, from_data):
 505 |         try:
 506 |             ret_title_set = set()
 507 |             for item in ret_data['result']['metadata']['music']:
 508 |                 sim_title = self.tryStrSub(item['title'])[0]
 509 |                 ret_title_set.add(sim_title)
 510 | 
 511 |             for item in from_data['result']['metadata']['music']:
 512 |                 from_title = item['title']
 513 |                 sim_from_title = self.tryStrSub(from_title)[0]
 514 |                 if sim_from_title == title and sim_from_title not in ret_title_set:
 515 |                     ret_data['result']['metadata']['music'].append(item)
 516 |                     ret_title_set.add(sim_from_title)
 517 |         except Exception as e:
 518 |             self._dlog.logger.error("Error@result_append_for_music_delay2", exc_info=True)
 519 | 
 520 |     def get_custom_duration_by_title(self, title, ret_data):
 521 |         try:
 522 |             duration = 0
 523 |             db_end_offset = 0
 524 |             for index, item in enumerate(ret_data["result"]["metadata"]["custom_files"]):
 525 |                 #custom 获取的title是acrid
 526 |                 if title == item["acrid"]:
 527 |                     duration_ms = int(item["duration_ms"])
 528 |                     db_end_offset_ms = int(item["db_end_time_offset_ms"])
 529 |                     if duration_ms >= 0:
 530 |                         duration = int(duration_ms/1000)
 531 |                     if db_end_offset_ms:
 532 |                         db_end_offset = int(db_end_offset_ms/1000)
 533 |         except Exception as e:
 534 |             self._dlog.logger.error("Error@get_custom_duration_by_title, error_data:{0}".format(ret_data), exc_info=True)
 535 |         return duration, db_end_offset
 536 | 
 537 |     def get_music_duration_by_title(self, title, ret_data):
 538 |         try:
 539 |             duration = 0
 540 |             db_end_offset = 0
 541 |             if "metadata" in ret_data["result"] and "music" in ret_data["result"]["metadata"]:
 542 |                 for index, item in enumerate(ret_data["result"]["metadata"]["music"]):
 543 |                     if title == item["title"]:
 544 |                         duration_ms = int(item["duration_ms"])
 545 |                         db_end_offset_ms = int(item["db_end_time_offset_ms"])
 546 |                         if duration_ms >= 0:
 547 |                             duration = int(duration_ms/1000)
 548 |                         if db_end_offset_ms:
 549 |                             db_end_offset = int(db_end_offset_ms/1000)
 550 |         except Exception as e:
 551 |             self._dlog.logger.error("Error@get_custom_duration_by_title, error_data:{0}".format(ret_data), exc_info=True)
 552 |         return duration, db_end_offset
 553 | 
 554 |     def delay_dynamic_judge_size(self, deal_title_map, history_data, itype):
 555 |         try:
 556 |             judge_size = 5
 557 |             if itype == "custom":
 558 |                 title = sorted(deal_title_map.items(), key=lambda x:x[1]["score"], reverse=True)[0][0]
 559 |             else:
 560 |                 title = deal_title_map.keys()[0]
 561 | 
 562 |             index = deal_title_map[title]["index_list"][-1]
 563 |             if itype == "custom":
 564 |                 ret_data = history_data[index][2]
 565 |             else:
 566 |                 ret_data = history_data[index][3]
 567 | 
 568 |             monitor_len = ret_data.get("monitor_seconds", 10)
 569 | 
 570 |             if itype == "custom":
 571 |                 duration, db_end_offset = self.get_custom_duration_by_title(title, ret_data)
 572 |             else:
 573 |                 duration, db_end_offset = self.get_music_duration_by_title(title, ret_data)
 574 | 
 575 |             if db_end_offset > 0  and db_end_offset < duration:
 576 |                 judge_size = abs(int(math.ceil(db_end_offset*1.0/monitor_len))) + 1
 577 |             if judge_size > 10:
 578 |                 judge_size = 10
 579 |             if judge_size <= 3:
 580 |                 judge_size = 3
 581 |                 if itype == "custom":
 582 |                     judge_size = 1
 583 |         except Exception as e:
 584 |             self._dlog.logger.error("Error@delay_dynamic_judge_size", exc_info=True)
 585 |         return judge_size+1
 586 | 
 587 |     def fill_ret_data_by_acrid_count(self, sorted_title_list, history_data):
 588 |         try:
 589 |             ret_data = None
 590 |             init_ret_data = True
 591 |             for sitem in sorted_title_list:
 592 |                 sitem_title, sitem_map = sitem
 593 |                 sitem_title = self.tryStrSub(sitem_title)[0]
 594 |                 sitem_count = sitem_map["count"]
 595 |                 acrid_count_map = {}
 596 |                 for tindex in sitem_map["index_list"]:
 597 |                     tdata = history_data[tindex][3]
 598 |                     if init_ret_data:
 599 |                         ret_data = copy.deepcopy(tdata)
 600 |                         ret_data["result"]["metadata"]["music"] = []
 601 |                         init_ret_data = False
 602 |                     if "metadata" in tdata["result"] and "music" in tdata["result"]["metadata"]:
 603 |                         for item in tdata['result']['metadata']['music']:
 604 |                             sim_title = self.tryStrSub(item['title'])[0]
 605 |                             if sim_title == sitem_title:
 606 |                                 acrid = item['acrid']
 607 |                                 if acrid not in acrid_count_map:
 608 |                                     acrid_count_map[acrid] = {"count":0, "info":item}
 609 |                                 acrid_count_map[acrid]["count"] += 1
 610 |                 if ret_data is None:
 611 |                     break
 612 | 
 613 |                 acrid_count_map_sorted = sorted(acrid_count_map.items(), key=lambda x:x[1]["count"], reverse=True)
 614 |                 for s_index, s_item in enumerate(acrid_count_map_sorted):
 615 |                     ret_data["result"]["metadata"]["music"].append(s_item[1]["info"])
 616 |                     if s_index >= 2:
 617 |                         break
 618 |             if ret_data is not None and len(ret_data['result']['metadata']['music']) > 6:
 619 |                 ret_data['result']['metadata']['music'] = ret_data['result']['metadata']['music'][:6]
 620 |         except Exception as e:
 621 |             self._dlog.logger.error("Error@fill_ret_data_by_acrid_count", exc_info=True)
 622 |         return ret_data
 623 | 
 624 |     def get_music_data_offset(self, data):
 625 |         try:
 626 |             ret = {
 627 |                 "monitor_len":0,
 628 |                 "duration_ms":0,
 629 |                 "s_begin_ms":0,
 630 |                 "s_end_ms":0,
 631 |                 "d_begin_ms":0,
 632 |                 "d_end_ms":0
 633 |             }
 634 |             result = data.get("result")
 635 |             monitor_len = data.get("monitor_seconds", 10)
 636 |             ret["monitor_len"] = monitor_len
 637 |             if result and "metadata" in result and "music" in result["metadata"]:
 638 |                 fitem = result["metadata"]["music"][0]
 639 |                 ret["duration_ms"] = int(fitem["duration_ms"])
 640 |                 ret["s_begin_ms"] = int(fitem["sample_begin_time_offset_ms"])
 641 |                 ret["s_end_ms"] = int(fitem["sample_end_time_offset_ms"])
 642 |                 ret["d_begin_ms"] = int(fitem["db_begin_time_offset_ms"])
 643 |                 ret["d_end_ms"] = int(fitem["db_end_time_offset_ms"])
 644 |                 return ret
 645 |         except Exception as e:
 646 |             self._dlog.logger.error("Error@get_music_data_offset, error_data:{0}".format(data), exc_info=True)
 647 |         return None
 648 | 
 649 |     def check_if_is_break(self, index1, index2, data1, data2):
 650 |         try:
 651 |             is_break = False
 652 |             ret1 = self.get_music_data_offset(data1)
 653 |             ret2 = self.get_music_data_offset(data2)
 654 |             if ret1 and ret2:
 655 |                 diff_db = ret2["d_end_ms"] - ret1["d_begin_ms"]
 656 |                 if diff_db <= 0:
 657 |                     return is_break
 658 |                 timestamp1 = datetime.datetime.strptime(data1["timestamp"], "%H:%M:%S")
 659 |                 timestamp2 = datetime.datetime.strptime(data2["timestamp"], "%H:%M:%S")
 660 |                 monitor_len = ret1["monitor_len"]
 661 |                 A1 = timestamp1 + relativedelta(seconds=int(ret1["s_begin_ms"]/1000))
 662 |                 A2 = timestamp2 + relativedelta(seconds=int(ret2["s_end_ms"]/1000))
 663 |                 B1 = int((A2 - A1).total_seconds())
 664 |                 B2 = (index2 - index1 - 1)*monitor_len + int(diff_db/1000)
 665 |                 B3 = int(diff_db/1000)
 666 |                 if abs(B3 - B1) <= 15:
 667 |                     is_break = False
 668 |                 elif abs(B2 - B1) <= 10:
 669 |                     is_break = True
 670 |         except Exception as e:
 671 |             self._dlog.logger.error("Error@check_if_is_break", exc_info=True)
 672 |         return is_break
 673 | 
 674 |     def check_if_continuous(self, index1, index2, data1, data2):
 675 |         try:
 676 |             is_cont = True
 677 |             ret1 = self.get_music_data_offset(data1)
 678 |             ret2 = self.get_music_data_offset(data2)
 679 |             timestamp1 = datetime.datetime.strptime(data1["timestamp"], "%H:%M:%S")
 680 |             timestamp2 = datetime.datetime.strptime(data2["timestamp"], "%H:%M:%S")
 681 |             diff_sec = (timestamp2 - timestamp1).total_seconds()
 682 |             monitor_len = ret1["monitor_len"]
 683 |             if ret1 and ret2:
 684 |                 for tmp_ret in [ret1, ret2]:
 685 |                     if (tmp_ret["s_end_ms"] - tmp_ret["s_begin_ms"]) != (tmp_ret["d_end_ms"] - tmp_ret["d_begin_ms"]):
 686 |                         return is_cont
 687 |                 dur1 = ret1["d_end_ms"] - ret1["d_begin_ms"]
 688 |                 dur2 = ret2["d_end_ms"] - ret2["d_begin_ms"]
 689 |                 dur1 = dur1 if dur1 > 0 else 0
 690 |                 dur2 = dur2 if dur2 > 0 else 0
 691 |                 ret1_s_end = ret1["s_end_ms"]
 692 |                 ret2_s_begin = ret2["s_begin_ms"]
 693 |                 if index1+1 == index2 and abs(monitor_len*1000 - ret1_s_end) < 2500 and abs(ret2_s_begin) < 2500 and diff_sec < monitor_len*2:
 694 |                     pass
 695 |                 else:
 696 |                     ifirst, iend = max(ret1["d_begin_ms"], ret2["d_begin_ms"]), min(ret1["d_end_ms"], ret2["d_end_ms"])
 697 |                     inter_dur = iend - ifirst
 698 |                     if inter_dur > 0:
 699 |                         min_dur = min(dur1, dur2) if min(dur1, dur2) > 0 else max(dur1, dur2)
 700 |                         if min_dur > 0:
 701 |                             inter_rate = (inter_dur*1.0/min_dur)
 702 |                             if inter_dur >=2 and inter_rate >=0.8:
 703 |                                 is_cont = False
 704 |         except Exception as e:
 705 |             self._dlog.logger.error("Error@check_if_continuous", exc_info=True)
 706 |         return is_cont
 707 | 
 708 |     def runDelayX_for_music_delay2(self, stream_id):
 709 |         history_data = self._delay_music[stream_id]
 710 |         judge_zero_or_latter = True
 711 | 
 712 |         if len(history_data) >= self._delay_list_threshold:
 713 |             history_data = history_data[-(self._delay_list_threshold-1):]
 714 | 
 715 |             history_data_len = len(history_data)
 716 |             for ii in range((history_data_len-1), 0, -1):
 717 |                 if history_data[-ii][0][0] == NORESULT:
 718 |                     continue
 719 |                 else:
 720 |                     history_data = history_data[-(ii+1):]
 721 |                     break
 722 | 
 723 |         first_not_noresult_index = -1
 724 |         for index, item in enumerate(history_data):
 725 |             if index == 0:
 726 |                 continue
 727 |             if item[0] == NORESULT:
 728 |                 first_not_noresult_index = index
 729 |             else:
 730 |                 break
 731 |         if first_not_noresult_index != -1:
 732 |             history_data = history_data[first_not_noresult_index:]
 733 |             self._delay_music[stream_id] = history_data
 734 |             return None
 735 | 
 736 |         ########## Get Break Index ##########
 737 |         deal_title_map = {} #key:title, value:{'count':0, 'index_list':[]}
 738 |         break_index = 0
 739 | 
 740 | 
 741 |         for index, item in enumerate(history_data[1:]):
 742 |             index += 1
 743 |             raw_title, sim_title, timestamp, data = item
 744 |             if index!=1:
 745 |                 flag_first = True
 746 |                 flag_second = True
 747 |                 if sim_title in deal_title_map:
 748 |                     flag_first = False
 749 |                 if flag_first:
 750 |                     tmp_all_len = len(history_data)
 751 |                     tmp_count = 0
 752 |                     tmp_first_break_index = -1
 753 |                     #tmp_judge_size = 2
 754 |                     tmp_judge_size = self.delay_dynamic_judge_size(deal_title_map, history_data, "music")
 755 |                     find_interval = False
 756 |                     find_pre_last_index = index-1
 757 |                     find_next_sim_index = -1
 758 |                     for i in range(index, tmp_all_len):
 759 |                         next_raw_title, next_sim_title, next_timestamp, next_data = history_data[i]
 760 |                         tmp_list_flag = False
 761 |                         if next_sim_title in deal_title_map:
 762 |                             tmp_list_flag = True
 763 |                             tmp_count = 0
 764 |                             tmp_first_break_index = -1
 765 |                             if find_interval == True:
 766 |                                 find_interval = False
 767 |                                 find_next_sim_index = i
 768 |                                 if find_next_sim_index - find_pre_last_index - 1 >= 8:
 769 |                                     is_break = self.check_if_is_break(find_pre_last_index, find_next_sim_index, history_data[find_pre_last_index][3], history_data[find_next_sim_index][3])
 770 |                                     if is_break:
 771 |                                         break_index = find_pre_last_index + 1
 772 |                                         break
 773 |                         else:
 774 |                             if find_interval == False:
 775 |                                 find_interval = True
 776 |                                 find_pre_last_index = i - 1
 777 | 
 778 |                         if tmp_list_flag:
 779 |                             continue
 780 |                         else:
 781 |                             tmp_count += 1
 782 |                             if tmp_first_break_index == -1:
 783 |                                 tmp_first_break_index = i
 784 |                             if tmp_count < tmp_judge_size:
 785 |                                 continue
 786 |                             flag_second = True
 787 |                             break_index = tmp_first_break_index if tmp_first_break_index != -1 else i
 788 |                             break
 789 | 
 790 |                 if flag_first and flag_second and deal_title_map:
 791 |                     if break_index >0:
 792 |                         for iii in range(index, break_index):
 793 |                             tmp_raw_title, tmp_sim_title, tmp_timestamp, tmp_data = history_data[iii]
 794 |                             if tmp_sim_title == NORESULT:
 795 |                                 continue
 796 |                             if tmp_sim_title in deal_title_map:
 797 |                                 deal_title_map[tmp_sim_title]['count'] += 1
 798 |                                 deal_title_map[tmp_sim_title]['index_list'].append(iii)
 799 |                         #**********************************************************
 800 |                         sorted_dtitle = sorted(deal_title_map.items(), key = lambda x:x[1]['count'], reverse = True)
 801 |                         sorted_fitem_title, sorted_fitem_map = sorted_dtitle[0]
 802 |                         sfm_count = sorted_fitem_map["count"]
 803 |                         cfirst_index, csecond_index = sorted(sorted_fitem_map["index_list"])[:2] if sfm_count >=2 else [0, 0]
 804 |                         if sfm_count in [2, 3]: #or ((3 < sfm_count <= 10) and sfm_count < (break_index - index)):
 805 |                             is_cont = self.check_if_continuous(cfirst_index, csecond_index, history_data[cfirst_index][3], history_data[csecond_index][3])
 806 |                             if not is_cont:
 807 |                                 judge_zero_or_latter = False
 808 |                                 break_index = cfirst_index + 1
 809 |                                 deal_title_map = {sorted_fitem_title:{'count':1, 'index_list':[cfirst_index]}}
 810 |                         #**********************************************************
 811 |                     #跳出
 812 |                     break
 813 | 
 814 |             if sim_title == NORESULT:
 815 |                 continue
 816 |             if sim_title not in deal_title_map:
 817 |                 deal_title_map[sim_title] ={'count':0, 'index_list':[]}
 818 |             deal_title_map[sim_title]['count'] += 1
 819 |             deal_title_map[sim_title]['index_list'].append(index)
 820 | 
 821 | 
 822 |         ret_data = None
 823 |         duration_dict = {}
 824 |         duration = 0
 825 |         if break_index > 0 and deal_title_map:
 826 |             sorted_title_list = sorted(deal_title_map.items(), key = lambda x:x[1]['count'], reverse = True)
 827 |             ret_data = self.fill_ret_data_by_acrid_count(sorted_title_list, history_data)
 828 |             if ret_data and len(ret_data["result"]["metadata"]["music"]) == 0:
 829 |                 ret_data = None
 830 | 
 831 |             index_range = set()
 832 |             for title in deal_title_map:
 833 |                 index_range |= set(deal_title_map[title]['index_list'])
 834 |             min_index = min(index_range)
 835 |             max_index = max(index_range)
 836 |             duration_dict = self.compute_played_duration(history_data, min_index, max_index, judge_zero_or_latter, "music")
 837 | 
 838 |             self.remove_next_result_from_now_result_list_for_music_delay2(history_data, ret_data, max_index)
 839 | 
 840 |         if ret_data:
 841 |             duration = duration_dict["duration"]
 842 |             duration_accurate = duration_dict["duration_accurate"]
 843 |             sample_duration = duration_dict["sample_duration"]
 844 |             db_duration = duration_dict["db_duration"]
 845 |             mix_duration = duration_dict["mix_duration"]
 846 |             accurate_timestamp_utc = duration_dict["accurate_timestamp_utc"]
 847 |             ret_data['result']['metadata']['played_duration'] = abs(mix_duration)
 848 |             ret_data['result']['metadata']['timestamp_utc'] = accurate_timestamp_utc
 849 |             ret_data['timestamp'] = accurate_timestamp_utc
 850 |             if ret_data['result']['metadata']['played_duration'] <= 1:
 851 |                 ret_data = None
 852 | 
 853 |         ########### cut history_data #############
 854 |         if break_index>=0:
 855 |             cut_index = break_index
 856 |             for i, item in enumerate(history_data[break_index:]):
 857 |                 if item[0][0] == NORESULT:
 858 |                     cut_index = break_index + i + 1
 859 |                 else:
 860 |                     break
 861 |             cut_index = cut_index - 1 if cut_index >= 1 else cut_index
 862 |             history_data = history_data[cut_index:]
 863 | 
 864 |             reverse_index = -1
 865 |             for i, item in enumerate(history_data[::-1]):
 866 |                 if item[0][0] == NORESULT:
 867 |                     reverse_index = i
 868 |                     continue
 869 |                 else:
 870 |                     break
 871 | 
 872 |             if reverse_index != -1:
 873 |                 new_cut_index = -1
 874 |                 reverse_index = len(history_data) - reverse_index - 1
 875 |                 if reverse_index in [0, 1]:
 876 |                     history_data = []
 877 |                 else:
 878 |                     pass
 879 | 
 880 |             if judge_zero_or_latter == False and len(history_data) > 0:
 881 |                 if history_data[0][0] != NORESULT:
 882 |                     tmp_t, sim_tmp_t, tmp_timestamp, tmp_data = history_data[0]
 883 |                     if tmp_data and "status" in tmp_data["result"]:
 884 |                         tmp_data["result"]["status"]["code"] = 1001
 885 |                         history_data[0] = (NORESULT, NORESULT, tmp_timestamp, tmp_data)
 886 |             self._delay_music[stream_id] = history_data
 887 | 
 888 |         return ret_data
 889 | 
 890 | 
 891 |     def deal_real_custom(self, data):
 892 |         is_new = False
 893 |         result = None
 894 |         curr_title = self.get_mutil_result_acrid(data, 'custom')[0]
 895 | 
 896 |         stream_id = data.get("stream_id")
 897 |         timestamp = data.get("timestamp")
 898 |         timestamp_obj = datetime.datetime.strptime(timestamp, "%H:%M:%S")
 899 |         if not stream_id:
 900 |             return result, is_new
 901 |         if curr_title == NORESULT:
 902 |             if not self.real_check_title_custom(stream_id, curr_title, timestamp_obj):
 903 |                 self._real_custom[stream_id][0].append((curr_title, timestamp))
 904 |                 self._real_custom[stream_id][1] = data
 905 |                 result = data
 906 |                 is_new = True
 907 |             else:
 908 |                 result = None
 909 |                 is_new = False
 910 |         else:
 911 |             if self.real_check_title_custom(stream_id, curr_title, timestamp_obj):
 912 |                 result = self._real_custom[stream_id][1]
 913 |                 is_new = False
 914 |             else:
 915 |                 self._real_custom[stream_id][0].append((curr_title, timestamp))
 916 |                 self._real_custom[stream_id][1] = data
 917 |                 result = data
 918 |                 is_new = True
 919 |         return result, is_new
 920 | 
 921 |     def deal_delay_custom(self, data):
 922 |         try:
 923 |             ret_result = None
 924 |             stream_id = data.get("stream_id")
 925 |             timestamp = data.get("timestamp")
 926 |             title_list = self.get_mutil_result_acrid(data, 'custom', 5)
 927 |             if stream_id not in self._delay_custom:
 928 |                 self._delay_custom[stream_id] = [(title_list, timestamp, data)]
 929 |             else:
 930 |                 self._delay_custom[stream_id].append((title_list, timestamp, data))
 931 | 
 932 |             if len(self._delay_custom[stream_id]) >= self._delay_list_max_num:
 933 |                 ret_result = self.runDelayX_custom(stream_id)
 934 |         except Exception as e:
 935 |             self._dlog.logger.error("Error@deal_delay_custom", exc_info=True)
 936 |         return ret_result
 937 | 
 938 |     def remove_next_result_from_now_result_list(self, history_data, ret_data, max_index):
 939 |         #Just for custom delay filter
 940 |         try:
 941 |             if ret_data and len(history_data) >= max_index+2:
 942 |                 acrid_list, timestamp, next_data = history_data[max_index + 1]
 943 |                 if next_data:
 944 |                     #update max size acrid_list to 20
 945 |                     next_acrid_list = self.get_mutil_result_acrid(next_data, 'custom', 20)
 946 |                     next_acrid_set = set(next_acrid_list)
 947 |                     new_ret_custom_files = []
 948 |                     for index, item in enumerate(ret_data["result"]["metadata"]["custom_files"]):
 949 |                         if index == 0 or (item["acrid"] not in next_acrid_set):
 950 |                             new_ret_custom_files.append(item)
 951 |                     ret_data["result"]["metadata"]["custom_files"] = new_ret_custom_files
 952 |         except Exception as e:
 953 |             self._dlog.logger.error("Error@remove_next_result_from_now_result_list", exc_info=True)
 954 | 
 955 |     def get_custom_duration_by_title(self, title, ret_data):
 956 |         try:
 957 |             duration = 0
 958 |             for index, item in enumerate(ret_data["result"]["metadata"]["custom_files"]):
 959 |                 if title == item["acrid"]:
 960 |                     duration_ms = int(item["duration_ms"])
 961 |                     if duration_ms >= 0:
 962 |                         duration = int(duration_ms/1000)
 963 |         except Exception as e:
 964 |             self._dlog.logger.error("Error@get_custom_duration_by_title, error_data:{0}".format(ret_data), exc_info=True)
 965 |         return duration
 966 | 
 967 |     def custom_delay_dynamic_judge_size(self, deal_title_map, history_data):
 968 |         try:
 969 |             judge_size = 6
 970 |             title = list(deal_title_map.keys())[0]
 971 |             index = deal_title_map[title]["index_list"][-1]
 972 |             ret_data = history_data[index][2]
 973 |             duration = self.get_custom_duration_by_title(title, ret_data)
 974 |             tmp_size = int(duration/10)
 975 |             if tmp_size <=6:
 976 |                 judge_size = tmp_size if tmp_size > 1 else 2
 977 |             elif tmp_size >= 18:
 978 |                 judge_size = 18
 979 |         except Exception as e:
 980 |             self._dlog.logger.error("Error@custom_delay_dynamic_judge_size", exc_info=True)
 981 | 
 982 |         return judge_size if judge_size >= 2 else 2
 983 | 
 984 |     def runDelayX_custom(self, stream_id):
 985 |         history_data = self._delay_custom[stream_id]
 986 | 
 987 |         if len(history_data) >= self._delay_list_threshold:
 988 |             history_data = history_data[-(self._delay_list_threshold-1):]
 989 | 
 990 |             history_data_len = len(history_data)
 991 |             for ii in range((history_data_len-1), 0, -1):
 992 |                 if history_data[-ii][0][0] == NORESULT:
 993 |                     continue
 994 |                 else:
 995 |                     history_data = history_data[-(ii+1):]
 996 |                     break
 997 | 
 998 |         first_not_noresult_index = -1
 999 |         for index, item in enumerate(history_data):
1000 |             if index == 0:
1001 |                 continue
1002 |             if len(item[0])>0 and item[0][0] == NORESULT:
1003 |                 first_not_noresult_index = index
1004 |             else:
1005 |                 break
1006 |         if first_not_noresult_index != -1:
1007 |             history_data = history_data[first_not_noresult_index:]
1008 |             self._delay_custom[stream_id] = history_data
1009 |             return None
1010 | 
1011 |         deal_title_map = {} #key:title, value:{'count':0, 'index_list':[]}
1012 |         tmp_deal_title_map = {}
1013 |         break_index = 0
1014 | 
1015 |         for index, item in enumerate(history_data[1:]):
1016 |             index += 1
1017 |             title_list, timestamp, data = item
1018 |             if index!=1:
1019 |                 flag_first = True
1020 |                 flag_second = True
1021 |                 for title in title_list[:3]:
1022 |                     if title in deal_title_map:
1023 |                         flag_first = False
1024 |                 if flag_first:
1025 |                     judge_size = self.custom_delay_dynamic_judge_size(deal_title_map, history_data)
1026 |                     for i in range(1,judge_size):
1027 |                         if index + i < len(history_data):
1028 |                             next_title_list, next_timestamp, next_data = history_data[index + i]
1029 |                             for title in next_title_list[:3]:
1030 |                                 if title in deal_title_map:
1031 |                                     flag_second = False
1032 |                         else:
1033 |                             flag_second = False
1034 |                 if flag_first and flag_second and deal_title_map:
1035 |                     break_index = index
1036 |                     break
1037 | 
1038 |             for i, title in enumerate(title_list):
1039 |                 if title == NORESULT:
1040 |                     continue
1041 |                 if i == 0:
1042 |                     if title not in deal_title_map:
1043 |                         deal_title_map[title] ={'count':0, 'index_list':[]}
1044 |                     deal_title_map[title]['count'] += 1
1045 |                     deal_title_map[title]['index_list'].append(index)
1046 |                 if title not in tmp_deal_title_map:
1047 |                     tmp_deal_title_map[title] = {'count':0, 'index_list':[]}
1048 |                 tmp_deal_title_map[title]['count'] += 1
1049 |                 tmp_deal_title_map[title]['index_list'].append(index)
1050 | 
1051 |         ########### New Deal Custom Result Add Count ###########
1052 |         ret_data = None
1053 |         duration_dict = {}
1054 |         duration = 0
1055 |         if break_index > 0 and deal_title_map:
1056 |             tmp_count_map = {}
1057 |             sorted_title_list = sorted(deal_title_map.items(), key = lambda x:x[1]['count'], reverse = True)
1058 |             for sitem in sorted_title_list:
1059 |                 sitem_title, sitem_map = sitem
1060 |                 sitem_count = sitem_map["count"]
1061 |                 sitem_min_index = min(sitem_map["index_list"])
1062 |                 if sitem_count not in tmp_count_map:
1063 |                     tmp_count_map[sitem_count] = []
1064 |                 tmp_count_map[sitem_count].append((sitem_title, sitem_min_index))
1065 |             first_item_flag = True
1066 |             for scount in sorted(tmp_count_map.keys(), reverse=True):
1067 |                 count_list = sorted(tmp_count_map[scount], key = lambda x:x[1])
1068 |                 for ditem in count_list:
1069 |                     dtitle, dindex = ditem
1070 |                     from_data = history_data[dindex][2]
1071 |                     if first_item_flag:
1072 |                         first_item_flag = False
1073 |                         ret_data = copy.deepcopy(from_data)
1074 |                         ret_data["result"]["metadata"]["custom_files"] = []
1075 |                     self.custom_result_append(ret_data, dtitle, from_data, scount, tmp_deal_title_map)
1076 | 
1077 |             index_range = set()
1078 |             for title in deal_title_map:
1079 |                 index_range |= set(deal_title_map[title]['index_list'])
1080 |             min_index = min(index_range)
1081 |             max_index = max(index_range)
1082 |             duration_dict = self.compute_played_duration(history_data, min_index, max_index, True, "custom")
1083 | 
1084 |             self.remove_next_result_from_now_result_list(history_data, ret_data, max_index)
1085 | 
1086 |         if ret_data:
1087 |             duration = duration_dict["duration"]
1088 |             duration_accurate = duration_dict["duration_accurate"]
1089 |             sample_duration = duration_dict["sample_duration"]
1090 |             db_duration = duration_dict["db_duration"]
1091 |             mix_duration = duration_dict["mix_duration"]
1092 |             accurate_timestamp_utc = duration_dict["accurate_timestamp_utc"]
1093 |             ret_data['result']['metadata']['played_duration'] = abs(mix_duration)
1094 |             ret_data['result']['metadata']['timestamp_utc'] = accurate_timestamp_utc
1095 |             ret_data['timestamp'] = accurate_timestamp_utc
1096 |             if ret_data['result']['metadata']['played_duration'] <= self._delay_custom_played_duration_min:
1097 |                 ret_data = None
1098 | 
1099 |         ########### cut history_data #############
1100 |         if break_index>=0:
1101 |             cut_index = break_index
1102 |             for i, item in enumerate(history_data[break_index:]):
1103 |                 if item[0][0] == NORESULT:
1104 |                     cut_index = break_index + i + 1
1105 |                 else:
1106 |                     break
1107 |             cut_index = cut_index - 1 if cut_index >= 1 else cut_index
1108 |             history_data = history_data[cut_index:]
1109 | 
1110 |             reverse_index = -1
1111 |             for i, item in enumerate(history_data[::-1]):
1112 |                 if item[0][0] == NORESULT:
1113 |                     reverse_index = i
1114 |                     continue
1115 |                 else:
1116 |                     break
1117 | 
1118 |             if reverse_index != -1:
1119 |                 new_cut_index = -1
1120 |                 reverse_index = len(history_data) - reverse_index - 1
1121 |                 if reverse_index in [0, 1]:
1122 |                     history_data = []
1123 |                 else:
1124 |                     pass
1125 | 
1126 |             self._delay_custom[stream_id] = history_data
1127 |         return ret_data
1128 | 
1129 | class FilterWorker:
1130 |     def __init__(self):
1131 |         self.tmp_no_result = {'status': {'msg': 'No result', 'code': 1001, 'version': '1.0'}, 'metadata': {}}
1132 |         self._result_map = []
1133 |         self.init_logger()
1134 |         self._result_filter = ResultFilter(self.dlog)
1135 | 
1136 |     def init_logger(self):
1137 |         self.dlog = acrcloud_logger.AcrcloudLogger('Filter_Log')
1138 |         self.dlog.addStreamHandler()
1139 | 
1140 |     def save_one_delay(self, old_data, isCustom=0):
1141 |         data = None
1142 |         if isCustom:
1143 |             data = self._result_filter.deal_delay_custom(old_data)
1144 |         else:
1145 |             data = self._result_filter.deal_delay_history(old_data)
1146 | 
1147 |         if data is not None:
1148 |             del data["stream_id"]
1149 |             self._result_map.append(data)
1150 |             return True
1151 |         else:
1152 |             return False
1153 | 
1154 |     def save_one(self, jsondata):
1155 |         try:
1156 |             timestamp = jsondata['timestamp']
1157 |             if jsondata['result']['status']['code'] != 0:
1158 |                 jsondata['result']['metadata'] = {'timestamp_utc':timestamp}
1159 |             elif 'metadata' in jsondata['result']:
1160 |                 jsondata['result']['metadata']['timestamp_utc'] = timestamp
1161 | 
1162 |             tmp_no_result_json = {'status': {'msg': 'No result', 'code': 1001, 'version': '1.0'}, 'metadata': {'timestamp_utc': timestamp}}
1163 | 
1164 |             ret = False
1165 |             custom_data = copy.deepcopy(jsondata)
1166 |             if jsondata['result']['status']['code'] != 0:
1167 |                 ret = self.save_one_delay(jsondata, 0)
1168 |                 ret = self.save_one_delay(custom_data, 1)
1169 |             elif 'metadata' in jsondata['result'] and 'custom_files' in jsondata['result']['metadata']:
1170 |                 if 'music' in jsondata['result']['metadata']:
1171 |                     del custom_data['result']['metadata']['music']
1172 |                     del jsondata['result']['metadata']['custom_files']
1173 |                     ret = self.save_one_delay(jsondata, 0)
1174 |                 else:
1175 |                     jsondata['result'] = copy.deepcopy(tmp_no_result_json)
1176 |                     ret = self.save_one_delay(jsondata, 0)
1177 |                 ret = self.save_one_delay(custom_data, 1)
1178 |             elif 'metadata' in jsondata['result'] and 'music' in jsondata['result']['metadata']:
1179 |                 custom_data['result'] = copy.deepcopy(tmp_no_result_json)
1180 |                 ret = self.save_one_delay(jsondata, 0)
1181 |         except Exception as e:
1182 |             self.dlog.logger.error("Error@save_one", exc_info=True)
1183 |         return ret
1184 | 
1185 |     def do_filter(self, tmp_id, filepath, result, rec_length, timestamp):
1186 |         try:
1187 |             jsoninfo = {
1188 |                 "stream_id": tmp_id,
1189 |                 "file":filepath,
1190 |                 "rec_length": rec_length,
1191 |                 "result": result,
1192 |                 "timestamp": timestamp
1193 |             }
1194 |             self.save_one(jsoninfo)
1195 |         except Exception as e:
1196 |             self.dlog.logger.error("Error@do_filter", exc_info=True)
1197 | 
1198 |     def end_filter(self, tmp_id, rec_length, timestamp):
1199 |         try:
1200 |             tmp_no_result = copy.deepcopy(self.tmp_no_result)
1201 |             for i in range(1, 60):
1202 |                 tmp_timestamp = datetime.datetime.strptime(timestamp, "%H:%M:%S")
1203 |                 new_timestamp = (tmp_timestamp + relativedelta(seconds=int(i*rec_length))).strftime("%H:%M:%S")
1204 |                 jsoninfo = {
1205 |                     "stream_id": tmp_id,
1206 |                     "rec_length": rec_length,
1207 |                     "result": tmp_no_result,
1208 |                     "timestamp": new_timestamp
1209 |                 }
1210 |                 self.save_one(jsoninfo)
1211 |         except Exception as e:
1212 |             self.dlog.logger.error("Error@end_filter", exc_info=True)
1213 | 
1214 |     def start_filter(self, tmp_id, rec_length, timestamp):
1215 |         try:
1216 |             tmp_no_result = copy.deepcopy(self.tmp_no_result)
1217 |             for i in range(1, 0, -1):
1218 |                 new_timestamp = timestamp
1219 |                 jsoninfo = {
1220 |                     "stream_id": tmp_id,
1221 |                     "rec_length": rec_length,
1222 |                     "result": tmp_no_result,
1223 |                     "timestamp": new_timestamp
1224 |                 }
1225 |                 self.save_one(jsoninfo)
1226 |         except Exception as e:
1227 |             self.dlog.logger.error("Error@start_filter", exc_info=True)
1228 | 
1229 |     def apply_filter(self, result_list):
1230 |         try:
1231 |             appid = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S")
1232 |             rec_length = 10
1233 |             timestamp = None
1234 |             for index, item in enumerate(result_list):
1235 |                 filename = item["file"]
1236 |                 timestamp = item["timestamp"]
1237 |                 rec_length = item["rec_length"]
1238 |                 if index == 0:
1239 |                     self.start_filter(appid, rec_length, timestamp)
1240 |                 result = item["result"]
1241 |                 if "status" in result and result["status"]["code"] in [0, 1001]:
1242 |                     self.do_filter(appid, filename, result, rec_length, timestamp)
1243 |             if timestamp is not None:
1244 |                 self.end_filter(appid, rec_length, timestamp)
1245 |         except Exception as e:
1246 |             self.dlog.logger.error("Error@apply_filter", exc_info=True)
1247 |         return self._result_map
1248 | 
1249 |     def test(self):
1250 |         a = '{"timestamp": "01 00:17:40", "rec_length": 10, "result": {"status": {"msg": "Success", "code": 0, "version": "1.0"}, "cost_time": 1.2630000114441, "result_type": 0, "metadata": {"timestamp_utc": "2018-08-02 14:44:39", "music": [{"album": {"name": "Solino"}, "play_offset_ms": 85200, "sample_begin_time_offset_ms": 300, "title": "La Bambola", "result_from": 1, "release_date": "2002-10-28", "sample_end_time_offset_ms": 9460, "genres": [{"name": "Pop"}], "label": "Amiga", "db_end_time_offset_ms": 85120, "score": 82, "db_begin_time_offset_ms": 75960, "artists": [{"name": "Patty Pravo"}], "duration_ms": 182200, "external_ids": {"isrc": "ITB006870616", "upc": "743219711328"}, "acrid": "27fef80da4dabc33591a2c08a08edaf0", "external_metadata": {"spotify": {"album": {"name": "Solino", "id": "0I3MXd5FYGAj6X9GOJepMb"}, "track": {"name": "La Bambola", "id": "5YT3WdXo5gBwZ0TlJiB0TE"}, "artists": [{"name": "Patty Pravo", "id": "2Yi5fknmHBqqKjHF6cXQyh"}]}, "deezer": {"album": {"name": "Solino", "id": "112016"}, "track": {"name": "La Bambola", "id": "1017795"}, "artists": [{"name": "Patty Pravo", "id": "58615"}]}, "youtube": {"vid": "UHCgZY-HX6U"}}}]}}, "file": "radioairplay_19/501.2018.06.19.04.00.00.mp3"}'
1251 |         data = json.loads(a)
1252 |         raw_title = self._result_filter.get_mutil_result_title(data, 'music', 1)[0]
1253 |         sim_title = self._result_filter.tryStrSub(raw_title)
1254 |         print(raw_title, sim_title)
1255 | 


--------------------------------------------------------------------------------