├── .gitattributes ├── LICENSE ├── MANIFEST.in ├── README.md ├── claravy ├── __init__.py ├── avalias.py ├── avparse.py ├── avstats.py ├── avtagger.py ├── data │ ├── confidence_model.pkl │ ├── default_aliases.txt │ ├── default_avs.json │ ├── default_ignore.txt │ ├── default_substr.txt │ └── default_taxonomy.txt ├── ibcc │ ├── IBCC.py │ ├── annotator.py │ ├── labelmodel.py │ └── utils.py ├── parsers │ ├── parse_acronis.py │ ├── parse_adaware.py │ ├── parse_agnitum.py │ ├── parse_ahnlabv3.py │ ├── parse_alibaba.py │ ├── parse_alibabacloud.py │ ├── parse_alyac.py │ ├── parse_antivir.py │ ├── parse_antiyavl.py │ ├── parse_apex.py │ ├── parse_arcabit.py │ ├── parse_avast.py │ ├── parse_avastmobile.py │ ├── parse_avg.py │ ├── parse_avira.py │ ├── parse_avware.py │ ├── parse_babable.py │ ├── parse_baidu.py │ ├── parse_baiduinternational.py │ ├── parse_bitdefender.py │ ├── parse_bitdefenderfalx.py │ ├── parse_bitdefendertheta.py │ ├── parse_bkav.py │ ├── parse_bytehero.py │ ├── parse_catquickheal.py │ ├── parse_clamav.py │ ├── parse_cmc.py │ ├── parse_commtouch.py │ ├── parse_comodo.py │ ├── parse_crowdstrike.py │ ├── parse_cybereason.py │ ├── parse_cylance.py │ ├── parse_cynet.py │ ├── parse_cyren.py │ ├── parse_cyrencloud.py │ ├── parse_deepinstinct.py │ ├── parse_drweb.py │ ├── parse_egambit.py │ ├── parse_elastic.py │ ├── parse_emsisoft.py │ ├── parse_endgame.py │ ├── parse_esafe.py │ ├── parse_esetnod32.py │ ├── parse_etrustvet.py │ ├── parse_fireeye.py │ ├── parse_fortinet.py │ ├── parse_fprot.py │ ├── parse_fsecure.py │ ├── parse_gdata.py │ ├── parse_google.py │ ├── parse_gridinsoft.py │ ├── parse_ikarus.py │ ├── parse_invincea.py │ ├── parse_jiangmin.py │ ├── parse_k7antivirus.py │ ├── parse_k7gw.py │ ├── parse_kaspersky.py │ ├── parse_kingsoft.py │ ├── parse_lionic.py │ ├── parse_malwarebytes.py │ ├── parse_max.py │ ├── parse_maxsecure.py │ ├── parse_mcafee.py │ ├── parse_mcafeegwedition.py │ ├── parse_microsoft.py │ ├── parse_microworldescan.py │ ├── parse_nanoantivirus.py │ ├── parse_nod32.py │ ├── parse_norman.py │ ├── parse_nprotect.py │ ├── parse_paloalto.py │ ├── parse_panda.py │ ├── parse_pctools.py │ ├── parse_qihoo360.py │ ├── parse_rising.py │ ├── parse_sangfor.py │ ├── parse_sentinelone.py │ ├── parse_skyhigh.py │ ├── parse_sophos.py │ ├── parse_superantispyware.py │ ├── parse_symantec.py │ ├── parse_symantecmobileinsight.py │ ├── parse_tachyon.py │ ├── parse_tehtris.py │ ├── parse_tencent.py │ ├── parse_thehacker.py │ ├── parse_totaldefense.py │ ├── parse_trapmine.py │ ├── parse_trendmicro.py │ ├── parse_trendmicrohousecall.py │ ├── parse_trustlook.py │ ├── parse_varist.py │ ├── parse_vba32.py │ ├── parse_vipre.py │ ├── parse_virit.py │ ├── parse_virobot.py │ ├── parse_virusbuster.py │ ├── parse_webroot.py │ ├── parse_xcitium.py │ ├── parse_yandex.py │ ├── parse_zillya.py │ ├── parse_zonealarm.py │ └── parse_zoner.py └── taxonomy.py ├── examples ├── v2_scan.jsonl └── v3_scan.jsonl ├── pyproject.toml └── setup.cfg /.gitattributes: -------------------------------------------------------------------------------- 1 | VirusShare/ filter=lfs diff=lfs merge=lfs -text 2 | VirusShare/virusshare_claravy_tags.zip filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include claravy/parsers/* 2 | include claravy/data/* 3 | include claravy/ibcc/* -------------------------------------------------------------------------------- /claravy/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/") 3 | -------------------------------------------------------------------------------- /claravy/data/confidence_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureComputing4AI/ClarAVy/d0713201837ae15aa67d2ad0f38a14eed3d7b87b/claravy/data/confidence_model.pkl -------------------------------------------------------------------------------- /claravy/data/default_ignore.txt: -------------------------------------------------------------------------------- 1 | trojan 2 | win32 3 | pe 4 | -------------------------------------------------------------------------------- /claravy/data/default_substr.txt: -------------------------------------------------------------------------------- 1 | [SUBSTR] 2 | agent- 3 | backdoor- 4 | bank- 5 | banker- 6 | cryp- 7 | crypt- 8 | download- 9 | downloader- 10 | hacktool- 11 | hp- 12 | infostealer- 13 | psw- 14 | ransom- 15 | rkit- 16 | tr- 17 | trj- 18 | troj- 19 | trojan- 20 | 21 | -bot 22 | -dll 23 | -door 24 | -rat 25 | -ransom 26 | -plugin 27 | -drop 28 | -crypt 29 | -net 30 | -kd 31 | -------------------------------------------------------------------------------- /claravy/ibcc/utils.py: -------------------------------------------------------------------------------- 1 | import numba 2 | import scipy.special as sc 3 | import numpy as np 4 | from numba import jit 5 | 6 | @jit(nopython=True) 7 | def unique(X): 8 | """Reimplementation of np.unique() so that it works with numba JIT.""" 9 | b = np.sort(X) 10 | unique = list(b[:1]) 11 | counts = [1 for _ in unique] 12 | for x in b[1:]: 13 | if x != unique[-1]: 14 | unique.append(x) 15 | counts.append(1) 16 | else: 17 | counts[-1] += 1 18 | return unique, counts 19 | 20 | 21 | @numba.vectorize 22 | def psi(X): 23 | return sc.psi(X) 24 | -------------------------------------------------------------------------------- /claravy/parsers/parse_acronis.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Acronis: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1 9 | } 10 | 11 | 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_adaware.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Adaware: # Uses Bitdefender engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK:TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK:TOK.TOK.TOK@TOK": self.parse_delim_fmt6, 15 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK!.TOK": self.parse_delim_fmt8, 17 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt9, 18 | "TOK.TOK.TOK@TOK": self.parse_delim_fmt10, 19 | "TOK.TOK": self.parse_delim_fmt11, 20 | "TOK.TOK-TOK.TOK": self.parse_delim_fmt12, 21 | "TOK.TOK.TOK!TOK!TOK.TOK": self.parse_delim_fmt13, 22 | "TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt14, 23 | "TOK:TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt15, 24 | "TOK.TOK-TOK-TOK.TOK": self.parse_delim_fmt16, 25 | "TOK:TOK.TOK.TOK@TOK!TOK": self.parse_delim_fmt17, 26 | "TOK:TOK.TOK.TOK@TOK@TOK": self.parse_delim_fmt18, 27 | } 28 | 29 | # TOK.TOK.TOK 30 | def parse_delim_fmt1(self, tokens): 31 | tax = [UNK, UNK, SUF] 32 | if tokens[0] == "Packer": 33 | tax = [PRE, PACK, SUF] 34 | elif tokens[1].isnumeric(): 35 | tax = [FAM, SUF, SUF] 36 | else: 37 | tax = [PRE, FAM, SUF] 38 | return tax 39 | 40 | # TOK.TOK.TOK.TOK 41 | def parse_delim_fmt2(self, tokens): 42 | if tokens[3].isnumeric() and tokens[2].startswith("b"): 43 | tax = [PRE, FAM, SUF, SUF] 44 | elif tokens[0] == "Packer": 45 | tax = [PRE, PRE, PACK, SUF] 46 | elif tokens[2] == "Gen" or tokens[2].isnumeric() or len(tokens[2]) == 1: 47 | tax = [PRE, FAM, SUF, SUF] 48 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 49 | if tokens[1].isupper(): 50 | tax = [PRE, PRE, SUF, SUF] 51 | else: 52 | tax = [PRE, FAM, SUF, SUF] 53 | else: 54 | tax = [PRE, PRE, FAM, SUF] 55 | return tax 56 | 57 | # TOK:TOK.TOK.TOK 58 | def parse_delim_fmt3(self, tokens): 59 | tax = [PRE, UNK, UNK, SUF] 60 | if tokens[1] == "Packer": 61 | tax = [PRE, PRE, PACK, SUF] 62 | else: 63 | tax = [PRE, PRE, FAM, SUF] 64 | return tax 65 | 66 | # TOK:TOK.TOK.TOK.TOK 67 | def parse_delim_fmt4(self, tokens): 68 | tax = [PRE, UNK, UNK, UNK, SUF] 69 | if tokens[1] == "Packer": 70 | tax = [PRE, PRE, PACK, SUF, SUF] 71 | elif tokens[3] == "Gen" or tokens[3].isnumeric() or tokens[3].isupper(): 72 | tax = [PRE, PRE, FAM, SUF, SUF] 73 | else: 74 | tax = [PRE, PRE, PRE, FAM, SUF] 75 | return tax 76 | 77 | # TOK:TOK.TOK.TOK.TOK.TOK 78 | def parse_delim_fmt5(self, tokens): 79 | tax = [PRE, PRE, PRE, UNK, UNK, SUF] 80 | if tokens[4] == "Gen" or tokens[4].isnumeric() or (len(tokens[4]) <= 2 and tokens[4] != "VB"): 81 | if tokens[3].isnumeric() or len(tokens[3]) <= 2: 82 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 83 | else: 84 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 85 | else: 86 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 87 | return tax 88 | 89 | # TOK:TOK.TOK.TOK@TOK 90 | def parse_delim_fmt6(self, tokens): 91 | return [PRE, PRE, FAM, SUF, SUF] 92 | 93 | # TOK.TOK.TOK.TOK.TOK 94 | def parse_delim_fmt7(self, tokens): 95 | tax = [PRE, UNK, UNK, UNK, SUF] 96 | if tokens[3].isnumeric() or tokens[3] == "Gen" or len(tokens[3]) == 1: 97 | if tokens[2].isnumeric() or tokens[2] == "Gen" or len(tokens[2]) == 1: 98 | tax = [PRE, FAM, SUF, SUF, SUF] 99 | else: 100 | tax = [PRE, PRE, FAM, SUF, SUF] 101 | else: 102 | tax = [PRE, PRE, PRE, FAM, SUF] 103 | return tax 104 | 105 | # TOK.TOK.TOK!.TOK 106 | def parse_delim_fmt8(self, tokens): 107 | return [PRE, PRE, SUF, SUF] 108 | 109 | # TOK:TOK.TOK.TOK.TOK@TOK 110 | def parse_delim_fmt9(self, tokens): 111 | tax = [PRE, UNK, UNK, UNK, SUF, SUF] 112 | if tokens[1] == "Packer": 113 | tax = [PRE, PRE, PACK, SUF, SUF, SUF] 114 | elif tokens[3].isupper(): 115 | tax = [PRE, PRE, PRE, SUF, SUF, SUF] 116 | elif tokens[3].isnumeric(): 117 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 118 | else: 119 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 120 | return tax 121 | 122 | # TOK.TOK.TOK@TOK 123 | def parse_delim_fmt10(self, tokens): 124 | return [FILE, FAM, SUF, SUF] 125 | 126 | # TOK.TOK 127 | def parse_delim_fmt11(self, tokens): 128 | tax = [UNK, UNK] 129 | if len(tokens[1]) == 4 and tokens[1].isupper(): 130 | tax = [PRE, SUF] 131 | elif len(tokens[1]) <= 2 or tokens[1].isnumeric() or tokens[1] == "Gen": 132 | if tokens[0].isupper(): 133 | tax = [UNK, SUF] 134 | else: 135 | tax = [FAM, SUF] 136 | elif len(tokens[1]) == 3 and tokens[1].isupper(): 137 | tax = [PRE, UNK] 138 | elif re.match(r"^[0-9A-Z]+$", tokens[1]): 139 | tax = [PRE, SUF] 140 | else: 141 | tax = [PRE, FAM] 142 | return tax 143 | 144 | # TOK.TOK-TOK.TOK 145 | def parse_delim_fmt12(self, tokens): 146 | # TODO: Lots of unusual formats in here 147 | return [UNK, UNK, UNK, SUF] 148 | 149 | # TOK.TOK.TOK!TOK!TOK.TOK 150 | def parse_delim_fmt13(self, tokens): 151 | return [PRE, PRE, SUF, SUF, SUF, SUF] 152 | 153 | # TOK.TOK.TOK!TOK.TOK 154 | def parse_delim_fmt14(self, tokens): 155 | return [PRE, PRE, SUF, SUF, SUF] 156 | 157 | # TOK:TOK.TOK.TOK!TOK.TOK 158 | def parse_delim_fmt15(self, tokens): 159 | return [PRE, PRE, PRE, SUF, SUF, SUF] 160 | 161 | # TOK.TOK-TOK-TOK.TOK 162 | def parse_delim_fmt16(self, tokens): 163 | tax = [PRE, UNK, UNK, UNK, SUF] 164 | if tokens[1] == "CVE" and tokens[2].isnumeric() and tokens[3].isnumeric(): 165 | tax = [PRE, VULN, VULN, VULN, SUF] 166 | else: 167 | tax = [PRE, PRE, PRE, PRE, SUF] 168 | return tax 169 | 170 | # TOK:TOK.TOK.TOK@TOK!TOK 171 | def parse_delim_fmt17(self, tokens): 172 | return [PRE, PRE, FAM, SUF, SUF, SUF] 173 | 174 | # TOK:TOK.TOK.TOK@TOK@TOK 175 | def parse_delim_fmt18(self, tokens): 176 | return [PRE, PRE, FAM, SUF, SUF, SUF] 177 | -------------------------------------------------------------------------------- /claravy/parsers/parse_ahnlabv3.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Ahnlabv3: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK/TOK": self.parse_delim_fmt3, 12 | "TOK-TOK/TOK.TOK": self.parse_delim_fmt4, 13 | "TOK-TOK/TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK-TOK/TOK": self.parse_delim_fmt6, 15 | "TOK/TOK.TOK_TOK.TOK": self.parse_delim_fmt7, 16 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 17 | "TOK.TOK": self.parse_delim_fmt9, 18 | "TOK-TOK.TOK": self.parse_delim_fmt10, 19 | "TOK/TOK-TOK-TOK": self.parse_delim_fmt11, 20 | } 21 | 22 | # TOK/TOK.TOK 23 | def parse_delim_fmt1(self, tokens): 24 | if tokens[2].isupper() and len(tokens[2]) <= 3 and tokens[2] not in ["VB", "BHO", "WOW"]: 25 | tax = [PRE, FAM, SUF] 26 | elif tokens[2].isnumeric() or re.match(r"^Gen[0-9]*$", tokens[2]): 27 | tax = [PRE, FAM, SUF] 28 | else: 29 | tax = [PRE, PRE, FAM] 30 | return tax 31 | 32 | # TOK/TOK.TOK.TOK 33 | def parse_delim_fmt2(self, tokens): 34 | tax = [PRE, UNK, UNK, SUF] 35 | if tokens[2].islower() or len(tokens[2]) == 1 or tokens[2].isnumeric(): 36 | tax = [PRE, FAM, SUF, SUF] 37 | else: 38 | tax = [PRE, PRE, FAM, SUF] 39 | return tax 40 | 41 | # TOK/TOK 42 | def parse_delim_fmt3(self, tokens): 43 | return [PRE, FAM] 44 | 45 | # TOK-TOK/TOK.TOK 46 | def parse_delim_fmt4(self, tokens): 47 | return [FILE, PRE, FAM, SUF] 48 | 49 | # TOK-TOK/TOK.TOK.TOK 50 | def parse_delim_fmt5(self, tokens): 51 | return [FILE, CAT, FAM, SUF, SUF] 52 | 53 | # TOK-TOK/TOK 54 | def parse_delim_fmt6(self, tokens): 55 | return [FILE, CAT, FAM] 56 | 57 | # TOK/TOK.TOK_TOK.TOK 58 | def parse_delim_fmt7(self, tokens): 59 | return [CAT, FILE, PRE, FAM, SUF] 60 | 61 | # TOK/TOK.TOK.TOK.TOK 62 | def parse_delim_fmt8(self, tokens): 63 | tax = [FILE, UNK, UNK, SUF, SUF] 64 | if tokens[3].isnumeric(): 65 | tax = [FILE, FAM, CAT, SUF, SUF] 66 | elif len(tokens[2]) == 1: 67 | tax = [PRE, FAM, SUF, SUF, SUF] 68 | else: 69 | tax = [PRE, PRE, FAM, SUF, SUF] 70 | return tax 71 | 72 | # TOK.TOK 73 | def parse_delim_fmt9(self, tokens): 74 | # TODO: Inconsistent format 75 | return [UNK, UNK] 76 | 77 | # TOK-TOK.TOK 78 | def parse_delim_fmt10(self, tokens): 79 | return [FAM, FAM, SUF] 80 | 81 | #TOK/TOK-TOK-TOK 82 | def parse_delim_fmt11(self, tokens): 83 | return [PRE, VULN, VULN, VULN] 84 | -------------------------------------------------------------------------------- /claravy/parsers/parse_alibaba.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Alibaba: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK:TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK:TOK/TOK-TOK-TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK/TOK_TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK/TOK-TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | "TOK:TOK/TOK_TOK_TOK.TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt8, 17 | } 18 | 19 | # TOK:TOK/TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | return [CAT, FILE, FAM, SUF] 22 | 23 | # TOK:TOK/TOK-TOK-TOK.TOK 24 | def parse_delim_fmt2(self, tokens): 25 | tax = [CAT, FILE, UNK, UNK, UNK, SUF] 26 | if tokens[2] in ["CVE", "CAN"]: 27 | tax = [CAT, FILE, VULN, VULN, VULN, SUF] 28 | elif tokens[4] == "based" or tokens[4].isnumeric(): 29 | tax = [CAT, FILE, FAM, FAM, SUF, SUF] 30 | else: 31 | tax = [CAT, FILE, FAM, FAM, FAM, SUF] 32 | return tax 33 | 34 | # TOK:TOK/TOK_TOK.TOK 35 | def parse_delim_fmt3(self, tokens): 36 | tax = [CAT, FILE, UNK, UNK, SUF] 37 | if tokens[4] == "None": 38 | tax = [CAT, FILE, SUF, SUF, SUF] 39 | elif tokens[3].isnumeric() or tokens[3] == "gen" or len(tokens[3]) == 1: 40 | tax = [CAT, FILE, FAM, SUF, SUF] 41 | else: 42 | tax = [CAT, FILE, FAM, FAM, SUF] 43 | return tax 44 | 45 | # TOK:TOK/TOK-TOK.TOK 46 | def parse_delim_fmt4(self, tokens): 47 | tax = [CAT, FILE, UNK, UNK, SUF] 48 | if tokens[3] == "based": 49 | tax = [CAT, FILE, FAM, SUF, SUF] 50 | elif re.match("MS[0-9]{2}", tokens[2]) or re.match("CVE[0-9]{4}", tokens[2]): 51 | tax = [CAT, FILE, VULN, VULN, SUF] 52 | elif tokens[3].isnumeric() or tokens[3] == "gen": 53 | tax = [CAT, FILE, FAM, SUF, SUF] 54 | else: 55 | tax = [CAT, FILE, FAM, FAM, SUF] 56 | return tax 57 | 58 | # TOK.TOK.TOK.TOK 59 | def parse_delim_fmt5(self, tokens): 60 | tax = [UNK, UNK, UNK, UNK] 61 | if len(tokens[0]) == 1 and len(tokens[1]) == 1 and len(tokens[2]) == 3: 62 | tax = [PRE, PRE, PRE, FAM] 63 | else: 64 | tax = [PRE, FILE, FAM, SUF] 65 | return tax 66 | 67 | # TOK.TOK.TOK.TOK.TOK 68 | def parse_delim_fmt6(self, tokens): 69 | return [PRE, PRE, PRE, FAM, SUF] 70 | 71 | # TOK:TOK/TOK_TOK_TOK.TOK 72 | def parse_delim_fmt7(self, tokens): 73 | tax = [CAT, FILE, UNK, UNK, UNK, SUF] 74 | if tokens[3].isnumeric() and tokens[4].isnumeric(): 75 | tax = [CAT, FILE, FAM, SUF, SUF, SUF] 76 | elif tokens[4].isnumeric(): 77 | tax = [CAT, FILE, FAM, FAM, SUF, SUF] 78 | else: 79 | tax = [CAT, FILE, FAM, FAM, FAM, SUF] 80 | return tax 81 | 82 | # TOK.TOK.TOK-TOK.TOK 83 | def parse_delim_fmt8(self, tokens): 84 | return [PRE, FILE, FAM, SUF, SUF] 85 | -------------------------------------------------------------------------------- /claravy/parsers/parse_alibabacloud.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Alibabacloud: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK:TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK[TOK]:TOK/TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK/TOK.TOK(TOK)": self.parse_delim_fmt3, 12 | "TOK": self.parse_delim_fmt4, 13 | "TOK:TOK/TOK.TOK!TOK": self.parse_delim_fmt5, 14 | #"TOK:TOK/TOK-TOK-TOK.TOK": self.parse_delim_fmt2, 15 | #"TOK:TOK/TOK_TOK.TOK": self.parse_delim_fmt3, 16 | #"TOK:TOK/TOK-TOK.TOK": self.parse_delim_fmt4, 17 | #"TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 18 | #"TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 19 | #"TOK:TOK/TOK_TOK_TOK.TOK": self.parse_delim_fmt7, 20 | #"TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt8, 21 | } 22 | 23 | # TOK:TOK/TOK.TOK 24 | def parse_delim_fmt1(self, tokens): 25 | return [CAT, FILE, FAM, SUF] 26 | 27 | # TOK[TOK]:TOK/TOK.TOK 28 | def parse_delim_fmt2(self, tokens): 29 | return [CAT, CAT, FILE, FAM, SUF] 30 | 31 | # TOK:TOK/TOK.TOK(TOK) 32 | def parse_delim_fmt3(self, tokens): 33 | return [CAT, FILE, FAM, SUF, SUF, NULL] 34 | 35 | # TOK 36 | def parse_delim_fmt4(self, tokens): 37 | return [CAT] 38 | 39 | # TOK:TOK/TOK.TOK!TOK 40 | def parse_delim_fmt5(self, tokens): 41 | return [CAT, FILE, FAM, SUF, SUF] 42 | -------------------------------------------------------------------------------- /claravy/parsers/parse_antiyavl.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Antiyavl: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK[TOK]/TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK[TOK]/TOK.TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK": self.parse_delim_fmt4, 12 | "TOK[TOK:TOK-TOK-TOK]/TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK[TOK:TOK-TOK-TOK]/TOK.TOK": self.parse_delim_fmt6, 14 | "TOK[:TOK]/TOK.TOK": self.parse_delim_fmt7, 15 | "TOK[TOK]/TOK.TOK-TOK-TOK": self.parse_delim_fmt8, 16 | "TOK[TOK:TOK-TOK-TOK,TOK]/TOK.TOK": self.parse_delim_fmt9, 17 | "TOK[:TOK-TOK-TOK]/TOK.TOK.TOK": self.parse_delim_fmt10, 18 | "TOK/TOK.TOK.TOK[TOK]": self.parse_delim_fmt11, 19 | "TOK[TOK-TOK]/TOK.TOK": self.parse_delim_fmt12, 20 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt13, 21 | "TOK[TOK]/TOK.TOK.TOK.TOK": self.parse_delim_fmt14, 22 | "TOK[TOK:TOK]/TOK.TOK": self.parse_delim_fmt15, 23 | } 24 | 25 | # TOK/TOK.TOK.TOK 26 | def parse_delim_fmt1(self, tokens): 27 | tax = [CAT, UNK, UNK, SUF] 28 | if tokens[2].isnumeric(): 29 | tax = [PRE, FAM, SUF, SUF] 30 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 31 | if tokens[2].islower(): 32 | tax = [PRE, FAM, SUF, SUF] 33 | else: 34 | tax = [PRE, PRE, SUF, SUF] 35 | else: 36 | tax = [CAT, PRE, FAM, SUF] 37 | return tax 38 | 39 | # TOK[TOK]/TOK.TOK.TOK 40 | def parse_delim_fmt2(self, tokens): 41 | return [CAT, CAT, PRE, FAM, SUF] 42 | 43 | # TOK[TOK]/TOK.TOK 44 | def parse_delim_fmt3(self, tokens): 45 | return [CAT, CAT, PRE, FAM] 46 | 47 | # TOK/TOK.TOK 48 | def parse_delim_fmt4(self, tokens): 49 | return [CAT, PRE, FAM] 50 | 51 | # TOK[TOK:TOK-TOK-TOK]/TOK.TOK.TOK 52 | def parse_delim_fmt5(self, tokens): 53 | return [CAT, CAT, PRE, PRE, PRE, FILE, FAM, SUF] 54 | 55 | # TOK[TOK:TOK-TOK-TOK]/TOK.TOK 56 | def parse_delim_fmt6(self, tokens): 57 | tax = [CAT, CAT, PRE, PRE, PRE, UNK, UNK] 58 | if tokens[6].islower(): 59 | tax = [CAT, CAT, PRE, PRE, PRE, FAM, SUF] 60 | else: 61 | tax = [CAT, CAT, PRE, PRE, PRE, FILE, FAM] 62 | return tax 63 | 64 | # TOK[:TOK]/TOK.TOK 65 | def parse_delim_fmt7(self, tokens): 66 | return [CAT, PRE, PRE, SUF] 67 | 68 | # TOK[TOK]/TOK.TOK-TOK-TOK 69 | def parse_delim_fmt8(self, tokens): 70 | return [CAT, PRE, PRE, VULN, VULN, VULN] 71 | 72 | # TOK[TOK:TOK-TOK-TOK,TOK]/TOK.TOK 73 | def parse_delim_fmt9(self, tokens): 74 | return [CAT, CAT, PRE, PRE, PRE, PRE, FILE, FAM] 75 | 76 | # TOK[:TOK-TOK-TOK]/TOK.TOK.TO] 77 | def parse_delim_fmt10(self, tokens): 78 | return [CAT, PRE, PRE, PRE, FILE, FAM, SUF] 79 | 80 | # TOK/TOK.TOK.TOK[TOK] 81 | def parse_delim_fmt11(self, tokens): 82 | return [CAT, FILE, FAM, SUF, CAT, NULL] 83 | 84 | # TOK[TOK-TOK]/TOK.TOK 85 | def parse_delim_fmt12(self, tokens): 86 | return [CAT, CAT, CAT, FILE, FAM] 87 | 88 | # TOK/TOK.TOK.TOK.TOK 89 | def parse_delim_fmt13(self, tokens): 90 | tax = [CAT, FILE, UNK, UNK, SUF] 91 | if len(tokens[3]) <= 2 and tokens[3].islower(): 92 | tax = [CAT, FILE, FAM, SUF, SUF] 93 | else: 94 | tax = [CAT, FILE, PRE, FAM, SUF] 95 | return tax 96 | 97 | # TOK[TOK]/TOK.TOK.TOK.TOK 98 | def parse_delim_fmt14(self, tokens): 99 | return [CAT, CAT, FILE, FAM, SUF, SUF] 100 | 101 | # TOK[TOK:TOK]/TOK.TOK 102 | def parse_delim_fmt15(self, tokens): 103 | return [CAT, CAT, PRE, FILE, FAM] 104 | -------------------------------------------------------------------------------- /claravy/parsers/parse_apex.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Apex: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1, 9 | "TOK TOK TOK (TOK TOK TOK).": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK 13 | def parse_delim_fmt1(self, tokens): 14 | return [PRE] # Always 'Malicious' 15 | 16 | # TOK TOK TOK (TOK TOK TOK). 17 | def parse_delim_fmt2(self, tokens): 18 | return [PRE, PRE, PRE, PRE, PRE, PRE, NULL] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_arcabit.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Arcabit: # Uses own engine + Bitdefender engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK!.TOK": self.parse_delim_fmt6, 15 | "TOK.TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt8, 17 | "TOK.TOK.TOK!TOK!TOK.TOK": self.parse_delim_fmt9, 18 | "TOK.TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt10, 19 | "TOK.TOK-TOK-TOK.TOK": self.parse_delim_fmt11, 20 | "TOK.TOK.TOK-TOK-TOK.TOK": self.parse_delim_fmt12, 21 | } 22 | 23 | # TOK.TOK.TOK 24 | def parse_delim_fmt1(self, tokens): 25 | tax = [UNK, UNK, UNK] 26 | if tokens[0] == "Packer": 27 | tax = [PRE, PACK, SUF] 28 | elif any(filter(str.islower, tokens[2])): 29 | if tokens[1].startswith("Heur"): 30 | tax = [PRE, PRE, SUF] 31 | elif tokens[2] in ["Gen", "Dam", "based", "Generic"]: 32 | if tokens[2] in ["based", "Generic"] or tokens[1].isnumeric(): 33 | tax = [UNK, SUF, SUF] 34 | else: 35 | tax = [PRE, FAM, SUF] 36 | elif any(filter(str.isnumeric, tokens[2])): 37 | tax = [PRE, UNK, SUF] 38 | else: 39 | tax = [PRE, CAT, FAM] 40 | else: 41 | tax = [PRE, FAM, SUF] 42 | return tax 43 | 44 | # TOK.TOK.TOK.TOK 45 | def parse_delim_fmt2(self, tokens): 46 | tax = [PRE, UNK, UNK, SUF] 47 | if tokens[0] == "Packer": 48 | tax = [PRE, PRE, PACK, SUF] 49 | elif tokens[2] == "Gen" or tokens[2].isnumeric(): 50 | tax = [PRE, FAM, SUF, SUF] 51 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 52 | tax = [PRE, FAM, SUF, SUF] 53 | elif len(tokens[2]) <= 3 and tokens[2].isupper() and not tokens[2].startswith("VB"): 54 | tax = [PRE, FAM, SUF, SUF] 55 | else: 56 | tax = [PRE, PRE, FAM, SUF] 57 | return tax 58 | 59 | # TOK:TOK.TOK.TOK 60 | def parse_delim_fmt3(self, tokens): 61 | tax = [PRE, UNK, UNK, SUF] 62 | if tokens[1] == "Packer" and tokens[2] != "Generic": 63 | tax = [PRE, PRE, PACK, SUF] 64 | else: 65 | tax = [PRE, PRE, FAM, SUF] 66 | return tax 67 | 68 | # TOK:TOK.TOK.TOK.TOK 69 | def parse_delim_fmt4(self, tokens): 70 | tax = [PRE, PRE, UNK, UNK, SUF] 71 | if tokens[1] == "Packer": 72 | tax = [PRE, PRE, PACK, SUF, SUF] 73 | elif tokens[2] == "JS": 74 | tax = [PRE, PRE, PRE, FAM, SUF] 75 | elif len(tokens[3]) <= 2 or tokens[3].isnumeric() or re.match(r"^[A-Z0-9]+$", tokens[3]) or tokens[3] == "Gen": 76 | if tokens[3] == "VB": 77 | tax = [PRE, PRE, PRE, FAM, SUF] 78 | else: 79 | tax = [PRE, PRE, FAM, SUF, SUF] 80 | elif tokens[1] == "Generic" and tokens[2] == "Malware": 81 | tax = [PRE, PRE, PRE, SUF, SUF] 82 | else: 83 | tax = [PRE, PRE, PRE, FAM, SUF] 84 | return tax 85 | 86 | # TOK.TOK.TOK.TOK.TOK 87 | def parse_delim_fmt5(self, tokens): 88 | tax = [PRE, UNK, UNK, UNK, SUF] 89 | if tokens[3] == "Gen" or len(tokens[3]) <= 2 or tokens[3].isnumeric(): 90 | if tokens[2] == "Gen" or len(tokens[2]) <= 2 or tokens[2].isnumeric(): 91 | tax = [PRE, FAM, SUF, SUF, SUF] 92 | else: 93 | tax = [PRE, PRE, FAM, SUF, SUF] 94 | else: 95 | tax = [PRE, PRE, PRE, FAM, SUF] 96 | return tax 97 | 98 | # TOK.TOK.TOK!.TOK 99 | def parse_delim_fmt6(self, tokens): 100 | return [PRE, PRE, SUF, SUF] 101 | 102 | # TOK.TOK 103 | def parse_delim_fmt7(self, tokens): 104 | tax = [UNK, UNK] 105 | if tokens[0] == "Application": # Typically ends in suffix, bad format 106 | tax = [PRE, UNK] 107 | elif tokens[1].isnumeric() or re.match(r"^D[0-9A-Z]+$", tokens[1]): 108 | tax = [FAM, SUF] 109 | else: 110 | tax = [PRE, FAM] 111 | return tax 112 | 113 | # TOK.TOK.TOK!TOK.TOK 114 | def parse_delim_fmt8(self, tokens): 115 | return [PRE, PRE, SUF, SUF, SUF] 116 | 117 | # TOK.TOK.TOK!TOK!TOK.TOK 118 | def parse_delim_fmt9(self, tokens): 119 | return [PRE, PRE, SUF, SUF, SUF, SUF] 120 | 121 | # TOK.TOK.TOK.TOK.TOK.TOK 122 | def parse_delim_fmt10(self, tokens): 123 | tax = [PRE, UNK, UNK, UNK, UNK, SUF] 124 | if re.match(r"M[Ss][0-9]{2}", tokens[3]) and tokens[4].isnumeric(): 125 | tax = [PRE, PRE, PRE, VULN, VULN, SUF] 126 | elif tokens[2].isnumeric(): 127 | tax = [PRE, FAM, SUF, SUF, SUF, SUF] 128 | elif len(tokens[3]) <= 2 or tokens[3].isnumeric(): 129 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 130 | elif tokens[4] in ["Gen", "Dam"] or tokens[4].isnumeric() or re.match("^[A-Z0-9]+$", tokens[4]): 131 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 132 | elif tokens[3] == "OSX": 133 | tax = [PRE, PRE, FILE, FILE, FAM, SUF] 134 | elif tokens[4] == "Damaged": 135 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 136 | else: # Bad format 137 | tax = [PRE, PRE, UNK, UNK, UNK, SUF] 138 | return tax 139 | 140 | # TOK.TOK-TOK-TOK.TOK 141 | def parse_delim_fmt11(self, tokens): 142 | return [PRE, VULN, VULN, VULN, SUF] 143 | 144 | # TOK.TOK.TOK-TOK-TOK.TOK 145 | def parse_delim_fmt12(self, tokens): 146 | return [PRE, PRE, VULN, VULN, VULN, SUF] 147 | -------------------------------------------------------------------------------- /claravy/parsers/parse_avast.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Avast: # Same company as AvastMobile. AVG is a subsidiary of Avast. 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK-TOK [TOK]": self.parse_delim_fmt1, 9 | "TOK:TOK-TOK": self.parse_delim_fmt2, 10 | "TOK:TOK": self.parse_delim_fmt3, 11 | "TOK:TOK [TOK]": self.parse_delim_fmt4, 12 | "TOK [TOK]": self.parse_delim_fmt5, 13 | "TOK": self.parse_delim_fmt6, 14 | "TOK:TOK-TOK {TOK}": self.parse_delim_fmt7, 15 | "TOK-TOK": self.parse_delim_fmt8, 16 | "TOK:TOK-TOK@TOK [TOK]": self.parse_delim_fmt9, 17 | "TOK/TOK-TOK": self.parse_delim_fmt10, 18 | "TOK:TOK-TOK-TOK-TOK [TOK]": self.parse_delim_fmt11, 19 | } 20 | 21 | 22 | # TOK:TOK-TOK [TOK] 23 | def parse_delim_fmt1(self, tokens): 24 | return [PRE, FAM, SUF, CAT, NULL] 25 | 26 | #TOK:TOK-TOK 27 | def parse_delim_fmt2(self, tokens): 28 | return [PRE, FAM, SUF] 29 | 30 | # TOK:TOK 31 | def parse_delim_fmt3(self, tokens): 32 | return [PRE, FAM] 33 | 34 | # TOK:TOK [TOK] 35 | def parse_delim_fmt4(self, tokens): 36 | return [PRE, FAM, CAT, NULL] 37 | 38 | # TOK [TOK] 39 | def parse_delim_fmt5(self, tokens): 40 | tax = [UNK, CAT, NULL] 41 | if tokens[0].startswith("FileRep"): 42 | tax = [PRE, CAT, NULL] 43 | else: 44 | tax = [FAM, CAT, NULL] 45 | return tax 46 | 47 | # TOK 48 | def parse_delim_fmt6(self, tokens): 49 | tax = [UNK] 50 | if tokens[0].startswith("FileRep"): 51 | tax = [PRE] 52 | else: 53 | tax = [FAM] 54 | return tax 55 | 56 | #TOK:TOK-TOK {TOK} 57 | def parse_delim_fmt7(self, tokens): 58 | return [CAT, PRE, PRE, UNK, NULL] 59 | 60 | # TOK-TOK 61 | def parse_delim_fmt8(self, tokens): 62 | tax = [UNK, UNK] 63 | if tokens[1] == "gen" or tokens[1].isnumeric() or len(tokens[1]) == 1: 64 | tax = [FAM, SUF] 65 | elif tokens[0].isupper(): # Bad format 66 | tax = [UNK, UNK] 67 | else: 68 | tax = [FAM, FAM] 69 | return tax 70 | 71 | # TOK:TOK-TOK@TOK [TOK] 72 | def parse_delim_fmt9(self, tokens): 73 | return [CAT, FAM, SUF, UNK, CAT, NULL] 74 | 75 | # TOK/TOK-TOK 76 | def parse_delim_fmt10(self, tokens): 77 | return [FAM, FAM, SUF] 78 | 79 | # TOK:TOK-TOK-TOK-TOK [TOK] 80 | def parse_delim_fmt11(self, tokens): 81 | return [PRE, VULN, VULN, VULN, SUF, CAT, SUF] 82 | -------------------------------------------------------------------------------- /claravy/parsers/parse_avastmobile.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Avastmobile: # Same company as Avast 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK-TOK [TOK]": self.parse_delim_fmt1, 9 | "TOK:TOK [TOK]": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK:TOK-TOK [TOK] 13 | def parse_delim_fmt1(self, tokens): 14 | return [FILE, FAM, SUF, CAT, NULL] 15 | 16 | # TOK:TOK [TOK] 17 | def parse_delim_fmt2(self, tokens): 18 | return [FILE, PRE, CAT, NULL] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_avg.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Avg: # Subsidiary of Avast 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK-TOK [TOK]": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK": self.parse_delim_fmt5, 13 | "TOK:TOK-TOK": self.parse_delim_fmt6, 14 | "TOK_TOK.TOK": self.parse_delim_fmt7, 15 | "TOK": self.parse_delim_fmt8, 16 | "TOK [TOK]": self.parse_delim_fmt9, 17 | "TOK:TOK [TOK]": self.parse_delim_fmt10, 18 | "TOK:TOK": self.parse_delim_fmt11, 19 | "TOK.TOK_TOK.TOK": self.parse_delim_fmt12, 20 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt13, 21 | "TOK: TOK TOK": self.parse_delim_fmt14, 22 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt15, 23 | "TOK-TOK/TOK.TOK": self.parse_delim_fmt16, 24 | "TOK/TOK{TOK}": self.parse_delim_fmt17, 25 | "TOK TOK": self.parse_delim_fmt18, 26 | "TOK-TOK/TOK": self.parse_delim_fmt19, 27 | "TOK TOK TOK/TOK{TOK}": self.parse_delim_fmt20, 28 | "TOK/TOK_TOK.TOK": self.parse_delim_fmt21, 29 | "TOK-TOK.TOK": self.parse_delim_fmt22, 30 | "TOK-TOK": self.parse_delim_fmt23, 31 | "TOK/TOK{TOK?}": self.parse_delim_fmt24, 32 | "TOK/TOK{TOK+TOK}": self.parse_delim_fmt25, 33 | "TOK/TOK.TOK.TOK_TOK": self.parse_delim_fmt26, 34 | "TOK_TOK": self.parse_delim_fmt27, 35 | "TOK/TOK.TOK{TOK}": self.parse_delim_fmt28, 36 | } 37 | 38 | # TOK:TOK-TOK [TOK] 39 | def parse_delim_fmt1(self, tokens): 40 | return [FILE, FAM, SUF, CAT, NULL] 41 | 42 | # TOK.TOK 43 | def parse_delim_fmt2(self, tokens): 44 | tax = [UNK, UNK] 45 | if tokens[1].isupper() or tokens[1].isnumeric() or tokens[1].islower(): 46 | tax = [FAM, SUF] 47 | else: 48 | tax = [PRE, FAM] 49 | return tax 50 | 51 | # TOK/TOK 52 | def parse_delim_fmt3(self, tokens): 53 | return [FILE, FAM] 54 | 55 | # TOK.TOK.TOK 56 | def parse_delim_fmt4(self, tokens): 57 | tax = [UNK, UNK, SUF] 58 | if tokens[1].isupper() and tokens[1] != "VB": 59 | tax = [PRE, SUF, SUF] 60 | elif tokens[1].startswith("Generic"): 61 | tax = [PRE, PRE, SUF] 62 | elif tokens[1].isnumeric(): 63 | tax = [FAM, SUF, SUF] 64 | else: 65 | tax = [UNK, FAM, SUF] # Unlear Skodna and Luhe tokens 66 | return tax 67 | 68 | # TOK/TOK.TOK 69 | def parse_delim_fmt5(self, tokens): 70 | return [FILE, FAM, SUF] 71 | 72 | # TOK:TOK-TOK 73 | def parse_delim_fmt6(self, tokens): 74 | return [PRE, FAM, SUF] 75 | 76 | # TOK_TOK.TOK 77 | def parse_delim_fmt7(self, tokens): 78 | return [FAM, SUF, SUF] 79 | 80 | # TOK 81 | def parse_delim_fmt8(self, tokens): 82 | return [FAM] 83 | 84 | # TOK [TOK] 85 | def parse_delim_fmt9(self, tokens): 86 | tax = [UNK, UNK, NULL] 87 | if tokens[0].startswith("FileRep"): 88 | tax = [PRE, PRE, NULL] 89 | else: 90 | tax = [FAM, SUF, SUF] 91 | return tax 92 | 93 | # TOK:TOK [TOK] 94 | def parse_delim_fmt10(self, tokens): 95 | return [FILE, FAM, CAT, NULL] 96 | 97 | # TOK:TOK 98 | def parse_delim_fmt11(self, tokens): 99 | return [FILE, FAM] 100 | 101 | # TOK.TOK_TOK.TOK 102 | def parse_delim_fmt12(self, tokens): 103 | return [UNK, FAM, SUF, SUF] # Unclear Skodna token 104 | 105 | # TOK.TOK.TOK.TOK 106 | def parse_delim_fmt13(self, tokens): 107 | tax = [UNK, UNK, UNK, SUF] 108 | if tokens[2].isnumeric() or tokens[2].isupper(): 109 | tax = [UNK, FAM, SUF, SUF] 110 | else: # Bad format w/ Skodna, Luhe tokens 111 | tax = [UNK, UNK, UNK, SUF] 112 | return tax 113 | 114 | # TOK: TOK TOK 115 | def parse_delim_fmt14(self, tokens): 116 | return [PRE, PRE, PRE] 117 | 118 | # TOK/TOK.TOK.TOK 119 | def parse_delim_fmt15(self, tokens): 120 | tax = [PRE, UNK, UNK, SUF] 121 | if tokens[2].isnumeric() or len(tokens[2]) <= 2 or tokens[2].isupper(): 122 | tax = [PRE, FAM, SUF, SUF] 123 | else: 124 | tax = [PRE, PRE, FAM, SUF] 125 | return tax 126 | 127 | # TOK-TOK/TOK.TOK 128 | def parse_delim_fmt16(self, tokens): 129 | return [PRE, CAT, FAM, SUF] 130 | 131 | # TOK/TOK{TOK} 132 | def parse_delim_fmt17(self, tokens): 133 | return [PRE, UNK, SUF, NULL] 134 | 135 | # TOK TOK 136 | def parse_delim_fmt18(self, tokens): 137 | tax = [UNK, UNK] 138 | if tokens[0] == "unknown": 139 | tax = [UNK, UNK] 140 | elif tokens[1].isnumeric() or tokens[1].isupper() or tokens[1].islower(): 141 | tax = [FAM, SUF] 142 | else: 143 | tax = [FAM, FAM] 144 | return tax 145 | 146 | # TOK-TOK/TOK 147 | def parse_delim_fmt19(self, tokens): 148 | tax = [UNK, UNK, UNK] 149 | if tokens[2].isnumeric(): 150 | tax = [FAM, SUF, SUF] 151 | else: 152 | tax = [PRE, CAT, FAM] 153 | return tax 154 | 155 | # TOK TOK TOK/TOK{TOK} 156 | def parse_delim_fmt20(self, tokens): 157 | return [PRE, PRE, FILE, UNK, SUF, NULL] 158 | 159 | # TOK/TOK_TOK.TOK 160 | def parse_delim_fmt21(self, tokens): 161 | return [CAT, PRE, SUF, SUF] 162 | 163 | # TOK-TOK.TOK 164 | def parse_delim_fmt22(self, tokens): 165 | tax = [UNK, UNK, SUF] 166 | if tokens[2].isnumeric(): 167 | tax = [FAM, FAM, SUF] 168 | elif tokens[0] == "Rootkit": 169 | tax = [CAT, FAM, SUF] 170 | else: # Bad format 171 | tax = [UNK, UNK, SUF] 172 | return tax 173 | 174 | # TOK-TOK 175 | def parse_delim_fmt23(self, tokens): 176 | tax = [UNK, UNK] 177 | if tokens[0].isupper(): # Bad format 178 | tax = [UNK, UNK] 179 | elif tokens[1].isnumeric(): 180 | tax = [FAM, SUF] 181 | elif tokens[0] == "Rootkit": 182 | tax = [CAT, FAM] 183 | elif tokens[1] == "Obfuscated": 184 | tax = [PRE, PRE] 185 | elif tokens[1] == "gen": 186 | tax = [FAM, SUF] 187 | elif len(tokens[1]) == 1: 188 | tax = [UNK, UNK] 189 | else: 190 | tax = [FAM, FAM] 191 | return tax 192 | 193 | # TOK/TOK{TOK?} 194 | def parse_delim_fmt24(self, tokens): 195 | return [FILE, UNK, SUF, NULL] 196 | 197 | # TOK/TOK{TOK+TOK} 198 | def parse_delim_fmt25(self, tokens): 199 | return [FILE, UNK, SUF, SUF, NULL] 200 | 201 | # TOK/TOK.TOK.TOK_TOK 202 | def parse_delim_fmt26(self, tokens): 203 | return [PRE, PRE, SUF, SUF, SUF] 204 | 205 | # TOK_TOK 206 | def parse_delim_fmt27(self, tokens): 207 | return [FAM, FAM] 208 | 209 | # TOK/TOK.TOK{TOK} 210 | def parse_delim_fmt28(self, tokens): 211 | return [FILE, UNK, SUF, SUF, NULL] 212 | -------------------------------------------------------------------------------- /claravy/parsers/parse_avira.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Avira: # Renamed from Antivir. Format somewhat similar to TheHacker, but probably not related. Partnership with F-Secure. 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK/TOK": self.parse_delim_fmt4, 13 | "TOK.TOK": self.parse_delim_fmt5, 14 | } 15 | 16 | # TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [PRE, UNK, UNK] 19 | if tokens[2].isupper() or tokens[2].islower() or tokens[2].isnumeric() or re.match(r"^Gen[0-9]*$", tokens[2]): 20 | tax = [PRE, FAM, SUF] 21 | else: # Bad format 22 | tax = [PRE, UNK, UNK] 23 | return tax 24 | 25 | # TOK/TOK.TOK.TOK 26 | def parse_delim_fmt2(self, tokens): 27 | tax = [CAT, UNK, UNK, SUF] 28 | if tokens[2].islower() or tokens[2].isnumeric() or len(tokens[2]) <= 2: 29 | tax = [CAT, FAM, SUF, SUF] 30 | elif tokens[2].isupper() and len(tokens[2]) <= 4: 31 | tax = [CAT, FAM, SUF, SUF] 32 | else: 33 | tax = [CAT, CAT, FAM, SUF] 34 | return tax 35 | 36 | # TOK/TOK.TOK.TOK.TOK 37 | def parse_delim_fmt3(self, tokens): 38 | tax = [PRE, UNK, UNK, UNK, SUF] 39 | if len(tokens[2]) == 1: 40 | tax = [PRE, FAM, SUF, SUF, SUF] 41 | elif not tokens[3].isupper() and not tokens[3].islower() and not tokens[3].isnumeric(): 42 | if tokens[4] == "Gen": 43 | tax = [PRE, PRE, PRE, PRE, SUF] 44 | elif tokens[2].isnumeric() or tokens[2].islower() or len(tokens[2]) <= 2: 45 | tax = [PRE, FAM, SUF, SUF, SUF] 46 | else: # Bad format 47 | tax = [PRE, UNK, UNK, UNK, SUF] 48 | elif tokens[2].isnumeric() or tokens[2].islower(): 49 | tax = [PRE, FAM, SUF, SUF, SUF] 50 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 51 | tax = [PRE, FAM, SUF, SUF, SUF] 52 | elif tokens[2].isupper() and len(tokens[2]) == 3: # Bad format 53 | tax = [PRE, UNK, UNK, SUF, SUF] 54 | else: 55 | tax = [PRE, PRE, FAM, SUF, SUF] 56 | return tax 57 | 58 | # TOK/TOK 59 | def parse_delim_fmt4(self, tokens): 60 | tax = [UNK, UNK] 61 | if tokens[1].isnumeric(): 62 | tax = [PRE, SUF] 63 | elif tokens[1].islower() or tokens[1].isupper(): # Bad format 64 | tax = [UNK, UNK] 65 | else: 66 | tax = [PRE, FAM] 67 | return tax 68 | 69 | # TOK.TOK 70 | def parse_delim_fmt5(self, tokens): 71 | tax = [UNK, UNK] 72 | if tokens[0].islower() or tokens[1].islower(): # Bad format 73 | tax = [UNK, UNK] 74 | elif tokens[1].isnumeric() or len(tokens[1]) == 1 or tokens[1].isupper(): 75 | tax = [FAM, SUF] 76 | else: # Bad format 77 | tax = [UNK, UNK] 78 | return tax 79 | -------------------------------------------------------------------------------- /claravy/parsers/parse_avware.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Avware: # Uses Vipre engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt2, 11 | "TOK.TOK.TOK (TOK)": self.parse_delim_fmt3, 12 | "TOK (TOK)": self.parse_delim_fmt4, 13 | "TOK-TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt5, 14 | "TOK-TOK.TOK.TOK (TOK)": self.parse_delim_fmt6, 15 | "TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt8, 17 | "TOK.TOK": self.parse_delim_fmt9, 18 | "TOK": self.parse_delim_fmt10, 19 | "TOK.TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt11, 20 | "TOK/TOK (TOK)": self.parse_delim_fmt12, 21 | "TOK TOK (TOK)": self.parse_delim_fmt13, 22 | "TOK.TOK.TOK": self.parse_delim_fmt14, 23 | "TOK.TOK.TOK.TOK!TOK (TOK)": self.parse_delim_fmt15, 24 | "TOK.TOK (TOK)": self.parse_delim_fmt16, 25 | "TOK.TOK.TOK.TOK (TOK-TOK)": self.parse_delim_fmt17, 26 | "TOK TOK. (TOK)": self.parse_delim_fmt18, 27 | "TOK.TOK.TOK!TOK (TOK)": self.parse_delim_fmt19, 28 | "TOK.TOK.TOK-TOK (TOK)": self.parse_delim_fmt20, 29 | "TOK-TOK.TOK.TOK": self.parse_delim_fmt21, 30 | "TOK.TOK.TOK-TOK-TOK (TOK)": self.parse_delim_fmt22, 31 | "TOK.TOK.TOK-TOK-TOK.TOK (TOK)": self.parse_delim_fmt23 32 | } 33 | 34 | # TOK.TOK.TOK.TOK (TOK) 35 | def parse_delim_fmt1(self, tokens): 36 | tax = [CAT, FILE, UNK, UNK, SUF, NULL] 37 | if tokens[2] == "Packer": 38 | tax = [CAT, FILE, PRE, PACK, SUF, NULL] 39 | else: 40 | tax = [CAT, FILE, FAM, SUF, SUF, NULL] 41 | return tax 42 | 43 | # TOK.TOK.TOK!TOK 44 | def parse_delim_fmt2(self, tokens): 45 | if tokens[2] == "Generic": 46 | tax = [PRE, PRE, PRE, SUF] 47 | else: 48 | tax = [PRE, PRE, FAM, SUF] 49 | return tax 50 | 51 | # TOK.TOK.TOK (TOK) 52 | def parse_delim_fmt3(self, tokens): 53 | tax = [PRE, UNK, UNK, SUF, NULL] 54 | if tokens[0] == "Packer": 55 | tax = [PRE, PACK, SUF, SUF, NULL] 56 | elif len(tokens[2]) <= 2 or tokens[2].lower() == "gen": 57 | tax = [PRE, FAM, SUF, SUF, NULL] 58 | elif tokens[2].isnumeric(): 59 | tax = [PRE, FAM, SUF, SUF, NULL] 60 | else: 61 | tax = [PRE, FILE, FAM, SUF, NULL] 62 | return tax 63 | 64 | # TOK (TOK) 65 | def parse_delim_fmt4(self, tokens): 66 | return [FAM, SUF, NULL] 67 | 68 | # TOK-TOK.TOK.TOK.TOK (TOK) 69 | def parse_delim_fmt5(self, tokens): 70 | return [CAT, CAT, FILE, FAM, SUF, SUF, NULL] 71 | 72 | # TOK-TOK.TOK.TOK (TOK) 73 | def parse_delim_fmt6(self, tokens): 74 | return [CAT, CAT, FILE, FAM, SUF, NULL] 75 | 76 | # TOK.TOK.TOK.TOK!TOK 77 | def parse_delim_fmt7(self, tokens): 78 | return [CAT, FILE, FAM, SUF, SUF] 79 | 80 | # TOK.TOK.TOK!TOK.TOK 81 | def parse_delim_fmt8(self, tokens): 82 | return [PRE, PRE, PRE, SUF, SUF] 83 | 84 | # TOK.TOK 85 | def parse_delim_fmt9(self, tokens): 86 | if tokens[1].isnumeric() or tokens[1].islower(): 87 | tax = [FAM, SUF] 88 | else: 89 | tax = [UNK, FAM] 90 | return tax 91 | 92 | # TOK 93 | def parse_delim_fmt10(self, tokens): 94 | return [FAM] 95 | 96 | # TOK.TOK.TOK.TOK.TOK (TOK) 97 | def parse_delim_fmt11(self, tokens): 98 | return [CAT, FILE, FAM, SUF, SUF, SUF, NULL] 99 | 100 | # TOK/TOK (TOK) 101 | def parse_delim_fmt12(self, tokens): 102 | return [FAM, FAM, SUF, NULL] 103 | 104 | # TOK TOK (TOK) 105 | def parse_delim_fmt13(self, tokens): 106 | if tokens[0] == "Corrupted": 107 | tax = [PRE, PRE, SUF, NULL] 108 | else: 109 | tax = [FAM, FAM, SUF, NULL] 110 | return tax 111 | 112 | # TOK.TOK.TOK 113 | def parse_delim_fmt14(self, tokens): 114 | tax = [PRE, UNK, UNK] 115 | if tokens[2] == "gen": 116 | tax = [PRE, PRE, SUF] 117 | elif tokens[2].isupper() or tokens[2].islower(): # Bad format 118 | tax = [PRE, UNK, UNK] 119 | else: 120 | tax = [PRE, PRE, FAM] 121 | return tax 122 | 123 | # TOK.TOK.TOK.TOK!TOK (TOK) 124 | def parse_delim_fmt15(self, tokens): 125 | return [CAT, FILE, FAM, SUF, SUF, SUF, NULL] 126 | 127 | # TOK.TOK (TOK) 128 | def parse_delim_fmt16(self, tokens): 129 | tax = [UNK, UNK, SUF, NULL] 130 | if tokens[1].islower() or len(tokens[1]) == 1 or tokens[1] == "Gen": 131 | tax = [FAM, SUF, SUF, NULL] 132 | else: 133 | tax = [PRE, FAM, SUF, SUF] 134 | return tax 135 | 136 | # TOK.TOK.TOK.TOK (TOK-TOK) 137 | def parse_delim_fmt17(self, tokens): 138 | return [PRE, PRE, PRE, SUF, SUF, SUF, NULL] 139 | 140 | # TOK TOK. (TOK) 141 | def parse_delim_fmt18(self, tokens): 142 | return [FAM, FAM, SUF, NULL] 143 | 144 | # TOK.TOK.TOK!TOK (TOK) 145 | def parse_delim_fmt19(self, tokens): 146 | tax = [PRE, UNK, UNK, SUF, SUF, SUF] 147 | if len(tokens[2]) == 1: 148 | tax = [PRE, FAM, SUF, SUF, SUF, SUF] 149 | else: 150 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 151 | return tax 152 | 153 | # TOK.TOK.TOK-TOK (TOK) 154 | def parse_delim_fmt20(self, tokens): 155 | tax = [] 156 | if re.match(r"^CVE[0-9]{4}", tokens[2]) and tokens[3].isnumeric(): 157 | tax = [PRE, PRE, VULN, VULN, SUF, NULL] 158 | elif tokens[2].islower() or len(tokens[2]) <= 2: 159 | tax = [PRE, FAM, SUF, SUF, SUF, NULL] 160 | else: 161 | tax = [CAT, FILE, FAM, SUF, SUF, NULL] 162 | return tax 163 | 164 | # TOK.TOK.TOK-TOK (TOK) 165 | def parse_delim_fmt21(self, tokens): 166 | tax = [UNK, CAT, UNK, UNK] 167 | if tokens[3].isupper() or tokens[3].isnumeric() or tokens[3].islower() or tokens[3] == "Gen": 168 | tax = [PRE, CAT, FAM, SUF] 169 | else: 170 | tax = [CAT, CAT, FILE, FAM] 171 | return tax 172 | 173 | # TOK.TOK.TOK-TOK-TOK (TOK) 174 | def parse_delim_fmt22(self, tokens): 175 | return [PRE, FILE, VULN, VULN, VULN, SUF, NULL] 176 | 177 | # TOK.TOK.TOK-TOK-TOK.TOK (TOK) 178 | def parse_delim_fmt23(self, tokens): 179 | return [PRE, FILE, VULN, VULN, VULN, SUF, SUF, NULL] 180 | 181 | -------------------------------------------------------------------------------- /claravy/parsers/parse_babable.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Babable: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, SUF] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_baidu.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Baidu: # Same company as Baiduinternational. Possibly related to Tencent, but unclear. May use Avira engine, but label formats seem unrelated. 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK.TOK.TOK.TOK 15 | def parse_delim_fmt1(self, tokens): 16 | tax = [FILE, UNK, UNK, SUF] 17 | if tokens[1] == "Packed": 18 | tax = [FILE, PRE, PACK, SUF] 19 | else: 20 | tax = [FILE, CAT, FAM, SUF] 21 | return tax 22 | 23 | # TOK.TOK-TOK.TOK.TOK 24 | def parse_delim_fmt2(self, tokens): 25 | return [FILE, PRE, PRE, FAM, SUF] 26 | 27 | # TOK.TOK.TOK.TOK.TOK.TOK 28 | def parse_delim_fmt3(self, tokens): 29 | return [PRE, PRE, FAM, SUF, SUF, SUF] 30 | 31 | # TOK.TOK.TOK 32 | def parse_delim_fmt4(self, tokens): 33 | return [PRE, PRE, PRE] 34 | -------------------------------------------------------------------------------- /claravy/parsers/parse_baiduinternational.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Baiduinternational: # Same company as Baidu 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.$TOK": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK.TOK.TOK.TOK 13 | def parse_delim_fmt1(self, tokens): 14 | return [FILE, CAT, FAM, SUF] 15 | 16 | # TOK.TOK.TOK.$TOK 17 | def parse_delim_fmt2(self, tokens): 18 | return [FILE, CAT, FAM, SUF] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_bitdefender.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Bitdefender: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK:TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK:TOK.TOK.TOK@TOK": self.parse_delim_fmt6, 15 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 17 | } 18 | 19 | # TOK.TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | tax = [UNK, UNK, UNK] 22 | if tokens[0] == "Packer": 23 | tax = [PRE, PACK, SUF] 24 | elif any(filter(str.islower, tokens[2])): 25 | if tokens[1].startswith("Heur"): 26 | tax = [PRE, PRE, SUF] 27 | elif tokens[2] in ["Gen", "Dam", "based", "Generic"]: 28 | if tokens[2] in ["based", "Generic"] or tokens[1].isnumeric(): 29 | tax = [UNK, SUF, SUF] 30 | else: 31 | tax = [PRE, FAM, SUF] 32 | elif any(filter(str.isnumeric, tokens[2])): 33 | tax = [PRE, UNK, SUF] 34 | else: 35 | tax = [PRE, CAT, FAM] 36 | else: 37 | tax = [PRE, FAM, SUF] 38 | return tax 39 | 40 | # TOK.TOK.TOK.TOK 41 | def parse_delim_fmt2(self, tokens): 42 | tax = [NULL, NULL, NULL, NULL] 43 | if tokens[0] == "Packer": 44 | tax = [PRE, PRE, PACK, SUF] 45 | elif tokens[3].isnumeric(): 46 | if tokens[2] in ["Gen", "GenericKD"] or len(tokens[2]) <= 2: 47 | tax = [PRE, FAM, SUF, SUF] 48 | elif tokens[2].isupper() and len(tokens[2]) <= 3: 49 | tax = [PRE, FAM, SUF, SUF] 50 | else: 51 | tax = [PRE, PRE, FAM, SUF] 52 | else: 53 | if len(tokens[2]) <= 3 and tokens[2] != "VB": 54 | tax = [PRE, FAM, SUF, SUF] 55 | else: 56 | tax = [PRE, PRE, FAM, SUF] 57 | return tax 58 | 59 | # TOK:TOK.TOK.TOK 60 | def parse_delim_fmt3(self, tokens): 61 | tax = [PRE, UNK, UNK, SUF] 62 | if tokens[1] == "Packer" and tokens[2] != "Generic": 63 | tax = [PRE, PRE, PACK, SUF] 64 | else: 65 | tax = [PRE, PRE, FAM, SUF] 66 | return tax 67 | 68 | # TOK:TOK.TOK.TOK.TOK 69 | def parse_delim_fmt4(self, tokens): 70 | tax = [PRE, PRE, UNK, UNK, SUF] 71 | if tokens[1] == "Packer": 72 | tax = [PRE, PRE, PACK, SUF, SUF] 73 | elif tokens[2] == "JS": 74 | tax = [PRE, PRE, PRE, FAM, SUF] 75 | elif len(tokens[3]) <= 2 or tokens[3].isnumeric() or re.match(r"^[A-Z0-9]+$", tokens[3]) or tokens[3] == "Gen": 76 | if tokens[3] == "VB": 77 | tax = [PRE, PRE, PRE, FAM, SUF] 78 | else: 79 | tax = [PRE, PRE, FAM, SUF, SUF] 80 | elif tokens[1] == "Generic" and tokens[2] == "Malware": 81 | tax = [PRE, PRE, PRE, SUF, SUF] 82 | else: 83 | tax = [PRE, PRE, PRE, FAM, SUF] 84 | return tax 85 | 86 | # TOK:TOK.TOK.TOK.TOK.TOK 87 | def parse_delim_fmt5(self, tokens): 88 | tax = [PRE, PRE, NULL, NULL, NULL, SUF] 89 | if tokens[4].isnumeric() or len(tokens[4]) == 1: 90 | if tokens[3].isnumeric() or len(tokens[3]) == 1: 91 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 92 | else: 93 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 94 | elif len(tokens[4]) <= 3: 95 | if (tokens[4].isupper() and tokens[4] != "VB") or tokens[4] == "Gen": 96 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 97 | else: 98 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 99 | else: 100 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 101 | return tax 102 | 103 | # TOK:TOK.TOK.TOK@TOK 104 | def parse_delim_fmt6(self, tokens): 105 | tax = [PRE, PRE, UNK, SUF, SUF] 106 | if tokens[2].startswith("Heur"): 107 | tax = [PRE, PRE, PRE, SUF, SUF] 108 | else: 109 | tax = [PRE, PRE, FAM, SUF, SUF] 110 | return tax 111 | 112 | # TOK:TOK.TOK.TOK.TOK@TOK 113 | def parse_delim_fmt7(self, tokens): 114 | tax = [PRE, UNK, UNK, UNK, SUF, SUF] 115 | if tokens[1] == "Packer": 116 | tax = [PRE, PRE, PACK, SUF, SUF, SUF] 117 | elif tokens[3].isupper(): 118 | tax = [PRE, PRE, PRE, SUF, SUF, SUF] 119 | elif tokens[3].isnumeric(): 120 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 121 | else: 122 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 123 | return tax 124 | 125 | # TOK.TOK.TOK.TOK.TOK 126 | def parse_delim_fmt8(self, tokens): 127 | tax = [PRE, UNK, UNK, UNK, SUF] 128 | if re.match(r"^M[Ss][0-9]+$", tokens[2]) and tokens[3].isnumeric(): 129 | tax = [PRE, PRE, VULN, VULN, SUF] 130 | elif tokens[3].isnumeric() or tokens[3] == "Gen" or len(tokens[3]) == 1: 131 | if tokens[2].isnumeric() or tokens[2] == "Gen" or len(tokens[2]) == 1: 132 | tax = [PRE, FAM, SUF, SUF, SUF] 133 | else: 134 | tax = [PRE, PRE, FAM, SUF, SUF] 135 | else: 136 | tax = [PRE, PRE, PRE, FAM, SUF] 137 | return tax 138 | 139 | 140 | -------------------------------------------------------------------------------- /claravy/parsers/parse_bitdefenderfalx.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Bitdefenderfalx: # Same company as Bitdefender 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK.TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [FILE, CAT, FAM, SUF] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_bitdefendertheta.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Bitdefendertheta: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK.TOK": self.parse_delim_fmt1, 9 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt2, 10 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK:TOK.TOK.TOK.TOK@TOK!TOK": self.parse_delim_fmt4, 12 | "TOK:TOK.TOK.TOK.TOK@TOK@TOK": self.parse_delim_fmt5, 13 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK:TOK.TOK.TOK.@TOK@TOK": self.parse_delim_fmt7, 15 | "TOK:TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt8, 16 | } 17 | 18 | 19 | # TOK:TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | return [PRE, CAT, SUF] 22 | 23 | # TOK:TOK.TOK.TOK.TOK@TOK 24 | def parse_delim_fmt2(self, tokens): 25 | return [PRE, PRE, SUF, SUF, SUF, SUF] 26 | 27 | # TOK:TOK.TOK.TOK.TOK 28 | def parse_delim_fmt3(self, tokens): 29 | return [PRE, PRE, SUF, SUF, SUF] 30 | 31 | # TOK:TOK.TOK.TOK.TOK@TOK!TOK 32 | def parse_delim_fmt4(self, tokens): 33 | return [PRE, PRE, SUF, SUF, SUF, SUF, SUF] 34 | 35 | # TOK:TOK.TOK.TOK.TOK@TOK@TOK 36 | def parse_delim_fmt5(self, tokens): 37 | return [PRE, PRE, SUF, SUF, SUF, SUF, SUF] 38 | 39 | # TOK:TOK.TOK.TOK 40 | def parse_delim_fmt6(self, tokens): 41 | return [PRE, PRE, FAM, SUF] 42 | 43 | # TOK:TOK.TOK.TOK.@TOK@TOK 44 | def parse_delim_fmt7(self, tokens): 45 | return [PRE, PRE, SUF, SUF, SUF, SUF] 46 | 47 | # TOK:TOK.TOK.TOK.TOK!TOK 48 | def parse_delim_fmt8(self, tokens): 49 | return [PRE, PRE, SUF, SUF, SUF, SUF] 50 | -------------------------------------------------------------------------------- /claravy/parsers/parse_bkav.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Bkav: 5 | 6 | # TODO: Need to fix famvt family! It's a prefix and there are other families in that label 7 | 8 | def __init__(self): 9 | self.parse_delim_fmt = { 10 | "TOK.TOK.TOK": self.parse_delim_fmt1, 11 | "TOK.TOK.": self.parse_delim_fmt2, 12 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 13 | "TOK.TOK": self.parse_delim_fmt4, 14 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 15 | } 16 | 17 | 18 | # TOK.TOK.TOK 19 | def parse_delim_fmt1(self, tokens): 20 | return [FILE, FAM, UNK] # Last token either SUF or CAT 21 | 22 | # TOK.TOK. 23 | def parse_delim_fmt2(self, tokens): 24 | return [FILE, FAM, NULL] 25 | 26 | # TOK.TOK.TOK.TOK 27 | def parse_delim_fmt3(self, tokens): 28 | tax = [PRE, UNK, UNK, UNK] 29 | if tokens[1].lower().startswith("fam"): 30 | if tokens[3] == "PE": 31 | tax = [FILE, PRE, FAM, FILE] 32 | else: 33 | tax = [FILE, PRE, FAM, CAT] 34 | elif tokens[1].startswith("Clod"): 35 | tax = [PRE, SUF, UNK, UNK] 36 | else: 37 | tax = [PRE, FAM, UNK, UNK] 38 | return tax 39 | 40 | # TOK.TOK 41 | def parse_delim_fmt4(self, tokens): 42 | return [UNK, FAM] # First token either SUF or FILE 43 | 44 | # TOK.TOK.TOK.TOK.TOK 45 | def parse_delim_fmt5(self, tokens): 46 | tax = [PRE, UNK, UNK, UNK] 47 | if tokens[1].startswith("Fam"): 48 | if tokens[4] == "PE": 49 | tax = [FILE, PRE, FAM, SUF, FILE] 50 | else: 51 | tax = [FILE, PRE, FAM, SUF, CAT] 52 | else: 53 | tax = [FILE, CAT, FAM, SUF, SUF] 54 | return tax 55 | -------------------------------------------------------------------------------- /claravy/parsers/parse_bytehero.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Bytehero: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK-TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK.TOK.TOK.TOK 15 | def parse_delim_fmt1(self, tokens): 16 | tax = [CAT, UNK, FAM, SUF] 17 | if tokens[1] == "Exception": 18 | tax = [CAT, PRE, FAM, SUF] 19 | else: 20 | tax = [CAT, FILE, FAM, SUF] 21 | return tax 22 | 23 | # TOK.TOK.TOK.TOK.TOK" 24 | def parse_delim_fmt2(self, tokens): 25 | return [CAT, PRE, FAM, SUF, SUF] 26 | 27 | # TOK-TOK.TOK.TOK.TOK.TOK 28 | def parse_delim_fmt3(self, tokens): 29 | return [CAT, CAT, FILE, FAM, SUF, SUF] 30 | 31 | # TOK-TOK.TOK.TOK.TOK 32 | def parse_delim_fmt4(self, tokens): 33 | return [CAT, CAT, PRE, FAM, SUF] 34 | -------------------------------------------------------------------------------- /claravy/parsers/parse_catquickheal.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Catquickheal: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK": self.parse_delim_fmt4, 12 | "(TOK) - TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK-TOK.TOK.TOK": self.parse_delim_fmt7, 15 | } 16 | 17 | # TOK.TOK.TOK 18 | def parse_delim_fmt1(self, tokens): 19 | return [PRE, FAM, SUF] 20 | 21 | # TOK/TOK.TOK 22 | def parse_delim_fmt2(self, tokens): 23 | return [FILE, FAM, SUF] 24 | 25 | 26 | 27 | # TOK.TOK.TOK.TOK 28 | def parse_delim_fmt3(self, tokens): 29 | tax = [PRE, UNK, UNK, SUF] 30 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 31 | tax = [PRE, FAM, SUF, SUF] 32 | elif tokens[2].islower() or tokens[2] == "Gen": 33 | tax = [PRE, FAM, SUF, SUF] 34 | elif tokens[2] == "MUE": # Unsure of what this token is, but common 35 | tax = [PRE, FAM, SUF, SUF] 36 | elif tokens[2].isupper() and tokens[2] != "VB": 37 | tax = [PRE, UNK, UNK, SUF] # Bad format 38 | else: 39 | tax = [PRE, PRE, FAM, SUF] 40 | return tax 41 | 42 | # TOK.TOK 43 | def parse_delim_fmt4(self, tokens): 44 | tax = [UNK, UNK] 45 | if tokens[1].isnumeric(): 46 | if tokens[1].isupper(): 47 | tax = [UNK, SUF] 48 | else: 49 | tax = [FAM, SUF] 50 | elif len(tokens[1]) <= 2 and tokens[1] != "VB": 51 | tax = [UNK, SUF] 52 | elif tokens[1].isupper() and not any([c.isdigit() for c in tokens[1]]): 53 | tax = [PRE, UNK] 54 | elif tokens[1].islower(): 55 | tax = [UNK, SUF] 56 | else: 57 | tax = [PRE, FAM] 58 | return tax 59 | 60 | # (TOK) - TOK 61 | def parse_delim_fmt5(self, tokens): 62 | return [NULL, PRE, SUF] 63 | 64 | # TOK.TOK.TOK.TOK.TOK 65 | def parse_delim_fmt6(self, tokens): 66 | tax = [PRE, UNK, UNK, UNK, UNK] 67 | if tokens[2].isnumeric(): 68 | tax = [PRE, FAM, SUF, SUF, SUF] 69 | elif tokens[2] == "CVE" and tokens[3].isnumeric() and tokens[4].isnumeric(): 70 | tax = [PRE, FAM, VULN, VULN, VULN] 71 | else: 72 | tax = [PRE, PRE, FAM, SUF, SUF] 73 | return tax 74 | 75 | # TOK-TOK.TOK.TOK 76 | def parse_delim_fmt7(self, tokens): 77 | if tokens[2].isnumeric(): 78 | tax = [UNK, UNK, SUF, SUF] # Bad format 79 | else: 80 | tax = [CAT, CAT, FAM, SUF] 81 | return tax 82 | 83 | -------------------------------------------------------------------------------- /claravy/parsers/parse_clamav.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Clamav: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK-TOK": self.parse_delim_fmt1, 9 | "TOK.TOK-TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK-TOK-TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK-TOK-TOK-TOK": self.parse_delim_fmt5, 13 | "TOK.TOK": self.parse_delim_fmt6, 14 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 15 | } 16 | 17 | # TOK.TOK.TOK-TOK 18 | def parse_delim_fmt1(self, tokens): 19 | if tokens[1] == "Packed": 20 | tax = [PRE, PRE, PACK, SUF] 21 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 22 | tax = [PRE, FAM, SUF, SUF] 23 | elif tokens[2].isnumeric(): 24 | tax = [PRE, FAM, SUF, SUF] 25 | else: 26 | tax = [PRE, CAT, FAM, SUF] 27 | return tax 28 | 29 | # TOK.TOK-TOK 30 | def parse_delim_fmt2(self, tokens): 31 | return [PRE, FAM, SUF] 32 | 33 | # TOK.TOK.TOK-TOK-TOK 34 | def parse_delim_fmt3(self, tokens): 35 | return [FILE, CAT, FAM, SUF, SUF] 36 | 37 | # TOK.TOK.TOK 38 | def parse_delim_fmt4(self, tokens): 39 | if tokens[1] == "Packed": 40 | tax = [PRE, PRE, PACK] 41 | elif tokens[2].isnumeric(): 42 | if tokens[1].isnumeric(): 43 | if tokens[0].isupper(): 44 | tax = [SUF, SUF, SUF] 45 | else: 46 | tax = [FAM, SUF, SUF] 47 | else: 48 | tax = [PRE, FAM, SUF] 49 | elif len(tokens[2]) <= 2: 50 | if tokens[1].isnumeric(): 51 | tax = [FAM, SUF, SUF] 52 | else: 53 | tax = [PRE, FAM, SUF] 54 | elif tokens[2].lower() == "gen": 55 | tax = [PRE, FAM, SUF] 56 | elif tokens[2].isupper(): # Bad format 57 | tax = [PRE, UNK, UNK] 58 | else: 59 | tax = [PRE, PRE, FAM] 60 | return tax 61 | 62 | # TOK.TOK.TOK-TOK-TOK-TOK 63 | def parse_delim_fmt5(self, tokens): 64 | return [FILE, CAT, FAM, SUF, SUF, SUF] 65 | 66 | # TOK.TOK 67 | def parse_delim_fmt6(self, tokens): 68 | tax = [UNK, UNK] 69 | if tokens[1].isnumeric() or tokens[1].islower(): 70 | if len(tokens[0]) <= 2 and tokens[0] != "VB": 71 | tax = [SUF, SUF] 72 | else: 73 | tax = [FAM, SUF] 74 | elif len(tokens[1]) <= 2 and tokens[1] != "VB": 75 | tax = [FAM, SUF] 76 | else: 77 | tax = [PRE, FAM] 78 | return tax 79 | 80 | # TOK.TOK.TOK.TOK 81 | def parse_delim_fmt7(self, tokens): 82 | if tokens[1] == "Packed": 83 | tax = [PRE, PRE, PACK, SUF] 84 | elif tokens[2].isnumeric(): 85 | tax = [PRE, FAM, SUF, SUF] 86 | elif tokens[3].isupper() or tokens[3].isnumeric() or tokens[3].islower() or tokens[3] == "Gen": 87 | tax = [PRE, PRE, FAM, SUF] 88 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 89 | tax = [PRE, PRE, FAM, SUF] 90 | else: 91 | tax = [PRE, PRE, PRE, FAM] 92 | return tax 93 | 94 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cmc.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Cmc: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 10 | "TOK-TOK.TOK.TOK!TOK": self.parse_delim_fmt2, 11 | "TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt3, 12 | "TOK-TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt4, 13 | "TOK-TOK.TOK!TOK": self.parse_delim_fmt5, 14 | "TOK.TOK!TOK": self.parse_delim_fmt6, 15 | "TOK.TOK.TOK-TOK.TOK!TOK": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt8 17 | } 18 | 19 | # TOK.TOK.TOK!TOK 20 | def parse_delim_fmt1(self, tokens): 21 | tax = [CAT, FILE, UNK, SUF] 22 | if re.match(r"^[a-f0-9]{10}$", tokens[2]): 23 | tax = [CAT, FILE, SUF, SUF] 24 | else: 25 | tax = [CAT, FILE, FAM, SUF] 26 | return tax 27 | 28 | # TOK-TOK.TOK.TOK!TOK 29 | def parse_delim_fmt2(self, tokens): 30 | return [CAT, CAT, FILE, FAM, SUF] 31 | 32 | # TOK.TOK.TOK.TOK!TOK" 33 | def parse_delim_fmt3(self, tokens): 34 | return [CAT, FILE, FAM, SUF, SUF] 35 | 36 | # TOK-TOK.TOK.TOK.TOK!TOK 37 | def parse_delim_fmt4(self, tokens): 38 | return [CAT, CAT, FILE, FAM, SUF, SUF] 39 | 40 | # TOK-TOK.TOK!TOK 41 | def parse_delim_fmt5(self, tokens): 42 | return [CAT, CAT, FILE, SUF] 43 | 44 | # TOK.TOK!TOK 45 | def parse_delim_fmt6(self, tokens): 46 | return [CAT, FILE, SUF] 47 | 48 | # TOK.TOK.TOK-TOK.TOK!TOK 49 | def parse_delim_fmt7(self, tokens): 50 | return [CAT, FILE, CAT, FAM, SUF, SUF] 51 | 52 | # TOK.TOK.TOK.TOK.TOK!TOK 53 | def parse_delim_fmt8(self, tokens): 54 | tax = [UNK, UNK, UNK, UNK, UNK, UNK] 55 | if tokens[3].isnumeric(): 56 | tax = [CAT, FILE, FAM, SUF, SUF, SUF] 57 | return tax 58 | -------------------------------------------------------------------------------- /claravy/parsers/parse_commtouch.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Commtouch: # Renamed from Cyren, Acquired F-prot 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK-TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt6, 14 | "TOK/TOK": self.parse_delim_fmt7, 15 | "TOK/TOK-TOK!TOK": self.parse_delim_fmt8, 16 | } 17 | 18 | # TOK/TOK.TOK.TOK!TOK 19 | def parse_delim_fmt1(self, tokens): 20 | return [FILE, FAM, SUF, SUF, SUF] 21 | 22 | # TOK/TOK.TOK 23 | def parse_delim_fmt2(self, tokens): 24 | return [FILE, FAM, SUF] 25 | 26 | # TOK/TOK.TOK-TOK 27 | def parse_delim_fmt3(self, tokens): 28 | return [FILE, FAM, SUF, SUF] 29 | 30 | # TOK/TOK.TOK.TOK 31 | def parse_delim_fmt4(self, tokens): 32 | return [FILE, FAM, UNK, SUF] 33 | 34 | # TOK.TOK 35 | def parse_delim_fmt5(self, tokens): 36 | if tokens[1] == "gen": 37 | tax = [PRE, SUF] 38 | elif tokens[1].isnumeric() or len(tokens[1]) <= 2: 39 | tax = [FAM, SUF] 40 | else: 41 | tax = [PRE, FAM] 42 | return tax 43 | 44 | # TOK/TOK.TOK!TOK 45 | def parse_delim_fmt6(self, tokens): 46 | if tokens[3] == "Olympus": 47 | tax = [FILE, PRE, SUF, SUF] 48 | else: 49 | tax = [FILE, FAM, SUF, SUF] 50 | return tax 51 | 52 | # TOK/TOK 53 | def parse_delim_fmt7(self, tokens): 54 | return [FILE, FAM] 55 | 56 | # TOK/TOK-TOK!TOK 57 | def parse_delim_fmt8(self, tokens): 58 | tax = [PRE, UNK, UNK, SUF] 59 | if len(tokens[1]) == 1: 60 | tax = [PRE, SUF, SUF, SUF] 61 | elif tokens[1] == "Heuristic": 62 | if tokens[2].isupper() or tokens[2].isnumeric(): 63 | tax = [PRE, PRE, SUF, SUF] 64 | else: 65 | tax = [PRE, PRE, FAM, SUF] 66 | elif tokens[2] == "based": 67 | tax = [PRE, FAM, SUF, SUF] 68 | elif tokens[2].islower() or tokens[2].isupper(): 69 | tax = [PRE, UNK, UNK, SUF] # Bad format 70 | else: 71 | tax = [PRE, PRE, FAM, SUF] 72 | return tax 73 | -------------------------------------------------------------------------------- /claravy/parsers/parse_comodo.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Comodo: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt2, 10 | "TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK@#TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt6, 14 | "TOK.TOK.TOK.~TOK": self.parse_delim_fmt7, 15 | "TOK.TOK.TOK.TOK.~TOK": self.parse_delim_fmt8, 16 | "TOK.TOK.TOK": self.parse_delim_fmt9, 17 | "TOK.TOK.TOK.TOK.~TOK@TOK": self.parse_delim_fmt10, 18 | "TOK.TOK.TOK.~TOK@TOK": self.parse_delim_fmt11, 19 | "TOK.TOK": self.parse_delim_fmt12, 20 | "TOK.TOK!": self.parse_delim_fmt13, 21 | } 22 | 23 | # TOK.TOK.TOK.TOK 24 | def parse_delim_fmt1(self, tokens): 25 | tax = [UNK, FILE, UNK, SUF] 26 | if tokens[0] == "Packed": 27 | tax = [PRE, FILE, PACK, SUF] 28 | else: 29 | tax = [PRE, FILE, FAM, SUF] 30 | return tax 31 | 32 | # TOK.TOK.TOK.TOK@TOK 33 | def parse_delim_fmt2(self, tokens): 34 | tax = [UNK, FILE, UNK, SUF, SUF] 35 | 36 | if tokens[0] == "Packed": 37 | tax = [PRE, FILE, PACK, SUF, SUF] 38 | else: 39 | tax = [PRE, FILE, FAM, SUF, SUF] 40 | return tax 41 | 42 | # TOK 43 | def parse_delim_fmt3(self, tokens): 44 | return [PRE] 45 | 46 | # TOK.TOK.TOK.TOK.TOK 47 | def parse_delim_fmt4(self, tokens): 48 | return [PRE, FILE, CAT, FAM, SUF] 49 | 50 | # TOK@#TOK 51 | def parse_delim_fmt5(self, tokens): 52 | return [PRE, SUF] 53 | 54 | # TOK.TOK.TOK.TOK.TOK@TOK 55 | def parse_delim_fmt6(self, tokens): 56 | return [PRE, FILE, CAT, FAM, SUF, SUF] 57 | 58 | # TOK.TOK.TOK.~TOK 59 | def parse_delim_fmt7(self, tokens): 60 | return [CAT, FILE, FAM, SUF] 61 | 62 | # TOK.TOK.TOK.TOK.~TOK 63 | def parse_delim_fmt8(self, tokens): 64 | return [PRE, FILE, CAT, FAM, SUF] 65 | 66 | # TOK.TOK.TOK 67 | def parse_delim_fmt9(self, tokens): 68 | if tokens[1] == "Pck": 69 | tax = [PRE, PRE, PACK] 70 | elif tokens[1].isnumeric(): 71 | tax = [FAM, SUF, SUF] 72 | elif tokens[2].isnumeric() or tokens[2] == "Gen": 73 | tax = [PRE, FAM, SUF] 74 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 75 | tax = [PRE, FAM, SUF] 76 | else: 77 | tax = [PRE, PRE, FAM] 78 | return tax 79 | 80 | # TOK.TOK.TOK.TOK.~TOK@TOK 81 | def parse_delim_fmt10(self, tokens): 82 | return [PRE, FILE, CAT, FAM, SUF, SUF] 83 | 84 | # TOK.TOK.TOK.~TOK@TOK 85 | def parse_delim_fmt11(self, tokens): 86 | return [CAT, FILE, FAM, SUF, SUF] 87 | 88 | # TOK.TOK 89 | def parse_delim_fmt12(self, tokens): 90 | if tokens[0] == "Heur": 91 | tax = [PRE, PRE] 92 | elif len(tokens[1]) <= 2 or tokens[1].isnumeric() or tokens[1].isupper() or tokens[1].islower(): 93 | tax = [FAM, SUF] 94 | else: 95 | tax = [PRE, FAM] 96 | return tax 97 | 98 | # TOK.TOK! 99 | def parse_delim_fmt13(self, tokens): 100 | return self.parse_delim_fmt12(tokens) + [NULL] 101 | 102 | -------------------------------------------------------------------------------- /claravy/parsers/parse_crowdstrike.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Crowdstrike: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK_TOK_TOK% (TOK)": self.parse_delim_fmt1, 9 | "TOK_TOK_TOK% (TOK)": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK/TOK_TOK_TOK% 13 | def parse_delim_fmt1(self, tokens): 14 | return [FILE, PRE, PRE, SUF, SUF, NULL] 15 | 16 | # TOK_TOK_TOK% 17 | def parse_delim_fmt2(self, tokens): 18 | return [PRE, PRE, SUF, SUF, NULL] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cybereason.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Cybereason: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, SUF] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cylance.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Cylance: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1 9 | } 10 | 11 | 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cynet.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Cynet: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK (TOK: TOK)": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK (TOK: TOK) 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, SUF, SUF, NULL] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cyren.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Cyren: # Renamed from Commtouch to Cyren, Acquired F-prot 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt3, 11 | "TOK/TOK-TOK!TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK-TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK/TOK": self.parse_delim_fmt7, 15 | "TOK/TOK_TOK.TOK.TOK!TOK": self.parse_delim_fmt8, 16 | "TOK.TOK": self.parse_delim_fmt9, 17 | } 18 | 19 | # TOK/TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | return [FILE, FAM, SUF] 22 | 23 | # TOK/TOK.TOK.TOK!TOK" 24 | def parse_delim_fmt2(self, tokens): 25 | return [FILE, FAM, SUF, SUF, SUF] 26 | 27 | # TOK/TOK.TOK!TOK 28 | def parse_delim_fmt3(self, tokens): 29 | if tokens[3] == "Olympus": 30 | tax = [FILE, PRE, SUF, SUF] 31 | else: 32 | tax = [FILE, FAM, SUF, SUF] 33 | return tax 34 | 35 | # TOK/TOK-TOK!TOK 36 | def parse_delim_fmt4(self, tokens): 37 | tax = [PRE, UNK, UNK, SUF] 38 | if len(tokens[1]) == 1: 39 | tax = [PRE, SUF, SUF, SUF] 40 | elif tokens[1] == "Heuristic": 41 | if tokens[2].isupper() or tokens[2].isnumeric(): 42 | tax = [PRE, PRE, SUF, SUF] 43 | else: 44 | tax = [PRE, PRE, FAM, SUF] 45 | elif tokens[2] == "based": 46 | tax = [PRE, FAM, SUF, SUF] 47 | elif tokens[2].islower() or tokens[2].isupper(): 48 | tax = [PRE, UNK, UNK, SUF] # Bad format 49 | else: 50 | tax = [PRE, PRE, FAM, SUF] 51 | return tax 52 | 53 | # TOK/TOK.TOK-TOK 54 | def parse_delim_fmt5(self, tokens): 55 | return [FILE, FAM, SUF, SUF] 56 | 57 | # TOK/TOK.TOK.TOK 58 | def parse_delim_fmt6(self, tokens): 59 | return [FILE, FAM, SUF, SUF] 60 | 61 | # TOK/TOK 62 | def parse_delim_fmt7(self, tokens): 63 | return [FILE, FAM] 64 | 65 | # TOK/TOK_TOK.TOK.TOK!TOK 66 | def parse_delim_fmt8(self, tokens): 67 | return [FILE, UNK, UNK, SUF, SUF, SUF] # Bad format 68 | 69 | # TOK.TOK 70 | def parse_delim_fmt9(self, tokens): 71 | if tokens[1] == "gen": 72 | tax = [PRE, SUF] 73 | elif tokens[1].isnumeric() or len(tokens[1]) <= 2: 74 | tax = [FAM, SUF] 75 | else: 76 | tax = [PRE, FAM] 77 | return tax 78 | -------------------------------------------------------------------------------- /claravy/parsers/parse_cyrencloud.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Cyrencloud: # Cloud version of Cyren. Related to Commtouch, F-Prot 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK-TOK": self.parse_delim_fmt3, 11 | } 12 | 13 | # TOK/TOK.TOK.TOK!TOK" 14 | def parse_delim_fmt1(self, tokens): 15 | return [FILE, FAM, SUF, SUF, SUF] 16 | 17 | # TOK/TOK.TOK!TOK 18 | def parse_delim_fmt2(self, tokens): 19 | tax = [FILE, UNK, UNK, SUF] 20 | if len(tokens[1]) == 1: 21 | tax = [FILE, SUF, SUF, SUF] 22 | return tax 23 | 24 | # TOK/TOK.TOK-TOK 25 | def parse_delim_fmt3(self, tokens): 26 | return [FILE, FAM, SUF, SUF] 27 | -------------------------------------------------------------------------------- /claravy/parsers/parse_deepinstinct.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Deepinstinct: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_drweb.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Drweb: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK": self.parse_delim_fmt3, 11 | "TOK TOK TOK.TOK.TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK.TOK.TOK 15 | def parse_delim_fmt1(self, tokens): 16 | tax = [PRE, UNK, UNK] 17 | if tokens[2] == "Packed": 18 | tax = [PRE, PACK, PRE] 19 | elif tokens[2].isnumeric(): 20 | tax = [PRE, FAM, SUF] 21 | elif tokens[2].lower() in ["based", "origin"]: 22 | tax = [PRE, FAM, SUF] 23 | else: 24 | tax = [PRE, PRE, FAM] 25 | return tax 26 | 27 | # TOK.TOK.TOK.TOK 28 | def parse_delim_fmt2(self, tokens): 29 | tax = [PRE, UNK, UNK, SUF] 30 | if tokens[2].isnumeric() or tokens[2].lower() == "based": 31 | tax = [PRE, FAM, SUF, SUF] 32 | else: 33 | tax = [PRE, PRE, FAM, SUF] 34 | return tax 35 | 36 | # TOK.TOK 37 | def parse_delim_fmt3(self, tokens): 38 | tax = [UNK, UNK] 39 | if tokens[1].isnumeric(): 40 | if tokens[0].isupper() or len(tokens[0]) <= 2: 41 | tax = [UNK, SUF] 42 | else: 43 | tax = [FAM, SUF] 44 | elif tokens[1].islower(): 45 | tax = [FAM, SUF] 46 | else: 47 | tax = [UNK, UNK] # Usually [PRE, FAM] but rarely are families in tokens[0] and tokens[1] 48 | return tax 49 | 50 | # TOK TOK TOK.TOK.TOK 51 | def parse_delim_fmt4(self, tokens): 52 | return [PRE, PRE, PRE, FAM, SUF] 53 | -------------------------------------------------------------------------------- /claravy/parsers/parse_egambit.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Egambit: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK_TOK_TOK%": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK": self.parse_delim_fmt3, 11 | } 12 | 13 | # TOK.TOK_TOK_TOK% 14 | def parse_delim_fmt1(self, tokens): 15 | return [PRE, PRE, PRE, SUF, NULL] 16 | 17 | # TOK.TOK 18 | def parse_delim_fmt2(self, tokens): 19 | return [PRE, FAM] # Very few families 20 | 21 | # TOK.TOK.TOK 22 | def parse_delim_fmt3(self, tokens): 23 | tax = [UNK, PRE, UNK] 24 | if tokens[0] != "PE": 25 | tax = [CAT, PRE, FAM] 26 | else: 27 | tax = [PRE, PRE, PRE] 28 | return tax 29 | -------------------------------------------------------------------------------- /claravy/parsers/parse_elastic.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Elastic: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK (TOK TOK)": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK (TOK TOK) 13 | def parse_delim_fmt1(self, tokens): 14 | return [PRE, SUF, SUF, NULL] 15 | 16 | # TOK.TOK.TOK 17 | def parse_delim_fmt2(self, tokens): 18 | return [FILE, CAT, FAM] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_endgame.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Endgame: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK (TOK TOK)": self.parse_delim_fmt1 9 | } 10 | 11 | # TOK (TOK TOK) 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, PRE, PRE, NULL] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_esafe.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Esafe: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK TOK/TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK.TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | if tokens[1].isnumeric(): 18 | tax = [FAM, SUF, SUF] 19 | elif len(tokens[2]) <= 2 or tokens[2].isnumeric() or tokens[2].lower() == "gen": 20 | tax = [PRE, FAM, SUF] 21 | elif tokens[2].isupper(): 22 | tax = [PRE, UNK, UNK] # Bad format 23 | elif tokens[2].islower(): 24 | tax = [PRE, FAM, SUF] 25 | else: 26 | tax = [PRE, UNK, UNK] # Bad format 27 | return tax 28 | 29 | # TOK.TOK 30 | def parse_delim_fmt2(self, tokens): 31 | tax = [UNK, UNK] 32 | if tokens[1].isnumeric() or tokens[1].islower() or tokens[1].lower() == "gen": 33 | if tokens[0].isupper() or len(tokens[0]) <= 2: 34 | tax = [UNK, SUF] 35 | else: 36 | tax = [FAM, SUF] 37 | else: 38 | tax = [FILE, FAM] 39 | return tax 40 | 41 | # TOK TOK 42 | def parse_delim_fmt3(self, tokens): 43 | tax = [UNK, UNK] 44 | if tokens[0].lower() == "suspicious": 45 | tax = [PRE, PRE] 46 | elif tokens[1].isnumeric() or len(tokens[1]) <= 2 or tokens[1].isupper() or tokens[1].startswith("v"): 47 | tax = [FAM, SUF] 48 | else: 49 | tax = [FAM, FAM] 50 | return tax 51 | 52 | # TOK.TOK.TOK.TOK 53 | def parse_delim_fmt4(self, tokens): 54 | tax = [PRE, UNK, UNK, SUF] 55 | if tokens[1].isnumeric() or tokens[1].islower(): 56 | tax = [FAM, SUF, SUF, SUF] 57 | elif tokens[2].isnumeric() or tokens[2].islower(): 58 | tax = [PRE, FAM, SUF, SUF] 59 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 60 | tax = [PRE, FAM, SUF, SUF] 61 | elif tokens[2].isupper() or len(tokens[2]) <= 3: 62 | tax = [PRE, UNK, UNK, SUF] # Bad format 63 | else: 64 | tax = [PRE, PRE, FAM, SUF] 65 | return tax 66 | 67 | # TOK TOK/TOK 68 | def parse_delim_fmt5(self, tokens): 69 | return [PRE, PRE, PRE] 70 | -------------------------------------------------------------------------------- /claravy/parsers/parse_esetnod32.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Esetnod32: # Renamed from nod32 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK TOK TOK TOK/TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK TOK TOK TOK/TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK TOK TOK TOK/TOK.TOK TOK TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK TOK TOK": self.parse_delim_fmt6, 14 | "TOK TOK TOK TOK/TOK.TOK.TOK TOK TOK": self.parse_delim_fmt7, 15 | "TOK/TOK": self.parse_delim_fmt8, 16 | "TOK TOK TOK TOK/TOK": self.parse_delim_fmt9, 17 | "TOK TOK TOK TOK TOK/TOK.TOK": self.parse_delim_fmt10, 18 | } 19 | 20 | # TOK/TOK.TOK 21 | def parse_delim_fmt1(self, tokens): 22 | return [FILE, FAM, SUF] 23 | 24 | # TOK TOK TOK TOK/TOK.TOK 25 | def parse_delim_fmt2(self, tokens): 26 | return [PRE, PRE, PRE, FILE, FAM, SUF] 27 | 28 | # TOK/TOK.TOK.TOK 29 | def parse_delim_fmt3(self, tokens): 30 | tax = [FILE, UNK, UNK, SUF] 31 | if tokens[2].isnumeric(): 32 | tax = [FILE, FAM, SUF, SUF] 33 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 34 | tax = [FILE, FAM, SUF, SUF] 35 | elif tokens[2].isupper() and len(tokens[2]) == 3: 36 | tax = [FILE, UNK, UNK, SUF] # Bad format 37 | else: 38 | tax = [FILE, CAT, FAM, SUF] 39 | return tax 40 | 41 | # TOK TOK TOK TOK/TOK.TOK.TOK 42 | def parse_delim_fmt4(self, tokens): 43 | return [PRE, PRE, PRE] + self.parse_delim_fmt3(tokens[3:]) 44 | 45 | # TOK TOK TOK TOK/TOK.TOK TOK TOK 46 | def parse_delim_fmt5(self, tokens): 47 | return [PRE, PRE, PRE, FILE, FAM, SUF, SUF, SUF] 48 | 49 | # TOK/TOK.TOK TOK TOK 50 | def parse_delim_fmt6(self, tokens): 51 | return [FILE, FAM, SUF, SUF, SUF] 52 | 53 | # TOK TOK TOK TOK/TOK.TOK.TOK TOK TOK 54 | def parse_delim_fmt7(self, tokens): 55 | tax = [PRE, PRE, PRE, FILE, UNK, UNK, SUF, SUF, SUF] 56 | if tokens[5].isupper(): 57 | tax = [PRE, PRE, PRE, FILE, FAM, SUF, SUF, SUF, SUF] 58 | elif tokens[4] == "FlyStudio": 59 | tax = [PRE, PRE, PRE, FILE, FAM, SUF, SUF, SUF, SUF] # Bad format - only for FlyStudio 60 | else: 61 | tax = [PRE, PRE, PRE, FILE, CAT, FAM, SUF, SUF, SUF] 62 | return tax 63 | 64 | # TOK/TOK 65 | def parse_delim_fmt8(self, tokens): 66 | return [FILE, FAM] 67 | 68 | # TOK TOK TOK TOK/TOK 69 | def parse_delim_fmt9(self, tokens): 70 | return [PRE, PRE, PRE, FILE, FAM] 71 | 72 | # TOK TOK TOK TOK TOK/TOK.TOK 73 | def parse_delim_fmt10(self, tokens): 74 | return [PRE, PRE, PRE, PRE, FILE, FAM, SUF] 75 | -------------------------------------------------------------------------------- /claravy/parsers/parse_etrustvet.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Etrustvet: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt2, 10 | "TOK/TOK!TOK": self.parse_delim_fmt3, 11 | "TOK/TOK": self.parse_delim_fmt4, 12 | "TOK/TOK_TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK[TOK]": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | return [FILE, FAM, SUF] 19 | 20 | # TOK/TOK.TOK!TOK 21 | def parse_delim_fmt2(self, tokens): 22 | return [FILE, FAM, SUF, SUF] 23 | 24 | # TOK/TOK!TOK 25 | def parse_delim_fmt3(self, tokens): 26 | return [FILE, FAM, SUF] 27 | 28 | # TOK/TOK 29 | def parse_delim_fmt4(self, tokens): 30 | return [FILE, FAM] 31 | 32 | # TOK/TOK_TOK 33 | def parse_delim_fmt5(self, tokens): 34 | return [FILE, FAM, SUF] 35 | 36 | # TOK/TOK.TOK[TOK] 37 | def parse_delim_fmt6(self, tokens): 38 | # Seems to be all Zango pinball malware? Unsure of family 39 | return [FILE, UNK, UNK, SUF, NULL] 40 | -------------------------------------------------------------------------------- /claravy/parsers/parse_fireeye.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Fireeye: # Uses Bitdefender engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | } 15 | 16 | # TOK.TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [UNK, UNK, SUF] 19 | if tokens[0] == "Generic": 20 | tax = [PRE, SUF, SUF] 21 | elif tokens[1].isnumeric() or tokens[1].islower(): 22 | tax = [FAM, SUF, SUF] 23 | elif tokens[1].lower() == "mg": 24 | tax = [FAM, SUF, SUF] 25 | else: 26 | tax = [PRE, FAM, SUF] 27 | return tax 28 | 29 | # TOK.TOK.TOK.TOK 30 | def parse_delim_fmt2(self, tokens): 31 | tax = [UNK, UNK, UNK, UNK] 32 | if "Exploit" in tokens: # Very inconsistent format 33 | tax = [UNK, UNK, UNK, UNK] 34 | elif tokens[2].isnumeric() or tokens[2] == "Gen" or len(tokens[2]) == 1: 35 | tax = [PRE, FAM, SUF, SUF] 36 | elif tokens[1] == "Generic" and tokens[2].isupper(): 37 | tax = [PRE, PRE, SUF, SUF] 38 | 39 | else: 40 | tax = [PRE, PRE, FAM, SUF] 41 | return tax 42 | 43 | # TOK:TOK.TOK.TOK 44 | def parse_delim_fmt3(self, tokens): 45 | tax = [UNK, UNK, FAM, SUF] 46 | if tokens[0] == "Gen": 47 | tax = [PRE, PRE, FAM, SUF] 48 | else: 49 | tax = [FILE, CAT, FAM, SUF] 50 | return tax 51 | 52 | # TOK:TOK.TOK.TOK.TOK 53 | def parse_delim_fmt4(self, tokens): 54 | tax = [UNK, UNK, CAT, FAM, SUF] 55 | if tokens[0] == "Gen": 56 | tax = [PRE, PRE, CAT, FAM, SUF] 57 | else: 58 | tax = [CAT, PRE, CAT, FAM, SUF] 59 | return tax 60 | 61 | # TOK.TOK.TOK.TOK.TOK 62 | def parse_delim_fmt5(self, tokens): 63 | tax = [PRE, UNK, UNK, UNK, SUF] 64 | if tokens[3].isnumeric() or len(tokens[3]) == 1 or tokens[3] == "Gen": 65 | if tokens[2].isnumeric(): 66 | tax = [PRE, FAM, SUF, SUF, SUF] 67 | else: 68 | tax = [PRE, PRE, FAM, SUF, SUF] 69 | else: 70 | tax = [PRE, UNK, UNK, UNK, SUF] # No clear format 71 | return tax 72 | 73 | # TOK:TOK.TOK.TOK.TOK.TOK 74 | def parse_delim_fmt6(self, tokens): 75 | tax = [PRE, PRE, UNK, UNK, UNK, SUF] 76 | if re.match(r"M[Ss][0-9]{2}", tokens[2]) and tokens[3].isnumeric(): 77 | tax = [PRE, PRE, VULN, VULN, UNK, SUF] 78 | elif re.match(r"M[Ss][0-9]{2}", tokens[3]) and tokens[4].isnumeric(): 79 | tax = [PRE, PRE, PRE, VULN, VULN, SUF] 80 | return tax 81 | -------------------------------------------------------------------------------- /claravy/parsers/parse_fortinet.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Fortinet: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt1, 10 | "TOK/TOK": self.parse_delim_fmt2, 11 | "TOK/TOK.TOK": self.parse_delim_fmt3, 12 | "TOK/TOK.TOK!TOK.TOK": self.parse_delim_fmt4, 13 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt5, 14 | "TOK/TOK!TOK": self.parse_delim_fmt6, 15 | "TOK": self.parse_delim_fmt7, 16 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt8, 17 | "TOK/TOK.TOK@TOK": self.parse_delim_fmt9, 18 | "TOK/TOK_TOK": self.parse_delim_fmt10, 19 | } 20 | 21 | # TOK/TOK.TOK!TOK 22 | def parse_delim_fmt1(self, tokens): 23 | return [FILE, FAM, SUF, SUF] 24 | 25 | # TOK/TOK 26 | def parse_delim_fmt2(self, tokens): 27 | return [PRE, FAM] 28 | 29 | # TOK/TOK.TOK 30 | def parse_delim_fmt3(self, tokens): 31 | tax = [PRE, UNK, SUF] 32 | if re.match(r"CVE[0-9]+", tokens[2]): 33 | tax = [PRE, UNK, VULN] 34 | else: 35 | tax = [PRE, FAM, SUF] 36 | return tax 37 | 38 | # TOK/TOK.TOK!TOK.TOK 39 | def parse_delim_fmt4(self, tokens): 40 | return [FILE, FAM, SUF, SUF, SUF] 41 | 42 | # TOK/TOK.TOK.TOK!TOK 43 | def parse_delim_fmt5(self, tokens): 44 | tax = [FILE, UNK, UNK, SUF, SUF] 45 | if re.match(r"CVE[0-9]+", tokens[2]): 46 | tax = [FILE, PRE, VULN, SUF, SUF] 47 | elif tokens[1] == "Generic": 48 | tax = [FILE, PRE, SUF, SUF, SUF] 49 | else: 50 | tax = [FILE, UNK, UNK, SUF, SUF] 51 | return tax 52 | 53 | # TOK/TOK!TOK 54 | def parse_delim_fmt6(self, tokens): 55 | tax = [PRE, FAM, UNK] 56 | if tokens[2].islower() or tokens[2].isupper() or tokens[2].isnumeric(): 57 | tax = [PRE, FAM, SUF] 58 | else: 59 | tax = [PRE, FAM, UNK] # Weird format - Morphine, Monder? 60 | return tax 61 | 62 | # TOK 63 | def parse_delim_fmt7(self, tokens): 64 | return [FAM] 65 | 66 | # TOK/TOK.TOK.TOK 67 | def parse_delim_fmt8(self, tokens): 68 | if tokens[2].isupper() or tokens[2].isnumeric(): 69 | tax = [PRE, FAM, SUF, SUF] 70 | else: 71 | tax = [PRE, PRE, FAM, SUF] 72 | return tax 73 | 74 | # TOK/TOK.TOK!TOK 75 | def parse_delim_fmt9(self, tokens): 76 | return [FILE, FAM, SUF, SUF] 77 | 78 | # TOK/TOK_TOK 79 | def parse_delim_fmt10(self, tokens): 80 | tax = [PRE, UNK, UNK] 81 | if tokens[2].isnumeric() or tokens[2].islower() or tokens[2].isupper(): 82 | if tokens[1].isupper(): 83 | tax = [PRE, UNK, SUF] # Bad format 84 | else: 85 | tax = [PRE, FAM, SUF] 86 | else: 87 | tax = [PRE, PRE, FAM] 88 | return tax 89 | -------------------------------------------------------------------------------- /claravy/parsers/parse_fprot.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Fprot: # Acquired by Commtouch/Cyren 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK-TOK!TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK.TOK": self.parse_delim_fmt6, 14 | "TOK/TOK": self.parse_delim_fmt7, 15 | "TOK/TOK.TOK@TOK": self.parse_delim_fmt8, 16 | "TOK/TOK_TOK.TOK.TOK!TOK": self.parse_delim_fmt9, 17 | } 18 | 19 | # TOK/TOK.TOK.TOK!TOK 20 | def parse_delim_fmt1(self, tokens): 21 | return [FILE, FAM, SUF, SUF, SUF] 22 | 23 | # TOK/TOK.TOK 24 | def parse_delim_fmt2(self, tokens): 25 | return [FILE, FAM, SUF] 26 | 27 | # TOK/TOK-TOK!TOK 28 | def parse_delim_fmt3(self, tokens): 29 | tax = [FILE, UNK, SUF, SUF] 30 | if len(tokens[1]) == 1: 31 | tax = [FILE, SUF, SUF, SUF] 32 | elif tokens[1].isupper(): 33 | tax = [FILE, PRE, FAM, SUF] 34 | else: 35 | tax = [FILE, FAM, SUF, SUF] 36 | return tax 37 | 38 | # TOK/TOK.TOK!TOK 39 | def parse_delim_fmt4(self, tokens): 40 | return [FILE, FAM, SUF, SUF] 41 | 42 | # TOK/TOK.TOK.TOK 43 | def parse_delim_fmt5(self, tokens): 44 | return [FILE, FAM, SUF, SUF] 45 | 46 | # TOK.TOK 47 | def parse_delim_fmt6(self, tokens): 48 | if tokens[1] == "gen": 49 | tax = [PRE, SUF] 50 | elif tokens[1].isnumeric() or len(tokens[1]) <= 2: 51 | tax = [FAM, SUF] 52 | else: 53 | tax = [PRE, FAM] 54 | return tax 55 | 56 | # TOK/TOK 57 | def parse_delim_fmt7(self, tokens): 58 | return [FILE, FAM] 59 | 60 | # TOK/TOK.TOK@TOK 61 | def parse_delim_fmt8(self, tokens): 62 | return [FILE, FAM, SUF, SUF] 63 | 64 | # TOK/TOK_TOK.TOK.TOK!TOK 65 | def parse_delim_fmt9(self, tokens): 66 | return [FILE, UNK, UNK, SUF, SUF, SUF] # Bad format 67 | -------------------------------------------------------------------------------- /claravy/parsers/parse_fsecure.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Fsecure: # Very similar to bitdefender engine. May use Commtouch/Cyren engine. Partnership with Avira. 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK/TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK/TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | "TOK:TOK/TOK.TOK": self.parse_delim_fmt7, 16 | "TOK:TOK/TOK.TOK!TOK": self.parse_delim_fmt8, 17 | "TOK-TOK:TOK/TOK.TOK!TOK": self.parse_delim_fmt9, 18 | "TOK:TOK/TOK": self.parse_delim_fmt10, 19 | "TOK-TOK:TOK/TOK.TOK": self.parse_delim_fmt11, 20 | "TOK:TOK.TOK.TOK@TOK": self.parse_delim_fmt12, 21 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt13, 22 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt14, 23 | } 24 | 25 | # TOK.TOK.TOK 26 | def parse_delim_fmt1(self, tokens): 27 | tax = [UNK, UNK, SUF] 28 | if tokens[0] == "Packer": 29 | tax = [PRE, PACK, SUF] 30 | elif tokens[1].isnumeric(): 31 | tax = [FAM, SUF, SUF] 32 | else: 33 | tax = [PRE, FAM, SUF] 34 | return tax 35 | 36 | # TOK.TOK/TOK.TOK 37 | def parse_delim_fmt2(self, tokens): 38 | return [PRE, PRE, FAM, SUF] 39 | 40 | # TOK:TOK.TOK.TOK 41 | def parse_delim_fmt3(self, tokens): 42 | return [PRE, PRE, FAM, SUF] 43 | 44 | # TOK.TOK.TOK.TOK 45 | def parse_delim_fmt4(self, tokens): 46 | tax = [PRE, UNK, UNK, SUF] 47 | if tokens[0] == "Packer": 48 | tax = [PRE, PRE, PACK, SUF] 49 | elif tokens[2] == "Gen": 50 | tax = [PRE, FAM, SUF, SUF] 51 | elif tokens[1] == "Generic": 52 | tax = [PRE, PRE, SUF, SUF] 53 | elif tokens[2].isnumeric(): 54 | tax = [PRE, FAM, SUF, SUF] 55 | elif tokens[0] == "Generic" and tokens[1] == "Malware": 56 | tax = [PRE, PRE, SUF, SUF] 57 | else: 58 | tax = [PRE, PRE, FAM, SUF] 59 | return tax 60 | 61 | # TOK.TOK/TOK.TOK.TOK 62 | def parse_delim_fmt5(self, tokens): 63 | tax = [PRE, PRE, UNK, UNK, SUF] 64 | if tokens[3].isnumeric() or tokens[3].islower(): 65 | tax = [PRE, PRE, FAM, SUF, SUF] 66 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 67 | tax = [PRE, PRE, FAM, SUF, SUF] 68 | elif tokens[3].isupper(): 69 | tax = [PRE, PRE, PRE, UNK, SUF] # Bad format 70 | else: 71 | tax = [PRE, PRE, PRE, FAM, SUF] 72 | return tax 73 | 74 | # TOK:TOK.TOK.TOK.TOK 75 | def parse_delim_fmt6(self, tokens): 76 | if tokens[1] == "Packer": 77 | if len(tokens[3]) == 1: 78 | tax = [PRE, PRE, PACK, SUF, SUF] 79 | else: 80 | tax = [PRE, PRE, PRE, PACK, SUF] 81 | if tokens[2] == "Generic": 82 | tax = [PRE, PRE, PRE, SUF, SUF] 83 | elif tokens[2] == "Malware": 84 | tax = [PRE, PRE, PRE, SUF, SUF] 85 | elif tokens[3].isnumeric() or tokens[3] == "Gen": 86 | tax = [PRE, CAT, FAM, SUF, SUF] 87 | elif tokens[3].isupper() and tokens[3] != "VB": 88 | tax = [PRE, CAT, FAM, SUF, SUF] 89 | else: 90 | tax = [PRE, PRE, PRE, FAM, SUF] 91 | return tax 92 | 93 | # TOK:TOK/TOK.TOK 94 | def parse_delim_fmt7(self, tokens): 95 | tax = [CAT, FILE, UNK, SUF] 96 | if re.match(r"^MS[0-9]+$", tokens[2]): 97 | tax = [CAT, FILE, VULN, SUF] 98 | else: 99 | tax = [CAT, FILE, FAM, SUF] 100 | return tax 101 | 102 | # TOK:TOK/TOK.TOK!TOK 103 | def parse_delim_fmt8(self, tokens): 104 | if re.match(r"^CVE[0-9]+$", tokens[2]): 105 | tax = [CAT, FILE, VULN, SUF, SUF] 106 | else: 107 | tax = [CAT, FILE, FAM, SUF, SUF] 108 | return tax 109 | 110 | # TOK-TOK:TOK/TOK.TOK!TOK 111 | def parse_delim_fmt9(self, tokens): 112 | return [CAT, CAT, FILE, FAM, SUF, SUF] 113 | 114 | # TOK:TOK/TOK 115 | def parse_delim_fmt10(self, tokens): 116 | return [CAT, FILE, FAM] 117 | 118 | # TOK-TOK:TOK/TOK.TOK 119 | def parse_delim_fmt11(self, tokens): 120 | return [CAT, CAT, FILE, FAM, SUF] 121 | 122 | # TOK:TOK.TOK.TOK@TOK 123 | def parse_delim_fmt12(self, tokens): 124 | return [PRE, PRE, PRE, SUF, SUF] # tokens[2] never seems to be a family name 125 | 126 | # TOK-TOK.TOK.TOK.TOK 127 | def parse_delim_fmt13(self, tokens): 128 | tax = [UNK, UNK, UNK, UNK, SUF] 129 | if tokens[2].isnumeric(): 130 | tax = [FAM, FAM, SUF, SUF, SUF] 131 | else: 132 | tax = [CAT, CAT, FILE, FAM, SUF] 133 | return tax 134 | 135 | # TOK:TOK.TOK.TOK.TOK@TOK 136 | def parse_delim_fmt14(self, tokens): 137 | tax = [PRE, UNK, UNK, UNK, SUF, SUF] 138 | if tokens[1] == "Packer": 139 | tax = [PRE, PRE, PACK, SUF, SUF, SUF] 140 | elif tokens[3].isupper(): 141 | tax = [PRE, PRE, PRE, SUF, SUF, SUF] 142 | elif tokens[3].isnumeric(): 143 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 144 | else: 145 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 146 | return tax 147 | -------------------------------------------------------------------------------- /claravy/parsers/parse_gdata.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Gdata: # Uses Bitdefender and in-house engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK:TOK-TOK ": self.parse_delim_fmt6, 15 | "TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt7, 16 | "TOK:TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 17 | "TOK:TOK.TOK.TOK@TOK": self.parse_delim_fmt9, 18 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt10, 19 | "TOK:TOK-TOK": self.parse_delim_fmt11, 20 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt12, 21 | "TOK.TOK-TOK.TOK.TOK@TOK": self.parse_delim_fmt13, 22 | "TOK.TOK.TOK!.TOK": self.parse_delim_fmt14, 23 | } 24 | 25 | # TOK.TOK.TOK.TOK 26 | def parse_delim_fmt1(self, tokens): 27 | tax = [PRE, UNK, UNK, SUF] 28 | if tokens[0] == "Packer": 29 | tax = [PRE, PRE, PACK, SUF] 30 | elif tokens[2] == "Gen": 31 | tax = [PRE, FAM, SUF, SUF] 32 | elif tokens[1] == "Generic": 33 | tax = [PRE, PRE, SUF, SUF] 34 | elif tokens[2].isnumeric() or len(tokens[2]) == 1: 35 | tax = [PRE, FAM, SUF, SUF] 36 | elif tokens[0] == "Generic" and tokens[1] == "Malware": 37 | tax = [PRE, PRE, SUF, SUF] 38 | else: 39 | tax = [PRE, PRE, FAM, SUF] 40 | return tax 41 | 42 | # TOK.TOK.TOK 43 | def parse_delim_fmt2(self, tokens): 44 | tax = [UNK, UNK, SUF] 45 | if tokens[0] == "Packer": 46 | tax = [PRE, PACK, SUF] 47 | elif tokens[1].isnumeric(): 48 | tax = [FAM, SUF, SUF] 49 | else: 50 | tax = [PRE, FAM, SUF] 51 | return tax 52 | 53 | # TOK:TOK.TOK.TOK 54 | def parse_delim_fmt3(self, tokens): 55 | return [PRE, PRE, FAM, SUF] 56 | 57 | # TOK:TOK.TOK.TOK.TOK 58 | def parse_delim_fmt4(self, tokens): 59 | if tokens[1] == "Packer": 60 | if len(tokens[3]) == 1: 61 | tax = [PRE, PRE, PACK, SUF, SUF] 62 | else: 63 | tax = [PRE, PRE, PRE, PACK, SUF] 64 | if tokens[2] == "Generic": 65 | tax = [PRE, PRE, PRE, SUF, SUF] 66 | elif tokens[2] == "Malware": 67 | tax = [PRE, PRE, PRE, SUF, SUF] 68 | elif tokens[3].isnumeric() or tokens[3] == "Gen": 69 | tax = [PRE, CAT, FAM, SUF, SUF] 70 | elif tokens[3].isupper() and tokens[3] != "VB": 71 | tax = [PRE, CAT, FAM, SUF, SUF] 72 | else: 73 | tax = [PRE, PRE, PRE, FAM, SUF] 74 | return tax 75 | 76 | # TOK.TOK-TOK.TOK.TOK 77 | def parse_delim_fmt5(self, tokens): 78 | return [FILE, CAT, CAT, FAM, SUF] 79 | 80 | # TOK:TOK-TOK 81 | def parse_delim_fmt6(self, tokens): 82 | return [PRE, FAM, SUF, NULL] 83 | 84 | # TOK.TOK.TOK.TOK@TOK 85 | def parse_delim_fmt7(self, tokens): 86 | tax = [PRE, PRE, UNK, SUF, SUF] 87 | if tokens[2] == "Heur": 88 | tax = [PRE, PRE, PRE, SUF, SUF] 89 | else: 90 | tax = [PRE, PRE, FAM, SUF, SUF] 91 | return tax 92 | 93 | # TOK:TOK.TOK.TOK.TOK.TOK 94 | def parse_delim_fmt8(self, tokens): 95 | tax = [PRE, PRE, NULL, NULL, NULL, SUF] 96 | if tokens[4].isnumeric() or len(tokens[4]) == 1: 97 | if tokens[3].isnumeric() or len(tokens[3]) == 1: 98 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 99 | else: 100 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 101 | elif len(tokens[4]) <= 3: 102 | if (tokens[4].isupper() and tokens[4] != "VB") or tokens[4] == "Gen": 103 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 104 | else: 105 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 106 | else: 107 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 108 | return tax 109 | 110 | # TOK:TOK.TOK.TOK@TOK 111 | def parse_delim_fmt9(self, tokens): 112 | return [PRE, PRE, FAM, SUF, SUF] 113 | 114 | # TOK:TOK.TOK.TOK.TOK@TOK 115 | def parse_delim_fmt10(self, tokens): 116 | tax = [PRE, UNK, UNK, UNK, SUF, SUF] 117 | if tokens[1] == "Packer": 118 | tax = [PRE, PRE, PACK, SUF, SUF, SUF] 119 | elif tokens[3].isupper(): 120 | tax = [PRE, PRE, PRE, SUF, SUF, SUF] 121 | elif tokens[3].isnumeric(): 122 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 123 | else: 124 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 125 | return tax 126 | 127 | # TOK:TOK-TOK 128 | def parse_delim_fmt11(self, tokens): 129 | return [PRE, FAM, SUF] 130 | 131 | # TOK.TOK.TOK.TOK.TOK 132 | def parse_delim_fmt12(self, tokens): 133 | tax = [PRE, UNK, UNK, UNK, SUF] 134 | if re.match(r"^M[Ss][0-9]+$", tokens[2]) and tokens[3].isnumeric(): 135 | tax = [PRE, PRE, VULN, VULN, SUF] 136 | elif tokens[3].isnumeric() or tokens[3] == "Gen" or len(tokens[3]) == 1: 137 | if tokens[2].isnumeric() or tokens[2] == "Gen" or len(tokens[2]) == 1: 138 | tax = [PRE, FAM, SUF, SUF, SUF] 139 | else: 140 | tax = [PRE, PRE, FAM, SUF, SUF] 141 | else: 142 | tax = [PRE, PRE, PRE, FAM, SUF] 143 | return tax 144 | 145 | # TOK.TOK-TOK.TOK.TOK@TOK 146 | def parse_delim_fmt13(self, tokens): 147 | return [FILE, CAT, CAT, FAM, SUF, SUF] 148 | 149 | # TOK.TOK.TOK!.TOK 150 | def parse_delim_fmt14(self, tokens): 151 | return [PRE, PRE, SUF, SUF] 152 | -------------------------------------------------------------------------------- /claravy/parsers/parse_google.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Google: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_gridinsoft.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Gridinsoft: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK!.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK.TOK!TOK-TOK": self.parse_delim_fmt5, 13 | "TOK.TOK_TOK.TOK!TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK.TOK.TOK.TOK!TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [UNK, UNK, UNK, UNK, UNK] 19 | if len(tokens[2]) == 1: 20 | tax = [CAT, FAM, SUF, SUF, SUF] 21 | elif tokens[1].startswith("Win"): 22 | tax = [CAT, FILE, FAM, SUF, SUF] 23 | return tax 24 | 25 | # TOK.TOK.TOK.TOK 26 | def parse_delim_fmt2(self, tokens): 27 | tax = [CAT, FILE, FAM, SUF] 28 | if len(tokens[1]) == 1: 29 | tax = [CAT, SUF, FAM, SUF] 30 | return tax 31 | 32 | # TOK.TOK!.TOK 33 | def parse_delim_fmt3(self, tokens): 34 | return [PRE, SUF, SUF] 35 | 36 | # TOK.TOK.TOK!TOK 37 | def parse_delim_fmt4(self, tokens): 38 | return [CAT, FAM, SUF, SUF] 39 | 40 | #TOK.TOK.TOK.TOK!TOK-TOK 41 | def parse_delim_fmt5(self, tokens): 42 | return self.parse_delim_fmt2(tokens) + [SUF, SUF] 43 | 44 | # TOK.TOK_TOK.TOK!TOK 45 | def parse_delim_fmt6(self, tokens): 46 | return [PRE, PRE, PRE, SUF, SUF] 47 | -------------------------------------------------------------------------------- /claravy/parsers/parse_ikarus.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Ikarus: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK": self.parse_delim_fmt2, 11 | "TOK-TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK-TOK.TOK": self.parse_delim_fmt4, 13 | "TOK-TOK-TOK:TOK.TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | "TOK-TOK-TOK:TOK.TOK.TOK": self.parse_delim_fmt7, 16 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 17 | "TOK-TOK": self.parse_delim_fmt9, 18 | "TOK": self.parse_delim_fmt10, 19 | } 20 | 21 | # TOK.TOK.TOK 22 | def parse_delim_fmt1(self, tokens): 23 | tax = [PRE, UNK, UNK] 24 | if tokens[0] == "Packer": 25 | if len(tokens[2]) == 1: 26 | tax = [PRE, PACK, SUF] 27 | else: 28 | tax = [PRE, PRE, PACK] 29 | if tokens[2].isnumeric() or re.match(r"^Gen[0-9]+$", tokens[2]): 30 | tax = [PRE, FAM, SUF] 31 | elif tokens[2].isupper(): 32 | tax = [PRE, UNK, UNK] # Bad format 33 | else: 34 | tax = [PRE, PRE, FAM] 35 | return tax 36 | 37 | # TOK.TOK 38 | def parse_delim_fmt2(self, tokens): 39 | if tokens[1].isnumeric() or len(tokens[1]) == 1: 40 | tax = [FAM, SUF] 41 | elif re.match(r"^CVE[0-9]+$", tokens[1]): 42 | tax = [PRE, VULN] 43 | else: 44 | tax = [PRE, FAM] 45 | return tax # Kind of messy format, but parsed ok 46 | 47 | # TOK-TOK.TOK.TOK 48 | def parse_delim_fmt3(self, tokens): 49 | tax = [UNK, UNK, UNK, UNK] 50 | if tokens[3].isupper(): 51 | tax = [PRE, PRE, UNK, UNK] 52 | elif tokens[3].isnumeric(): 53 | tax = [PRE, PRE, FAM, SUF] 54 | else: 55 | tax = [CAT, CAT, FILE, FAM] 56 | return tax 57 | 58 | # TOK-TOK.TOK 59 | def parse_delim_fmt4(self, tokens): 60 | tax = [CAT, CAT, UNK] 61 | if tokens[2].isupper(): 62 | tax = [CAT, CAT, UNK] 63 | elif tokens[2].isnumeric() or re.match(r"^Gen[0-9]+$", tokens[2]): 64 | tax = [CAT, CAT, SUF] 65 | else: 66 | tax = [CAT, CAT, FAM] 67 | return tax 68 | 69 | # TOK-TOK-TOK:TOK.TOK 70 | def parse_delim_fmt5(self, tokens): 71 | return [PRE, PRE, PRE, PRE, FAM] # Also kind of messy but parsed ok 72 | 73 | # TOK.TOK.TOK.TOK 74 | def parse_delim_fmt6(self, tokens): 75 | tax = [UNK, UNK, UNK, UNK] 76 | if tokens[1].isnumeric(): 77 | tax = [FAM, SUF, SUF, SUF] 78 | elif tokens[3].isnumeric() or tokens[3].islower(): 79 | if (tokens[2].isupper() or len(tokens[2]) <= 2) and tokens[2] != "VB": 80 | tax = [PRE, UNK, UNK, SUF] 81 | else: 82 | tax = [PRE, PRE, FAM, SUF] 83 | elif len(tokens[3]) <= 2 and tokens[2] != "VB": 84 | tax = [PRE, PRE, FAM, SUF] 85 | elif tokens[3].isupper(): 86 | tax = [PRE, PRE, UNK, UNK] # Bad format 87 | elif tokens[3] == "Based": 88 | tax = [PRE, PRE, FAM, SUF] 89 | else: 90 | tax = [PRE, PRE, PRE, FAM] 91 | return tax 92 | 93 | # TOK-TOK-TOK:TOK.TOK.TOK 94 | def parse_delim_fmt7(self, tokens): 95 | return [PRE, PRE, PRE] + self.parse_delim_fmt1(tokens[3:]) 96 | 97 | # TOK-TOK.TOK.TOK.TOK 98 | def parse_delim_fmt8(self, tokens): 99 | tax = [UNK, UNK, UNK, UNK, SUF] 100 | if tokens[3].isupper() and tokens[3] != "VB": 101 | tax = [PRE, PRE, UNK, UNK, SUF] 102 | elif tokens[3].isnumeric(): 103 | tax = [PRE, PRE, FAM, SUF, SUF] 104 | else: 105 | tax = [CAT, CAT, FILE, FAM, SUF] 106 | return tax 107 | 108 | # TOK-TOK 109 | def parse_delim_fmt9(self, tokens): 110 | if tokens[1].isnumeric(): 111 | tax = [FAM, SUF] 112 | elif tokens[0] in ["Tojan", "Trojan"]: 113 | tax = [CAT, CAT] 114 | else: 115 | tax = [FAM, FAM] 116 | return tax 117 | 118 | # TOK 119 | def parse_delim_fmt10(self, tokens): 120 | return [FAM] 121 | -------------------------------------------------------------------------------- /claravy/parsers/parse_invincea.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Invincea: # Acquired by Sophos 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK-TOK": self.parse_delim_fmt3, 11 | "TOK/TOK-TOK + TOK/TOK-TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK 15 | def parse_delim_fmt1(self, tokens): 16 | return [PRE] 17 | 18 | # TOK.TOK.TOK.TOK 19 | def parse_delim_fmt2(self, tokens): 20 | return [CAT, FILE, FAM, SUF] 21 | 22 | # TOK/TOK-TOK 23 | def parse_delim_fmt3(self, tokens): 24 | return [PRE, FAM, SUF] 25 | 26 | # TOK/TOK-TOK + TOK/TOK-TOK 27 | def parse_delim_fmt4(self, tokens): 28 | return [PRE, PRE, SUF, PRE, FAM, SUF] 29 | -------------------------------------------------------------------------------- /claravy/parsers/parse_jiangmin.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Jiangmin: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK-TOK/TOK.TOK": self.parse_delim_fmt5, 14 | } 15 | 16 | # TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | return [PRE, FAM, SUF] 19 | 20 | # TOK.TOK.TOK 21 | def parse_delim_fmt2(self, tokens): 22 | tax = [UNK, UNK, UNK] 23 | if re.match(r"^MS[0-9]+$", tokens[1]): 24 | tax = [PRE, VULN, SUF] 25 | elif tokens[1].isnumeric(): 26 | tax = [FAM, SUF, SUF] 27 | elif tokens[2].islower() or tokens[2].isnumeric() or tokens[2].startswith("Gen"): 28 | tax = [PRE, FAM, SUF] 29 | elif tokens[2].isupper(): 30 | tax = [PRE, UNK, UNK] 31 | else: 32 | tax = [PRE, PRE, FAM] 33 | return tax 34 | 35 | # TOK/TOK.TOK.TOK 36 | def parse_delim_fmt3(self, tokens): 37 | tax = [PRE, UNK, UNK, SUF] 38 | if tokens[2].isnumeric() or tokens[2].islower() or tokens[2] == "Gen": 39 | tax = [PRE, FAM, SUF, SUF] 40 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 41 | tax = [PRE, UNK, UNK, SUF] # Bad format 42 | else: 43 | tax = [PRE, PRE, FAM, SUF] 44 | return tax 45 | 46 | # TOK.TOK.TOK.TOK 47 | def parse_delim_fmt4(self, tokens): 48 | tax = [PRE, UNK, UNK, SUF] 49 | if tokens[1].isnumeric(): 50 | tax = [FAM, SUF, SUF, SUF] 51 | elif tokens[2].isnumeric() or tokens[2].islower() or tokens[2] == "Gen": 52 | tax = [PRE, FAM, SUF, SUF] 53 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 54 | tax = [PRE, UNK, UNK, SUF] # Bad format 55 | else: 56 | tax = [PRE, PRE, FAM, SUF] 57 | return tax 58 | 59 | # TOK-TOK/TOK.TOK 60 | def parse_delim_fmt5(self, tokens): 61 | tax = [CAT, CAT, UNK, UNK] 62 | if tokens[3].isnumeric() or tokens[3].islower() or tokens[3] == "Gen": 63 | tax = [CAT, CAT, FAM, SUF] 64 | elif tokens[2].isupper(): 65 | tax = [CAT, CAT, PRE, FAM] 66 | else: 67 | tax = [CAT, CAT, UNK, UNK] # Bad format 68 | return tax 69 | -------------------------------------------------------------------------------- /claravy/parsers/parse_k7antivirus.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_K7antivirus: # K7antivirus and K7GW both owned by K7 Computing 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK ( TOK )": self.parse_delim_fmt1, 9 | "TOK": self.parse_delim_fmt2, 10 | "TOK-TOK ( TOK )": self.parse_delim_fmt3, 11 | "TOK-TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 15 | "TOK-TOK.TOK.TOK": self.parse_delim_fmt8, 16 | } 17 | 18 | # TOK ( TOK ) 19 | def parse_delim_fmt1(self, tokens): 20 | return [CAT, SUF, NULL] 21 | 22 | # TOK 23 | def parse_delim_fmt2(self, tokens): 24 | return [CAT] 25 | 26 | # TOK-TOK ( TOK ) 27 | def parse_delim_fmt3(self, tokens): 28 | return [CAT, CAT, SUF, NULL] 29 | 30 | # TOK-TOK 31 | def parse_delim_fmt4(self, tokens): 32 | return [CAT, CAT] 33 | 34 | # TOK.TOK.TOK.TOK 35 | def parse_delim_fmt5(self, tokens): 36 | return [CAT, FILE, FAM, SUF] 37 | 38 | # TOK.TOK.TOK 39 | def parse_delim_fmt6(self, tokens): 40 | return [CAT, FILE, FAM] 41 | 42 | # TOK-TOK.TOK.TOK.TOK 43 | def parse_delim_fmt7(self, tokens): 44 | return [CAT, CAT, FILE, FAM, SUF] 45 | 46 | # TOK-TOK.TOK.TOK 47 | def parse_delim_fmt8(self, tokens): 48 | tax = [CAT, CAT, FILE, UNK] 49 | if tokens[3].isupper(): 50 | tax = [CAT, CAT, FILE, UNK] 51 | else: 52 | tax = [CAT, CAT, FILE, FAM] 53 | return tax 54 | -------------------------------------------------------------------------------- /claravy/parsers/parse_k7gw.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_K7gw: # K7antivirus and K7GW both owned by K7 Computing 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK ( TOK )": self.parse_delim_fmt1, 9 | "TOK": self.parse_delim_fmt2, 10 | "TOK-TOK ( TOK )": self.parse_delim_fmt3, 11 | "TOK-TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK ( TOK ) 15 | def parse_delim_fmt1(self, tokens): 16 | return [CAT, SUF, NULL] 17 | 18 | # TOK 19 | def parse_delim_fmt2(self, tokens): 20 | return [CAT] 21 | 22 | # TOK-TOK ( TOK ) 23 | def parse_delim_fmt3(self, tokens): 24 | return [CAT, CAT, SUF, NULL] 25 | 26 | # TOK-TOK 27 | def parse_delim_fmt4(self, tokens): 28 | return [CAT, CAT] 29 | -------------------------------------------------------------------------------- /claravy/parsers/parse_kaspersky.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Kaspersky: # Partnership with Zonealarm 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK-TOK-TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK-TOK-TOK:TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK-TOK-TOK:TOK:TOK.TOK.TOK": self.parse_delim_fmt7, 15 | "TOK:TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 16 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt9, 17 | "TOK:TOK-TOK.TOK.TOK": self.parse_delim_fmt10, 18 | } 19 | 20 | # TOK.TOK.TOK.TOK 21 | def parse_delim_fmt1(self, tokens): 22 | return [CAT, FILE, FAM, SUF] 23 | 24 | # TOK-TOK.TOK.TOK.TOK 25 | def parse_delim_fmt2(self, tokens): 26 | return [CAT, CAT, FILE, FAM, SUF] 27 | 28 | # TOK:TOK.TOK.TOK 29 | def parse_delim_fmt3(self, tokens): 30 | return [PRE, CAT, FILE, FAM] 31 | 32 | # TOK-TOK-TOK:TOK.TOK.TOK.TOK 33 | def parse_delim_fmt4(self, tokens): 34 | return [PRE, PRE, PRE, CAT, FILE, FAM, SUF] 35 | 36 | # TOK-TOK-TOK:TOK:TOK.TOK.TOK.TOK 37 | def parse_delim_fmt5(self, tokens): 38 | return [PRE, PRE, PRE, PRE, CAT, FILE, FAM, SUF] 39 | 40 | # TOK:TOK.TOK.TOK.TOK 41 | def parse_delim_fmt6(self, tokens): 42 | return [PRE, CAT, FILE, FAM, SUF] 43 | 44 | # TOK-TOK-TOK:TOK:TOK.TOK.TOK 45 | def parse_delim_fmt7(self, tokens): 46 | return [PRE, PRE, PRE, PRE, CAT, FILE, FAM] 47 | 48 | # TOK:TOK-TOK.TOK.TOK.TOK 49 | def parse_delim_fmt8(self, tokens): 50 | return [PRE, CAT, CAT, FILE, FAM, SUF] 51 | 52 | # TOK.TOK.TOK.TOK.TOK 53 | def parse_delim_fmt9(self, tokens): 54 | if tokens[3].isnumeric(): 55 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 56 | tax = [CAT, PRE, SUF, SUF, SUF] 57 | else: 58 | tax = [CAT, PRE, FAM, SUF, SUF] 59 | elif tokens[3].islower() and not any([c.isdigit() for c in tokens[3]]): 60 | tax = [CAT, FILE, FAM, SUF, SUF] 61 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 62 | if tokens[2].isupper(): 63 | tax = [CAT, FILE, UNK, SUF, SUF] # Bad format 64 | else: 65 | tax = [CAT, FILE, FAM, SUF, SUF] 66 | else: 67 | tax = [CAT, FILE, UNK, UNK, SUF] # Bad format 68 | return tax 69 | 70 | # TOK:TOK-TOK.TOK.TOK 71 | def parse_delim_fmt10(self, tokens): 72 | if tokens[4] == "Generic": 73 | tax = [PRE, CAT, CAT, FILE, PRE] 74 | else: 75 | tax = [PRE, CAT, CAT, UNK, UNK] # Bad format 76 | return tax 77 | 78 | -------------------------------------------------------------------------------- /claravy/parsers/parse_kingsoft.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Kingsoft: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK.(TOK)": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.(TOK)": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK_TOK.TOK.(TOK)": self.parse_delim_fmt4, 12 | "TOK_TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK.TOK.TOK.TOK.(TOK) 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [PRE, PRE, UNK, SUF, SUF, NULL] 19 | if tokens[2].isnumeric(): 20 | tax = [PRE, PRE, SUF, SUF, SUF, NULL] 21 | elif tokens[1].startswith("Heur"): 22 | tax = [PRE, PRE, SUF, SUF, SUF, NULL] 23 | else: 24 | tax = [PRE, PRE, FAM, SUF, SUF, NULL] 25 | return tax 26 | 27 | # TOK.TOK.TOK.(TOK) 28 | def parse_delim_fmt2(self, tokens): 29 | tax = [PRE, UNK, UNK, SUF, NULL] 30 | if len(tokens[2]) == 1 or tokens[2].islower(): 31 | tax = [PRE, FAM, SUF, SUF, NULL] 32 | else: 33 | tax = [PRE, PRE, FAM, SUF, NULL] 34 | return tax 35 | 36 | # TOK.TOK.TOK.TOK 37 | def parse_delim_fmt3(self, tokens): 38 | tax = [PRE, UNK, UNK, SUF] 39 | if len(tokens[2]) == 1 or tokens[2].islower(): 40 | tax = [PRE, FAM, SUF, SUF] 41 | else: 42 | tax = [PRE, PRE, FAM, SUF] 43 | return tax 44 | 45 | # TOK.TOK.TOK_TOK.TOK.(TOK) 46 | def parse_delim_fmt4(self, tokens): 47 | tax = [PRE, PRE, UNK, UNK, SUF, SUF, NULL] 48 | if tokens[2] == "Heur" and tokens[3] == "Generic": 49 | tax = [PRE, PRE, PRE, PRE, SUF, SUF, NULL] 50 | elif tokens[3].isnumeric() or len(tokens[3]) <= 2: 51 | tax = [PRE, PRE, FAM, SUF, SUF, SUF, NULL] 52 | elif tokens[3].islower(): 53 | tax = [PRE, PRE, SUF, FAM, SUF, SUF, NULL] 54 | elif tokens[3].isupper(): 55 | tax = [PRE, PRE, PRE, SUF, SUF, SUF, NULL] 56 | elif tokens[2].islower(): 57 | tax = [PRE, PRE, SUF, FAM, SUF, SUF, NULL] 58 | else: 59 | tax = [PRE, PRE, UNK, UNK, SUF, SUF, NULL] 60 | return tax 61 | 62 | # TOK_TOK 63 | def parse_delim_fmt5(self, tokens): 64 | return [PRE, PRE] 65 | 66 | # TOK.TOK.TOK.TOK.TOK 67 | def parse_delim_fmt6(self, tokens): 68 | return [PRE, CAT, FAM, SUF, SUF] 69 | -------------------------------------------------------------------------------- /claravy/parsers/parse_lionic.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Lionic: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK.TOK.TOK.TOK!TOK 16 | def parse_delim_fmt1(self, tokens): 17 | tax = [PRE, PRE, UNK, UNK, SUF] 18 | if tokens[3].islower() or tokens[3].isnumeric() or len(tokens[3]) == 1: 19 | tax = [PRE, PRE, FAM, SUF, SUF] 20 | else: 21 | tax = [PRE, PRE, PRE, FAM, SUF] 22 | return tax 23 | 24 | # TOK.TOK.TOK.TOK 25 | def parse_delim_fmt2(self, tokens): 26 | return [PRE, PRE, FAM, SUF] 27 | 28 | # TOK.TOK.TOK!TOK 29 | def parse_delim_fmt3(self, tokens): 30 | tax = [PRE, UNK, UNK, SUF] 31 | if tokens[2].islower() or tokens[2].isnumeric() or len(tokens[2]) == 1: 32 | tax = [PRE, FAM, SUF, SUF] 33 | elif len(tokens[2]) <= 3: 34 | tax = [PRE, UNK, UNK, SUF] # Bad format 35 | elif tokens[2].startswith("Gen"): 36 | tax = [PRE, UNK, UNK, SUF] # Bad format 37 | else: 38 | tax = [PRE, PRE, FAM, SUF] 39 | return tax 40 | 41 | # TOK.TOK.TOK 42 | def parse_delim_fmt4(self, tokens): 43 | if len(tokens[2]) == 4 and tokens[2][0].islower(): 44 | if tokens[1].startswith("Gen"): 45 | tax = [PRE, PRE, SUF] 46 | else: 47 | tax = [PRE, FAM, SUF] 48 | elif tokens[2].startswith("Gen"): 49 | tax = [PRE, PRE, SUF] 50 | else: 51 | tax = [PRE, PRE, FAM] 52 | return tax 53 | 54 | # TOK.TOK.TOK.TOK.TOK!TOK 55 | def parse_delim_fmt5(self, tokens): 56 | tax = [PRE, PRE, UNK, UNK, SUF, SUF] 57 | if tokens[4].isnumeric(): 58 | tax = [PRE, PRE, UNK, UNK, SUF, SUF] # Bad format 59 | elif tokens[3].isnumeric(): 60 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 61 | else: 62 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 63 | return tax 64 | -------------------------------------------------------------------------------- /claravy/parsers/parse_malwarebytes.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Malwarebytes: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK%": self.parse_delim_fmt4, 12 | "TOK.TOK/TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK.TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | if tokens[1].lower() in ["malpack", "packer"]: 19 | tax = [PRE, PRE, UNK] 20 | elif tokens[2].isupper(): 21 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 22 | tax = [PRE, FAM, SUF] 23 | else: 24 | tax = [PRE, UNK, UNK] # Bad format 25 | elif tokens[2].isnumeric(): 26 | if tokens[1].isupper(): 27 | tax = [PRE, SUF, SUF] 28 | else: 29 | tax = [PRE, FAM, SUF] 30 | elif "Gen" in tokens[2] or tokens[2].islower(): 31 | tax = [PRE, FAM, SUF] 32 | else: 33 | tax = [PRE, PRE, FAM] 34 | return tax 35 | 36 | # TOK.TOK 37 | def parse_delim_fmt2(self, tokens): 38 | return [CAT, FAM] 39 | 40 | # TOK.TOK.TOK.TOK 41 | def parse_delim_fmt3(self, tokens): 42 | if tokens[0].isupper(): 43 | tax = [PRE, PRE, FAM, SUF] 44 | elif tokens[3] == "DDS": 45 | tax = [FAM, CAT, UNK, SUF] # Really weird format 46 | elif tokens[0] == "Dont": 47 | tax = [PRE, PRE, PRE, PRE] # Another weird format - "Dont steal our software" 48 | elif tokens[2].isupper() and tokens[2] != "VB": 49 | tax = [PRE, FAM, SUF, SUF] 50 | elif tokens[2] == "Gen": 51 | tax = [PRE, FAM, SUF, SUF] 52 | else: 53 | tax = [PRE, UNK, UNK, SUF] # Bad format 54 | return tax 55 | 56 | # TOK/TOK.TOK% 57 | def parse_delim_fmt4(self, tokens): 58 | return [PRE, PRE, SUF, UNK] 59 | 60 | # TOK.TOK/TOK 61 | def parse_delim_fmt5(self, tokens): 62 | return [PRE, PRE, PRE] 63 | 64 | # TOK.TOK.TOK.TOK.TOK 65 | def parse_delim_fmt6(self, tokens): 66 | tax = [PRE, PRE, UNK, UNK, SUF] 67 | if tokens[0] == "Dont": 68 | tax = [PRE, PRE, PRE, PRE, SUF] # Weird format - "Dont steal our software" 69 | elif tokens[3].isupper() or tokens[3].islower(): 70 | tax = [PRE, PRE, FAM, SUF, SUF] 71 | else: 72 | tax = [PRE, PRE, PRE, FAM, SUF] 73 | return tax 74 | -------------------------------------------------------------------------------- /claravy/parsers/parse_max.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Max: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK (TOK TOK=TOK)": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK (TOK TOK=TOK) 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, SUF, SUF, SUF, NULL] 14 | 15 | -------------------------------------------------------------------------------- /claravy/parsers/parse_maxsecure.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Maxsecure: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK-TOK-TOK:TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK-TOK-TOK-TOK-TOK.TOK": self.parse_delim_fmt5, 14 | } 15 | 16 | # TOK.TOK.TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [PRE, UNK, UNK, SUF] 19 | if tokens[2].isnumeric(): 20 | tax = [PRE, PRE, SUF, SUF] 21 | elif tokens[2] == "Heur": 22 | tax = [PRE, UNK, SUF, SUF] 23 | else: 24 | tax = [PRE, PRE, FAM, SUF] 25 | return tax 26 | 27 | # TOK.TOK.TOK 28 | def parse_delim_fmt2(self, tokens): 29 | if tokens[2].islower() or tokens[2].isupper() or tokens[2].isnumeric() or len(tokens[2]) <= 2: 30 | tax = [PRE, FAM, SUF] 31 | elif re.match(r"^.*Gen[0-9]*$", tokens[2]) or tokens[2] == "Dam": 32 | tax = [PRE, FAM, SUF] 33 | else: 34 | tax = [PRE, PRE, FAM] 35 | return tax 36 | 37 | # TOK.TOK.TOK.TOK.TOK 38 | def parse_delim_fmt3(self, tokens): 39 | if tokens[:3] == ["Not", "a", "virus"]: 40 | tax = [PRE, PRE, PRE, PRE, FAM] 41 | elif tokens[3].isnumeric(): 42 | tax = [CAT, FILE, FAM, SUF, SUF] 43 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 44 | tax = [CAT, PRE, FAM, SUF, SUF] 45 | else: 46 | tax = [CAT, PRE, FILE, FAM, SUF] 47 | return tax 48 | 49 | # TOK-TOK-TOK:TOK.TOK.TOK 50 | def parse_delim_fmt4(self, tokens): 51 | return [PRE, PRE, PRE, CAT, FAM, SUF] 52 | 53 | # TOK-TOK-TOK-TOK-TOK.TOK 54 | def parse_delim_fmt5(self, tokens): 55 | return [PRE, PRE, PRE, CAT, FAM, SUF] 56 | -------------------------------------------------------------------------------- /claravy/parsers/parse_microsoft.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Microsoft: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK:TOK/TOK.TOK": self.parse_delim_fmt1, 10 | "TOK:TOK/TOK": self.parse_delim_fmt2, 11 | "TOK:TOK/TOK.TOK!TOK": self.parse_delim_fmt3, 12 | "TOK:TOK/TOK!TOK": self.parse_delim_fmt4, 13 | "TOK:TOK/TOK.TOK@TOK": self.parse_delim_fmt5, 14 | } 15 | 16 | # TOK:TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [CAT, FILE, UNK, SUF] 19 | if re.match(r"^MS[0-9]+$", tokens[2]): 20 | tax = [CAT, FILE, VULN, SUF] 21 | else: 22 | tax = [CAT, FILE, FAM, SUF] 23 | return tax 24 | 25 | # TOK:TOK/TOK 26 | def parse_delim_fmt2(self, tokens): 27 | tax = [CAT, FILE, UNK] 28 | if re.match(r"^MS[0-9]+$", tokens[2]): 29 | tax = [CAT, FILE, VULN] 30 | else: 31 | tax = [CAT, FILE, FAM] 32 | return tax 33 | 34 | # TOK:TOK/TOK.TOK!TOK 35 | def parse_delim_fmt3(self, tokens): 36 | tax = [CAT, FILE, UNK, SUF, SUF] 37 | if re.match(r"^MS[0-9]+$", tokens[2]): 38 | tax = [CAT, FILE, VULN, SUF, SUF] 39 | else: 40 | tax = [CAT, FILE, FAM, SUF, SUF] 41 | return tax 42 | 43 | # TOK:TOK/TOK!TOK 44 | def parse_delim_fmt4(self, tokens): 45 | tax = [CAT, FILE, UNK, SUF] 46 | if re.match(r"^MS[0-9]+$", tokens[2]): 47 | tax = [CAT, FILE, VULN, SUF] 48 | else: 49 | tax = [CAT, FILE, FAM, SUF] 50 | return tax 51 | 52 | # TOK:TOK/TOK.TOK@TOK 53 | def parse_delim_fmt5(self, tokens): 54 | tax = [CAT, FILE, UNK, SUF, SUF] 55 | if re.match(r"^MS[0-9]+$", tokens[2]): 56 | tax = [CAT, FILE, VULN, SUF, SUF] 57 | else: 58 | tax = [CAT, FILE, FAM, SUF, SUF] 59 | return tax 60 | -------------------------------------------------------------------------------- /claravy/parsers/parse_microworldescan.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Microworldescan: # Runs on BitDefender engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK:TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK:TOK.TOK.TOK@TOK": self.parse_delim_fmt6, 15 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 16 | "TOK:TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt8, 17 | } 18 | 19 | # TOK.TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | # Probably not perfect, but unsure if it can be improved. Might be missing some packers. 22 | tax = [NULL, NULL, NULL] 23 | if tokens[2].isnumeric(): 24 | tax = [PRE, FAM, SUF] 25 | elif re.match(r"[0-9A-F]{8}", tokens[2]): 26 | tax = [PRE, FAM, SUF] 27 | elif tokens[1].isnumeric(): 28 | tax = [FAM, SUF, SUF] 29 | elif len(tokens[2]) <= 3 or (len(tokens[2]) == 4 and tokens[2].isupper()): 30 | tax = [PRE, FAM, SUF] 31 | else: 32 | tax = [PRE, PRE, FAM] 33 | return tax 34 | 35 | # TOK.TOK.TOK.TOK 36 | def parse_delim_fmt2(self, tokens): 37 | tax = [NULL, NULL, NULL, NULL] 38 | if tokens[0] == "Packer": 39 | tax = [PRE, PRE, PACK, SUF] 40 | elif tokens[3].isnumeric(): 41 | if tokens[2] in ["Gen", "GenericKD"] or len(tokens[2]) <= 2: 42 | tax = [PRE, FAM, SUF, SUF] 43 | elif tokens[2].isupper() and len(tokens[2]) <= 3: 44 | tax = [PRE, FAM, SUF, SUF] 45 | else: 46 | tax = [PRE, PRE, FAM, SUF] 47 | elif len(tokens[2]) <= 3 and tokens[2] != "VB": 48 | tax = [PRE, FAM, SUF, SUF] 49 | elif tokens[2].isnumeric(): 50 | tax = [PRE, FAM, SUF, SUF] 51 | else: 52 | tax = [PRE, PRE, FAM, SUF] 53 | return tax 54 | 55 | # TOK:TOK.TOK.TOK 56 | def parse_delim_fmt3(self, tokens): 57 | tax = [PRE, UNK, UNK, SUF] 58 | if tokens[1] == "Packer": 59 | tax = [PRE, PRE, PACK, SUF] 60 | else: 61 | tax = [PRE, PRE, FAM, SUF] 62 | return tax 63 | 64 | # TOK:TOK.TOK.TOK.TOK 65 | def parse_delim_fmt4(self, tokens): 66 | tax = [PRE, UNK, UNK, UNK, SUF] 67 | if tokens[1] == "Packer": 68 | tax = [PRE, PRE, PACK, SUF, SUF] 69 | elif tokens[3] == "Gen" or tokens[3].isnumeric() or tokens[3].isupper(): 70 | tax = [PRE, PRE, FAM, SUF, SUF] 71 | else: 72 | tax = [PRE, PRE, PRE, FAM, SUF] 73 | return tax 74 | 75 | # TOK:TOK.TOK.TOK.TOK.TOK 76 | def parse_delim_fmt5(self, tokens): 77 | tax = [PRE, PRE, PRE, UNK, UNK, SUF] 78 | if tokens[4] == "Gen" or tokens[4].isnumeric() or (len(tokens[4]) <= 2 and tokens[4] != "VB"): 79 | if tokens[3].isnumeric() or len(tokens[3]) <= 2: 80 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 81 | else: 82 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 83 | else: 84 | tax = [PRE, PRE, PRE, PRE, FAM, SUF] 85 | return tax 86 | 87 | # TOK:TOK.TOK.TOK@TOK 88 | def parse_delim_fmt6(self, tokens): 89 | if tokens[2] == "Heur": 90 | tax = [PRE, PRE, PRE, SUF, SUF] 91 | else: 92 | tax = [PRE, PRE, FAM, SUF, SUF] 93 | return tax 94 | 95 | # TOK.TOK.TOK.TOK.TOK 96 | def parse_delim_fmt7(self, tokens): 97 | if re.match(r"[0-9A-F]{8}", tokens[4]): 98 | if tokens[3].isnumeric(): 99 | tax = [PRE, PRE, FAM, SUF, SUF] 100 | elif tokens[3].isupper() and len(tokens[3]) <= 3: 101 | tax = [PRE, PRE, PRE, SUF, SUF] 102 | else: 103 | tax = [PRE, PRE, PRE, FAM, SUF] 104 | elif tokens[2] == "CVE": 105 | tax = [PRE, PRE, VULN, VULN, VULN] 106 | elif re.match(r"M[Ss][0-9]{2}", tokens[2]) and tokens[3].isnumeric(): 107 | tax = [PRE, PRE, VULN, VULN, SUF] 108 | elif "plugin" in [tokens[2].lower(), tokens[3].lower(), tokens[4].lower()]: 109 | tax = [PRE, FAM, SUF, SUF, SUF] 110 | elif tokens[3].isnumeric() or tokens[3] == "Gen": 111 | if tokens[2].isnumeric() or tokens[2] in ["Dropper", "Based"]: 112 | tax = [PRE, FAM, SUF, SUF, SUF] 113 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 114 | tax = [PRE, FAM, SUF, SUF, SUF] 115 | elif tokens[2] == "Gen": 116 | tax = [PRE, FAM, SUF, SUF, SUF] 117 | else: 118 | tax = [PRE, PRE, FAM, SUF, SUF] 119 | elif tokens[2].isnumeric(): 120 | tax = [PRE, FAM, SUF, SUF, SUF] 121 | elif tokens[3].islower(): 122 | tax = [PRE, PRE, FAM, SUF, SUF] 123 | elif len(tokens[3]) == 2: 124 | if re.match(r"V[0-9]", tokens[3]): 125 | tax = [PRE, PRE, FAM, SUF, SUF] 126 | elif tokens[1].lower() == "aol": 127 | tax = [PRE, PRE, PRE, SUF, SUF] 128 | elif tokens[3].lower() == "vb": 129 | tax = [PRE, PRE, PRE, PRE, SUF] 130 | elif tokens[2] == "VB": 131 | tax = [PRE, PRE, FAM, SUF, SUF] 132 | elif len(tokens[2]) <= 3: 133 | tax = [PRE, FAM, SUF, SUF, SUF] 134 | else: 135 | tax = [PRE, PRE, FAM, SUF, SUF] 136 | elif tokens[4].isnumeric(): 137 | if len(tokens[3]) <= 3: 138 | if len(tokens[2]) <= 3: 139 | tax = [PRE, FAM, SUF, SUF, SUF] 140 | else: 141 | tax = [PRE, PRE, FAM, SUF, SUF] 142 | else: 143 | tax = [PRE, PRE, PRE, FAM, SUF] 144 | else: 145 | tax = [PRE, PRE, PRE, FAM, SUF] 146 | return tax 147 | 148 | # TOK:TOK.TOK.TOK.TOK@TOK 149 | def parse_delim_fmt8(self, tokens): 150 | tax = [PRE, UNK, UNK, UNK, SUF, SUF] 151 | if tokens[1] == "Packer": 152 | tax = [PRE, PRE, PACK, SUF, SUF, SUF] 153 | elif tokens[3].isupper(): 154 | tax = [PRE, PRE, PRE, SUF, SUF, SUF] 155 | elif tokens[3].isnumeric(): 156 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 157 | else: 158 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 159 | return tax 160 | -------------------------------------------------------------------------------- /claravy/parsers/parse_nanoantivirus.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Nanoantivirus: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt2, 11 | "TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK.TOK.TOK-TOK-TOK.TOK": self.parse_delim_fmt4, 13 | } 14 | 15 | # TOK.TOK.TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | return [CAT, FILE, FAM, SUF] 18 | 19 | # TOK.TOK.TOK-TOK.TOK 20 | def parse_delim_fmt2(self, tokens): 21 | tax = [CAT, FILE, UNK, UNK, SUF] 22 | if re.match(r"^MS[0-9]+$", tokens[3]): 23 | tax = [CAT, FILE, PRE, VULN, SUF] 24 | elif re.match(r"^MS[0-9]+$", tokens[2]) and tokens[3].isnumeric(): 25 | tax = [CAT, FILE, VULN, VULN, SUF] 26 | elif tokens[2].lower() in ["gen", "heuristic"]: 27 | tax = [CAT, FILE, SUF, UNK, SUF] 28 | elif tokens[3].lower() in ["gen", "heuristic"]: 29 | tax = [CAT, FILE, FAM, SUF, SUF] 30 | elif tokens[3].islower() or tokens[3].isnumeric(): 31 | tax = [CAT, FILE, FAM, SUF, SUF] 32 | else: 33 | tax = [CAT, FILE, UNK, UNK, SUF] # TODO: Bad format but might be able to be parsed more 34 | return tax 35 | 36 | # TOK.TOK.TOK 37 | def parse_delim_fmt3(self, tokens): 38 | tax = [CAT, UNK, SUF] 39 | if re.match(r"CVE[0-9]+", tokens[1]): 40 | tax = [CAT, VULN, SUF] 41 | elif tokens[1].isupper() and tokens[1] != "VB" and not any([c.isdigit() for c in tokens[1]]): 42 | tax = [CAT, SUF, SUF] 43 | else: 44 | tax = [CAT, FAM, SUF] 45 | return tax 46 | 47 | # TOK.TOK.TOK-TOK-TOK.TOK 48 | def parse_delim_fmt4(self, tokens): 49 | tax = [CAT, FILE, UNK, UNK, UNK, SUF] 50 | if tokens[2].lower() in ["cve", "can"] and tokens[3].isnumeric() and tokens[4].isnumeric(): 51 | tax = [CAT, FILE, VULN, VULN, VULN, SUF] 52 | elif tokens[2] == "Gen": 53 | tax = [CAT, FILE, PRE, PRE, PRE, SUF] 54 | else: 55 | tax = [CAT, FILE, UNK, UNK, UNK, SUF] # TODO: Bad format but might be able to be parsed more 56 | return tax 57 | 58 | -------------------------------------------------------------------------------- /claravy/parsers/parse_nod32.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Nod32: # Renamed to Esetnod32 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK TOK TOK TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK TOK TOK TOK/TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK TOK TOK TOK TOK/TOK.TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK TOK TOK TOK/TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | return [PRE, PRE, PRE, FILE, FAM, SUF] 18 | 19 | # TOK/TOK.TOK 20 | def parse_delim_fmt2(self, tokens): 21 | return [FILE, FAM, SUF] 22 | 23 | # TOK/TOK.TOK.TOK 24 | def parse_delim_fmt3(self, tokens): 25 | if tokens[1] == "Packed": 26 | tax = [FILE, PRE, PACK, SUF] 27 | elif tokens[2].islower(): 28 | tax = [FILE, FAM, SUF, SUF] 29 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 30 | tax = [FILE, FAM, SUF, SUF] 31 | elif tokens[2].isupper() and tokens[2] != "VB": 32 | tax = [FILE, UNK, UNK, SUF] 33 | else: 34 | tax = [FILE, CAT, FAM, SUF] 35 | return tax 36 | 37 | # TOK TOK TOK TOK TOK/TOK.TOK 38 | def parse_delim_fmt4(self, tokens): 39 | return [PRE, PRE, PRE, FILE, CAT, FAM, SUF] 40 | 41 | # TOK TOK TOK TOK TOK/TOK.TOK 42 | def parse_delim_fmt5(self, tokens): 43 | return [PRE, PRE, PRE, PRE, FILE, FAM, SUF] 44 | 45 | -------------------------------------------------------------------------------- /claravy/parsers/parse_norman.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Norman: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK": self.parse_delim_fmt1, 10 | "TOK/TOK.TOK": self.parse_delim_fmt2, 11 | "TOK_TOK.TOK": self.parse_delim_fmt3, 12 | "TOK/TOK_TOK.TOK": self.parse_delim_fmt4, 13 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt5, 14 | "TOK": self.parse_delim_fmt6, 15 | "TOK.TOK!TOK": self.parse_delim_fmt7, 16 | } 17 | 18 | # TOK.TOK 19 | def parse_delim_fmt1(self, tokens): 20 | return [FAM, SUF] 21 | 22 | # TOK/TOK.TOK 23 | def parse_delim_fmt2(self, tokens): 24 | return [FILE, FAM, SUF] 25 | 26 | # TOK_TOK.TOK 27 | def parse_delim_fmt3(self, tokens): 28 | if tokens[0] == "Packed": 29 | tax = [PRE, PACK, SUF] 30 | elif tokens[1] == "Generic" or re.match(r"^Gen[0-9]*$", tokens[1]): 31 | tax = [PRE, SUF, SUF] 32 | elif len(tokens[1]) == 1 or tokens[1].islower(): 33 | tax = [FAM, SUF, SUF] 34 | else: 35 | tax = [UNK, UNK, SUF] # Bad format 36 | return tax 37 | 38 | # TOK/TOK_TOK.TOK 39 | def parse_delim_fmt4(self, tokens): 40 | if tokens[1] == "Packed": 41 | tax = [FILE, PRE, PACK, SUF] 42 | elif tokens[2] == "Generic" or re.match(r"^Gen[0-9]*$", tokens[1]): 43 | tax = [FILE, PRE, SUF, SUF] 44 | elif len(tokens[2]) == 1 or tokens[2].islower(): 45 | tax = [FILE, FAM, SUF, SUF] 46 | else: 47 | tax = [FILE, UNK, UNK, SUF] # Bad format 48 | return tax 49 | 50 | # TOK/TOK.TOK!TOK 51 | def parse_delim_fmt5(self, tokens): 52 | return [FILE, FAM, SUF, SUF] 53 | 54 | # TOK 55 | def parse_delim_fmt6(self, tokens): 56 | tax = [UNK] 57 | if tokens[0].isnumeric() or tokens[0].isupper(): 58 | tax = [SUF] 59 | else: 60 | tax = [FAM] 61 | return tax 62 | 63 | # TOK.TOK!TOK 64 | def parse_delim_fmt7(self, tokens): 65 | return [FAM, SUF, SUF] 66 | -------------------------------------------------------------------------------- /claravy/parsers/parse_nprotect.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Nprotect: # Renamed to Tachyon 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK-TOK/TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK-TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 15 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 16 | "TOK/TOK.TOK": self.parse_delim_fmt9, 17 | } 18 | 19 | # TOK.TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | if tokens[0] == "Packer": 22 | tax = [PRE, PACK, SUF] 23 | elif tokens[1].isnumeric(): 24 | tax = [PRE, SUF, SUF] 25 | else: 26 | tax = [PRE, FAM, SUF] 27 | return tax 28 | 29 | # TOK/TOK.TOK.TOK.TOK 30 | def parse_delim_fmt2(self, tokens): 31 | return [CAT, FILE, FAM, SUF, SUF] 32 | 33 | # TOK.TOK.TOK.TOK 34 | def parse_delim_fmt3(self, tokens): 35 | tax = [PRE, UNK, UNK, SUF] 36 | if tokens[2].isnumeric(): 37 | if len(tokens[1]) == 1: 38 | tax = [FAM, SUF, SUF, SUF] 39 | else: 40 | tax = [PRE, FAM, SUF, SUF] 41 | elif tokens[1] == "Generic": 42 | tax = [PRE, PRE, SUF, SUF] 43 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 44 | tax = [PRE, FAM, SUF, SUF] 45 | elif tokens[2] == "Gen": 46 | tax = [PRE, FAM, SUF, SUF] 47 | elif tokens[2].isupper(): 48 | tax = [PRE, UNK, UNK, SUF] # Bad format 49 | else: 50 | tax = [PRE, PRE, FAM, SUF] 51 | return tax 52 | 53 | # TOK/TOK.TOK.TOK 54 | def parse_delim_fmt4(self, tokens): 55 | return [CAT, FILE, FAM, SUF] 56 | 57 | # TOK:TOK.TOK.TOK 58 | def parse_delim_fmt5(self, tokens): 59 | return [PRE, PRE, FAM, SUF] 60 | 61 | # TOK-TOK/TOK.TOK.TOK 62 | def parse_delim_fmt6(self, tokens): 63 | return [CAT, CAT, FILE, FAM, SUF] 64 | 65 | # TOK-TOK/TOK.TOK.TOK.TOK 66 | def parse_delim_fmt7(self, tokens): 67 | return [CAT, CAT, FILE, FAM, SUF, SUF] 68 | 69 | # TOK:TOK.TOK.TOK.TOK 70 | def parse_delim_fmt8(self, tokens): 71 | tax = [PRE, PRE, UNK, UNK, SUF] 72 | if len(tokens[3]) <= 2 and tokens[3].upper() != "VB": 73 | tax = [PRE, PRE, FAM, SUF, SUF] 74 | elif tokens[3].isupper() and tokens[3] != "VB": 75 | tax = [PRE, PRE, UNK, UNK, SUF] # Bad format 76 | else: 77 | tax = [PRE, PRE, PRE, FAM, SUF] 78 | return tax 79 | 80 | # TOK/TOK.TOK 81 | def parse_delim_fmt9(self, tokens): 82 | return [CAT, FILE, FAM] 83 | 84 | 85 | -------------------------------------------------------------------------------- /claravy/parsers/parse_paloalto.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Paloalto: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, SUF] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_panda.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Panda: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK TOK": self.parse_delim_fmt2, 10 | "TOK/TOK": self.parse_delim_fmt3, 11 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK": self.parse_delim_fmt5, 13 | "TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [PRE, UNK, SUF] 19 | if len(tokens[1]) <= 2 and tokens[1] != "VB": 20 | tax = [PRE, SUF, SUF] 21 | elif tokens[1].isupper(): 22 | tax = [PRE, UNK, SUF] # Bad format 23 | else: 24 | tax = [PRE, FAM, SUF] 25 | return tax 26 | 27 | # TOK TOK 28 | def parse_delim_fmt2(self, tokens): 29 | return [PRE, PRE] 30 | 31 | # TOK/TOK 32 | def parse_delim_fmt3(self, tokens): 33 | return [PRE, FAM] 34 | 35 | # TOK/TOK.TOK.TOK 36 | def parse_delim_fmt4(self, tokens): 37 | tax = [PRE, UNK, UNK, SUF] 38 | if tokens[2].isupper() or tokens[2].islower() or tokens[2].isnumeric() or len(tokens[2]) <= 2: 39 | tax = [PRE, FAM, SUF, SUF] 40 | else: 41 | tax = [PRE, PRE, FAM, SUF] 42 | return tax 43 | 44 | # TOK.TOK 45 | def parse_delim_fmt5(self, tokens): 46 | if tokens[1].isnumeric() or len(tokens[1]) <= 2 or tokens[1].lower() == "gen": 47 | if tokens[0].isupper(): 48 | tax = [UNK, SUF] 49 | else: 50 | tax = [FAM, SUF] 51 | elif tokens[1].isupper(): 52 | if tokens[0].isnumeric(): 53 | tax = [SUF, SUF] 54 | else: 55 | tax = [FAM, SUF] 56 | else: 57 | tax = [UNK, UNK] # Bad format 58 | return tax 59 | 60 | # TOK 61 | def parse_delim_fmt6(self, tokens): 62 | return [FAM] 63 | -------------------------------------------------------------------------------- /claravy/parsers/parse_pctools.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Pctools: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK!TOK": self.parse_delim_fmt2, 10 | "TOK-TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK-TOK.TOK!TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | if tokens[1].isnumeric() or len(tokens[1]) <= 2 or tokens[1].lower() == "gen": 18 | if tokens[0].isupper(): 19 | tax = [UNK, SUF] 20 | else: 21 | tax = [FAM, SUF] 22 | elif tokens[1].isupper(): 23 | tax = [PRE, SUF] 24 | else: 25 | tax = [PRE, FAM] 26 | return tax 27 | 28 | # TOK.TOK!TOK 29 | def parse_delim_fmt2(self, tokens): 30 | return [CAT, FAM, SUF] 31 | 32 | # TOK-TOK.TOK 33 | def parse_delim_fmt3(self, tokens): 34 | if tokens[2].isnumeric(): 35 | tax = [UNK, UNK, SUF] # Bad format 36 | else: 37 | tax = [CAT, CAT, FAM] 38 | return tax 39 | 40 | # TOK.TOK.TOK 41 | def parse_delim_fmt4(self, tokens): 42 | tax = [UNK, UNK, UNK] 43 | if tokens[1].isnumeric(): 44 | if tokens[0].isupper() or len(tokens[0]) <= 3: 45 | tax = [UNK, SUF, SUF] 46 | else: 47 | tax = [FAM, SUF, SUF] 48 | elif tokens[2].isupper() or tokens[2].islower() or tokens[2].isnumeric() or tokens[2].lower() == "gen": 49 | if len(tokens[1]) <= 2 and tokens[1] != "VB": 50 | if tokens[0].isupper(): 51 | tax = [UNK, UNK, SUF] # Bad format 52 | else: 53 | tax = [FAM, SUF, SUF] 54 | elif tokens[1].isupper() and tokens[1] != "VB": 55 | tax = [PRE, UNK, SUF] # Bad format 56 | else: 57 | tax = [PRE, FAM, SUF] 58 | else: 59 | tax = [PRE, UNK, UNK] # Bad format 60 | return tax 61 | 62 | # TOK-TOK.TOK!TOK 63 | def parse_delim_fmt5(self, tokens): 64 | return [CAT, CAT, FAM, SUF] 65 | -------------------------------------------------------------------------------- /claravy/parsers/parse_qihoo360.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Qihoo360: # Previously used Bitdefender and Antivir/Avira engines 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK/TOK.TOK": self.parse_delim_fmt3, 12 | "TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | "TOK.TOK": self.parse_delim_fmt7, 16 | } 17 | 18 | # TOK.TOK.TOK.TOK 19 | def parse_delim_fmt1(self, tokens): 20 | tax = [UNK, UNK, UNK, SUF] 21 | if tokens[1].isnumeric(): 22 | tax = [SUF, SUF, PRE, SUF] # QVM detections 23 | elif re.match(r"^[Gg]en[0-9]*$", tokens[2]): 24 | tax = [PRE, PRE, SUF, SUF] 25 | elif tokens[2].isnumeric(): 26 | tax = [PRE, PRE, SUF, SUF] 27 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 28 | tax = [PRE, FAM, SUF, SUF] 29 | else: 30 | tax = [PRE, PRE, FAM, SUF] 31 | return tax 32 | 33 | # TOK/TOK.TOK.TOK 34 | def parse_delim_fmt2(self, tokens): 35 | if re.match(r"^QVM[0-9]*$", tokens[2]): 36 | tax = [PRE, PRE, SUF, SUF] 37 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 38 | tax = [FILE, CAT, SUF, SUF] 39 | else: 40 | tax = [FILE, CAT, FAM, SUF] 41 | return tax 42 | 43 | # TOK/TOK.TOK 44 | def parse_delim_fmt3(self, tokens): 45 | return [FILE, CAT, SUF] 46 | 47 | # TOK.TOK.TOK 48 | def parse_delim_fmt4(self, tokens): 49 | if re.match(r"^QVM[0-9]*$", tokens[1]): 50 | tax = [PRE, PRE, SUF] 51 | elif tokens[1] in ["cve", "exp"] and tokens[2].isnumeric(): 52 | tax = [PRE, PRE, VULN] 53 | elif tokens[2].lower() == "gen": 54 | tax = [PRE, PRE, SUF] 55 | elif len(tokens[2]) == 1: 56 | tax = [PRE, FAM, SUF] 57 | elif tokens[2].isupper(): 58 | tax = [PRE, FAM, SUF] 59 | else: 60 | tax = [PRE, PRE, FAM] 61 | return tax 62 | 63 | # TOK/TOK.TOK.TOK.TOK 64 | def parse_delim_fmt5(self, tokens): 65 | if re.match(r"^QVM[0-9]*$", tokens[1]): 66 | tax = [PRE, PRE, SUF, PRE, SUF] 67 | else: 68 | tax = [PRE, PRE, PRE, FAM, SUF] 69 | return tax 70 | 71 | # TOK.TOK.TOK.TOK.TOK 72 | def parse_delim_fmt6(self, tokens): 73 | if tokens[2].lower() == "cve" and tokens[3].isnumeric and tokens[4].isnumeric(): 74 | tax = [PRE, PRE, VULN, VULN, VULN] 75 | elif tokens[3].islower(): 76 | tax = [PRE, PRE, PRE, SUF, SUF] 77 | else: 78 | tax = [PRE, PRE, PRE, FAM, SUF] 79 | return tax 80 | 81 | # TOK.TOK 82 | def parse_delim_fmt7(self, tokens): 83 | return [PRE, PRE] 84 | -------------------------------------------------------------------------------- /claravy/parsers/parse_sangfor.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Sangfor: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK-TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK-TOK-TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK 16 | def parse_delim_fmt1(self, tokens): 17 | return [PRE] 18 | 19 | # TOK.TOK.TOK.TOK 20 | def parse_delim_fmt2(self, tokens): 21 | return [CAT, FILE, FAM, SUF] 22 | 23 | # TOK.TOK-TOK.TOK.TOK 24 | def parse_delim_fmt3(self, tokens): 25 | return [CAT, PRE, PRE, PRE, SUF] # Save token - don't think it's a family 26 | 27 | # TOK.TOK.TOK-TOK 28 | def parse_delim_fmt4(self, tokens): 29 | return [FILE, CAT, FAM, SUF] 30 | 31 | # TOK.TOK.TOK-TOK-TOK 32 | def parse_delim_fmt5(self, tokens): 33 | return [FILE, CAT, FAM, SUF, SUF] 34 | -------------------------------------------------------------------------------- /claravy/parsers/parse_sentinelone.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Sentinelone: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK - TOK TOK": self.parse_delim_fmt1, 9 | "TOK TOK - TOK": self.parse_delim_fmt2, 10 | "TOK TOK - TOK TOK": self.parse_delim_fmt3, 11 | } 12 | 13 | # TOK - TOK TOK 14 | def parse_delim_fmt1(self, tokens): 15 | return [PRE, PRE, FILE] 16 | 17 | # TOK TOK - TOK 18 | def parse_delim_fmt2(self, tokens): 19 | return [PRE, PRE, PRE] 20 | 21 | # TOK TOK - TOK TOK 22 | def parse_delim_fmt3(self, tokens): 23 | return [PRE, PRE, PRE, FILE] 24 | -------------------------------------------------------------------------------- /claravy/parsers/parse_skyhigh.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Skyhigh: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK": self.parse_delim_fmt2, 10 | "TOK!TOK": self.parse_delim_fmt3, 11 | "TOK-TOK!TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK.TOK.TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | return [PRE, FILE, FAM, SUF] 18 | 19 | # TOK 20 | def parse_delim_fmt2(self, tokens): 21 | return [FAM] 22 | 23 | # TOK!TOK 24 | def parse_delim_fmt3(self, tokens): 25 | return [FAM, SUF] 26 | 27 | # TOK-TOK!TOK 28 | def parse_delim_fmt4(self, tokens): 29 | tax = [UNK, UNK, SUF] 30 | if tokens[0].startswith("Generic"): 31 | tax = [SUF, SUF, SUF] 32 | elif tokens[1].isupper(): 33 | tax = [UNK, SUF, SUF] 34 | return tax 35 | 36 | # TOK/TOK.TOK 37 | def parse_delim_fmt5(self, tokens): 38 | return [FILE, FAM, SUF] 39 | 40 | # 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /claravy/parsers/parse_sophos.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Sophos: # Acuired Invincea 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK-TOK": self.parse_delim_fmt1, 9 | "TOK (TOK)": self.parse_delim_fmt2, 10 | "TOK": self.parse_delim_fmt3, 11 | "TOK TOK TOK (TOK)": self.parse_delim_fmt4, 12 | "TOK TOK (TOK)": self.parse_delim_fmt5, 13 | "TOK TOK": self.parse_delim_fmt6, 14 | "TOK/TOK-TOK + TOK/TOK-TOK": self.parse_delim_fmt7, 15 | "TOK TOK TOK TOK (TOK)": self.parse_delim_fmt8, 16 | "TOK TOK TOK": self.parse_delim_fmt9, 17 | "TOK TOK-TOK TOK": self.parse_delim_fmt10, 18 | "TOK/TOK": self.parse_delim_fmt11, 19 | "TOK TOK-TOK TOK (TOK)": self.parse_delim_fmt12, 20 | } 21 | 22 | # TOK/TOK-TOK 23 | def parse_delim_fmt1(self, tokens): 24 | return [PRE, FAM, SUF] 25 | 26 | # TOK (TOK) 27 | def parse_delim_fmt2(self, tokens): 28 | return [FAM, CAT, NULL] 29 | 30 | # TOK 31 | def parse_delim_fmt3(self, tokens): 32 | return [FAM] 33 | 34 | # TOK TOK TOK (TOK) 35 | def parse_delim_fmt4(self, tokens): 36 | return [UNK, UNK, UNK, CAT, NULL] # Really bad format - not sure if can be improved 37 | 38 | # TOK TOK (TOK) 39 | def parse_delim_fmt5(self, tokens): 40 | return [UNK, UNK, CAT, NULL] # Really bad format - not sure if can be improved 41 | 42 | # TOK TOK 43 | def parse_delim_fmt6(self, tokens): 44 | return [UNK, UNK] # Really bad format - not sure if can be improved 45 | 46 | # TOK/TOK-TOK + TOK/TOK-TOK 47 | def parse_delim_fmt7(self, tokens): 48 | return [PRE, PRE, SUF, PRE, FAM, SUF] 49 | 50 | # TOK TOK TOK TOK (TOK) 51 | def parse_delim_fmt8(self, tokens): 52 | return [UNK, UNK, UNK, UNK, CAT, NULL] # Really bad format - not sure if can be improved 53 | 54 | # TOK TOK TOK 55 | def parse_delim_fmt9(self, tokens): 56 | return [UNK, UNK, UNK] # Really bad format - not sure if can be improved 57 | 58 | # TOK TOK-TOK TOK 59 | def parse_delim_fmt10(self, tokens): 60 | return [UNK, UNK, UNK, UNK] # Really bad format - not sure if can be improved 61 | 62 | # TOK/TOK 63 | def parse_delim_fmt11(self, tokens): 64 | tax = [PRE, UNK] 65 | if tokens[1].isnumeric(): 66 | tax = [PRE, SUF] 67 | else: 68 | tax = [PRE, FAM] 69 | return tax 70 | 71 | # TOK TOK-TOK TOK (TOK) 72 | def parse_delim_fmt12(self, tokens): 73 | return [UNK, UNK, UNK, UNK, SUF, NULL] # Really bad format - not sure if can be improved 74 | -------------------------------------------------------------------------------- /claravy/parsers/parse_superantispyware.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Superantispyware: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK/TOK-TOK": self.parse_delim_fmt1, 9 | "TOK.TOK/TOK": self.parse_delim_fmt2, 10 | "TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK/TOK-TOK[TOK]": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK.TOK/TOK-TOK 15 | def parse_delim_fmt1(self, tokens): 16 | if tokens[2] == "Gen": 17 | tax = [CAT, PRE, PRE, FAM] 18 | else: 19 | tax = [CAT, UNK, UNK, UNK] # Bad format 20 | return tax 21 | 22 | # TOK.TOK/TOK 23 | def parse_delim_fmt2(self, tokens): 24 | return [CAT, FAM, SUF] 25 | 26 | # TOK.TOK 27 | def parse_delim_fmt3(self, tokens): 28 | return [CAT, FAM] 29 | 30 | # TOK.TOK/TOK-TOK[TOK] 31 | def parse_delim_fmt4(self, tokens): 32 | if tokens[2] == "Gen": 33 | if tokens[4].isnumeric() or tokens[4].islower() or tokens[4].isupper(): 34 | tax = [PRE, PRE, PRE, FAM, SUF, SUF] 35 | else: 36 | tax = [PRE, PRE, PRE, UNK, UNK, SUF] 37 | else: 38 | tax = [PRE, UNK, PRE, UNK, SUF, NULL] # Bad format 39 | return tax 40 | -------------------------------------------------------------------------------- /claravy/parsers/parse_symantec.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Symantec: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK!TOK": self.parse_delim_fmt3, 11 | "TOK": self.parse_delim_fmt4, 12 | "TOK TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt6, 14 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt7, 15 | } 16 | 17 | # TOK.TOK.TOK 18 | def parse_delim_fmt1(self, tokens): 19 | tax = [UNK, UNK, UNK] 20 | if tokens[1].isnumeric(): 21 | tax = [FAM, SUF, SUF] 22 | elif tokens[2].isnumeric() or len(tokens[2]) <= 2 or tokens[2].isupper(): 23 | if tokens[1].isupper(): 24 | tax = [UNK, UNK, SUF] # Bad format 25 | else: 26 | tax = [PRE, FAM, SUF] 27 | else: 28 | tax = [UNK, UNK, UNK] # Bad format - may be able to parse more? 29 | return tax 30 | 31 | # TOK.TOK 32 | def parse_delim_fmt2(self, tokens): 33 | tax = [UNK, UNK] 34 | if tokens[1].isnumeric() or tokens[1].isupper() or tokens[1].islower(): 35 | if tokens[0].isupper(): 36 | tax = [UNK, UNK] # Bad format 37 | else: 38 | tax = [FAM, SUF] 39 | else: 40 | tax = [PRE, FAM] 41 | return tax 42 | 43 | # TOK.TOK!TOK 44 | def parse_delim_fmt3(self, tokens): 45 | return [PRE, FAM, SUF] 46 | 47 | # TOK 48 | def parse_delim_fmt4(self, tokens): 49 | return [FAM] 50 | 51 | # TOK TOK 52 | def parse_delim_fmt5(self, tokens): 53 | # Either [CAT, CAT] or [FAM, FAM] 54 | return [UNK, UNK] # Bad format 55 | 56 | # TOK.TOK.TOK!TOK 57 | def parse_delim_fmt6(self, tokens): 58 | return self.parse_delim_fmt1(tokens) + [SUF] 59 | 60 | # TOK.TOK.TOK.TOK 61 | def parse_delim_fmt7(self, tokens): 62 | tax = [UNK, UNK, UNK, UNK] 63 | if tokens[1].isnumeric(): 64 | tax = [UNK, SUF, UNK, SUF] # Bad format 65 | elif tokens[0] == "Suspicious": 66 | tax = [PRE, PRE, SUF, SUF] 67 | elif tokens[2].isnumeric(): 68 | if tokens[1].islower(): 69 | tax = [UNK, SUF, SUF, SUF] # Bad format 70 | else: 71 | tax = [PRE, FAM, SUF, SUF] # Some of PRE are family-like? Unsure 72 | elif len(tokens[2]) == 1: 73 | tax = [PRE, FAM, SUF, UNK] # Last token either CAT or SUF 74 | elif tokens[0] == "Heur": 75 | tax = [PRE, PRE, SUF, SUF] 76 | elif tokens[2].isupper(): 77 | tax = [PRE, FAM, SUF, SUF] 78 | else: 79 | tax = [PRE, PRE, FAM, SUF] 80 | return tax 81 | -------------------------------------------------------------------------------- /claravy/parsers/parse_symantecmobileinsight.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Symantecmobileinsight: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK": self.parse_delim_fmt1, 9 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK:TOK 13 | def parse_delim_fmt1(self, tokens): 14 | tax = [CAT, UNK] 15 | if tokens[1].startswith("Gen"): 16 | tax = [CAT, SUF] 17 | else: 18 | tax = [CAT, FAM] 19 | return tax 20 | 21 | # TOK:TOK.TOK.TOK 22 | def parse_delim_fmt2(self, tokens): 23 | return [PRE, FILE, FAM, SUF] 24 | -------------------------------------------------------------------------------- /claravy/parsers/parse_tachyon.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Tachyon: # Renamed from Nprotect 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK-TOK/TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK-TOK/TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK-TOK.TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK/TOK.TOK": self.parse_delim_fmt7, 15 | "TOK-TOK/TOK.TOK-TOK.TOK": self.parse_delim_fmt8 16 | } 17 | 18 | # TOK/TOK.TOK.TOK 19 | def parse_delim_fmt1(self, tokens): 20 | return [FILE, CAT, FAM, SUF] 21 | 22 | # TOK/TOK.TOK.TOK.TOK 23 | def parse_delim_fmt2(self, tokens): 24 | return [FILE, CAT, FAM, SUF, SUF] 25 | 26 | # TOK-TOK/TOK.TOK.TOK 27 | def parse_delim_fmt3(self, tokens): 28 | return [CAT, CAT, FILE, FAM, SUF] 29 | 30 | # TOK-TOK/TOK.TOK.TOK.TOK 31 | def parse_delim_fmt4(self, tokens): 32 | return [CAT, CAT, FILE, FAM, SUF, SUF] 33 | 34 | # TOK/TOK.TOK-TOK.TOK 35 | def parse_delim_fmt5(self, tokens): 36 | return [CAT, FILE, PRE, FAM, SUF] 37 | 38 | # TOK/TOK.TOK-TOK.TOK.TOK 39 | def parse_delim_fmt6(self, tokens): 40 | return [CAT, FILE, PRE, FAM, SUF, SUF] 41 | 42 | # TOK/TOK.TOK 43 | def parse_delim_fmt7(self, tokens): 44 | return [CAT, FILE, FAM] 45 | 46 | # TOK-TOK/TOK.TOK-TOK.TOK 47 | def parse_delim_fmt8(self, tokens): 48 | return [CAT, CAT, FILE, PRE, FAM, SUF] 49 | -------------------------------------------------------------------------------- /claravy/parsers/parse_tehtris.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Tehtris: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_tencent.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Tencent: 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 10 | "TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt2, 11 | "TOK:TOK.TOK.TOK_TOK.TOK.TOK": self.parse_delim_fmt3, 12 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 13 | "TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK": self.parse_delim_fmt6, 15 | "TOK.TOK. TOK.TOK": self.parse_delim_fmt7, 16 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 17 | } 18 | 19 | # TOK.TOK.TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | return [PRE, PRE, FAM, SUF] 22 | 23 | # TOK.TOK-TOK.TOK.TOK 24 | def parse_delim_fmt2(self, tokens): 25 | return [PRE, PRE, PRE, FAM, SUF] 26 | 27 | # TOK:TOK.TOK.TOK_TOK.TOK.TOK 28 | def parse_delim_fmt3(self, tokens): 29 | return [PRE, PRE, PRE, SUF, PRE, SUF, SUF] 30 | 31 | # TOK.TOK.TOK.TOK.TOK 32 | def parse_delim_fmt4(self, tokens): 33 | tax = [PRE, PRE, UNK, UNK, SUF] 34 | if tokens[3] == "Gen" or len(tokens[3]) <= 2: 35 | tax = [PRE, PRE, FAM, SUF, SUF] 36 | else: 37 | tax = [PRE, PRE, PRE, FAM, SUF] 38 | return tax 39 | 40 | # TOK.TOK.TOK-TOK.TOK 41 | def parse_delim_fmt5(self, tokens): 42 | tax = [FILE, CAT, UNK, UNK, SUF] 43 | if re.match(r"^Ms[0-9]+", tokens[2]) and tokens[3].isnumeric(): 44 | tax = [FILE, CAT, VULN, VULN, SUF] 45 | elif tokens[3] == "based": 46 | tax = [FILE, CAT, FAM, SUF, SUF] 47 | elif tokens[2].lower() == "ps" and tokens[3].lower() == "mpc": 48 | tax = [FILE, CAT, FAM, FAM, SUF] 49 | elif len(tokens[2]) <= 2: 50 | tax = [FILE, CAT, SUF, FAM, SUF] 51 | elif tokens[3].isnumeric(): 52 | tax = [FILE, CAT, FAM, SUF, SUF] 53 | else: 54 | tax = [FILE, CAT, UNK, UNK, SUF] # Bad format 55 | return tax 56 | 57 | # TOK.TOK.TOK 58 | def parse_delim_fmt6(self, tokens): 59 | if len(tokens[0]) == 1: 60 | tax = [SUF, PRE, FAM] 61 | elif len(tokens[2]) == 1 or tokens[2].isnumeric(): 62 | tax = [PRE, FAM, SUF] 63 | else: 64 | tax = [PRE, PRE, FAM] 65 | return tax 66 | 67 | # TOK.TOK. TOK.TOK 68 | def parse_delim_fmt7(self, tokens): 69 | return [PRE, PRE, FAM, SUF] 70 | 71 | # TOK-TOK.TOK.TOK.TOK 72 | def parse_delim_fmt8(self, tokens): 73 | return [CAT, CAT, FILE, FAM, SUF] 74 | 75 | -------------------------------------------------------------------------------- /claravy/parsers/parse_thehacker.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Thehacker: # Format somewhat similar to Antivir/Avira, but seems to be unrelated 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK/TOK-TOK-TOK-TOK": self.parse_delim_fmt3, 11 | "TOK_TOK": self.parse_delim_fmt4, 12 | "TOK/TOK-TOK-TOK": self.parse_delim_fmt5, 13 | } 14 | 15 | # TOK/TOK.TOK 16 | def parse_delim_fmt1(self, tokens): 17 | return [PRE, FAM, SUF] 18 | 19 | # TOK/TOK.TOK.TOK 20 | def parse_delim_fmt2(self, tokens): 21 | tax = [PRE, UNK, UNK, SUF] 22 | if len(tokens[2]) == 1 or tokens[2].islower() or tokens[2].isnumeric(): 23 | tax = [PRE, FAM, SUF, SUF] 24 | else: 25 | tax = [PRE, PRE, FAM, SUF] 26 | return tax 27 | 28 | # TOK/TOK-TOK-TOK-TOK 29 | def parse_delim_fmt3(self, tokens): 30 | return [PRE, PRE, PRE, PRE, SUF] 31 | 32 | # TOK_TOK 33 | def parse_delim_fmt4(self, tokens): 34 | if tokens[0] == "Posible": 35 | tax = [PRE, PRE] 36 | else: 37 | tax = [FAM, FAM] 38 | return tax 39 | 40 | # TOK/TOK-TOK-TOK 41 | def parse_delim_fmt5(self, tokens): 42 | if tokens[1] == "CVE" and tokens[2].isnumeric() and tokens[3].isnumeric(): 43 | tax = [PRE, VULN, VULN, VULN] 44 | elif tokens[2] == "Heuristic": 45 | tax = [PRE, PRE, PRE, SUF] 46 | else: 47 | tax = [UNK, UNK, UNK, SUF] # Bad format 48 | return tax 49 | -------------------------------------------------------------------------------- /claravy/parsers/parse_totaldefense.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Totaldefense: # Like etrustvet 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK": self.parse_delim_fmt1, 9 | "TOK/TOK!TOK": self.parse_delim_fmt2, 10 | "TOK/TOK.TOK!TOK": self.parse_delim_fmt3, 11 | "TOK/TOK": self.parse_delim_fmt4, 12 | "TOK/TOK_TOK": self.parse_delim_fmt5, 13 | "TOK/TOK.TOK.TOK[TOK]": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK/TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | return [FILE, FAM, SUF] 19 | 20 | # TOK/TOK!TOK 21 | def parse_delim_fmt2(self, tokens): 22 | return [FILE, FAM, SUF] 23 | 24 | # TOK/TOK.TOK!TOK 25 | def parse_delim_fmt3(self, tokens): 26 | return [FILE, FAM, SUF, SUF] 27 | 28 | # TOK/TOK 29 | def parse_delim_fmt4(self, tokens): 30 | return [FILE, FAM] 31 | 32 | # TOK/TOK_TOK 33 | def parse_delim_fmt5(self, tokens): 34 | return [FILE, FAM, SUF] 35 | 36 | # TOK/TOK.TOK.TOK[TOK] 37 | def parse_delim_fmt6(self, tokens): 38 | # Seems to be all Zango pinball malware? Unsure family 39 | return [FILE, UNK, UNK, SUF, SUF, NULL] 40 | -------------------------------------------------------------------------------- /claravy/parsers/parse_trapmine.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Trapmine: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK.TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [PRE, PRE, PRE, PRE] 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_trendmicro.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Trendmicro: # Same company as Trendmicrohousecall 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK_TOK.TOK": self.parse_delim_fmt1, 10 | "TOK_TOK_TOK.TOK": self.parse_delim_fmt2, 11 | "TOK_TOK.TOK-TOK": self.parse_delim_fmt3, 12 | "TOK_TOK-TOK": self.parse_delim_fmt4, 13 | "TOK_TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | } 16 | 17 | # TOK_TOK.TOK 18 | def parse_delim_fmt1(self, tokens): 19 | tax = [UNK, UNK, SUF] 20 | if len(tokens[1]) <= 2 and tokens[1] != "VB": 21 | tax = [PRE, SUF, SUF] 22 | elif len(tokens[1]) == 3: 23 | tax = [UNK, UNK, SUF] # Bad format 24 | else: 25 | tax = [PRE, FAM, SUF] 26 | return tax 27 | 28 | # TOK_TOK_TOK.TOK 29 | def parse_delim_fmt2(self, tokens): 30 | tax = [PRE, UNK, SUF, SUF] 31 | if len(tokens[1]) <= 3: 32 | tax = [PRE, UNK, SUF, SUF] # Bad format 33 | else: 34 | tax = [PRE, FAM, SUF, SUF] 35 | return tax 36 | 37 | # TOK_TOK.TOK-TOK 38 | def parse_delim_fmt3(self, tokens): 39 | return [PRE, FAM, SUF, SUF] 40 | 41 | # TOK_TOK-TOK 42 | def parse_delim_fmt4(self, tokens): 43 | tax = [PRE, UNK, SUF] 44 | if re.match(r"CVE[0-9]+", tokens[1]): 45 | tax = [PRE, VULN, SUF] 46 | elif len(tokens[1]) <= 4: 47 | tax = [PRE, UNK, SUF] # Bad format 48 | else: 49 | tax = [PRE, FAM, SUF] 50 | return tax 51 | 52 | # TOK_TOK 53 | def parse_delim_fmt5(self, tokens): 54 | tax = [UNK, UNK] 55 | if tokens[1].isnumeric() or tokens[1].islower(): 56 | tax = [UNK, SUF] # Bad format 57 | elif len(tokens[1]) <= 3: 58 | tax = [PRE, UNK] # Bad format 59 | else: 60 | tax = [PRE, FAM] 61 | return tax 62 | 63 | # TOK.TOK.TOK.TOK 64 | def parse_delim_fmt6(self, tokens): 65 | return [CAT, FILE, FAM, SUF] 66 | -------------------------------------------------------------------------------- /claravy/parsers/parse_trendmicrohousecall.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Trendmicrohousecall: # Same company as Trendmicro 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK_TOK.TOK": self.parse_delim_fmt1, 10 | "TOK_TOK_TOK.TOK": self.parse_delim_fmt2, 11 | "TOK_TOK.TOK-TOK": self.parse_delim_fmt3, 12 | "TOK_TOK": self.parse_delim_fmt4, 13 | "TOK_TOK-TOK": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 15 | } 16 | 17 | # TOK_TOK.TOK 18 | def parse_delim_fmt1(self, tokens): 19 | tax = [UNK, UNK, SUF] 20 | if len(tokens[1]) <= 2 and tokens[1] != "VB": 21 | tax = [PRE, SUF, SUF] 22 | elif len(tokens[1]) == 3: 23 | tax = [UNK, UNK, SUF] # Bad format 24 | else: 25 | tax = [PRE, FAM, SUF] 26 | return tax 27 | 28 | # TOK_TOK_TOK.TOK 29 | def parse_delim_fmt2(self, tokens): 30 | tax = [PRE, UNK, SUF, SUF] 31 | if len(tokens[1]) <= 3: 32 | tax = [PRE, UNK, SUF, SUF] # Bad format 33 | else: 34 | tax = [PRE, FAM, SUF, SUF] 35 | return tax 36 | 37 | # TOK_TOK.TOK-TOK 38 | def parse_delim_fmt3(self, tokens): 39 | return [PRE, FAM, SUF, SUF] 40 | 41 | # TOK_TOK 42 | def parse_delim_fmt4(self, tokens): 43 | tax = [UNK, UNK] 44 | if tokens[1].isnumeric() or tokens[1].islower(): 45 | tax = [UNK, SUF] # Bad format 46 | elif len(tokens[1]) <= 3: 47 | tax = [PRE, UNK] # Bad format 48 | else: 49 | tax = [PRE, FAM] 50 | return tax 51 | 52 | # TOK_TOK-TOK 53 | def parse_delim_fmt5(self, tokens): 54 | tax = [PRE, UNK, SUF] 55 | if re.match(r"CVE[0-9]+", tokens[1]): 56 | tax = [PRE, VULN, SUF] 57 | elif len(tokens[1]) <= 4: 58 | tax = [PRE, UNK, SUF] # Bad format 59 | else: 60 | tax = [PRE, FAM, SUF] 61 | return tax 62 | 63 | # TOK.TOK.TOK.TOK 64 | def parse_delim_fmt6(self, tokens): 65 | return [CAT, FILE, FAM, SUF] 66 | -------------------------------------------------------------------------------- /claravy/parsers/parse_trustlook.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Trustlook: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK (TOK:TOK)": self.parse_delim_fmt2, 10 | } 11 | 12 | # TOK.TOK.TOK 13 | def parse_delim_fmt1(self, tokens): 14 | return [FILE, PRE, FAM] 15 | 16 | # TOK.TOK.TOK (TOK:TOK) 17 | def parse_delim_fmt2(self, tokens): 18 | return [FILE, PRE, PRE, SUF, SUF, NULL] 19 | -------------------------------------------------------------------------------- /claravy/parsers/parse_varist.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Varist: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK/TOK.TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK/TOK.TOK-TOK 142945": self.parse_delim_fmt2, 10 | "TOK/TOK-TOK!TOK": self.parse_delim_fmt3, 11 | "TOK/TOK_TOK.TOK.TOK!TOK": self.parse_delim_fmt4, 12 | "TOK/TOK.TOK": self.parse_delim_fmt5, 13 | "TOK.TOK-TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK/TOK.TOK.TOK!TOK 17 | def parse_delim_fmt1(self, tokens): 18 | return [FILE, FAM, SUF, SUF, SUF] 19 | 20 | # TOK/TOK.TOK-TOK 21 | def parse_delim_fmt2(self, tokens): 22 | return [FILE, FAM, SUF, SUF] 23 | 24 | # TOK/TOK-TOK!TOK 25 | def parse_delim_fmt3(self, tokens): 26 | tax = [FILE, UNK, UNK, SUF] 27 | if len(tokens[1]) == 1: 28 | tax = [FILE, SUF, SUF, SUF] 29 | return tax 30 | 31 | # TOK/TOK_TOK.TOK.TOK!TOK 32 | def parse_delim_fmt4(self, tokens): 33 | return [FILE, FILE, FAM, SUF, SUF, SUF] 34 | 35 | # TOK/TOK.TOK 36 | def parse_delim_fmt5(self, tokens): 37 | return [FILE, FAM, SUF] 38 | 39 | # TOK.TOK-TOK 40 | def parse_delim_fmt6(self, tokens): 41 | return [PRE, SUF, SUF] 42 | 43 | -------------------------------------------------------------------------------- /claravy/parsers/parse_vba32.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Vba32: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK-TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK.TOK-TOK.TOK": self.parse_delim_fmt7, 15 | "TOK-TOK.TOK": self.parse_delim_fmt8, 16 | "TOK TOK TOK.TOK.TOK.TOK": self.parse_delim_fmt9, 17 | } 18 | 19 | # TOK.TOK.TOK 20 | def parse_delim_fmt1(self, tokens): 21 | tax = [PRE, UNK, UNK] 22 | if tokens[2].isnumeric(): 23 | if len(tokens[1]) <= 2 and tokens[1] != "VB": 24 | tax = [PRE, SUF, SUF] 25 | elif len(tokens[1]) == 3 and tokens[1].isupper(): 26 | tax = [PRE, UNK, SUF] # Bad format 27 | else: 28 | tax = [PRE, FAM, SUF] 29 | elif tokens[2].islower(): 30 | tax = [PRE, FAM, SUF] 31 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 32 | tax = [PRE, FAM, SUF] 33 | elif tokens[2].isupper() and tokens[2] != "VB": 34 | tax = [PRE, UNK, UNK] # Bad format 35 | elif tokens[2] == "Heur": 36 | tax = [PRE, FAM, SUF] 37 | else: 38 | tax = [PRE, PRE, FAM] 39 | return tax 40 | 41 | # TOK.TOK 42 | def parse_delim_fmt2(self, tokens): 43 | tax = [UNK, UNK] 44 | if tokens[1].isnumeric(): 45 | if tokens[0].isupper() or len(tokens[0]) <= 3: 46 | tax = [UNK, SUF] 47 | else: 48 | tax = [FAM, SUF] 49 | elif tokens[1].islower(): 50 | tax = [FAM, SUF] 51 | else: 52 | tax = [PRE, FAM] 53 | return tax 54 | 55 | # TOK.TOK.TOK.TOK 56 | def parse_delim_fmt3(self, tokens): 57 | if tokens[3].isnumeric() or tokens[3].islower() or tokens[3].isupper() or len(tokens[3]) <= 2: 58 | if tokens[2].isupper() and tokens[2] != "VB": 59 | tax = [PRE, PRE, SUF, SUF] 60 | else: 61 | tax = [PRE, PRE, FAM, SUF] 62 | elif len(tokens[3]) <= 4 and tokens[3].endswith("en"): # Gen, Sen, cGen 63 | if tokens[2].isupper() and tokens[2] != "VB": 64 | tax = [PRE, PRE, SUF, SUF] 65 | else: 66 | tax = [PRE, PRE, FAM, SUF] 67 | elif tokens[3] == "Heur": 68 | tax = [PRE, PRE, FAM, SUF] 69 | else: 70 | tax = [PRE, PRE, PRE, FAM] 71 | return tax 72 | 73 | # TOK.TOK-TOK.TOK.TOK 74 | def parse_delim_fmt4(self, tokens): 75 | if tokens[4].isnumeric() or tokens[4].isupper() or tokens[4].islower(): 76 | if tokens[3].isupper(): 77 | tax = [PRE, PRE, PRE, UNK, SUF] # Bad format 78 | else: 79 | tax = [PRE, CAT, CAT, FAM, SUF] 80 | elif len(tokens[4]) <= 3 or tokens[3] != "Win32": 81 | tax = [PRE, PRE, PRE, UNK, UNK] # Bad format 82 | else: 83 | tax = [PRE, CAT, CAT, FILE, FAM] # Only Win32 left 84 | return tax 85 | 86 | # TOK-TOK.TOK.TOK.TOK 87 | def parse_delim_fmt5(self, tokens): 88 | tax = [CAT, CAT, UNK, UNK, SUF] 89 | if tokens[3] == "gen": 90 | tax = [CAT, CAT, FAM, SUF, SUF] 91 | else: 92 | tax = [CAT, CAT, FILE, FAM, SUF] 93 | return tax 94 | 95 | # TOK-TOK.TOK.TOK 96 | def parse_delim_fmt6(self, tokens): 97 | tax = [CAT, CAT, UNK, UNK] 98 | if tokens[3].isnumeric() or tokens[3].islower() or tokens[3] == "gen": 99 | if tokens[2].isupper() and len(tokens[2]) <= 3 and tokens[2] != "VB": 100 | tax = [CAT, CAT, UNK, UNK] # Bad format 101 | else: 102 | tax = [CAT, CAT, FAM, SUF] 103 | elif len(tokens[3]) <= 3: 104 | tax = [CAT, CAT, UNK, UNK] # Bad format 105 | else: 106 | tax = [CAT, CAT, PRE, FAM] 107 | return tax 108 | 109 | # TOK.TOK-TOK.TOK 110 | def parse_delim_fmt7(self, tokens): 111 | tax = [PRE, CAT, CAT, UNK] 112 | if tokens[3].isnumeric() or tokens[3].islower(): 113 | tax = [PRE, CAT, CAT, SUF] 114 | elif tokens[3].isupper(): 115 | tax = [PRE, CAT, CAT, UNK] # Bad format 116 | else: 117 | tax = [PRE, CAT, CAT, FAM] 118 | return tax 119 | 120 | # TOK-TOK.TOK 121 | def parse_delim_fmt8(self, tokens): 122 | tax = [PRE, UNK, UNK] 123 | if tokens[1] == "based": 124 | tax = [PRE, SUF, SUF] 125 | elif tokens[2].isnumeric() or tokens[2].islower(): 126 | tax = [PRE, CAT, SUF] 127 | elif tokens[2].isupper(): 128 | tax = [PRE, CAT, UNK] # Bad format 129 | else: 130 | tax = [PRE, CAT, FAM] 131 | return tax 132 | 133 | # TOK TOK TOK.TOK.TOK.TOK 134 | def parse_delim_fmt9(self, tokens): 135 | tax = [PRE, PRE, PRE, PRE, UNK, UNK] 136 | if tokens[5].isnumeric() or tokens[5].isupper() or tokens[5].islower(): 137 | tax = [PRE, PRE, PRE, PRE, FAM, UNK] 138 | elif len(tokens[5]) <= 2 and tokens[5] != "VB": 139 | tax = [PRE, PRE, PRE, PRE, FAM, UNK] 140 | else: 141 | tax = [PRE, PRE, PRE, PRE, PRE, FAM] 142 | return tax 143 | -------------------------------------------------------------------------------- /claravy/parsers/parse_vipre.py: -------------------------------------------------------------------------------- 1 | import re 2 | from claravy.taxonomy import * 3 | 4 | 5 | class Parse_Vipre: # Avware uses Vipre engine 6 | 7 | def __init__(self): 8 | self.parse_delim_fmt = { 9 | "TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt1, 10 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt2, 11 | "TOK.TOK.TOK (TOK)": self.parse_delim_fmt3, 12 | "TOK (TOK)": self.parse_delim_fmt4, 13 | "TOK-TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt5, 14 | "TOK.TOK.TOK.TOK!TOK": self.parse_delim_fmt6, 15 | "TOK-TOK.TOK.TOK (TOK)": self.parse_delim_fmt7, 16 | "TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt8, 17 | "TOK/TOK (TOK)": self.parse_delim_fmt9, 18 | "TOK.TOK.TOK.TOK!TOK (TOK)": self.parse_delim_fmt10, 19 | "TOK.TOK.TOK.TOK.TOK (TOK)": self.parse_delim_fmt11, 20 | "TOK TOK (TOK)": self.parse_delim_fmt12, 21 | "TOK.TOK": self.parse_delim_fmt13, 22 | "TOK.TOK.TOK.TOK (TOK-TOK)": self.parse_delim_fmt14, 23 | "TOK.TOK.TOK": self.parse_delim_fmt15, 24 | "TOK.TOK.TOK!TOK (TOK)": self.parse_delim_fmt16, 25 | "TOK": self.parse_delim_fmt17, 26 | "TOK TOK. (TOK)": self.parse_delim_fmt18, 27 | "TOK.TOK.TOK-TOK (TOK)": self.parse_delim_fmt19, 28 | "TOK.TOK (TOK)": self.parse_delim_fmt20, 29 | } 30 | 31 | # TOK.TOK.TOK.TOK (TOK) 32 | def parse_delim_fmt1(self, tokens): 33 | tax = [CAT, FILE, UNK, UNK, SUF, NULL] 34 | if tokens[2] == "Packer": 35 | tax = [CAT, FILE, PRE, PACK, SUF, NULL] 36 | else: 37 | tax = [CAT, FILE, FAM, SUF, SUF, NULL] 38 | return tax 39 | 40 | # TOK.TOK.TOK!TOK 41 | def parse_delim_fmt2(self, tokens): 42 | if tokens[2] == "Generic": 43 | tax = [PRE, PRE, PRE, SUF] 44 | else: 45 | tax = [PRE, PRE, FAM, SUF] 46 | return tax 47 | 48 | # TOK.TOK.TOK (TOK) 49 | def parse_delim_fmt3(self, tokens): 50 | tax = [PRE, UNK, UNK, SUF, NULL] 51 | if tokens[0] == "Packer": 52 | tax = [PRE, PACK, SUF, SUF, NULL] 53 | elif len(tokens[2]) <= 2 or tokens[2].lower() == "gen": 54 | tax = [PRE, FAM, SUF, SUF, NULL] 55 | elif tokens[2].isnumeric(): 56 | tax = [PRE, FAM, SUF, SUF, NULL] 57 | else: 58 | tax = [PRE, FILE, FAM, SUF, NULL] 59 | return tax 60 | 61 | # TOK (TOK) 62 | def parse_delim_fmt4(self, tokens): 63 | return [FAM, SUF, NULL] 64 | 65 | # TOK-TOK.TOK.TOK.TOK (TOK) 66 | def parse_delim_fmt5(self, tokens): 67 | return [CAT, CAT, FILE, FAM, SUF, SUF, NULL] 68 | 69 | # TOK.TOK.TOK.TOK!TOK 70 | def parse_delim_fmt6(self, tokens): 71 | tax = [CAT, FILE, UNK, SUF, SUF] 72 | if tokens[2] == "Generic": 73 | tax = [CAT, FILE, PRE, SUF, SUF] 74 | else: 75 | tax = [CAT, FILE, FAM, SUF, SUF] 76 | return tax 77 | 78 | # TOK-TOK.TOK.TOK (TOK) 79 | def parse_delim_fmt7(self, tokens): 80 | tax = [CAT, CAT, UNK, UNK, SUF, NULL] 81 | if tokens[3].islower(): 82 | tax = [CAT, CAT, FAM, SUF, SUF, NULL] 83 | elif len(tokens[3]) <= 2 or tokens[3].isupper(): 84 | tax = [CAT, CAT, UNK, SUF, SUF, NULL] # Bad format 85 | else: 86 | tax = [CAT, CAT, FILE, FAM, SUF, NULL] 87 | return tax 88 | 89 | # TOK.TOK.TOK!TOK.TOK 90 | def parse_delim_fmt8(self, tokens): 91 | if tokens[2] == "Generic": 92 | tax = [PRE, PRE, PRE, SUF, SUF] 93 | else: 94 | tax = [PRE, PRE, FAM, SUF, SUF] 95 | return tax 96 | 97 | # TOK/TOK (TOK) 98 | def parse_delim_fmt9(self, tokens): 99 | return [FAM, FAM, SUF, NULL] 100 | 101 | # TOK.TOK.TOK.TOK!TOK (TOK) 102 | def parse_delim_fmt10(self, tokens): 103 | return self.parse_delim_fmt6(tokens) + [SUF, NULL] 104 | 105 | # TOK.TOK.TOK.TOK.TOK (TOK) 106 | def parse_delim_fmt11(self, tokens): 107 | return [CAT, FILE, FAM, SUF, SUF, SUF, NULL] 108 | 109 | # TOK TOK (TOK) 110 | def parse_delim_fmt12(self, tokens): 111 | if tokens[0] == "Corrupted": 112 | tax = [PRE, PRE, SUF, NULL] 113 | else: 114 | tax = [FAM, FAM, SUF, NULL] 115 | return tax 116 | 117 | # TOK.TOK 118 | def parse_delim_fmt13(self, tokens): 119 | if tokens[1].isnumeric() or tokens[1].islower(): 120 | tax = [FAM, SUF] 121 | else: 122 | tax = [UNK, FAM] # Some [FAM, FAM] and some [CAT, FAM] 123 | return tax 124 | 125 | # TOK.TOK.TOK.TOK (TOK-TOK) 126 | def parse_delim_fmt14(self, tokens): 127 | return [PRE, PRE, PRE, SUF, SUF, SUF, NULL] 128 | 129 | # TOK.TOK.TOK 130 | def parse_delim_fmt15(self, tokens): 131 | tax = [PRE, UNK, UNK] 132 | if tokens[2] == "gen": 133 | tax = [PRE, PRE, SUF] 134 | elif tokens[2].isupper() or tokens[2].islower(): # Bad format 135 | tax = [PRE, UNK, UNK] 136 | else: 137 | tax = [PRE, PRE, FAM] 138 | return tax 139 | 140 | # TOK.TOK.TOK!TOK (TOK) 141 | def parse_delim_fmt16(self, tokens): 142 | tax = [PRE, UNK, UNK, SUF, SUF, SUF] 143 | if len(tokens[2]) == 1: 144 | tax = [PRE, FAM, SUF, SUF, SUF, SUF] 145 | else: 146 | tax = [PRE, PRE, FAM, SUF, SUF, SUF] 147 | return tax 148 | 149 | # TOK 150 | def parse_delim_fmt17(self, tokens): 151 | return [FAM] 152 | 153 | # TOK TOK. (TOK) 154 | def parse_delim_fmt18(self, tokens): 155 | return [FAM, FAM, SUF, NULL] 156 | 157 | # TOK.TOK.TOK-TOK (TOK) 158 | def parse_delim_fmt19(self, tokens): 159 | tax = [] 160 | if re.match(r"^CVE[0-9]{4}", tokens[2]) and tokens[3].isnumeric(): 161 | tax = [PRE, PRE, VULN, VULN, SUF, NULL] 162 | elif tokens[2].islower() or len(tokens[2]) <= 2: 163 | tax = [PRE, FAM, SUF, SUF, SUF, NULL] 164 | else: 165 | tax = [CAT, FILE, FAM, SUF, SUF, NULL] 166 | return tax 167 | 168 | # TOK.TOK (TOK) 169 | def parse_delim_fmt20(self, tokens): 170 | tax = [UNK, UNK, SUF, NULL] 171 | if tokens[1].islower() or len(tokens[1]) == 1 or tokens[1] == "Gen": 172 | tax = [FAM, SUF, SUF, NULL] 173 | else: 174 | tax = [PRE, FAM, SUF, SUF] 175 | return tax 176 | -------------------------------------------------------------------------------- /claravy/parsers/parse_virit.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Virit: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK_TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt4, 12 | } 13 | 14 | # TOK.TOK.TOK.TOK 15 | def parse_delim_fmt1(self, tokens): 16 | return [PRE, PRE, FAM, SUF] 17 | 18 | # TOK.TOK.TOK 19 | def parse_delim_fmt2(self, tokens): 20 | return [PRE, FAM, SUF] 21 | 22 | # TOK.TOK.TOK_TOK.TOK 23 | def parse_delim_fmt3(self, tokens): 24 | return [CAT, FILE, UNK, UNK, SUF] 25 | 26 | # TOK.TOK.TOK-TOK.TOK 27 | def parse_delim_fmt4(self, tokens): 28 | tax = [PRE, PRE, UNK, UNK, SUF] 29 | if len(tokens[2]) == 1: 30 | tax = [PRE, PRE, SUF, FAM, SUF] 31 | return tax 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /claravy/parsers/parse_virobot.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Virobot: # Seems to use Bitdefender's engine and own engine 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK_TOK_TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.TOK[TOK]": self.parse_delim_fmt6, 14 | "TOK.TOK.TOK[TOK]": self.parse_delim_fmt7, 15 | "TOK.TOK.TOK.TOK[TOK]": self.parse_delim_fmt8, 16 | "TOK.TOK.TOK.TOK.TOK.TOK[TOK]": self.parse_delim_fmt9, 17 | "TOK.TOK.TOK.TOK-TOK.TOK": self.parse_delim_fmt10, 18 | "TOK.TOK.TOK.TOK-TOK.TOK.TOK": self.parse_delim_fmt11, 19 | } 20 | 21 | # TOK.TOK.TOK 22 | def parse_delim_fmt1(self, tokens): 23 | tax = [PRE, UNK, UNK] 24 | if tokens[0] == "Packed": 25 | tax = [PRE, PRE, PACK] 26 | elif tokens[2].isnumeric() or len(tokens[2]) <= 2 or tokens[2] == "Gen": 27 | tax = [PRE, FAM, SUF] 28 | else: 29 | tax = [PRE, PRE, FAM] 30 | return tax 31 | 32 | # TOK.TOK.TOK.TOK.TOK 33 | def parse_delim_fmt2(self, tokens): 34 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 35 | if tokens[3].isnumeric() or tokens[3] == "Gen": 36 | tax = [CAT, FAM, SUF, SUF, SUF] 37 | else: 38 | tax = [CAT, FILE, SUF, FAM, SUF] 39 | elif len(tokens[1]) <= 2 and tokens[1] != "VB": 40 | tax = [PRE, SUF, FAM, SUF, SUF] 41 | elif tokens[2].isnumeric(): 42 | tax = [PRE, FAM, SUF, SUF, SUF] 43 | elif tokens[1] == "Win32": 44 | tax = [PRE, FILE, FAM, SUF, SUF] 45 | else: 46 | tax = [PRE, UNK, UNK, SUF, SUF] # Bad format 47 | return tax 48 | 49 | # TOK.TOK.TOK.TOK 50 | def parse_delim_fmt3(self, tokens): 51 | tax = [PRE, UNK, UNK, SUF] 52 | if tokens[2].isnumeric() or tokens[2] == "Gen": 53 | tax = [PRE, FAM, SUF, SUF] 54 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 55 | tax = [PRE, FAM, SUF, SUF] 56 | elif len(tokens[1]) <= 2: 57 | tax = [PRE, SUF, FAM, SUF] 58 | else: 59 | tax = [PRE, PRE, FAM, SUF] 60 | return tax 61 | 62 | # TOK.TOK.TOK.TOK.TOK.TOK 63 | def parse_delim_fmt4(self, tokens): 64 | tax = [CAT, FILE, UNK, UNK, SUF, SUF] 65 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 66 | tax = [CAT, FILE, SUF, FAM, SUF, SUF] 67 | else: 68 | tax = [CAT, FILE, UNK, UNK, SUF, SUF] # Bad format 69 | return tax 70 | 71 | # TOK.TOK_TOK_TOK 72 | def parse_delim_fmt5(self, tokens): 73 | return [PRE, PRE, UNK, SUF] # All Geno iframe 74 | 75 | # TOK.TOK.TOK.TOK.TOK[TOK] 76 | def parse_delim_fmt6(self, tokens): 77 | tax = [PRE, UNK, UNK, UNK, SUF, UNK, NULL] 78 | if tokens[3].isnumeric() or tokens[3] == "Gen" or len(tokens[3]) == 1: 79 | if tokens[2].isnumeric() or tokens[2] == "Gen" or len(tokens[2]) == 1: 80 | tax = [PRE, FAM, SUF, SUF, SUF, SUF, NULL] 81 | else: 82 | tax = [PRE, PRE, FAM, SUF, SUF, SUF, NULL] 83 | else: 84 | tax = [PRE, PRE, PRE, FAM, SUF, SUF, NULL] 85 | if not tokens[5].islower(): 86 | tax[5] = PACK 87 | return tax 88 | 89 | # TOK.TOK.TOK[TOK] 90 | def parse_delim_fmt7(self, tokens): 91 | tax = [UNK, UNK, UNK, UNK, NULL] 92 | if any(filter(str.islower, tokens[2])): 93 | if tokens[2] in ["Gen", "Dam", "based", "Generic"]: 94 | if tokens[2] in ["based", "Generic"] or tokens[1].isnumeric(): 95 | tax = [UNK, SUF, SUF, SUF, NULL] 96 | else: 97 | tax = [PRE, FAM, SUF, SUF, NULL] 98 | else: 99 | tax = [PRE, PRE, FAM, SUF, NULL] 100 | else: 101 | tax = [PRE, FAM, SUF, SUF, NULL] 102 | return tax 103 | 104 | # TOK.TOK.TOK.TOK[TOK] 105 | def parse_delim_fmt8(self, tokens): 106 | tax = self.parse_delim_fmt3(tokens) + [SUF, NULL] 107 | if not tokens[4].islower(): 108 | tax[4] = PACK 109 | return tax 110 | 111 | # TOK.TOK.TOK.TOK.TOK.TOK[TOK] 112 | def parse_delim_fmt9(self, tokens): 113 | tax = self.parse_delim_fmt4(tokens) + [SUF, NULL] 114 | if not tokens[6].islower(): 115 | tax[6] = PACK 116 | return tax 117 | 118 | # TOK.TOK.TOK.TOK-TOK.TOK 119 | def parse_delim_fmt10(self, tokens): 120 | return [CAT, FILE, SUF, PRE, FAM, SUF] 121 | 122 | # TOK.TOK.TOK.TOK-TOK.TOK.TOK 123 | def parse_delim_fmt11(self, tokens): 124 | return [CAT, FILE, SUF, PRE, FAM, SUF, SUF] 125 | -------------------------------------------------------------------------------- /claravy/parsers/parse_virusbuster.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Virusbuster: # Acquired by Agnitum, which was acuired by Yandex 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK!TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt4, 12 | "TOK/TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK!TOK.TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK.TOK.TOK 17 | def parse_delim_fmt1(self, tokens): 18 | tax = [UNK, UNK, SUF] 19 | if tokens[0] == "Packer": 20 | tax = [PRE, PACK, SUF] 21 | elif tokens[1].isnumeric(): 22 | tax = [FAM, SUF, SUF] 23 | else: 24 | tax = [PRE, FAM, SUF] 25 | return tax 26 | 27 | # TOK.TOK.TOK.TOK 28 | def parse_delim_fmt2(self, tokens): 29 | if tokens[3].isnumeric() and tokens[2].startswith("b"): 30 | tax = [PRE, FAM, SUF, SUF] 31 | elif tokens[0] == "Packer": 32 | tax = [PRE, PRE, PACK, SUF] 33 | elif tokens[2] == "Gen" or tokens[2].isnumeric() or len(tokens[2]) == 1: 34 | tax = [PRE, FAM, SUF, SUF] 35 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 36 | if tokens[1].isupper(): 37 | tax = [PRE, PRE, SUF, SUF] 38 | else: 39 | tax = [PRE, FAM, SUF, SUF] 40 | else: 41 | tax = [PRE, PRE, FAM, SUF] 42 | return tax 43 | 44 | # TOK.TOK!TOK 45 | def parse_delim_fmt3(self, tokens): 46 | return [CAT, FAM, SUF] 47 | 48 | # TOK.TOK.TOK!TOK 49 | def parse_delim_fmt4(self, tokens): 50 | tax = [CAT, UNK, UNK, SUF] 51 | if tokens[2] == "Gen": 52 | tax = [CAT, FAM, SUF, SUF] 53 | else: 54 | tax = [CAT, PRE, FAM, SUF] 55 | return tax 56 | 57 | # TOK/TOK 58 | def parse_delim_fmt5(self, tokens): 59 | return [PRE, PACK] 60 | 61 | # TOK.TOK.TOK!TOK.TOK 62 | def parse_delim_fmt6(self, tokens): 63 | return [CAT, FAM, SUF, SUF, SUF] 64 | 65 | -------------------------------------------------------------------------------- /claravy/parsers/parse_webroot.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Webroot: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | } 12 | 13 | # TOK.TOK.TOK 14 | def parse_delim_fmt1(self, tokens): 15 | tax = [PRE, UNK, UNK] 16 | if tokens[2].lower() == "gen" or tokens[2].islower(): 17 | tax = [PRE, FAM, SUF] 18 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 19 | tax = [PRE, FAM, SUF] 20 | elif tokens[2].isnumeric() or tokens[2].isupper(): 21 | tax = [PRE, UNK, SUF] # Bad format 22 | else: 23 | tax = [PRE, UNK, UNK] # Very bad format - can't tell PRE from FAM 24 | return tax 25 | 26 | # TOK.TOK 27 | def parse_delim_fmt2(self, tokens): 28 | return [PRE, FAM] 29 | 30 | # TOK.TOK.TOK.TOK 31 | def parse_delim_fmt3(self, tokens): 32 | if tokens[3] == "Gen" or tokens[3].islower() or tokens[3].isnumeric(): 33 | tax = [PRE, PRE, FAM, SUF] 34 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 35 | tax = [PRE, PRE, FAM, SUF] 36 | else: 37 | tax = [PRE, PRE, UNK, UNK] # Bad format 38 | return tax 39 | -------------------------------------------------------------------------------- /claravy/parsers/parse_xcitium.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Xcitium: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt1, 9 | "TOK.TOK.TOK.TOK.TOK@TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK@TOK": self.parse_delim_fmt3, 11 | "TOK@#TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK.~TOK@TOK": self.parse_delim_fmt5, 13 | "TOK.TOK.TOK.TOK.~TOK@TOK": self.parse_delim_fmt6, 14 | } 15 | 16 | # TOK.TOK.TOK.TOK@TOK 17 | def parse_delim_fmt1(self, tokens): 18 | return [CAT, FILE, FAM, SUF, SUF] 19 | 20 | # TOK.TOK.TOK.TOK.TOK@TOK 21 | def parse_delim_fmt2(self, tokens): 22 | return [CAT, FILE, CAT, FAM, SUF, SUF] 23 | 24 | # TOK.TOK.TOK@TOK 25 | def parse_delim_fmt3(self, tokens): 26 | return [UNK, UNK, UNK, SUF] # Bad format 27 | 28 | # TOK@#TOK 29 | def parse_delim_fmt4(self, tokens): 30 | return [PRE, SUF] 31 | 32 | # TOK.TOK.TOK.~TOK@TOK 33 | def parse_delim_fmt5(self, tokens): 34 | return [CAT, FILE, FAM, SUF, SUF] 35 | 36 | # TOK.TOK.TOK.TOK.~TOK@TOK 37 | def parse_delim_fmt6(self, tokens): 38 | return [CAT, FILE, CAT, FAM, SUF, SUF] 39 | -------------------------------------------------------------------------------- /claravy/parsers/parse_yandex.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Yandex: # Acquired Agnitum, May rely on Sophos' signatures in some detections 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK!TOK": self.parse_delim_fmt1, 9 | "TOK.TOK!": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK.TOK.TOK!TOK": self.parse_delim_fmt5, 13 | "TOK.TOK!TOK+TOK": self.parse_delim_fmt6, 14 | "TOK.TOK!TOK/TOK": self.parse_delim_fmt7, 15 | } 16 | 17 | # TOK.TOK!TOK 18 | def parse_delim_fmt1(self, tokens): 19 | return [CAT, FAM, SUF] 20 | 21 | # TOK.TOK! 22 | def parse_delim_fmt2(self, tokens): 23 | return [CAT, FAM, NULL] 24 | 25 | # TOK.TOK.TOK 26 | def parse_delim_fmt3(self, tokens): 27 | tax = [UNK, UNK, SUF] 28 | if tokens[0] == "Packer": 29 | tax = [PRE, PACK, SUF] 30 | elif tokens[1].isnumeric(): 31 | tax = [FAM, SUF, SUF] 32 | else: 33 | tax = [PRE, FAM, SUF] 34 | return tax 35 | 36 | # TOK.TOK.TOK.TOK 37 | def parse_delim_fmt4(self, tokens): 38 | if tokens[3].isnumeric() and tokens[2].startswith("b"): 39 | tax = [PRE, FAM, SUF, SUF] 40 | elif tokens[0] == "Packer": 41 | tax = [PRE, PRE, PACK, SUF] 42 | elif tokens[2] == "Gen" or tokens[2].isnumeric() or len(tokens[2]) == 1: 43 | tax = [PRE, FAM, SUF, SUF] 44 | elif len(tokens[2]) <= 2 and tokens[2] != "VB": 45 | if tokens[1].isupper(): 46 | tax = [PRE, PRE, SUF, SUF] 47 | else: 48 | tax = [PRE, FAM, SUF, SUF] 49 | elif tokens[2].isupper() and tokens[2] != "VB": 50 | tax = [PRE, UNK, UNK, SUF] # Bad format 51 | else: 52 | tax = [PRE, PRE, FAM, SUF] 53 | return tax 54 | 55 | # TOK.TOK.TOK!TOK 56 | def parse_delim_fmt5(self, tokens): 57 | tax = [CAT, UNK, UNK, SUF] 58 | if tokens[2] == "Gen": 59 | tax = [CAT, FAM, SUF, SUF] 60 | else: 61 | tax = [CAT, PRE, FAM, SUF] 62 | return tax 63 | 64 | # TOK.TOK!TOK+TOK 65 | def parse_delim_fmt6(self, tokens): 66 | return [CAT, FAM, SUF, SUF] 67 | 68 | # TOK.TOK!TOK/TOK 69 | def parse_delim_fmt7(self, tokens): 70 | return [CAT, FAM, SUF, SUF] 71 | -------------------------------------------------------------------------------- /claravy/parsers/parse_zillya.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Zillya: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt1, 9 | } 10 | 11 | # TOK.TOK.TOK.TOK 12 | def parse_delim_fmt1(self, tokens): 13 | return [CAT, FAM, FILE, SUF] # Zillya format is incredibly standardized 14 | -------------------------------------------------------------------------------- /claravy/parsers/parse_zonealarm.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Zonealarm: # Partnership with Kaspersky 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK:TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | "TOK-TOK-TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt4, 12 | "TOK-TOK-TOK:TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt5, 13 | "TOK:TOK.TOK.TOK.TOK": self.parse_delim_fmt6, 14 | "TOK-TOK-TOK:TOK:TOK.TOK.TOK": self.parse_delim_fmt7, 15 | "TOK:TOK-TOK.TOK.TOK.TOK": self.parse_delim_fmt8, 16 | "TOK:TOK-TOK.TOK.TOK": self.parse_delim_fmt9, 17 | "TOK.TOK.TOK.TOK.TOK": self.parse_delim_fmt10, 18 | } 19 | 20 | # TOK:TOK.TOK.TOK 21 | def parse_delim_fmt1(self, tokens): 22 | return [PRE, CAT, FILE, FAM] 23 | 24 | # TOK-TOK.TOK.TOK.TOK 25 | def parse_delim_fmt2(self, tokens): 26 | return [CAT, CAT, FILE, FAM, SUF] 27 | 28 | # TOK.TOK.TOK.TOK 29 | def parse_delim_fmt3(self, tokens): 30 | return [CAT, FILE, FAM, SUF] 31 | 32 | # TOK-TOK-TOK:TOK.TOK.TOK.TOK 33 | def parse_delim_fmt4(self, tokens): 34 | return [PRE, PRE, PRE, CAT, FILE, FAM, SUF] 35 | 36 | # TOK-TOK-TOK:TOK:TOK.TOK.TOK.TOK 37 | def parse_delim_fmt5(self, tokens): 38 | 39 | return [PRE, PRE, PRE, PRE, CAT, FILE, FAM, SUF] 40 | 41 | # TOK:TOK.TOK.TOK.TOK 42 | def parse_delim_fmt6(self, tokens): 43 | return [PRE, CAT, FILE, FAM, SUF] 44 | 45 | # TOK-TOK-TOK:TOK:TOK.TOK.TOK 46 | def parse_delim_fmt7(self, tokens): 47 | return [PRE, PRE, PRE, PRE, CAT, FILE, FAM] 48 | 49 | # TOK:TOK-TOK.TOK.TOK.TOK 50 | def parse_delim_fmt8(self, tokens): 51 | return [PRE, CAT, CAT, FILE, FAM, SUF] 52 | 53 | # TOK.TOK.TOK.TOK.TOK 54 | def parse_delim_fmt9(self, tokens): 55 | if tokens[4] == "Generic": 56 | tax = [PRE, CAT, CAT, FILE, PRE] 57 | else: 58 | tax = [PRE, CAT, CAT, UNK, UNK] # Bad format 59 | return tax 60 | 61 | # TOK:TOK-TOK.TOK.TOK 62 | def parse_delim_fmt10(self, tokens): 63 | if tokens[3].isnumeric(): 64 | if len(tokens[2]) <= 2 and tokens[2] != "VB": 65 | tax = [CAT, PRE, SUF, SUF, SUF] 66 | else: 67 | tax = [CAT, PRE, FAM, SUF, SUF] 68 | elif tokens[3].islower() and not any([c.isdigit() for c in tokens[3]]): 69 | tax = [CAT, FILE, FAM, SUF, SUF] 70 | elif len(tokens[3]) <= 2 and tokens[3] != "VB": 71 | if tokens[2].isupper(): 72 | tax = [CAT, FILE, UNK, SUF, SUF] # Bad format 73 | else: 74 | tax = [CAT, FILE, FAM, SUF, SUF] 75 | else: 76 | tax = [CAT, FILE, UNK, UNK, SUF] # Bad format 77 | return tax 78 | -------------------------------------------------------------------------------- /claravy/parsers/parse_zoner.py: -------------------------------------------------------------------------------- 1 | from claravy.taxonomy import * 2 | 3 | 4 | class Parse_Zoner: 5 | 6 | def __init__(self): 7 | self.parse_delim_fmt = { 8 | "TOK.TOK.TOK": self.parse_delim_fmt1, 9 | "TOK.TOK": self.parse_delim_fmt2, 10 | "TOK.TOK.TOK.TOK": self.parse_delim_fmt3, 11 | } 12 | 13 | # TOK.TOK.TOK 14 | def parse_delim_fmt1(self, tokens): 15 | if tokens[2].isnumeric(): 16 | tax = [CAT, FILE, SUF] 17 | elif tokens[2].isupper() and tokens[1] != "VB": 18 | if tokens[1].isupper(): 19 | tax = [CAT, FILE, SUF] 20 | else: 21 | tax = [PRE, FAM, SUF] 22 | else: 23 | tax = [PRE, UNK, UNK] # Bad format 24 | return tax 25 | 26 | # TOK.TOK 27 | def parse_delim_fmt2(self, tokens): 28 | return [PRE, FAM] 29 | 30 | # TOK.TOK.TOK.TOK 31 | def parse_delim_fmt3(self, tokens): 32 | tax = [CAT, UNK, UNK, SUF] 33 | if tokens[3].isnumeric(): 34 | tax = [CAT, FILE, FAM, SUF] 35 | else: 36 | tax = [CAT, UNK, UNK, SUF] # Bad format 37 | return tax 38 | -------------------------------------------------------------------------------- /claravy/taxonomy.py: -------------------------------------------------------------------------------- 1 | FAM = "FAM" 2 | GRP = "GRP" 3 | FILE = "FILE" 4 | CAT = "BEH" 5 | PACK = "PACK" 6 | VULN = "VULN" 7 | PRE = "PRE" 8 | SUF = "SUF" 9 | HEUR = "HEUR" 10 | UNK = "UNK" 11 | NULL = "NULL" 12 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=71.0.0", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name="claravy" 7 | version="2.0.0" 8 | authors=[{name = "RJ Joyce", email="joyce8@umbc.edu"}] 9 | description="ClarAVy: Clarifying noise in antivirus scan data" 10 | readme="README.md" 11 | requires-python = ">=3.10" 12 | classifiers = [ 13 | "Programming Language :: Python :: 3", 14 | "Operating System :: OS Independent" 15 | ] 16 | 17 | dependencies = [ 18 | "numpy>=1.26.4", 19 | "scipy==1.11.0", 20 | "ultradict", 21 | "pylcs", 22 | "editdistance", 23 | "atomics", 24 | "xgboost==2.1.1", 25 | "joblib", 26 | "numba>=0.61.0", 27 | "numba-scipy>=0.4.0", 28 | "scikit-learn==1.5.2" 29 | ] 30 | [project.scripts] 31 | claravy = "claravy.avtagger:main_cli" 32 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = claravy 3 | version = 2.0.0 4 | 5 | [options] 6 | packages = find: 7 | include_package_data = True 8 | python_requires = >=3.10 9 | install_requires = 10 | numpy>=1.26.0 11 | scipy==1.11.0 12 | ultradict 13 | pylcs 14 | editdistance 15 | atomics 16 | xgboost==2.1.1 17 | joblib 18 | numba>=0.61.0 19 | numba-scipy>=0.4.0 20 | scikit-learn==1.5.2 21 | 22 | [options.entry_points] 23 | console_scripts = 24 | claravy = claravy.avtagger:main_cli 25 | --------------------------------------------------------------------------------