├── CVE_annotated_dataset.xlsx
├── LICENSE
├── LR_ATT&CK_model.pkl
├── LR_ATT&CK_model_V2.pkl
├── README.md
├── SMET.py
├── SMET_use_example.py
├── __pycache__
├── SMET.cpython-38.pyc
├── SMET.cpython-39.pyc
├── funs.cpython-39.pyc
├── nlp_general.cpython-39.pyc
├── parse_class.cpython-38.pyc
└── parse_class.cpython-39.pyc
├── funs.py
├── id2ATT&CK.json
├── id2ATT&CK_V2.json
├── id2mitre.json
├── nlp_general.py
├── parse_class.py
├── requirements-frozen.txt
├── requirements.txt
└── tram_data_predictions.json
/CVE_annotated_dataset.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/CVE_annotated_dataset.xlsx
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 basel-a
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LR_ATT&CK_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/LR_ATT&CK_model.pkl
--------------------------------------------------------------------------------
/LR_ATT&CK_model_V2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/LR_ATT&CK_model_V2.pkl
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SMET (mapping CTI reports and CVE to ATT&CK)
2 | SMET : Semantic Mapping of CVE to ATT&CK and its Application to Cybersecurity
3 | https://link.springer.com/chapter/10.1007/978-3-031-37586-6_15
4 |
5 | SMET is tool that maps text (such as CTI reports or CVE) to ATT&CK techniques
6 | Check SMET_use_example.py to learn how to integrate SMET in your code
7 |
8 | ATT&CK BERT for semantically meaningful cybersecurity text embedding: https://huggingface.co/basel/ATTACK-BERT
9 |
10 | ## Installation
11 | Developed using Python 3.8.18, other versions should work too.
12 | ```bash
13 | pip install -r requirements.txt
14 | python -m spacy download en_core_web_lg
15 | ```
--------------------------------------------------------------------------------
/SMET.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Apr 14 16:24:28 2023
4 |
5 | @author: basel
6 | """
7 |
8 |
9 |
10 | from sentence_transformers import SentenceTransformer
11 | import pickle
12 | from scipy.special import softmax
13 | from parse_class import Parser
14 | from nlp_general import NLP
15 | # import pandas as pd
16 | import funs
17 | nlp = NLP()
18 | nlp.load_model('dep')
19 | nlp.load_model('sentencizer')
20 |
21 | LR_model = pickle.load(open("LR_ATT&CK_model_V2.pkl", 'rb'))
22 | emb_model = SentenceTransformer("basel/ATTACK-BERT")
23 | id2mitre = funs.read_json_as_dict('id2mitre.json')
24 | id2label = funs.read_json_as_dict('id2ATT&CK_V2.json')
25 | id2label = {int(i):id2label[i] for i in id2label}
26 |
27 | def get_verbs_tag(srl):
28 | verbs_tag= {}
29 |
30 | for v in srl['verbs']:
31 |
32 | try:
33 | v_ind = v['tags'].index("B-V")
34 | except:
35 |
36 | continue
37 | for other_v in srl['verbs']:
38 | if other_v['tags'][v_ind] not in ('B-V','O'):
39 |
40 | try:
41 | other_v_ind = other_v['tags'].index("B-V")
42 | except:
43 |
44 | continue
45 |
46 | if v['id'] not in verbs_tag:
47 | verbs_tag[v['id']] = [(other_v["verb"] , "-".join(other_v['tags'][v_ind].split("-")[1:]),abs(v_ind - other_v_ind),other_v_ind)]
48 | else:
49 | verbs_tag[v['id']] += [(other_v["verb"] , "-".join(other_v['tags'][v_ind].split("-")[1:]),abs(v_ind - other_v_ind),other_v_ind)]
50 |
51 | for i in verbs_tag:
52 | verbs_tag[i] = sorted(verbs_tag[i],key = lambda x:x[2])
53 | return verbs_tag
54 |
55 | def add_arg0_from_parent(srl,srl_dict):
56 |
57 | verbs_tag = get_verbs_tag(srl)
58 | for v in srl_dict:
59 | if 'V' in srl_dict[v] and srl_dict[v]['V']['text'] in verbs_tag and 'ARG0' not in srl_dict[v] and v in verbs_tag:
60 | parent_verb = verbs_tag[v][0][0]
61 | if parent_verb in srl_dict and 'ARG0' in srl_dict[parent_verb]:
62 | srl_dict[v]['ARG0'] = srl_dict[parent_verb]['ARG0'].copy()
63 |
64 |
65 | def get_AVs(text,CVE = False):
66 |
67 | sents = nlp.seperate_sentences(text)
68 | cve_srl = {}
69 | for sent in sents:
70 | try:
71 | srl = Parser.extract_srl(sent)
72 | Parser.add_v_id_srl(srl)
73 | srl_dict = Parser.srl_to_dict(srl)
74 | add_arg0_from_parent(srl,srl_dict)
75 | cve_srl[sent] = (srl_dict)
76 | except:
77 | print('error')
78 |
79 | if CVE:
80 | arg_constrain = {'ARG0' : lambda x : 'attacker' in x.lower() or 'adversary' in x.lower() or 'user' in x.lower() or 'vulnerability' in x }
81 | vo0 = nlp.extract_VO_from_sents_lambda(cve_srl,arg_constrain)
82 |
83 | arg_constrain = {'ARG1' : lambda x : 'attacker' in x.lower() or 'adversary' in x.lower() or 'user' in x or 'vulnerability' in x.lower() }
84 | vo1 = nlp.extract_VO_from_sents_lambda(cve_srl,arg_constrain)
85 |
86 | arg_constrain = {'V' : lambda x : 'allow' in x.lower() or 'lead' in x.lower() or 'result' in x.lower()}
87 | vo2 = nlp.extract_VO_from_sents_lambda(cve_srl,arg_constrain) #or 'caus' in x.lower()
88 |
89 | cve_vos_filtered = { key:vo0.get(key,[])+vo1.get(key,[])+vo2.get(key,[]) for key in set(list(vo0.keys())+list(vo1.keys())+list(vo2.keys())) }
90 | cve_vos = set([i[0] for j in cve_vos_filtered.values() for i in j ])
91 | cve_vos.add(text)
92 |
93 |
94 | else:
95 | arg_constrain = {}
96 | vo = nlp.extract_VO_from_sents_lambda(cve_srl,arg_constrain)
97 | cve_vos = set([i[0] for j in vo.values() for i in j ])
98 | return cve_vos
99 |
100 |
101 | def predict_techniques(emb,clf,id2label):
102 | dec = clf.decision_function([emb])
103 | out = softmax(dec)[0]
104 | return sorted(list( zip([id2mitre[id2label[i]] for i in range(len(dec[0])) ], out)), key = lambda x:x[1], reverse=True)
105 |
106 |
107 | def predict_per_vo(cve_vos, rank,id2mitre):
108 |
109 | out = []
110 | for vo in cve_vos:
111 | if vo.strip() == '':
112 | continue
113 | vo = vo
114 | dec = rank(vo)
115 | out.append([(j[0],j[1]) for j in dec])
116 |
117 |
118 | outa = [j for k in out for j in k]
119 | outa = sorted(outa,key = lambda x:x[1],reverse = True )
120 | abc = []
121 | cdf = []
122 | for k in outa:
123 | if k[0] not in cdf:
124 | abc.append(k)
125 | cdf.append(k[0])
126 | outa = [i for i in abc if i[0]]
127 |
128 | return outa
129 |
130 | #map text to ATT&CK
131 | def map_text(text,CVE = False):
132 | rank = lambda x:predict_techniques(emb_model.encode(x),LR_model,id2label)
133 | vos = get_AVs(text,CVE = CVE)
134 | return predict_per_vo(vos,rank,id2mitre)
135 |
136 | #map attack vector to ATT&CK
137 | def map_attack_vector(AV):
138 | return predict_techniques(emb_model.encode(AV),LR_model,id2label)
139 |
140 |
141 |
--------------------------------------------------------------------------------
/SMET_use_example.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Apr 15 04:03:07 2023
4 |
5 | @author: basel
6 | """
7 |
8 | from SMET import map_text,map_attack_vector
9 |
10 | #Mapping tips:
11 | #When the input is short (e.g., one sentence or attack action) use map_attack_vector()
12 | #For inputs that consist of a few lines, such as a CVE entry of a paragraph from a CTI report use map_text()
13 | #In cases where the input is long, like a full CTI report, segmented the text into multiple paragraphs or sentences and processed each separately
14 |
15 |
16 | #map attack vectors to ATT&CK
17 | AV1 = 'take screenshot'
18 | mapping1 = map_attack_vector(AV1)
19 |
20 | AV2 = 'delete logs'
21 | mapping2 = map_attack_vector(AV2)
22 |
23 | AV3 = 'exfiltrate data to C2 server'
24 | mapping3 = map_attack_vector(AV3)
25 |
26 |
27 |
28 | #map CVE to ATT&CK
29 | cve = ""
30 | mapping = map_text(cve,CVE = True)
31 |
32 |
33 | #map any text to ATT&CK
34 | cve = ""
35 | mapping = map_text(cve,CVE = False)
36 |
37 |
38 | #get embedding using ATT&CK
39 | from sentence_transformers import SentenceTransformer
40 |
41 | text = ""
42 |
43 | emb_model = SentenceTransformer("basel/ATTACK-BERT")
44 | embedding = emb_model.encode(text)
45 |
46 |
47 | ######
48 | from sentence_transformers import SentenceTransformer
49 |
50 | model = SentenceTransformer('basel/ATTACK-BERT')
51 |
52 | sentences = ["the account has weak password", "attacker gain an initial access to the machine"]
53 |
54 | embeddings = model.encode(sentences)
55 |
56 | from sklearn.metrics.pairwise import cosine_similarity
57 | print(cosine_similarity([embeddings[0]], [embeddings[1]]))
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/__pycache__/SMET.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/SMET.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/SMET.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/SMET.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/funs.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/funs.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/nlp_general.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/nlp_general.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/parse_class.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/parse_class.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/parse_class.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basel-a/SMET/21a223f710eb6accdb4dcbf435e8b3af9a739261/__pycache__/parse_class.cpython-39.pyc
--------------------------------------------------------------------------------
/funs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 29 12:59:22 2019
4 |
5 | @author: babdeen
6 | """
7 | import json
8 | from os import listdir
9 | from os.path import isfile, join,isdir
10 | #import gensim
11 | #model = gensim.models.KeyedVectors.load_word2vec_format('C:/basel/GoogleNews-vectors-negative300.bin', binary=True)
12 |
13 | def save_list_to_text(l, dist):
14 | if dist[-4:] != '.txt':
15 | return 'error'
16 | file = open(dist,'w', encoding='utf-8', errors='ignore')
17 | for line in l:
18 | line = line.replace('\n', ' ')
19 | file.write(line)
20 | file.write('\n')
21 | file.close()
22 | return 'done'
23 |
24 | def save_list_to_text_w_sep(l, dist,sep):
25 | if dist[-4:] != '.txt':
26 | return 'error'
27 | file = open(dist,'w', encoding='utf-8', errors='ignore')
28 | for line in l:
29 | line = line.replace('\n', ' ')
30 | file.write(line)
31 | file.write('\n')
32 | file.write(sep)
33 | file.write('\n')
34 | file.close()
35 |
36 | def save_list_to_text_2(l, dist):
37 | if dist[-4:] != '.txt':
38 | return 'error'
39 | file = open(dist,'w',encoding='utf-8', errors='ignore')
40 | for line in l:
41 | file.write(str(line))
42 | file.write('\n')
43 | file.close()
44 | return 'done'
45 |
46 | def save_list_to_text_w_sep_2(l, dist,sep):
47 | if dist[-4:] != '.txt':
48 | return 'error'
49 | file = open(dist,'w')
50 | for line in l:
51 | file.write(str(line))
52 | file.write('\n')
53 | file.write(sep)
54 | file.write('\n')
55 | file.close()
56 | return 'done'
57 |
58 | def read_list_from_text(dist, maxi = -1):
59 | file = open(dist,'r', encoding='utf-8', errors='ignore')
60 | l = []
61 | line = file.readline()
62 | if maxi == -1:
63 | while line != '':
64 | l.append(line)
65 | line = file.readline()
66 |
67 | else:
68 | c = 0
69 | while line != '':
70 | l.append(line)
71 | c += 1
72 | if c > maxi:
73 | break
74 | line = file.readline()
75 | file.close()
76 | return l
77 |
78 |
79 | #def most_sim(word , top = 6):
80 | # return model.most_similar(word, topn=top)
81 |
82 | def save_dict_as_json(dist, d,note = ''):
83 | with open(dist, 'w') as fp:
84 | json.dump(d, fp)
85 | if note != '':
86 | with open(dist[:-5] +'.txt', 'w') as fp:
87 | fp.write(note)
88 | return 'done'
89 |
90 | def save_list_as_json(dist, l):
91 | d = {i:j for i,j in enumerate(l)}
92 | with open(dist, 'w') as fp:
93 | json.dump(d, fp)
94 | return 'done'
95 |
96 | def read_json_as_dict(src):
97 | with open(src) as json_file:
98 | data = json.load(json_file)
99 | return data
100 |
101 | def read_json_as_dict_utf(src):
102 | with open(src,encoding = 'utf-8') as json_file:
103 | data = json.load(json_file)
104 | return data
105 |
106 |
107 | def read_words_from_text(dist, delimeter = ','):
108 | file = open(dist,'r')
109 | l = []
110 | content = file.read()
111 | words = content.split(delimeter)
112 | file.close()
113 | return words
114 | def get_files_in_folder(path):
115 | return [f for f in listdir(path) if isfile(join(path, f))]
116 |
117 |
118 | def get_folders_in_folder(path):
119 | return [f for f in listdir(path) if isdir(join(path, f))]
120 |
121 | def get_all_files(path):
122 | folders = [path]
123 | out = []
124 | while folders != []:
125 | folder = folders[0]
126 | folders = folders[1:]
127 |
128 | folders.extend([folder + '/' + i for i in get_folders_in_folder(folder)])
129 | out.extend([folder + '/' + i for i in get_files_in_folder(folder)])
130 | return out
131 |
--------------------------------------------------------------------------------
/id2ATT&CK.json:
--------------------------------------------------------------------------------
1 | {"0": "T1137", "1": "T1176", "2": "T1110", "3": "T1134", "4": "T1190", "5": "T1495", "6": "T1104", "7": "T1568", "8": "T1098", "9": "T1205", "10": "T1614", "11": "T1030", "12": "T1135", "13": "T1608", "14": "T1595", "15": "T1531", "16": "T1596", "17": "T1220", "18": "T1014", "19": "T1561", "20": "T1485", "21": "T1090", "22": "T1490", "23": "T1219", "24": "T1007", "25": "T1580", "26": "T1033", "27": "T1082", "28": "T1102", "29": "T1087", "30": "T1010", "31": "T1550", "32": "T1136", "33": "T1484", "34": "T1497", "35": "T1570", "36": "T1601", "37": "T1207", "38": "T1556", "39": "T1200", "40": "T1602", "41": "T1591", "42": "T1204", "43": "T1482", "44": "T1083", "45": "T1599", "46": "T1571", "47": "T1597", "48": "T1071", "49": "T1069", "50": "T1025", "51": "T1020", "52": "T1072", "53": "T1535", "54": "T1538", "55": "T1140", "56": "T1127", "57": "T1199", "58": "T1587", "59": "T1037", "60": "T1080", "61": "T1189", "62": "T1593", "63": "T1197", "64": "T1496", "65": "T1021", "66": "T1553", "67": "T1027", "68": "T1113", "69": "T1125", "70": "T1119", "71": "T1040", "72": "T1528", "73": "T1530", "74": "T1534", "75": "T1592", "76": "T1562", "77": "T1543", "78": "T1006", "79": "T1529", "80": "T1598", "81": "T1124", "82": "T1053", "83": "T1567", "84": "T1569", "85": "T1056", "86": "T1547", "87": "T1584", "88": "T1078", "89": "T1590", "90": "T1202", "91": "T1055", "92": "T1047", "93": "T1609", "94": "T1560", "95": "T1554", "96": "T1185", "97": "T1036", "98": "T1526", "99": "T1039", "100": "T1041", "101": "T1498", "102": "T1212", "103": "T1572", "104": "T1537", "105": "T1612", "106": "T1048", "107": "T1221", "108": "T1201", "109": "T1217", "110": "T1132", "111": "T1074", "112": "T1005", "113": "T1564", "114": "T1187", "115": "T1016", "116": "T1211", "117": "T1613", "118": "T1049", "119": "T1052", "120": "T1008", "121": "T1611", "122": "T1586", "123": "T1499", "124": "T1213", "125": "T1525", "126": "T1557", "127": "T1057", "128": "T1542", "129": "T1216", "130": "T1222", "131": "T1114", "132": "T1546", "133": "T1559", "134": "T1120", "135": "T1018", "136": "T1011", "137": "T1486", "138": "T1563", "139": "T1552", "140": "T1518", "141": "T1129", "142": "T1115", "143": "T1001", "144": "T1068", "145": "T1046", "146": "T1583", "147": "T1105", "148": "T1091", "149": "T1012", "150": "T1585", "151": "T1111", "152": "T1610", "153": "T1588", "154": "T1548", "155": "T1573", "156": "T1003", "157": "T1489", "158": "T1574", "159": "T1589", "160": "T1133", "161": "T1555", "162": "T1029", "163": "T1505", "164": "T1218", "165": "T1070", "166": "T1578", "167": "T1210", "168": "T1558", "169": "T1106", "170": "T1566", "171": "T1491", "172": "T1195", "173": "T1095", "174": "T1092", "175": "T1112", "176": "T1539", "177": "T1600", "178": "T1565", "179": "T1594", "180": "T1059", "181": "T1123", "182": "T1203", "183": "T1480", "184": "T1606"}
--------------------------------------------------------------------------------
/id2ATT&CK_V2.json:
--------------------------------------------------------------------------------
1 | {"0": "T1547", "1": "T1201", "2": "T1003", "3": "T1578", "4": "T1499", "5": "T1571", "6": "T1129", "7": "T1197", "8": "T1007", "9": "T1584", "10": "T1498", "11": "T1005", "12": "T1102", "13": "T1531", "14": "T1132", "15": "T1030", "16": "T1110", "17": "T1491", "18": "T1069", "19": "T1566", "20": "T1221", "21": "T1599", "22": "T1039", "23": "T1530", "24": "T1137", "25": "T1600", "26": "T1083", "27": "T1550", "28": "T1591", "29": "T1202", "30": "T1553", "31": "T1018", "32": "T1543", "33": "T1115", "34": "T1204", "35": "T1111", "36": "T1125", "37": "T1027", "38": "T1528", "39": "T1537", "40": "T1580", "41": "T1014", "42": "T1025", "43": "T1614", "44": "T1001", "45": "T1091", "46": "T1176", "47": "T1057", "48": "T1199", "49": "T1497", "50": "T1564", "51": "T1120", "52": "T1219", "53": "T1053", "54": "T1588", "55": "T1213", "56": "T1505", "57": "T1546", "58": "T1119", "59": "T1068", "60": "T1496", "61": "T1485", "62": "T1047", "63": "T1611", "64": "T1040", "65": "T1074", "66": "T1114", "67": "T1106", "68": "T1597", "69": "T1220", "70": "T1557", "71": "T1082", "72": "T1195", "73": "T1480", "74": "T1572", "75": "T1592", "76": "T1124", "77": "T1594", "78": "T1140", "79": "T1036", "80": "T1601", "81": "T1078", "82": "T1112", "83": "T1049", "84": "T1548", "85": "T1200", "86": "T1029", "87": "T1589", "88": "T1020", "89": "T1587", "90": "T1048", "91": "T1583", "92": "T1136", "93": "T1037", "94": "T1012", "95": "T1568", "96": "T1559", "97": "T1010", "98": "T1210", "99": "T1016", "100": "T1185", "101": "T1518", "102": "T1072", "103": "T1056", "104": "T1490", "105": "T1133", "106": "T1529", "107": "T1562", "108": "T1567", "109": "T1203", "110": "T1105", "111": "T1558", "112": "T1055", "113": "T1070", "114": "T1052", "115": "T1538", "116": "T1489", "117": "T1552", "118": "T1484", "119": "T1218", "120": "T1596", "121": "T1021", "122": "T1585", "123": "T1606", "124": "T1095", "125": "T1613", "126": "T1565", "127": "T1526", "128": "T1563", "129": "T1560", "130": "T1059", "131": "T1482", "132": "T1190", "133": "T1080", "134": "T1608", "135": "T1011", "136": "T1008", "137": "T1205", "138": "T1087", "139": "T1602", "140": "T1006", "141": "T1135", "142": "T1554", "143": "T1090", "144": "T1212", "145": "T1046", "146": "T1187", "147": "T1098", "148": "T1598", "149": "T1211", "150": "T1071", "151": "T1041", "152": "T1123", "153": "T1586", "154": "T1104", "155": "T1495", "156": "T1033", "157": "T1222", "158": "T1573", "159": "T1570", "160": "T1207", "161": "T1216", "162": "T1189", "163": "T1609", "164": "T1574", "165": "T1134", "166": "T1590", "167": "T1555", "168": "T1113", "169": "T1486", "170": "T1534", "171": "T1539", "172": "T1612", "173": "T1569", "174": "T1561", "175": "T1556", "176": "T1593", "177": "T1542", "178": "T1595", "179": "T1535", "180": "T1092", "181": "T1525", "182": "T1127", "183": "T1610", "184": "T1217"}
--------------------------------------------------------------------------------
/id2mitre.json:
--------------------------------------------------------------------------------
1 | {"T1055.011": "Process Injection: Extra Window Memory Injection", "T1053.005": "Scheduled Task/Job: Scheduled Task", "T1205.002": "Traffic Signaling: Socket Filters", "T1560.001": "Archive Collected Data: Archive via Utility", "T1021.005": "Remote Services: VNC", "T1047": "Windows Management Instrumentation", "T1113": "Screen Capture", "T1037": "Boot or Logon Initialization Scripts", "T1557": "Adversary-in-the-Middle", "T1033": "System Owner/User Discovery", "T1583": "Acquire Infrastructure", "T1218.011": "System Binary Proxy Execution: Rundll32", "T1613": "Container and Resource Discovery", "T1583.007": "Acquire Infrastructure: Serverless", "T1132.001": "Data Encoding: Standard Encoding", "T1027.009": "Obfuscated Files or Information: Embedded Payloads", "T1556.003": "Modify Authentication Process: Pluggable Authentication Modules", "T1578.004": "Modify Cloud Compute Infrastructure: Revert Cloud Instance", "T1592": "Gather Victim Host Information", "T1596.003": "Search Open Technical Databases: Digital Certificates", "T1056.001": "Input Capture: Keylogging", "T1222.002": "File and Directory Permissions Modification: Linux and Mac File and Directory Permissions Modification", "T1110.001": "Brute Force: Password Guessing", "T1216.001": "System Script Proxy Execution: PubPrn", "T1597.002": "Search Closed Sources: Purchase Technical Data", "T1003": "OS Credential Dumping", "T1129": "Shared Modules", "T1602": "Data from Configuration Repository", "T1561.002": "Disk Wipe: Disk Structure Wipe", "T1498.001": "Network Denial of Service: Direct Network Flood", "T1574.007": "Hijack Execution Flow: Path Interception by PATH Environment Variable", "T1213.002": "Data from Information Repositories: Sharepoint", "T1006": "Direct Volume Access", "T1564.008": "Hide Artifacts: Email Hiding Rules", "T1491.002": "Defacement: External Defacement", "T1590.005": "Gather Victim Network Information: IP Addresses", "T1499.001": "Endpoint Denial of Service: OS Exhaustion Flood", "T1014": "Rootkit", "T1546.013": "Event Triggered Execution: PowerShell Profile", "T1059.007": "Command and Scripting Interpreter: JavaScript", "T1590.002": "Gather Victim Network Information: DNS", "T1123": "Audio Capture", "T1543": "Create or Modify System Process", "T1133": "External Remote Services", "T1546.006": "Event Triggered Execution: LC_LOAD_DYLIB Addition", "T1539": "Steal Web Session Cookie", "T1053.007": "Scheduled Task/Job: Container Orchestration Job", "T1568.002": "Dynamic Resolution: Domain Generation Algorithms", "T1036.007": "Masquerading: Double File Extension", "T1548.002": "Abuse Elevation Control Mechanism: Bypass User Account Control", "T1016.001": "System Network Configuration Discovery: Internet Connection Discovery", "T1548.003": "Abuse Elevation Control Mechanism: Sudo and Sudo Caching", "T1560.003": "Archive Collected Data: Archive via Custom Method", "T1578": "Modify Cloud Compute Infrastructure", "T1069": "Permission Groups Discovery", "T1114": "Email Collection", "T1003.002": "OS Credential Dumping: Security Account Manager", "T1596.002": "Search Open Technical Databases: WHOIS", "T1542.001": "Pre-OS Boot: System Firmware", "T1594": "Search Victim-Owned Websites", "T1069.003": "Permission Groups Discovery: Cloud Groups", "T1574.011": "Hijack Execution Flow: Services Registry Permissions Weakness", "T1596.001": "Search Open Technical Databases: DNS/Passive DNS", "T1499.003": "Endpoint Denial of Service: Application Exhaustion Flood", "T1195.001": "Supply Chain Compromise: Compromise Software Dependencies and Development Tools", "T1588.004": "Obtain Capabilities: Digital Certificates", "T1583.002": "Acquire Infrastructure: DNS Server", "T1561": "Disk Wipe", "T1071.004": "Application Layer Protocol: DNS", "T1552.005": "Unsecured Credentials: Cloud Instance Metadata API", "T1555.002": "Credentials from Password Stores: Securityd Memory", "T1615": "Group Policy Discovery", "T1542.003": "Pre-OS Boot: Bootkit", "T1025": "Data from Removable Media", "T1218.013": "System Binary Proxy Execution: Mavinject", "T1074.001": "Data Staged: Local Data Staging", "T1036.005": "Masquerading: Match Legitimate Name or Location", "T1587.003": "Develop Capabilities: Digital Certificates", "T1565.001": "Data Manipulation: Stored Data Manipulation", "T1110.002": "Brute Force: Password Cracking", "T1114.001": "Email Collection: Local Email Collection", "T1555.001": "Credentials from Password Stores: Keychain", "T1547": "Boot or Logon Autostart Execution", "T1003.004": "OS Credential Dumping: LSA Secrets", "T1600": "Weaken Encryption", "T1606.002": "Forge Web Credentials: SAML Tokens", "T1489": "Service Stop", "T1587.001": "Develop Capabilities: Malware", "T1087.002": "Account Discovery: Domain Account", "T1547.014": "Boot or Logon Autostart Execution: Active Setup", "T1564": "Hide Artifacts", "T1559.002": "Inter-Process Communication: Dynamic Data Exchange", "T1204.002": "User Execution: Malicious File", "T1591.003": "Gather Victim Org Information: Identify Business Tempo", "T1592.001": "Gather Victim Host Information: Hardware", "T1080": "Taint Shared Content", "T1484.002": "Domain Policy Modification: Domain Trust Modification", "T1573.001": "Encrypted Channel: Symmetric Cryptography", "T1087.001": "Account Discovery: Local Account", "T1586.001": "Compromise Accounts: Social Media Accounts", "T1562.009": "Impair Defenses: Safe Mode Boot", "T1542.005": "Pre-OS Boot: TFTP Boot", "T1543.003": "Create or Modify System Process: Windows Service", "T1568.001": "Dynamic Resolution: Fast Flux DNS", "T1497.001": "Virtualization/Sandbox Evasion: System Checks", "T1053.003": "Scheduled Task/Job: Cron", "T1069.002": "Permission Groups Discovery: Domain Groups", "T1588.006": "Obtain Capabilities: Vulnerabilities", "T1566.002": "Phishing: Spearphishing Link", "T1070.002": "Indicator Removal: Clear Linux or Mac System Logs", "T1499.004": "Endpoint Denial of Service: Application or System Exploitation", "T1137": "Office Application Startup", "T1218.004": "System Binary Proxy Execution: InstallUtil", "T1598.003": "Phishing for Information: Spearphishing Link", "T1021.004": "Remote Services: SSH", "T1098.003": "Account Manipulation: Additional Cloud Roles", "T1547.012": "Boot or Logon Autostart Execution: Print Processors", "T1566.001": "Phishing: Spearphishing Attachment", "T1027.008": "Obfuscated Files or Information: Stripped Payloads", "T1559.001": "Inter-Process Communication: Component Object Model", "T1574.001": "Hijack Execution Flow: DLL Search Order Hijacking", "T1119": "Automated Collection", "T1115": "Clipboard Data", "T1003.007": "OS Credential Dumping: Proc Filesystem", "T1583.005": "Acquire Infrastructure: Botnet", "T1555.005": "Credentials from Password Stores: Password Managers", "T1553.001": "Subvert Trust Controls: Gatekeeper Bypass", "T1608.004": "Stage Capabilities: Drive-by Target", "T1007": "System Service Discovery", "T1040": "Network Sniffing", "T1553.002": "Subvert Trust Controls: Code Signing", "T1530": "Data from Cloud Storage", "T1565.003": "Data Manipulation: Runtime Data Manipulation", "T1552.002": "Unsecured Credentials: Credentials in Registry", "T1135": "Network Share Discovery", "T1120": "Peripheral Device Discovery", "T1590.004": "Gather Victim Network Information: Network Topology", "T1587.002": "Develop Capabilities: Code Signing Certificates", "T1222.001": "File and Directory Permissions Modification: Windows File and Directory Permissions Modification", "T1137.006": "Office Application Startup: Add-ins", "T1505.002": "Server Software Component: Transport Agent", "T1082": "System Information Discovery", "T1071": "Application Layer Protocol", "T1074.002": "Data Staged: Remote Data Staging", "T1053": "Scheduled Task/Job", "T1218.007": "System Binary Proxy Execution: Msiexec", "T1590.003": "Gather Victim Network Information: Network Trust Dependencies", "T1498.002": "Network Denial of Service: Reflection Amplification", "T1556.002": "Modify Authentication Process: Password Filter DLL", "T1505.005": "Server Software Component: Terminal Services DLL", "T1059.002": "Command and Scripting Interpreter: AppleScript", "T1176": "Browser Extensions", "T1499.002": "Endpoint Denial of Service: Service Exhaustion Flood", "T1195.003": "Supply Chain Compromise: Compromise Hardware Supply Chain", "T1106": "Native API", "T1070.007": "Indicator Removal: Clear Network Connection History and Configurations", "T1558.004": "Steal or Forge Kerberos Tickets: AS-REP Roasting", "T1584.003": "Compromise Infrastructure: Virtual Private Server", "T1600.001": "Weaken Encryption: Reduce Key Space", "T1070.003": "Indicator Removal: Clear Command History", "T1202": "Indirect Command Execution", "T1091": "Replication Through Removable Media", "T1005": "Data from Local System", "T1140": "Deobfuscate/Decode Files or Information", "T1137.005": "Office Application Startup: Outlook Rules", "T1562": "Impair Defenses", "T1586.003": "Compromise Accounts: Cloud Accounts", "T1586.002": "Compromise Accounts: Email Accounts", "T1608.001": "Stage Capabilities: Upload Malware", "T1195": "Supply Chain Compromise", "T1190": "Exploit Public-Facing Application", "T1558": "Steal or Forge Kerberos Tickets", "T1555": "Credentials from Password Stores", "T1567": "Exfiltration Over Web Service", "T1219": "Remote Access Software", "T1583.001": "Acquire Infrastructure: Domains", "T1560.002": "Archive Collected Data: Archive via Library", "T1055.003": "Process Injection: Thread Execution Hijacking", "T1036": "Masquerading", "T1546.011": "Event Triggered Execution: Application Shimming", "T1552": "Unsecured Credentials", "T1547.010": "Boot or Logon Autostart Execution: Port Monitors", "T1070.008": "Indicator Removal: Clear Mailbox Data", "T1037.002": "Boot or Logon Initialization Scripts: Login Hook", "T1055": "Process Injection", "T1205": "Traffic Signaling", "T1218": "System Binary Proxy Execution", "T1070.006": "Indicator Removal: Timestomp", "T1620": "Reflective Code Loading", "T1611": "Escape to Host", "T1547.009": "Boot or Logon Autostart Execution: Shortcut Modification", "T1010": "Application Window Discovery", "T1087.003": "Account Discovery: Email Account", "T1497.003": "Virtualization/Sandbox Evasion: Time Based Evasion", "T1218.003": "System Binary Proxy Execution: CMSTP", "T1563.001": "Remote Service Session Hijacking: SSH Hijacking", "T1562.002": "Impair Defenses: Disable Windows Event Logging", "T1029": "Scheduled Transfer", "T1021.002": "Remote Services: SMB/Windows Admin Shares", "T1525": "Implant Internal Image", "T1572": "Protocol Tunneling", "T1218.002": "System Binary Proxy Execution: Control Panel", "T1599.001": "Network Boundary Bridging: Network Address Translation Traversal", "T1608.002": "Stage Capabilities: Upload Tool", "T1547.005": "Boot or Logon Autostart Execution: Security Support Provider", "T1550": "Use Alternate Authentication Material", "T1597.001": "Search Closed Sources: Threat Intel Vendors", "T1011": "Exfiltration Over Other Network Medium", "T1602.002": "Data from Configuration Repository: Network Device Configuration Dump", "T1589": "Gather Victim Identity Information", "T1562.004": "Impair Defenses: Disable or Modify System Firewall", "T1560": "Archive Collected Data", "T1553.003": "Subvert Trust Controls: SIP and Trust Provider Hijacking", "T1185": "Browser Session Hijacking", "T1021": "Remote Services", "T1071.003": "Application Layer Protocol: Mail Protocols", "T1556.007": "Modify Authentication Process: Hybrid Identity", "T1595.002": "Active Scanning: Vulnerability Scanning", "T1596": "Search Open Technical Databases", "T1207": "Rogue Domain Controller", "T1553.006": "Subvert Trust Controls: Code Signing Policy Modification", "T1610": "Deploy Container", "T1112": "Modify Registry", "T1543.004": "Create or Modify System Process: Launch Daemon", "T1580": "Cloud Infrastructure Discovery", "T1555.003": "Credentials from Password Stores: Credentials from Web Browsers", "T1574.008": "Hijack Execution Flow: Path Interception by Search Order Hijacking", "T1491": "Defacement", "T1535": "Unused/Unsupported Cloud Regions", "T1557.003": "Adversary-in-the-Middle: DHCP Spoofing", "T1563": "Remote Service Session Hijacking", "T1027.001": "Obfuscated Files or Information: Binary Padding", "T1505.003": "Server Software Component: Web Shell", "T1484.001": "Domain Policy Modification: Group Policy Modification", "T1217": "Browser Bookmark Discovery", "T1552.004": "Unsecured Credentials: Private Keys", "T1583.004": "Acquire Infrastructure: Server", "T1021.006": "Remote Services: Windows Remote Management", "T1011.001": "Exfiltration Over Other Network Medium: Exfiltration Over Bluetooth", "T1078.001": "Valid Accounts: Default Accounts", "T1547.003": "Boot or Logon Autostart Execution: Time Providers", "T1546.005": "Event Triggered Execution: Trap", "T1574.006": "Hijack Execution Flow: Dynamic Linker Hijacking", "T1136.001": "Create Account: Local Account", "T1092": "Communication Through Removable Media", "T1070.001": "Indicator Removal: Clear Windows Event Logs", "T1585.002": "Establish Accounts: Email Accounts", "T1557.001": "Adversary-in-the-Middle: LLMNR/NBT-NS Poisoning and SMB Relay", "T1222": "File and Directory Permissions Modification", "T1003.001": "OS Credential Dumping: LSASS Memory", "T1595": "Active Scanning", "T1548": "Abuse Elevation Control Mechanism", "T1134.002": "Access Token Manipulation: Create Process with Token", "T1548.001": "Abuse Elevation Control Mechanism: Setuid and Setgid", "T1547.004": "Boot or Logon Autostart Execution: Winlogon Helper DLL", "T1021.003": "Remote Services: Distributed Component Object Model", "T1110.003": "Brute Force: Password Spraying", "T1090.002": "Proxy: External Proxy", "T1056.003": "Input Capture: Web Portal Capture", "T1589.002": "Gather Victim Identity Information: Email Addresses", "T1003.005": "OS Credential Dumping: Cached Domain Credentials", "T1098.004": "Account Manipulation: SSH Authorized Keys", "T1590.006": "Gather Victim Network Information: Network Security Appliances", "T1546.012": "Event Triggered Execution: Image File Execution Options Injection", "T1218.008": "System Binary Proxy Execution: Odbcconf", "T1593.002": "Search Open Websites/Domains: Search Engines", "T1591.002": "Gather Victim Org Information: Business Relationships", "T1125": "Video Capture", "T1055.013": "Process Injection: Process Doppelg\u00e4nging", "T1016": "System Network Configuration Discovery", "T1578.003": "Modify Cloud Compute Infrastructure: Delete Cloud Instance", "T1593.003": "Search Open Websites/Domains: Code Repositories", "T1574.005": "Hijack Execution Flow: Executable Installer File Permissions Weakness", "T1546.008": "Event Triggered Execution: Accessibility Features", "T1087": "Account Discovery", "T1090": "Proxy", "T1059": "Command and Scripting Interpreter", "T1562.006": "Impair Defenses: Indicator Blocking", "T1136.002": "Create Account: Domain Account", "T1589.003": "Gather Victim Identity Information: Employee Names", "T1482": "Domain Trust Discovery", "T1558.001": "Steal or Forge Kerberos Tickets: Golden Ticket", "T1020": "Automated Exfiltration", "T1592.004": "Gather Victim Host Information: Client Configurations", "T1562.007": "Impair Defenses: Disable or Modify Cloud Firewall", "T1036.002": "Masquerading: Right-to-Left Override", "T1588.001": "Obtain Capabilities: Malware", "T1542.002": "Pre-OS Boot: Component Firmware", "T1070": "Indicator Removal", "T1048.001": "Exfiltration Over Alternative Protocol: Exfiltration Over Symmetric Encrypted Non-C2 Protocol", "T1137.001": "Office Application Startup: Office Template Macros", "T1583.003": "Acquire Infrastructure: Virtual Private Server", "T1213.001": "Data from Information Repositories: Confluence", "T1550.003": "Use Alternate Authentication Material: Pass the Ticket", "T1609": "Container Administration Command", "T1083": "File and Directory Discovery", "T1568": "Dynamic Resolution", "T1036.004": "Masquerading: Masquerade Task or Service", "T1055.004": "Process Injection: Asynchronous Procedure Call", "T1020.001": "Automated Exfiltration: Traffic Duplication", "T1647": "Plist File Modification", "T1546.009": "Event Triggered Execution: AppCert DLLs", "T1114.003": "Email Collection: Email Forwarding Rule", "T1074": "Data Staged", "T1649": "Steal or Forge Authentication Certificates", "T1098.005": "Account Manipulation: Device Registration", "T1049": "System Network Connections Discovery", "T1584": "Compromise Infrastructure", "T1553.005": "Subvert Trust Controls: Mark-of-the-Web Bypass", "T1600.002": "Weaken Encryption: Disable Crypto Hardware", "T1542": "Pre-OS Boot", "T1612": "Build Image on Host", "T1055.002": "Process Injection: Portable Executable Injection", "T1218.012": "System Binary Proxy Execution: Verclsid", "T1586": "Compromise Accounts", "T1569.001": "System Services: Launchctl", "T1584.005": "Compromise Infrastructure: Botnet", "T1059.008": "Command and Scripting Interpreter: Network Device CLI", "T1552.003": "Unsecured Credentials: Bash History", "T1562.010": "Impair Defenses: Downgrade Attack", "T1559.003": "Inter-Process Communication: XPC Services", "T1497": "Virtualization/Sandbox Evasion", "T1102": "Web Service", "T1552.001": "Unsecured Credentials: Credentials In Files", "T1568.003": "Dynamic Resolution: DNS Calculation", "T1218.005": "System Binary Proxy Execution: Mshta", "T1547.015": "Boot or Logon Autostart Execution: Login Items", "T1608": "Stage Capabilities", "T1608.005": "Stage Capabilities: Link Target", "T1104": "Multi-Stage Channels", "T1480": "Execution Guardrails", "T1619": "Cloud Storage Object Discovery", "T1606.001": "Forge Web Credentials: Web Cookies", "T1134.001": "Access Token Manipulation: Token Impersonation/Theft", "T1567.001": "Exfiltration Over Web Service: Exfiltration to Code Repository", "T1205.001": "Traffic Signaling: Port Knocking", "T1583.006": "Acquire Infrastructure: Web Services", "T1528": "Steal Application Access Token", "T1598.002": "Phishing for Information: Spearphishing Attachment", "T1098.001": "Account Manipulation: Additional Cloud Credentials", "T1204": "User Execution", "T1491.001": "Defacement: Internal Defacement", "T1564.002": "Hide Artifacts: Hidden Users", "T1134.003": "Access Token Manipulation: Make and Impersonate Token", "T1552.006": "Unsecured Credentials: Group Policy Preferences", "T1048.002": "Exfiltration Over Alternative Protocol: Exfiltration Over Asymmetric Encrypted Non-C2 Protocol", "T1087.004": "Account Discovery: Cloud Account", "T1057": "Process Discovery", "T1562.003": "Impair Defenses: Impair Command History Logging", "T1546.003": "Event Triggered Execution: Windows Management Instrumentation Event Subscription", "T1596.004": "Search Open Technical Databases: CDNs", "T1497.002": "Virtualization/Sandbox Evasion: User Activity Based Checks", "T1585.003": "Establish Accounts: Cloud Accounts", "T1072": "Software Deployment Tools", "T1041": "Exfiltration Over C2 Channel", "T1134.004": "Access Token Manipulation: Parent PID Spoofing", "T1591": "Gather Victim Org Information", "T1606": "Forge Web Credentials", "T1621": "Multi-Factor Authentication Request Generation", "T1554": "Compromise Client Software Binary", "T1059.001": "Command and Scripting Interpreter: PowerShell", "T1546.001": "Event Triggered Execution: Change Default File Association", "T1055.014": "Process Injection: VDSO Hijacking", "T1071.002": "Application Layer Protocol: File Transfer Protocols", "T1212": "Exploitation for Credential Access", "T1546.014": "Event Triggered Execution: Emond", "T1102.003": "Web Service: One-Way Communication", "T1590": "Gather Victim Network Information", "T1210": "Exploitation of Remote Services", "T1534": "Internal Spearphishing", "T1574.010": "Hijack Execution Flow: Services File Permissions Weakness", "T1547.001": "Boot or Logon Autostart Execution: Registry Run Keys / Startup Folder", "T1199": "Trusted Relationship", "T1136.003": "Create Account: Cloud Account", "T1069.001": "Permission Groups Discovery: Local Groups", "T1593": "Search Open Websites/Domains", "T1098": "Account Manipulation", "T1048": "Exfiltration Over Alternative Protocol", "T1547.006": "Boot or Logon Autostart Execution: Kernel Modules and Extensions", "T1056.002": "Input Capture: GUI Input Capture", "T1588.002": "Obtain Capabilities: Tool", "T1052.001": "Exfiltration Over Physical Medium: Exfiltration over USB", "T1574.013": "Hijack Execution Flow: KernelCallbackTable", "T1597": "Search Closed Sources", "T1053.006": "Scheduled Task/Job: Systemd Timers", "T1566": "Phishing", "T1542.004": "Pre-OS Boot: ROMMONkit", "T1218.001": "System Binary Proxy Execution: Compiled HTML File", "T1070.005": "Indicator Removal: Network Share Connection Removal", "T1090.003": "Proxy: Multi-hop Proxy", "T1110": "Brute Force", "T1059.004": "Command and Scripting Interpreter: Unix Shell", "T1137.003": "Office Application Startup: Outlook Forms", "T1562.001": "Impair Defenses: Disable or Modify Tools", "T1565": "Data Manipulation", "T1559": "Inter-Process Communication", "T1001": "Data Obfuscation", "T1039": "Data from Network Shared Drive", "T1584.006": "Compromise Infrastructure: Web Services", "T1601": "Modify System Image", "T1574": "Hijack Execution Flow", "T1027.005": "Obfuscated Files or Information: Indicator Removal from Tools", "T1204.003": "User Execution: Malicious Image", "T1078": "Valid Accounts", "T1571": "Non-Standard Port", "T1585.001": "Establish Accounts: Social Media Accounts", "T1055.012": "Process Injection: Process Hollowing", "T1068": "Exploitation for Privilege Escalation", "T1564.009": "Hide Artifacts: Resource Forking", "T1531": "Account Access Removal", "T1110.004": "Brute Force: Credential Stuffing", "T1027": "Obfuscated Files or Information", "T1556.006": "Modify Authentication Process: Multi-Factor Authentication", "T1114.002": "Email Collection: Remote Email Collection", "T1505.004": "Server Software Component: IIS Components", "T1036.001": "Masquerading: Invalid Code Signature", "T1564.006": "Hide Artifacts: Run Virtual Instance", "T1201": "Password Policy Discovery", "T1546": "Event Triggered Execution", "T1546.004": "Event Triggered Execution: Unix Shell Configuration Modification", "T1187": "Forced Authentication", "T1134.005": "Access Token Manipulation: SID-History Injection", "T1599": "Network Boundary Bridging", "T1486": "Data Encrypted for Impact", "T1553": "Subvert Trust Controls", "T1548.004": "Abuse Elevation Control Mechanism: Elevated Execution with Prompt", "T1592.003": "Gather Victim Host Information: Firmware", "T1573": "Encrypted Channel", "T1547.002": "Boot or Logon Autostart Execution: Authentication Package", "T1218.010": "System Binary Proxy Execution: Regsvr32", "T1592.002": "Gather Victim Host Information: Software", "T1056": "Input Capture", "T1587.004": "Develop Capabilities: Exploits", "T1593.001": "Search Open Websites/Domains: Social Media", "T1546.015": "Event Triggered Execution: Component Object Model Hijacking", "T1589.001": "Gather Victim Identity Information: Credentials", "T1195.002": "Supply Chain Compromise: Compromise Software Supply Chain", "T1036.003": "Masquerading: Rename System Utilities", "T1102.002": "Web Service: Bidirectional Communication", "T1203": "Exploitation for Client Execution", "T1595.003": "Active Scanning: Wordlist Scanning", "T1137.004": "Office Application Startup: Outlook Home Page", "T1573.002": "Encrypted Channel: Asymmetric Cryptography", "T1567.002": "Exfiltration Over Web Service: Exfiltration to Cloud Storage", "T1570": "Lateral Tool Transfer", "T1574.009": "Hijack Execution Flow: Path Interception by Unquoted Path", "T1608.003": "Stage Capabilities: Install Digital Certificate", "T1037.005": "Boot or Logon Initialization Scripts: Startup Items", "T1614.001": "System Location Discovery: System Language Discovery", "T1095": "Non-Application Layer Protocol", "T1027.003": "Obfuscated Files or Information: Steganography", "T1584.002": "Compromise Infrastructure: DNS Server", "T1001.003": "Data Obfuscation: Protocol Impersonation", "T1012": "Query Registry", "T1030": "Data Transfer Size Limits", "T1550.004": "Use Alternate Authentication Material: Web Session Cookie", "T1078.002": "Valid Accounts: Domain Accounts", "T1218.009": "System Binary Proxy Execution: Regsvcs/Regasm", "T1553.004": "Subvert Trust Controls: Install Root Certificate", "T1037.003": "Boot or Logon Initialization Scripts: Network Logon Script", "T1499": "Endpoint Denial of Service", "T1027.004": "Obfuscated Files or Information: Compile After Delivery", "T1614": "System Location Discovery", "T1564.007": "Hide Artifacts: VBA Stomping", "T1197": "BITS Jobs", "T1127.001": "Trusted Developer Utilities Proxy Execution: MSBuild", "T1090.004": "Proxy: Domain Fronting", "T1557.002": "Adversary-in-the-Middle: ARP Cache Poisoning", "T1562.008": "Impair Defenses: Disable Cloud Logs", "T1518.001": "Software Discovery: Security Software Discovery", "T1564.003": "Hide Artifacts: Hidden Window", "T1059.006": "Command and Scripting Interpreter: Python", "T1591.004": "Gather Victim Org Information: Identify Roles", "T1132": "Data Encoding", "T1546.010": "Event Triggered Execution: AppInit DLLs", "T1598": "Phishing for Information", "T1496": "Resource Hijacking", "T1585": "Establish Accounts", "T1588": "Obtain Capabilities", "T1546.002": "Event Triggered Execution: Screensaver", "T1578.002": "Modify Cloud Compute Infrastructure: Create Cloud Instance", "T1213.003": "Data from Information Repositories: Code Repositories", "T1565.002": "Data Manipulation: Transmitted Data Manipulation", "T1003.008": "OS Credential Dumping: /etc/passwd and /etc/shadow", "T1543.001": "Create or Modify System Process: Launch Agent", "T1569": "System Services", "T1059.003": "Command and Scripting Interpreter: Windows Command Shell", "T1055.009": "Process Injection: Proc Memory", "T1601.001": "Modify System Image: Patch System Image", "T1558.002": "Steal or Forge Kerberos Tickets: Silver Ticket", "T1213": "Data from Information Repositories", "T1070.009": "Indicator Removal: Clear Persistence", "T1555.004": "Credentials from Password Stores: Windows Credential Manager", "T1200": "Hardware Additions", "T1505": "Server Software Component", "T1485": "Data Destruction", "T1132.002": "Data Encoding: Non-Standard Encoding", "T1556.001": "Modify Authentication Process: Domain Controller Authentication", "T1537": "Transfer Data to Cloud Account", "T1027.006": "Obfuscated Files or Information: HTML Smuggling", "T1556.005": "Modify Authentication Process: Reversible Encryption", "T1070.004": "Indicator Removal: File Deletion", "T1189": "Drive-by Compromise", "T1498": "Network Denial of Service", "T1546.016": "Event Triggered Execution: Installer Packages", "T1595.001": "Active Scanning: Scanning IP Blocks", "T1221": "Template Injection", "T1037.004": "Boot or Logon Initialization Scripts: RC Scripts", "T1134": "Access Token Manipulation", "T1111": "Multi-Factor Authentication Interception", "T1027.002": "Obfuscated Files or Information: Software Packing", "T1584.007": "Compromise Infrastructure: Serverless", "T1071.001": "Application Layer Protocol: Web Protocols", "T1059.005": "Command and Scripting Interpreter: Visual Basic", "T1564.005": "Hide Artifacts: Hidden File System", "T1543.002": "Create or Modify System Process: Systemd Service", "T1563.002": "Remote Service Session Hijacking: RDP Hijacking", "T1136": "Create Account", "T1547.013": "Boot or Logon Autostart Execution: XDG Autostart Entries", "T1584.004": "Compromise Infrastructure: Server", "T1526": "Cloud Service Discovery", "T1018": "Remote System Discovery", "T1046": "Network Service Discovery", "T1590.001": "Gather Victim Network Information: Domain Properties", "T1518": "Software Discovery", "T1538": "Cloud Service Dashboard", "T1055.005": "Process Injection: Thread Local Storage", "T1622": "Debugger Evasion", "T1036.006": "Masquerading: Space after Filename", "T1547.007": "Boot or Logon Autostart Execution: Re-opened Applications", "T1608.006": "Stage Capabilities: SEO Poisoning", "T1550.002": "Use Alternate Authentication Material: Pass the Hash", "T1052": "Exfiltration Over Physical Medium", "T1574.002": "Hijack Execution Flow: DLL Side-Loading", "T1105": "Ingress Tool Transfer", "T1098.002": "Account Manipulation: Additional Email Delegate Permissions", "T1588.003": "Obtain Capabilities: Code Signing Certificates", "T1648": "Serverless Execution", "T1055.008": "Process Injection: Ptrace System Calls", "T1027.007": "Obfuscated Files or Information: Dynamic API Resolution", "T1021.001": "Remote Services: Remote Desktop Protocol", "T1037.001": "Boot or Logon Initialization Scripts: Logon Script (Windows)", "T1055.015": "Process Injection: ListPlanting", "T1484": "Domain Policy Modification", "T1220": "XSL Script Processing", "T1596.005": "Search Open Technical Databases: Scan Databases", "T1564.001": "Hide Artifacts: Hidden Files and Directories", "T1578.001": "Modify Cloud Compute Infrastructure: Create Snapshot", "T1591.001": "Gather Victim Org Information: Determine Physical Locations", "T1137.002": "Office Application Startup: Office Test", "T1587": "Develop Capabilities", "T1003.003": "OS Credential Dumping: NTDS", "T1602.001": "Data from Configuration Repository: SNMP (MIB Dump)", "T1001.002": "Data Obfuscation: Steganography", "T1204.001": "User Execution: Malicious Link", "T1550.001": "Use Alternate Authentication Material: Application Access Token", "T1547.008": "Boot or Logon Autostart Execution: LSASS Driver", "T1569.002": "System Services: Service Execution", "T1078.004": "Valid Accounts: Cloud Accounts", "T1480.001": "Execution Guardrails: Environmental Keying", "T1008": "Fallback Channels", "T1564.004": "Hide Artifacts: NTFS File Attributes", "T1558.003": "Steal or Forge Kerberos Tickets: Kerberoasting", "T1003.006": "OS Credential Dumping: DCSync", "T1124": "System Time Discovery", "T1053.002": "Scheduled Task/Job: At", "T1055.001": "Process Injection: Dynamic-link Library Injection", "T1588.005": "Obtain Capabilities: Exploits", "T1556": "Modify Authentication Process", "T1056.004": "Input Capture: Credential API Hooking", "T1495": "Firmware Corruption", "T1490": "Inhibit System Recovery", "T1546.007": "Event Triggered Execution: Netsh Helper DLL", "T1566.003": "Phishing: Spearphishing via Service", "T1090.001": "Proxy: Internal Proxy", "T1216": "System Script Proxy Execution", "T1102.001": "Web Service: Dead Drop Resolver", "T1001.001": "Data Obfuscation: Junk Data", "T1598.001": "Phishing for Information: Spearphishing Service", "T1552.007": "Unsecured Credentials: Container API", "T1584.001": "Compromise Infrastructure: Domains", "T1505.001": "Server Software Component: SQL Stored Procedures", "T1556.004": "Modify Authentication Process: Network Device Authentication", "T1561.001": "Disk Wipe: Disk Content Wipe", "T1048.003": "Exfiltration Over Alternative Protocol: Exfiltration Over Unencrypted Non-C2 Protocol", "T1574.004": "Hijack Execution Flow: Dylib Hijacking", "T1601.002": "Modify System Image: Downgrade System Image", "T1078.003": "Valid Accounts: Local Accounts", "T1211": "Exploitation for Defense Evasion", "T1127": "Trusted Developer Utilities Proxy Execution", "T1529": "System Shutdown/Reboot", "T1218.014": "System Binary Proxy Execution: MMC", "T1564.010": "Hide Artifacts: Process Argument Spoofing", "T1574.012": "Hijack Execution Flow: COR_PROFILER"}
--------------------------------------------------------------------------------
/nlp_general.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Wed Mar 18 12:15:21 2020
3 |
4 | @author: babdeen
5 | """
6 | import spacy
7 |
8 | import numpy as np
9 | from spacy.lang.en import English
10 | parser = spacy.load("en_core_web_lg",disable=['ner'])
11 | from nltk.corpus import stopwords
12 | stop_words = set(stopwords.words('english'))
13 |
14 | stop_words = stop_words.union(['may','should','can','could'])
15 |
16 | sw_NN = set(list(stop_words)+['part','kind','different','number','all','many','kinds','parts','whole','certain','various','other','such','both','multiple','some','several'])
17 | import string
18 | from nltk.corpus import wordnet as wn
19 | import re
20 | from collections import Counter,OrderedDict
21 | import pickle
22 | from nltk.stem.wordnet import WordNetLemmatizer
23 | lemmatizer = WordNetLemmatizer()
24 | from sklearn.feature_extraction.text import TfidfVectorizer
25 | from pattern.en import lexeme, pluralize
26 | from numpy.linalg import norm
27 | from numpy import dot
28 |
29 |
30 | class NLP:
31 | def srl_to_dict(srl):
32 | SRLDict = {}
33 | for verb in srl['verbs']:
34 | verb_str = verb['id']
35 | SRLDict[verb_str] = {}
36 | for ind,tag in enumerate(verb['tags']):
37 | if tag != 'O':
38 | if tag[0] == 'B':
39 | newTag = tag[tag.find('-')+1:]
40 | if newTag not in SRLDict[verb_str]:
41 | SRLDict[verb_str][newTag] = {'text': srl['words'][ind] }
42 | else:
43 | SRLDict[verb_str][newTag]['text'] += ('/ ' + srl['words'][ind])
44 | if newTag == 'V':
45 | SRLDict[verb_str][newTag]['index'] = ind
46 | else :
47 | newTag = tag[tag.find('-')+1:]
48 | if newTag not in SRLDict[verb_str]:
49 | continue
50 | SRLDict[verb_str][newTag]['text'] += (' ' + srl['words'][ind])
51 |
52 | return SRLDict
53 | def add_v_id_srl_from_dict(srls):
54 | for srl in srls:
55 | NLP.add_v_id_srl(srls[srl])
56 |
57 | def srl_to_dict_from_dict(srls):
58 | return {srl:NLP.srl_to_dict(srls[srl]) for srl in srls}
59 | def add_v_id_srl(srl):
60 | verbs = set()
61 | counter = {}
62 |
63 | for v in srl["verbs"]:
64 | if v["verb"] not in verbs:
65 | verbs.add(v["verb"])
66 | counter[v["verb"]] = 1
67 | v['id'] = v["verb"]
68 | else:
69 | counter[v["verb"]] += 1
70 | v['id'] = v["verb"] + '_' + str(counter[v["verb"]])
71 |
72 | def get_lemma(word,is_verb):
73 | return lemmatizer.lemmatize(word.lower(), wn.VERB if is_verb else wn.NOUN)
74 |
75 |
76 | def load_model(self,model_name):
77 | if model_name == 'ner':
78 | self.ner_spacy = spacy.load('en_core_web_lg',disable=['parser', 'tagger'])
79 | if model_name == 'pos':
80 | self.pos_tagger = spacy.load("en_core_web_lg",disable=['ner', 'parser'])
81 | if model_name == 'parse':
82 | self.parser = spacy.load("en_core_web_lg",disable=['ner'])
83 | if model_name == 'dep':
84 | self.dep_tagger = spacy.load("en_core_web_lg",disable=['ner'])
85 | if model_name == 'sentencizer':
86 | self.nlp = English()
87 | #self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))
88 | self.nlp.add_pipe('sentencizer')
89 |
90 |
91 | def seperate_sentences(self,text):
92 | doc = self.nlp(text)
93 | #sentences = [sent.string.strip() for sent in doc.sents]
94 | sentences = [sent.text.strip() for sent in doc.sents]
95 | return sentences
96 |
97 |
98 | def cos_sim(x,y):
99 | a = np.array(x)
100 | b = np.array(y)
101 | if norm(a) == 0 or norm(b) == 0 :
102 | return 0
103 | return abs(dot(a, b)/(norm(a)*norm(b)))
104 |
105 |
106 | def extract_VO_from_docs_lambda(self,doc_srl,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT']):
107 | out = {}
108 | exclude = []
109 | for doc in doc_srl:
110 | out[doc] = []
111 | for sent in doc_srl[doc]:
112 |
113 |
114 | for v in doc_srl[doc][sent]:
115 | if 'V' in doc_srl[doc][sent][v] :
116 |
117 | cont = True
118 | for arg in arg_constrain:
119 | if arg=='forbid':
120 | for srl_arg in doc_srl[doc][sent][v]:
121 | if srl_arg in arg_constrain[arg]:
122 | cont = False
123 | break
124 | if cont == False :
125 | break
126 | elif arg not in doc_srl[doc][sent][v] :
127 | cont = False
128 | break
129 | elif arg_constrain[arg](doc_srl[doc][sent][v][arg]) == False:
130 | cont = False
131 | break
132 | if cont:
133 | args = [doc_srl[doc][sent][v][arg]['text'] for arg in doc_srl[doc][sent][v] if arg in return_args]
134 | VO = ' '.join(args) if join_args else args
135 | out[doc].append((VO,sent))
136 | return out
137 |
138 | def filter_srl_docs_lambda(self,doc_srl,arg_constrain = {},return_args = None):
139 | out = {}
140 | for doc in doc_srl:
141 | out[doc] = {}
142 | for sent in doc_srl[doc]:
143 | out[doc][sent] = {}
144 | for v in doc_srl[doc][sent]:
145 |
146 | cont = True
147 | for arg in arg_constrain:
148 | if arg=='forbid':
149 | for srl_arg in doc_srl[doc][sent][v]:
150 | if srl_arg in arg_constrain[arg]:
151 | cont = False
152 | break
153 | if cont == False :
154 | break
155 | elif arg not in doc_srl[doc][sent][v] :
156 | cont = False
157 | break
158 | elif arg_constrain[arg](doc_srl[doc][sent][v][arg]['text']) == False:
159 | cont = False
160 | break
161 | if cont:
162 | out[doc][sent][v] = {i:doc_srl[doc][sent][v][i] for i in doc_srl[doc][sent][v] if return_args == None or i in return_args }
163 | return out
164 |
165 | def extract_VO_from_sents_lambda(self,sents_srl,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT']):
166 | out = {}
167 | exclude = []
168 |
169 | for sent in sents_srl:
170 | out[sent] = []
171 |
172 | for v in sents_srl[sent]:
173 | if 'V' in sents_srl[sent][v] :
174 |
175 | cont = True
176 | for arg in arg_constrain:
177 | if arg=='forbid':
178 | for srl_arg in sents_srl[sent][v]:
179 | if srl_arg in arg_constrain[arg]:
180 | cont = False
181 | break
182 | if cont == False :
183 | break
184 | elif arg not in sents_srl[sent][v] :
185 | cont = False
186 | break
187 | elif arg_constrain[arg](sents_srl[sent][v][arg]['text']) == False:
188 | cont = False
189 | break
190 | if cont:
191 | args = [sents_srl[sent][v][arg]['text'] for arg in sents_srl[sent][v] if arg in return_args]
192 | VO = ' '.join(args) if join_args else args
193 | out[sent].append((VO,sent))
194 | return out
195 |
196 | def extract_VO_from_sents(self,sents_srl,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT']):
197 | out = {}
198 | for sent in sents_srl:
199 | out[sent] = []
200 |
201 |
202 | for v in sents_srl[sent]:
203 | if 'V' in sents_srl[sent][v] :
204 |
205 | cont = True
206 | for arg in arg_constrain:
207 | if arg=='forbid':
208 | for srl_arg in sents_srl[sent][v]:
209 | if srl_arg in arg_constrain[arg]:
210 | cont = False
211 | break
212 | if cont == False :
213 | break
214 | elif arg not in sents_srl[sent][v] :
215 | cont = False
216 | break
217 | elif arg_constrain[arg](sents_srl[sent][v][arg]['text']) == False:
218 | cont = False
219 | break
220 | if cont:
221 | args = [sents_srl[sent][v][arg]['text'] for arg in sents_srl[sent][v] if arg in return_args]
222 | VO = ' '.join(args) if join_args else args
223 | out[sent].append((VO,sent))
224 | return out
225 | def extract_VO_from_srl(srl,sent,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT']):
226 | exclude = []
227 |
228 | VO = ''
229 | # for v in srl:
230 | if 'V' in srl and srl['V']['text'] not in exclude:
231 |
232 | cont = True
233 | for arg in arg_constrain:
234 | if arg not in srl:
235 | cont = False
236 | break
237 | elif srl[arg]['text'].lower() not in arg_constrain[arg] and arg_constrain[arg] != 'any':
238 | cont = False
239 | break
240 | if cont:
241 | args = [srl[arg]['text'] for arg in srl if arg in return_args]
242 | VO = ' '.join(args) if join_args else args
243 | return VO
244 |
--------------------------------------------------------------------------------
/parse_class.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Jul 21 23:21:08 2020
4 |
5 | @author: babdeen
6 | """
7 |
8 |
9 | # -*- coding: utf-8 -*-
10 | """
11 | Created on Sun Jul 5 23:01:55 2020
12 |
13 | @author: babdeen
14 | """
15 |
16 | from allennlp.predictors.predictor import Predictor
17 | from nlp_general import NLP
18 | # import allennlp_models.syntax.srl
19 | # SRL = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bert-base-srl-2020.11.19.tar.gz")
20 | SRL = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")
21 | # DT = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/biaffine-dependency-parser-ptb-2020.04.06.tar.gz")
22 | # CT = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/elmo-constituency-parser-2020.02.10.tar.gz")
23 |
24 |
25 | class Parser:
26 | def extract_srl(text):
27 | srl = SRL.predict(text)
28 | Parser.add_v_id_srl(srl)
29 | # srl_dict = Parser.srl_to_dict(srl)
30 | return srl
31 |
32 | # def extract_dt(text):
33 | # return DT.predict(text)
34 | # def extract_ct(text):
35 | # return CT.predict(text)
36 | def srl_to_dict(srl):
37 | SRLDict = {}
38 | for verb in srl['verbs']:
39 | verb_str = verb['id']
40 | SRLDict[verb_str] = {}
41 | for ind,tag in enumerate(verb['tags']):
42 | if tag != 'O':
43 | if tag[0] == 'B':
44 | newTag = tag[tag.find('-')+1:]
45 | if newTag not in SRLDict[verb_str]:
46 | SRLDict[verb_str][newTag] = {'text': srl['words'][ind] }
47 | else:
48 | SRLDict[verb_str][newTag]['text'] += ('/ ' + srl['words'][ind])
49 | # if newTag == 'V':
50 | SRLDict[verb_str][newTag]['index'] = ind
51 | else :
52 | newTag = tag[tag.find('-')+1:]
53 | if newTag not in SRLDict[verb_str]:
54 | continue
55 | SRLDict[verb_str][newTag]['text'] += (' ' + srl['words'][ind])
56 |
57 | return SRLDict
58 |
59 |
60 | def srl_to_dict_from_dict(srls):
61 | return {srl:Parser.srl_to_dict(srls[srl]) for srl in srls}
62 |
63 | def srl_to_dict_from_list(srls):
64 | return [Parser.srl_to_dict(srls[srl]) for srl in srls]
65 |
66 | def add_v_id_srl(srl):
67 | verbs = set()
68 | counter = {}
69 |
70 | for v in srl["verbs"]:
71 | if v["verb"] not in verbs:
72 | verbs.add(v["verb"])
73 | counter[v["verb"]] = 1
74 | v['id'] = v["verb"]
75 | else:
76 | counter[v["verb"]] += 1
77 | v['id'] = v["verb"] + '_' + str(counter[v["verb"]])
78 |
79 | def add_v_id_srl_from_dict(srls):
80 | for srl in srls:
81 | Parser.add_v_id_srl(srls[srl])
82 |
83 |
84 | def get_words_verb(sent,is_SRL = True,args = ['ARG0','ARG1','ARG2','ARG3'], ):
85 | words_verbs = {}
86 | if is_SRL:
87 | srl = sent
88 | else:
89 | srl = Parser.extract_srl(sent)
90 | srl = Parser.srl_to_dict(srl)
91 |
92 | for v in srl:
93 | if 'V' not in srl[v]:
94 | continue
95 | args_join = ' '.join([srl[v][arg]['text'] if arg in srl[v] else '' for arg in args])
96 | for w in args_join.split():
97 | if w in words_verbs:
98 | words_verbs[w].append(srl[v]['V']['text'])
99 | else:
100 | words_verbs[w] = [srl[v]['V']['text']]
101 | return words_verbs
102 |
103 | def get_words_sub(sent, is_SRL = True, args = ['ARG1','ARG2','ARG3']):
104 | words_subs = {}
105 |
106 | if is_SRL:
107 | srl = sent
108 | else:
109 | srl = Parser.extract_srl(sent)
110 | srl = Parser.srl_to_dict(srl)
111 |
112 | for v in srl:
113 | if 'ARG0' in srl[v]:
114 | args_join = ' '.join([srl[v][arg]['text'] if arg in srl[v] else '' for arg in args])
115 |
116 | for w in args_join.split():
117 | if w in words_subs:
118 | words_subs[w].append(srl[v]['ARG0']['text'])
119 | else:
120 | words_subs[w] = [srl[v]['ARG0']['text']]
121 | return words_subs
122 |
123 | def extract_VO_from_docs(doc_srl,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT'], exclude_dep = ['amod']):
124 | out = []
125 | for doc in doc_srl:
126 | for sent in doc_srl[doc]:
127 |
128 | if exclude_dep != []:
129 | exclude = NLP.extract_dep(sent, exclude_dep)
130 | for v in doc_srl[doc][sent]:
131 | if 'V' in doc_srl[doc][sent][v] and doc_srl[doc][sent][v]['V']['text'] not in exclude:
132 |
133 | cont = True
134 | for arg in arg_constrain:
135 | if arg not in doc_srl[doc][sent][v]:
136 | cont = False
137 | break
138 | elif doc_srl[doc][sent][v][arg]['text'].lower() not in arg_constrain[arg] and arg_constrain[arg] != 'any':
139 | cont = False
140 | break
141 | if cont:
142 | args = [doc_srl[doc][sent][v][arg]['text'] for arg in doc_srl[doc][sent][v] if arg in return_args]
143 | VO = ' '.join(args) if join_args else args
144 | out.append((VO,sent))
145 | return out
146 |
147 | def extract_VO_from_sents(sents_srl,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT'], exclude_dep = ['amod']):
148 | out = []
149 | exclude = []
150 | for sent in sents_srl:
151 |
152 | if exclude_dep != []:
153 | exclude = NLP.extract_dep(sent, exclude_dep)
154 |
155 | for v in sents_srl[sent]:
156 | if 'V' in sents_srl[sent][v] and sents_srl[sent][v]['V']['text'] not in exclude:
157 |
158 | cont = True
159 | for arg in arg_constrain:
160 | if arg not in sents_srl[sent][v]:
161 | cont = False
162 | break
163 | elif sents_srl[sent][v][arg]['text'].lower() not in arg_constrain[arg] and arg_constrain[arg] != 'any':
164 | cont = False
165 | break
166 | if cont:
167 | args = [sents_srl[sent][v][arg]['text'] for arg in sents_srl[sent][v] if arg in return_args]
168 | VO = ' '.join(args) if join_args else args
169 | out.append((VO,sent))
170 | return out
171 |
172 | def extract_VO_from_srl(srl,sent,arg_constrain = {}, join_args = True, return_args = ['V','ARG1','ARG2','ARG3','ARGM-LOC','ARGM-EXT'], exclude_dep = ['amod']):
173 |
174 | if exclude_dep != []:
175 | exclude = NLP.extract_dep(sent, exclude_dep)
176 |
177 | VO = ''
178 | # for v in srl:
179 | if 'V' in srl and srl['V']['text'] not in exclude:
180 |
181 | cont = True
182 | for arg in arg_constrain:
183 | if arg not in srl:
184 | cont = False
185 | break
186 | elif srl[arg]['text'].lower() not in arg_constrain[arg] and arg_constrain[arg] != 'any':
187 | cont = False
188 | break
189 | if cont:
190 | args = [srl[arg]['text'] for arg in srl if arg in return_args]
191 | VO = ' '.join(args) if join_args else args
192 | return VO
193 |
194 |
195 |
--------------------------------------------------------------------------------
/requirements-frozen.txt:
--------------------------------------------------------------------------------
1 | aiohttp==3.9.1
2 | aiosignal==1.3.1
3 | allennlp==2.10.1
4 | allennlp-models==2.10.1
5 | async-timeout==4.0.3
6 | attrs==23.1.0
7 | autocommand==2.2.2
8 | backports.csv==1.0.7
9 | base58==2.1.1
10 | beautifulsoup4==4.12.2
11 | benepar==0.2.0
12 | blis==0.7.11
13 | boto3==1.34.4
14 | botocore==1.34.4
15 | cached-path==1.1.6
16 | cachetools==5.3.2
17 | catalogue==2.0.10
18 | certifi==2023.11.17
19 | cffi==1.16.0
20 | charset-normalizer==3.1.0
21 | cheroot==10.0.0
22 | CherryPy==18.9.0
23 | click==8.1.7
24 | colorama==0.4.6
25 | commonmark==0.9.1
26 | confection==0.0.4
27 | conllu==4.4.2
28 | cryptography==41.0.7
29 | cymem==2.0.8
30 | datasets==2.10.1
31 | dill==0.3.6
32 | docker-pycreds==0.4.0
33 | en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.3.0/en_core_web_lg-3.3.0-py3-none-any.whl#sha256=6ce19d37dfe5280400f80a5954d41afca10cbc742b97bfcf4b0e452b6eb24273
34 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl#sha256=84d7d8059bfbf53c09b39139782f76cd6ac7064851e7799dcc685c06ebf5fd4f
35 | exceptiongroup==1.2.0
36 | fairscale==0.4.6
37 | feedparser==6.0.11
38 | filelock==3.7.1
39 | frozenlist==1.4.1
40 | fsspec==2023.12.2
41 | ftfy==6.1.3
42 | future==0.18.3
43 | gitdb==4.0.11
44 | GitPython==3.1.40
45 | google-api-core==2.15.0
46 | google-auth==2.25.2
47 | google-cloud-core==2.4.1
48 | google-cloud-storage==2.14.0
49 | google-crc32c==1.5.0
50 | google-resumable-media==2.7.0
51 | googleapis-common-protos==1.62.0
52 | h5py==3.10.0
53 | huggingface-hub==0.10.1
54 | idna==3.6
55 | importlib-resources==6.1.1
56 | inflect==6.0.1
57 | iniconfig==2.0.0
58 | jaraco.collections==5.0.0
59 | jaraco.context==4.3.0
60 | jaraco.functools==4.0.0
61 | jaraco.text==3.12.0
62 | Jinja2==3.1.2
63 | jmespath==1.0.1
64 | joblib==1.3.2
65 | langcodes==3.3.0
66 | lmdb==1.3.0
67 | lxml==4.9.4
68 | MarkupSafe==2.1.3
69 | more-itertools==10.1.0
70 | multidict==6.0.4
71 | multiprocess==0.70.14
72 | murmurhash==1.0.10
73 | mysqlclient==2.2.1
74 | neuralcoref==4.0
75 | nltk==3.8.1
76 | numpy==1.24.4
77 | packaging==23.2
78 | pandas==2.0.3
79 | pathtools==0.1.2
80 | pathy==0.10.1
81 | Pattern==3.6
82 | pdfminer.six==20221105
83 | pdfplumber==0.7.4
84 | Pillow==9.2.0
85 | pluggy==1.3.0
86 | portend==3.2.0
87 | preshed==3.0.9
88 | promise==2.3
89 | protobuf==3.20.3
90 | psutil==5.9.7
91 | py-rouge==1.1
92 | pyarrow==14.0.2
93 | pyasn1==0.5.1
94 | pyasn1-modules==0.3.0
95 | pycparser==2.21
96 | pydantic==1.8.2
97 | pydantic_core==2.0.2
98 | Pygments==2.17.2
99 | pytest==7.4.3
100 | python-dateutil==2.8.2
101 | python-docx==1.1.0
102 | pytz==2023.3.post1
103 | pywin32==306
104 | PyYAML==6.0.1
105 | regex==2023.10.3
106 | requests==2.30.0
107 | responses==0.18.0
108 | rich==12.6.0
109 | rsa==4.9
110 | s3transfer==0.9.0
111 | sacremoses==0.1.1
112 | scikit-learn==1.3.0
113 | scipy==1.10.1
114 | sentence-transformers==2.2.2
115 | sentencepiece==0.1.99
116 | sentry-sdk==1.39.1
117 | setproctitle==1.3.3
118 | sgmllib3k==1.0.0
119 | shortuuid==1.0.11
120 | six==1.16.0
121 | smart-open==5.2.1
122 | smmap==5.0.1
123 | soupsieve==2.5
124 | spacy==3.3.3
125 | spacy-legacy==3.0.12
126 | spacy-loggers==1.0.1
127 | srsly==2.4.8
128 | tempora==5.5.0
129 | tensorboardX==2.6.2.2
130 | termcolor==1.1.0
131 | thinc==8.0.17
132 | threadpoolctl==3.2.0
133 | tokenizers==0.12.1
134 | tomli==2.0.1
135 | torch==1.12.1
136 | torch-struct==0.5
137 | torchvision==0.13.1
138 | tqdm==4.65.0
139 | traitlets==5.14.0
140 | transformers==4.20.1
141 | typer==0.4.2
142 | typing_extensions==4.5.0
143 | tzdata==2023.3
144 | urllib3==1.26.18
145 | Wand==0.6.10
146 | wandb==0.12.21
147 | wasabi==0.10.1
148 | wcwidth==0.2.12
149 | word2number==1.1
150 | wrapt==1.12.1
151 | xxhash==3.4.1
152 | yarl==1.9.4
153 | zc.lockfile==3.0.post1
154 | zipp==3.17.0
155 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | allennlp==2.10.1
2 | allennlp-models==2.10.1
3 | nltk==3.8.1
4 | numpy==1.24.4
5 | Pattern==3.6
6 | scikit_learn==1.3.0
7 | scipy==1.10.1
8 | sentence_transformers==2.2.2
9 | spacy==3.3.3
10 |
--------------------------------------------------------------------------------