├── synthesizers ├── input.txt ├── defs.json ├── script_context_config.py ├── README.md ├── script_ir_end_dump.txt ├── script_config.py ├── CoreNLP_server.py ├── script_msg_helpers.py ├── common-defs.json ├── script_msg_defs_5_rrc.py ├── script_build_string_keyword_distance.py ├── script_msg_defs_4.py ├── defs-saved.json ├── script_msg_defs_5.py ├── sympy_expression_builder.py └── script_db_handler.py ├── keyword_extraction ├── 5g-rrc.pdf ├── combined.json ├── assets │ ├── manual_recategorization.txt │ ├── abbreviations.txt │ └── definitions.txt ├── cause_extraction.py ├── README.md ├── merge_keywords_np.py ├── ie_from_pdf.py ├── noun_phrase_cleanup.py ├── constituency_parser.py ├── cellular_text_converter.py ├── gather_keyword_pdf.py └── categorize_keywords.py ├── neutrex ├── supar │ ├── cmds │ │ ├── __init__.py │ │ ├── cmd.py │ │ ├── vi_con.py │ │ ├── crf_con.py │ │ ├── biaffine_sdp.py │ │ ├── vi_sdp.py │ │ ├── biaffine_dep.py │ │ ├── crf_dep.py │ │ ├── crf2o_dep.py │ │ └── vi_dep.py │ ├── utils │ │ ├── common.py │ │ ├── __init__.py │ │ ├── tokenizer.py │ │ ├── scripting.py │ │ ├── embedding.py │ │ ├── parallel.py │ │ ├── logging.py │ │ ├── config.py │ │ ├── vocab.py │ │ └── metric.py │ ├── modules │ │ ├── __init__.py │ │ ├── scalar_mix.py │ │ ├── mlp.py │ │ ├── dropout.py │ │ └── affine.py │ ├── models │ │ └── __init__.py │ ├── parsers │ │ └── __init__.py │ ├── structs │ │ ├── __init__.py │ │ ├── dist.py │ │ └── linearchain.py │ └── __init__.py ├── tests │ ├── test_fn.py │ ├── test_parse.py │ └── test_transform.py ├── README.md └── tree_to_xml │ ├── tree_to_xml.py │ └── tree_cleanup.py └── README.md /synthesizers/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /synthesizers/defs.json: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /keyword_extraction/5g-rrc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SyNSec-den/hermes-spec-to-fsm/HEAD/keyword_extraction/5g-rrc.pdf -------------------------------------------------------------------------------- /keyword_extraction/combined.json: -------------------------------------------------------------------------------- 1 | { 2 | "id2agent": {}, 3 | "id2verb": {}, 4 | "id2adj": {}, 5 | "id2state": {}, 6 | "id2message": {}, 7 | "id2procedure": {}, 8 | "id2event": {}, 9 | "id2timer": {}, 10 | "id2counter": {}, 11 | "id2var": {}, 12 | "id2mode": {}, 13 | "id2service": {}, 14 | "id2field_val": {}, 15 | "id2msg_field": {}, 16 | "id2cause": {}, 17 | "id2misc": {}, 18 | "id2other": {}, 19 | "id2num": {} 20 | } -------------------------------------------------------------------------------- /keyword_extraction/assets/manual_recategorization.txt: -------------------------------------------------------------------------------- 1 | # categories = [ message procedure messagefield state mode status service counter timer algorithm variable ] 2 | # lines with - indicate which category to search from for replacement, lines indicate which keyword to move and : indicates to which category 3 | # if no : is indicated, it will be moved to the last mentioned category used. 4 | - definitions 5 | - abbreviation 6 | guti : variable 7 | - misc 8 | authentication_check : procedure 9 | imsi_attach : procedure 10 | imsi_detach : procedure 11 | plmn_search : procedure 12 | eps_update_status : status 13 | current_plmn : variable 14 | emm_cause_value : variable 15 | native_guti : variable 16 | plmn_identity : variable 17 | security_context_flag : variable 18 | selected_plmn : variable 19 | integrity_check : event 20 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | -------------------------------------------------------------------------------- /synthesizers/script_context_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | global_context = {} 22 | header_context = [] 23 | -------------------------------------------------------------------------------- /synthesizers/README.md: -------------------------------------------------------------------------------- 1 | # IRSynthesizer and FSMSynthesizer 2 | 3 | ## Requirements 4 | 5 | - python=3.7 6 | - stanza==1.4.2 7 | - nltk==3.8.1 8 | - tokenizers==0.13.3 9 | - torch==1.13.1 10 | - sympy==1.10.1 11 | - python-levenshtein==0.20.9 12 | 13 | 14 | ## Config 15 | 16 | - Update `./script_config.py` to select appropriate configuration. 17 | 18 | 19 | ## Input 20 | 21 | - Put input Hermes annotated document into `./input.txt` 22 | - Put extracted keywords into `./defs-saved.json` 23 | 24 | 25 | ## CoreNLP Server 26 | 27 | - Run `./CoreNLP_server.py` to start CoreNLP server and keep it running. 28 | 29 | 30 | ## Keyword Preprocess 31 | 32 | - Run `./run-keyword-db-builder.py` to create database for keywords. 33 | 34 | 35 | ## Synthesizers 36 | 37 | - Run `./run-synthesizers.py` to run IRSynthesizer and FSMSynthesizer. 38 | 39 | 40 | ## Output 41 | 42 | - `./transitions.txt` outputs the transitions. 43 | - `./ir-out.xml` outputs the FSM in IR format. 44 | - `./smv-out.smv` FSM transpiled to nuXmv. 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /synthesizers/script_ir_end_dump.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | UE 6 | MME 7 | FALSE 8 | 9 | 10 | 11 | MME 12 | UE 13 | FALSE 14 | 15 | 16 | 17 | 18 | 19 | chan_UM 20 | TRUE 21 | 22 | 23 | 24 | chan_MU 25 | TRUE 26 | 27 | 28 | 29 | 30 | 31 | 32 |
DEFINE
33 | range := 8; 34 |
35 | 36 |
37 | 38 | -------------------------------------------------------------------------------- /neutrex/supar/utils/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | PAD = '' 23 | UNK = '' 24 | BOS = '' 25 | EOS = '' 26 | 27 | MIN = -1e32 28 | -------------------------------------------------------------------------------- /synthesizers/script_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | GEN = "4g" #4g, 5g, 5g-rrc 22 | 23 | common_definitions = "common-defs.json" 24 | 25 | saved_nas_definitions = "defs-saved.json" 26 | nas_definitions = "defs.json" 27 | keyword_db_table = "SubstringKeywordDistance" 28 | 29 | 30 | -------------------------------------------------------------------------------- /neutrex/supar/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from . import field, fn, metric, transform 23 | from .config import Config 24 | from .data import Dataset 25 | from .embedding import Embedding 26 | from .field import ChartField, Field, RawField, SubwordField 27 | from .transform import CoNLL, Transform, Tree 28 | from .vocab import Vocab 29 | 30 | __all__ = ['ChartField', 'CoNLL', 'Config', 'Dataset', 'Embedding', 'Field', 31 | 'RawField', 'SubwordField', 'Transform', 'Tree', 'Vocab', 'field', 'fn', 'metric', 'transform'] 32 | -------------------------------------------------------------------------------- /synthesizers/CoreNLP_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import time 22 | 23 | import stanza 24 | from stanza.server import CoreNLPClient 25 | 26 | stanza.install_corenlp() 27 | 28 | 29 | def Main(): 30 | corenlp_client = CoreNLPClient( 31 | annotators=['tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse', 'coref'], 32 | properties={'annotators': 'coref', 'coref.algorithm': 'neural'}, timeout=30000, 33 | memory='4G', endpoint='http://localhost:9001') 34 | 35 | while True: 36 | corenlp_client.ensure_alive() 37 | time.sleep(300) 38 | 39 | if __name__ == '__main__': 40 | Main() 41 | -------------------------------------------------------------------------------- /neutrex/tests/test_fn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from supar.structs.fn import tarjan 23 | 24 | 25 | def test_tarjan(): 26 | sequences = [[4, 1, 2, 0, 4, 4, 8, 6, 8], 27 | [2, 5, 0, 3, 1, 5, 8, 6, 8], 28 | [2, 5, 0, 4, 1, 5, 8, 6, 8], 29 | [2, 5, 0, 4, 1, 9, 6, 5, 7]] 30 | answers = [None, [[2, 5, 1]], [[2, 5, 1]], [[2, 5, 1], [9, 7, 6]]] 31 | for sequence, answer in zip(sequences, answers): 32 | if answer is None: 33 | assert next(tarjan(sequence), None) == answer 34 | else: 35 | assert list(tarjan(sequence)) == answer 36 | -------------------------------------------------------------------------------- /neutrex/supar/utils/tokenizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | 23 | class Tokenizer: 24 | 25 | def __init__(self, lang='en'): 26 | import stanza 27 | try: 28 | self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True) 29 | except Exception: 30 | stanza.download(lang=lang, resources_url='stanford') 31 | self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True) 32 | 33 | def __call__(self, text): 34 | return [i.text for i in self.pipeline(text).sentences[0].tokens] 35 | -------------------------------------------------------------------------------- /neutrex/supar/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from .affine import Biaffine, Triaffine 23 | from .dropout import IndependentDropout, SharedDropout 24 | from .lstm import CharLSTM, VariationalLSTM 25 | from .mlp import MLP 26 | from .pretrained import ELMoEmbedding, TransformerEmbedding 27 | from .scalar_mix import ScalarMix 28 | from .transformer import RelativePositionTransformerEncoder, TransformerEncoder 29 | 30 | __all__ = ['MLP', 'TransformerEmbedding', 'Biaffine', 'CharLSTM', 'ELMoEmbedding', 'IndependentDropout', 31 | 'RelativePositionTransformerEncoder', 'ScalarMix', 'SharedDropout', 'TransformerEncoder', 'Triaffine', 32 | 'VariationalLSTM'] 33 | -------------------------------------------------------------------------------- /neutrex/supar/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from .const import CRFConstituencyModel, VIConstituencyModel 23 | from .dep import (BiaffineDependencyModel, CRF2oDependencyModel, 24 | CRFDependencyModel, VIDependencyModel) 25 | from .model import Model 26 | from .sdp import BiaffineSemanticDependencyModel, VISemanticDependencyModel 27 | 28 | __all__ = ['Model', 29 | 'BiaffineDependencyModel', 30 | 'CRFDependencyModel', 31 | 'CRF2oDependencyModel', 32 | 'VIDependencyModel', 33 | 'CRFConstituencyModel', 34 | 'VIConstituencyModel', 35 | 'BiaffineSemanticDependencyModel', 36 | 'VISemanticDependencyModel'] 37 | -------------------------------------------------------------------------------- /neutrex/supar/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from .const import CRFConstituencyParser, VIConstituencyParser 23 | from .dep import (BiaffineDependencyParser, CRF2oDependencyParser, 24 | CRFDependencyParser, VIDependencyParser) 25 | from .parser import Parser 26 | from .sdp import BiaffineSemanticDependencyParser, VISemanticDependencyParser 27 | 28 | __all__ = ['BiaffineDependencyParser', 29 | 'CRFDependencyParser', 30 | 'CRF2oDependencyParser', 31 | 'VIDependencyParser', 32 | 'CRFConstituencyParser', 33 | 'VIConstituencyParser', 34 | 'BiaffineSemanticDependencyParser', 35 | 'VISemanticDependencyParser', 36 | 'Parser'] 37 | -------------------------------------------------------------------------------- /neutrex/supar/structs/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from .dist import StructuredDistribution 23 | from .linearchain import LinearChainCRF 24 | from .tree import (BiLexicalizedConstituencyCRF, ConstituencyCRF, 25 | Dependency2oCRF, DependencyCRF, MatrixTree) 26 | from .vi import (ConstituencyLBP, ConstituencyMFVI, DependencyLBP, 27 | DependencyMFVI, SemanticDependencyLBP, SemanticDependencyMFVI) 28 | 29 | __all__ = ['StructuredDistribution', 30 | 'MatrixTree', 31 | 'DependencyCRF', 32 | 'Dependency2oCRF', 33 | 'ConstituencyCRF', 34 | 'BiLexicalizedConstituencyCRF', 35 | 'LinearChainCRF', 36 | 'DependencyMFVI', 37 | 'DependencyLBP', 38 | 'ConstituencyMFVI', 39 | 'ConstituencyLBP', 40 | 'SemanticDependencyMFVI', 41 | 'SemanticDependencyLBP', ] 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hermes 2 | 3 | This is the official repository of the paper titled "[Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural Language Specifications](https://www.usenix.org/conference/usenixsecurity24/presentation/al-ishtiaq)" (USENIX Security '24). 4 | 5 | 6 | ## System 7 | - OS: Ubuntu 22.04.3 LTS 8 | - GPU: NVIDIA RTX A6000 9 | - CUDA Version: 12.2 10 | - NVIDIA Driver version: 535.86.05 11 | 12 | 13 | ## Components 14 | 15 | ### Annotated data 16 | 17 | `data` contains the annotated data for 4G NAS, 5G NAS and 5G RRC specifications. 18 | 19 | 20 | ### NEUTREX 21 | 22 | `neutrex` contains the implementation of NEUTREX. It also provides instructions to run it. 23 | 24 | 25 | ### Keyword Extractor 26 | 27 | `keyword_extraction` contains the implementation of Keyword Extractor from Hermes. 28 | It also contains the instructions on how to use the tool. 29 | 30 | 31 | ### Synthesizers 32 | 33 | `synthesizers` contains the implementation of IRSynthesizer and FSMSynthesizer. 34 | It also provides instructions to use the tool. 35 | 36 | 37 | ## Citation 38 | 39 | ```bibtex 40 | @inproceedings {ishtiaq2023hermes, 41 | author = {Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das and Syed Md Mukit Rashid and Ali Ranjbar and Kai Tu and Tianwei Wu and Zhezheng Song and Weixuan Wang and Mujtahid Akon and Rui Zhang and Syed Rafiul Hussain}, 42 | title = {Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural Language Specifications}, 43 | booktitle = {33rd USENIX Security Symposium (USENIX Security 24)}, 44 | year = {2024}, 45 | isbn = {978-1-939133-44-1}, 46 | address = {Philadelphia, PA}, 47 | pages = {4445--4462}, 48 | url = {https://www.usenix.org/conference/usenixsecurity24/presentation/al-ishtiaq}, 49 | publisher = {USENIX Association}, 50 | month = aug 51 | } 52 | ``` 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /keyword_extraction/cause_extraction.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import pprint 22 | 23 | pp = pprint.PrettyPrinter(indent=4) 24 | 25 | 26 | def create_cause_set(): 27 | causes = [] 28 | with open("assets/cause.txt", "r") as fr: 29 | lines = fr.readlines() 30 | for line in lines: 31 | if line.startswith("Cause #"): 32 | causes.append(line.split("\n")[0]) 33 | 34 | keyword_set = dict() 35 | for line in causes: 36 | tokens = line.split(" ") 37 | 38 | phrase_1 = str(tokens[0]) + " " + str(tokens[1]) 39 | phrase_2 = str(tokens[1]) 40 | phrase_3 = line.split(" ", 2)[2].replace("-", "").strip() 41 | phrase_4 = line.split(" ", 1)[1] 42 | 43 | key_str = phrase_1.replace(" ", "_").replace("#", "").lower() 44 | if key_str in keyword_set.keys(): 45 | keyword_set[key_str] = list(set(keyword_set[key_str] + [phrase_1, phrase_2, phrase_3, phrase_4])) 46 | else: 47 | keyword_set[key_str] = [phrase_1, phrase_2, phrase_3, phrase_4] 48 | 49 | return keyword_set 50 | 51 | -------------------------------------------------------------------------------- /neutrex/supar/utils/scripting.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import nltk 22 | 23 | from utils.metric import SpanMetric 24 | from nltk import tree 25 | from utils.transform import Tree 26 | 27 | 28 | pred_path = './current_predictions.txt' 29 | gt_path = './current_predictions.txt' 30 | 31 | preds = [] 32 | gts = [] 33 | 34 | 35 | with open(pred_path, mode='r', encoding='utf8', newline='\n\n\n') as f: 36 | lines = f.readlines() 37 | for l in lines: 38 | preds.append(nltk.Tree.fromstring(l)) 39 | 40 | with open(gt_path, mode='r', encoding='utf8', newline='\n\n\n') as f: 41 | lines = f.readlines() 42 | for l in lines: 43 | gts.append(nltk.Tree.fromstring(l)) 44 | 45 | metric = SpanMetric() 46 | delete={'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', '', '', '', '', '', '', '', '', '', '', '', ''} 47 | equal={'ADVP': 'PRT'} 48 | 49 | result = metric([Tree.factorize(tree, delete, equal) for tree in preds], 50 | [Tree.factorize(tree, delete, equal) for tree in gts]) 51 | 52 | print(result) -------------------------------------------------------------------------------- /neutrex/supar/utils/embedding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | 24 | 25 | class Embedding(object): 26 | 27 | def __init__(self, tokens, vectors, unk=None): 28 | self.tokens = tokens 29 | self.vectors = torch.tensor(vectors) 30 | self.pretrained = {w: v for w, v in zip(tokens, vectors)} 31 | self.unk = unk 32 | 33 | def __len__(self): 34 | return len(self.tokens) 35 | 36 | def __contains__(self, token): 37 | return token in self.pretrained 38 | 39 | @property 40 | def dim(self): 41 | return self.vectors.size(1) 42 | 43 | @property 44 | def unk_index(self): 45 | if self.unk is not None: 46 | return self.tokens.index(self.unk) 47 | else: 48 | raise AttributeError 49 | 50 | @classmethod 51 | def load(cls, path, unk=None): 52 | with open(path, 'r') as f: 53 | lines = [line for line in f] 54 | splits = [line.split() for line in lines] 55 | tokens, vectors = zip(*[(s[0], list(map(float, s[1:]))) 56 | for s in splits]) 57 | 58 | return cls(tokens, vectors, unk=unk) 59 | -------------------------------------------------------------------------------- /neutrex/supar/utils/parallel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import os 23 | from random import Random 24 | 25 | import torch 26 | import torch.distributed as dist 27 | import torch.nn as nn 28 | 29 | 30 | class DistributedDataParallel(nn.parallel.DistributedDataParallel): 31 | 32 | def __init__(self, module, **kwargs): 33 | super().__init__(module, **kwargs) 34 | 35 | def __getattr__(self, name): 36 | wrapped = super().__getattr__('module') 37 | if hasattr(wrapped, name): 38 | return getattr(wrapped, name) 39 | return super().__getattr__(name) 40 | 41 | 42 | def init_device(device, local_rank=-1, backend='nccl', host=None, port=None): 43 | os.environ['CUDA_VISIBLE_DEVICES'] = device 44 | if torch.cuda.device_count() > 1: 45 | host = host or os.environ.get('MASTER_ADDR', 'localhost') 46 | port = port or os.environ.get('MASTER_PORT', str(Random(0).randint(10000, 20000))) 47 | os.environ['MASTER_ADDR'] = host 48 | os.environ['MASTER_PORT'] = port 49 | dist.init_process_group(backend) 50 | torch.cuda.set_device(local_rank) 51 | 52 | 53 | def is_master(): 54 | return not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0 55 | -------------------------------------------------------------------------------- /neutrex/README.md: -------------------------------------------------------------------------------- 1 | # NEUTREX 2 | 3 | ## Setup 4 | 5 | Download the following files to `neutrex` folder: 6 | - model_4g_nas: https://drive.google.com/file/d/11-4ujqtQAwDf8p_7j_leG_hECiAaxw2t/view?usp=sharing 7 | - model_5g_nas: https://drive.google.com/file/d/1xHqhadH3mgjK9v_0eV7MWSRQAx6ZycJE/view?usp=sharing 8 | 9 | Download and unzip the following file containing `CellulaRoBERTa` to `neutrex` folder: 10 | - saved_model.zip: https://drive.google.com/file/d/1R3A5zfM9z6aQzILrfh7aPkSzlQebu5iX/view?usp=sharing 11 | 12 | Download the following file to `neutrex/data` folder: 13 | - glove.6B.100d.txt: https://drive.google.com/file/d/1qot1XbmuN6R7bwDmT7CwSZBV1Sh1X1VD/view?usp=sharing 14 | 15 | 16 | ### Requirements 17 | 18 | - python=3.7 19 | - dill==0.3.6 20 | - nltk==3.8.1 21 | - stanza==1.5.0 22 | - tokenizers==0.13.3 23 | - torch==1.13.1 24 | - transformers==4.30.1 25 | 26 | 27 | ## Preprocess 28 | 29 | Preprocess text document with `neutrex/xml_to_tree/conversion.py`. 30 | It takes inputs from a `input.txt` file and 31 | will generate `out_full.pid` file with preprocessed trees to be given as input to NEUTREX. 32 | 33 | 34 | ## Commands 35 | 36 | train: 37 | ```sh 38 | python3 -u -m supar.cmds.crf_con train -b -d 0 -c crf-con-roberta-en -p model_4g_nas \ 39 | --train data/4g-nas.pid \ 40 | --dev data/5g-nas.pid \ 41 | --test data/5g-nas.pid \ 42 | --encoder=bert \ 43 | --bert=saved_model/ \ 44 | --lr=5e-5 \ 45 | --lr-rate=20 \ 46 | --epochs=200 \ 47 | --update-steps=4 48 | ``` 49 | 50 | predict: 51 | ```sh 52 | python3 -u -m supar.cmds.crf_con predict -d 0 -c crf-con-roberta-en -p model_4g_nas \ 53 | --data data/5g-nas.pid \ 54 | --pred pred_out.pid \ 55 | --encoder=bert \ 56 | --bert=saved_model/ 57 | ``` 58 | 59 | evaluate: 60 | ```sh 61 | python3 -u -m supar.cmds.crf_con evaluate -d 0 -c crf-con-roberta-en -p model_4g_nas \ 62 | --data data/5g-nas.pid \ 63 | --encoder=bert \ 64 | --bert=saved_model/ 65 | ``` 66 | 67 | 68 | ## Tree to XML 69 | 70 | The output trees from NEUTREX can be translated to XML formats with `neutrex/tree_to_xml/tree_to_xml.py`. 71 | It takes inputs from a `input.pid` file and will generate outputs to `output.txt`. 72 | 73 | 74 | ## Acknowledgement 75 | 76 | We acknowledge [SuPar](https://github.com/yzhangcs/parser) as the baseline implementation of NEUTREX. 77 | -------------------------------------------------------------------------------- /keyword_extraction/README.md: -------------------------------------------------------------------------------- 1 | # Keyword Extraction 2 | 3 | The folder shows keyword extraction for 5G RRC Release 17. It can be adapted for other specification documents. 4 | 5 | 6 | ## Required packages 7 | 8 | ```bash 9 | pip3 install stanza transformers nltk 10 | pip3 install PyEnchant 11 | pip3 install PyPDF2 12 | pip3 install tabula-py 13 | 14 | python3 -m nltk.downloader all-nltk 15 | ``` 16 | 17 | 18 | ## How to generate files in `assets` folder: 19 | 20 | ```bash 21 | # substitute '. ' and '; ' with '.\n' and ';\n' 22 | cat assets/5g-rrc.txt | sed 's/\. /\.\n/g' | sed 's/; /;\n/g' > assets/5g-rrc_small_lines.txt 23 | python3 constituency_parser.py -f assets/5g-rrc_small_lines.txt --label NP > assets/5g-rrc_small_lines.np.txt 24 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.0.txt 25 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sed 's/^the \|^a \|^an //' | sed '/[],:;\(\){}[]/d' | grep -Evw '(and|or|but)' | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.1.txt 26 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sed 's/^the \|^a \|^an //' | sed '/[],:;\(\){}[]/d' | grep -Evw '(and|or|but)' > temp 27 | cat temp | grep 's$' | sed 's/.$//' | sort -u | grep -xFf temp | sed -e 's/$/s/' > temp.remove 28 | cat temp | grep -vxFf temp.remove > out.1 29 | cat temp | grep -xFf temp.remove | sed 's/.$//' > out.2 30 | cat out.1 out.2 | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.2.txt 31 | rm temp temp.remove out.1 out.2 32 | ``` 33 | 34 | 35 | ## Update the following files manually 36 | 37 | - `assets/abbreviations.txt` 38 | - `assets/definitions.txt` 39 | - `assets/cause.txt` 40 | - `assets/manual_recategorization.txt` 41 | - `gather_keyword_pdf.py: gather_messages_and_procedures, gather_state, gather_vars` 42 | - `ie_from_pdf.py: get_IE_toc` 43 | 44 | 45 | ## Run the following commands 46 | 47 | ```bash 48 | python3 noun_phrase_cleanup.py 49 | python3 merge_keywords_np.py 50 | python3 create_combined_dictionary.py 51 | python3 post_refinement_combined_keywords.py 52 | ``` 53 | 54 | Output: `combined.json` 55 | 56 | 57 | ## Note 58 | 59 | The output of automated keyword extraction and categorization may still contain some uncategorized keywords. 60 | In Hermes, we manually check and categorize them. 61 | 62 | 63 | -------------------------------------------------------------------------------- /neutrex/tests/test_parse.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import os 23 | 24 | import supar 25 | from supar import Parser 26 | 27 | 28 | def test_parse(): 29 | sents = {'en': ['She enjoys playing tennis.', 'Too young too simple!'], 30 | 'zh': '她喜欢打网球.', 31 | 'de': 'Sie spielt gerne Tennis.', 32 | 'fr': 'Elle aime jouer au tennis.', 33 | 'ru': 'Она любит играть в теннис.', 34 | 'he': 'היא נהנית לשחק טניס.'} 35 | tokenized_sents = {'en': [['She', 'enjoys', 'playing', 'tennis', '.'], ['Too', 'young', 'too', 'simple', '!']], 36 | 'zh': ['她', '喜欢', '打', '网球', '.'], 37 | 'de': ['Sie', 'spielt', 'gerne', 'Tennis', '.'], 38 | 'fr': ['Elle', 'aime', 'jouer', 'au', 'tennis', '.'], 39 | 'ru': ['Она', 'любит', 'играть', 'в', 'теннис', '.'], 40 | 'he': ['היא', 'נהנית', 'לשחק', 'טניס', '.']} 41 | for name, model in supar.NAME.items(): 42 | if 'xlmr' in name or 'roberta' in name or 'electra' in name: 43 | continue 44 | parser = Parser.load(name, reload=True) 45 | if name.endswith(('en', 'zh')): 46 | lang = name[-2:] 47 | parser.predict(sents[lang], prob=True, lang=lang) 48 | parser.predict(tokenized_sents[lang], prob=True, lang=None) 49 | else: 50 | for lang in sents: 51 | parser.predict(sents[lang], prob=True, lang=lang) 52 | parser.predict(list(tokenized_sents.values()), prob=True, lang=None) 53 | os.remove(os.path.join(os.path.expanduser('~/.cache/supar'), model)) 54 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/cmd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | from supar.utils import Config 24 | from supar.utils.logging import init_logger, logger 25 | from supar.utils.parallel import init_device 26 | 27 | from pathlib import Path 28 | 29 | 30 | def parse(parser): 31 | parser.add_argument('--path', '-p', help='path to model file') 32 | parser.add_argument('--conf', '-c', default='', help='path to config file') 33 | parser.add_argument('--device', '-d', default='3', help='ID of GPU to use') 34 | parser.add_argument('--seed', '-s', default=1, type=int, help='seed for generating random numbers') 35 | parser.add_argument('--threads', '-t', default=16, type=int, help='max num of threads') 36 | parser.add_argument("--local_rank", type=int, default=-1, help='node rank for distributed training') 37 | args, unknown = parser.parse_known_args() 38 | args, unknown = parser.parse_known_args(unknown, args) 39 | args = Config.load(**vars(args), unknown=unknown) 40 | Parser = args.pop('Parser') 41 | 42 | torch.set_num_threads(args.threads) 43 | torch.manual_seed(args.seed) 44 | init_device(args.device, args.local_rank) 45 | init_logger(logger, f"{args.path}.{args.mode}.log", 'a' if args.get('checkpoint') else 'w') 46 | logger.info('\n' + str(args)) 47 | 48 | if args.mode == 'train': 49 | parser = Parser.load(**args) if args.checkpoint else Parser.build(**args) 50 | Path(args.path).touch() 51 | parser.train(**args) 52 | elif args.mode == 'evaluate': 53 | parser = Parser.load(**args) 54 | parser.evaluate(**args) 55 | elif args.mode == 'predict': 56 | parser = Parser.load(**args) 57 | parser.predict(**args) 58 | -------------------------------------------------------------------------------- /synthesizers/script_msg_helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | from script_config import GEN 22 | 23 | if GEN == "5g": 24 | from script_msg_defs_5 import * 25 | elif GEN == "4g": 26 | from script_msg_defs_4 import * 27 | elif GEN == "5g-rrc": 28 | from script_msg_defs_5_rrc import * 29 | 30 | 31 | def get_msg_direction(message_name: str) -> str: 32 | if message_name in um_msg_list: 33 | return "ue_to_mme" 34 | elif message_name in mu_msg_list: 35 | return "mme_to_ue" 36 | elif message_name in both_dir_msg_list: 37 | return "both_dir" 38 | else: 39 | return "unk_msg" 40 | 41 | 42 | def get_msg_sublayer(message_name: str) -> str: 43 | if message_name in emm_sublayer_msg_list: 44 | return "emm_sublayer" 45 | elif message_name in esm_sublayer_msg_list: 46 | return "esm_sublayer" 47 | elif message_name in special_msg_list: 48 | return "special" 49 | else: 50 | return "unk_msg" 51 | 52 | 53 | def get_msg_response(message_name: str) -> str: 54 | if message_name in msg_response: 55 | return msg_response[message_name] 56 | else: 57 | return "unk_resp" 58 | 59 | 60 | def get_mme_wait_for(msg: str): 61 | if msg in mme_wait_for_message: 62 | return mme_wait_for_message[msg] 63 | else: 64 | return "" 65 | 66 | 67 | def get_check_mme_wait_for(msg: str): 68 | if msg in check_mme_wait_for: 69 | return check_mme_wait_for[msg] 70 | else: 71 | return "" 72 | 73 | 74 | def check_valid_msg(msg: str): 75 | if msg in um_msg_list or msg in mu_msg_list or msg in both_dir_msg_list or msg in emm_sublayer_msg_list \ 76 | or msg in esm_sublayer_msg_list or msg in special_msg_list: 77 | return True 78 | else: 79 | return False 80 | -------------------------------------------------------------------------------- /synthesizers/common-defs.json: -------------------------------------------------------------------------------- 1 | { 2 | "directive": { 3 | "shall": "_SHALL_", 4 | "should": "_SHOULD_", 5 | "will": "_WILL_", 6 | "can": "_CAN_", 7 | "may": "_MAY_", 8 | "need": "_NEED_" 9 | }, 10 | 11 | "conjunction": { 12 | "/" : "_OR_", 13 | "or": "_OR_", 14 | "and": "_AND_" 15 | }, 16 | 17 | "conj_label": { 18 | "conj:and" : "_AND_", 19 | "conj:or" : "_OR_" 20 | }, 21 | 22 | "preposition" : { 23 | "from" : "_FROM_", 24 | "to" : "_TO_", 25 | "before" : "_BEFORE_", 26 | "after" : "_AFTER_", 27 | "for" : "_FOR_", 28 | "by" : "_BY_", 29 | "during": "_DURING_", 30 | "due to": "_DUE_TO_", 31 | "in": "_IN_", 32 | "of": "_OF_", 33 | "with": "_WITH_", 34 | "into": "_INTO_" 35 | }, 36 | 37 | "preposition_label" : { 38 | "nmod:from" : "_FROM_", 39 | "nmod:to" : "_TO_", 40 | "nmod:before" : "_BEFORE_", 41 | "nmod:after" : "_AFTER_", 42 | "nmod:for" : "_FOR_", 43 | "nmod:by" : "_BY_", 44 | "nmod:during": "_DURING_", 45 | "nmod:due_to": "_DUE_TO_", 46 | "nmod:in": "_IN_", 47 | "nmod:of": "_OF_", 48 | "nmod:with": "_WITH_", 49 | "nmod:except": "_EXCEPT_", 50 | "nmod:instead_of": "_INSTEAD_OF_", 51 | "obl:from" : "_FROM_", 52 | "obl:to" : "_TO_", 53 | "obl:before" : "_BEFORE_", 54 | "obl:after" : "_AFTER_", 55 | "obl:for" : "_FOR_", 56 | "obl:by" : "_BY_", 57 | "obl:during": "_DURING_", 58 | "obl:due_to": "_DUE_TO_", 59 | "obl:in": "_IN_", 60 | "obl:within": "_IN_", 61 | "obl:of": "_OF_", 62 | "obl:with": "_WITH_", 63 | "obl:except": "_EXCEPT_", 64 | "obl:except_for": "_EXCEPT_", 65 | "obl:into": "_INTO_" 66 | }, 67 | 68 | "mark": { 69 | "until": "_UNTIL_", 70 | "without": "_WITHOUT_", 71 | "before": "_BEFORE_", 72 | "unless": "_UNLESS_" 73 | }, 74 | 75 | 76 | "case": { 77 | "until": "_UNTIL_", 78 | "without": "_WITHOUT_", 79 | "before": "_BEFORE_", 80 | "except": "_EXCEPT_", 81 | "via": "_VIA_", 82 | "in": "_IN_" 83 | }, 84 | 85 | "special": { 86 | "specified": "_REFERENCE_", 87 | "section": "_SECTION_", 88 | "subsection": "_SUBSECTION_", 89 | "subclause": "_SUBCLAUSE_", 90 | "annex": "_ANNEX_", 91 | "ts": "_TS_" 92 | }, 93 | 94 | "number": { 95 | "zero": 0, 96 | "one": 1, 97 | "two": 2, 98 | "three": 3, 99 | "four": 4, 100 | "five": 5, 101 | "six": 6, 102 | "seven": 7, 103 | "eight": 8, 104 | "nine": 9, 105 | "ten": 10 106 | }, 107 | 108 | "ignore_list": [ 109 | "already", 110 | "subclause", 111 | "unchanged" 112 | ] 113 | 114 | } -------------------------------------------------------------------------------- /neutrex/supar/modules/scalar_mix.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | import torch.nn as nn 24 | 25 | 26 | class ScalarMix(nn.Module): 27 | r""" 28 | Computes a parameterized scalar mixture of :math:`N` tensors, :math:`mixture = \gamma * \sum_{k}(s_k * tensor_k)` 29 | where :math:`s = \mathrm{softmax}(w)`, with :math:`w` and :math:`\gamma` scalar parameters. 30 | 31 | Args: 32 | n_layers (int): 33 | The number of layers to be mixed, i.e., :math:`N`. 34 | dropout (float): 35 | The dropout ratio of the layer weights. 36 | If dropout > 0, then for each scalar weight, adjusts its softmax weight mass to 0 37 | with the dropout probability (i.e., setting the unnormalized weight to -inf). 38 | This effectively redistributes the dropped probability mass to all other weights. 39 | Default: 0. 40 | """ 41 | 42 | def __init__(self, n_layers, dropout=0): 43 | super().__init__() 44 | 45 | self.n_layers = n_layers 46 | 47 | self.weights = nn.Parameter(torch.zeros(n_layers)) 48 | self.gamma = nn.Parameter(torch.tensor([1.0])) 49 | self.dropout = nn.Dropout(dropout) 50 | 51 | def __repr__(self): 52 | s = f"n_layers={self.n_layers}" 53 | if self.dropout.p > 0: 54 | s += f", dropout={self.dropout.p}" 55 | 56 | return f"{self.__class__.__name__}({s})" 57 | 58 | def forward(self, tensors): 59 | r""" 60 | Args: 61 | tensors (list[~torch.Tensor]): 62 | :math:`N` tensors to be mixed. 63 | 64 | Returns: 65 | The mixture of :math:`N` tensors. 66 | """ 67 | 68 | normed_weights = self.dropout(self.weights.softmax(-1)) 69 | weighted_sum = sum(w * h for w, h in zip(normed_weights, tensors)) 70 | 71 | return self.gamma * weighted_sum 72 | -------------------------------------------------------------------------------- /neutrex/supar/modules/mlp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch.nn as nn 23 | from supar.modules.dropout import SharedDropout 24 | 25 | 26 | class MLP(nn.Module): 27 | r""" 28 | Applies a linear transformation together with a non-linear activation to the incoming tensor: 29 | :math:`y = \mathrm{Activation}(x A^T + b)` 30 | 31 | Args: 32 | n_in (~torch.Tensor): 33 | The size of each input feature. 34 | n_out (~torch.Tensor): 35 | The size of each output feature. 36 | dropout (float): 37 | If non-zero, introduces a :class:`SharedDropout` layer on the output with this dropout ratio. Default: 0. 38 | activation (bool): 39 | Whether to use activations. Default: True. 40 | """ 41 | 42 | def __init__(self, n_in, n_out, dropout=0, activation=True): 43 | super().__init__() 44 | 45 | self.n_in = n_in 46 | self.n_out = n_out 47 | self.linear = nn.Linear(n_in, n_out) 48 | self.activation = nn.LeakyReLU(negative_slope=0.1) if activation else nn.Identity() 49 | self.dropout = SharedDropout(p=dropout) 50 | 51 | self.reset_parameters() 52 | 53 | def __repr__(self): 54 | s = f"n_in={self.n_in}, n_out={self.n_out}" 55 | if self.dropout.p > 0: 56 | s += f", dropout={self.dropout.p}" 57 | 58 | return f"{self.__class__.__name__}({s})" 59 | 60 | def reset_parameters(self): 61 | nn.init.orthogonal_(self.linear.weight) 62 | nn.init.zeros_(self.linear.bias) 63 | 64 | def forward(self, x): 65 | r""" 66 | Args: 67 | x (~torch.Tensor): 68 | The size of each input feature is `n_in`. 69 | 70 | Returns: 71 | A tensor with the size of each output feature `n_out`. 72 | """ 73 | 74 | x = self.linear(x) 75 | x = self.activation(x) 76 | x = self.dropout(x) 77 | 78 | return x 79 | -------------------------------------------------------------------------------- /neutrex/supar/utils/logging.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import logging 23 | import os 24 | 25 | from supar.utils.parallel import is_master 26 | from tqdm import tqdm 27 | 28 | 29 | def get_logger(name): 30 | return logging.getLogger(name) 31 | 32 | 33 | class TqdmHandler(logging.StreamHandler): 34 | 35 | def __init__(self, *args, **kwargs): 36 | super().__init__(*args, **kwargs) 37 | 38 | def emit(self, record): 39 | try: 40 | msg = self.format(record) 41 | tqdm.write(msg) 42 | self.flush() 43 | except (KeyboardInterrupt, SystemExit): 44 | raise 45 | except Exception: 46 | self.handleError(record) 47 | 48 | 49 | def init_logger(logger, 50 | path=None, 51 | mode='w', 52 | level=None, 53 | handlers=None, 54 | verbose=True): 55 | level = level or logging.WARNING 56 | if not handlers: 57 | handlers = [TqdmHandler()] 58 | if path: 59 | os.makedirs(os.path.dirname(path) or './', exist_ok=True) 60 | handlers.append(logging.FileHandler(path, mode)) 61 | logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', 62 | datefmt='%Y-%m-%d %H:%M:%S', 63 | level=level, 64 | handlers=handlers) 65 | logger.setLevel(logging.INFO if is_master() and verbose else logging.WARNING) 66 | 67 | 68 | def progress_bar(iterator, 69 | ncols=None, 70 | bar_format='{l_bar}{bar:18}| {n_fmt}/{total_fmt} {elapsed}<{remaining}, {rate_fmt}{postfix}', 71 | leave=False, 72 | **kwargs): 73 | return tqdm(iterator, 74 | ncols=ncols, 75 | bar_format=bar_format, 76 | ascii=True, 77 | disable=(not (logger.level == logging.INFO and is_master())), 78 | leave=leave, 79 | **kwargs) 80 | 81 | 82 | logger = get_logger('supar') 83 | -------------------------------------------------------------------------------- /keyword_extraction/merge_keywords_np.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq, Syed Md Mukit Rashid, and Ali Ranjbar 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | from nltk.stem.porter import * 22 | import pickle 23 | import pprint 24 | import enchant 25 | 26 | pp = pprint.PrettyPrinter(width=200) 27 | 28 | 29 | def get_merged_dict_myalgo(get_updated): 30 | if get_updated: 31 | p_stemmer = PorterStemmer() 32 | 33 | a_file = open("keyword_dict_np.pkl", "rb") 34 | output = pickle.load(a_file) 35 | keyword_dict = dict(output) 36 | a_file.close() 37 | 38 | remove_list = [] 39 | for k, v in keyword_dict.items(): 40 | words = str(k).split("_") 41 | k_to_compare = "" 42 | for word_k in words[:-1]: 43 | k_to_compare += word_k + " " 44 | k_to_compare += p_stemmer.stem(words[-1]) 45 | for other_k in keyword_dict.keys(): 46 | other_k_words = str(other_k).split("_") 47 | other_k_to_compare = "" 48 | for word_k in other_k_words[:-1]: 49 | other_k_to_compare += word_k + " " 50 | other_k_to_compare += p_stemmer.stem(other_k_words[-1]) 51 | if k_to_compare == other_k_to_compare and len(k) > len(other_k): 52 | remove_list.append(k) 53 | keyword_dict[other_k] = keyword_dict[other_k] + keyword_dict[k] 54 | elif k_to_compare == other_k_to_compare and len(k) < len(other_k): 55 | remove_list.append(other_k) 56 | keyword_dict[k] = keyword_dict[k] + keyword_dict[other_k] 57 | 58 | remove_list = list(set(remove_list)) 59 | for key in remove_list: 60 | del keyword_dict[key] 61 | 62 | for k, v in keyword_dict.items(): 63 | keyword_dict[k] = list(set(keyword_dict[k])) 64 | 65 | a_file = open("keyword_dict_np_merged.pkl", "wb") 66 | pickle.dump(keyword_dict, a_file) 67 | a_file.close() 68 | 69 | else: 70 | a_file = open("keyword_dict_np_merged.pkl", "rb") 71 | output = pickle.load(a_file) 72 | keyword_dict = dict(output) 73 | a_file.close() 74 | 75 | 76 | 77 | get_merged_dict_myalgo(True) -------------------------------------------------------------------------------- /keyword_extraction/ie_from_pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import pickle 22 | import pprint 23 | 24 | import PyPDF2 25 | import numpy as np 26 | import pandas 27 | import enchant 28 | from nltk.stem.porter import * 29 | 30 | from tabula import read_pdf 31 | 32 | # Change this file to accommodate new specs 33 | INPUT_FILENAME = '5g-rrc.pdf' 34 | 35 | pp = pprint.PrettyPrinter(width=150) 36 | dictionary = enchant.Dict("en_US") 37 | 38 | 39 | def get_IE_keywords_dict(get_updated): 40 | if get_updated: 41 | keyword_dict_new = get_IE_toc() 42 | a_file = open("ie_from_pdf.pkl", "wb") 43 | pickle.dump(keyword_dict_new, a_file) 44 | a_file.close() 45 | else: 46 | a_file = open("ie_from_pdf.pkl", "rb") 47 | output = pickle.load(a_file) 48 | keyword_dict_new = dict(output) 49 | a_file.close() 50 | 51 | return keyword_dict_new 52 | 53 | 54 | def get_IE_toc(): 55 | pdfFileObj = open(INPUT_FILENAME, 'rb') 56 | pdfReader = PyPDF2.PdfReader(pdfFileObj) 57 | ie_dict = dict() 58 | 59 | last_section = "" 60 | for i in range(4, 24): 61 | pageObj = pdfReader.pages[i] 62 | lines = pageObj.extract_text().split("\n") 63 | for line in lines: 64 | if "..." not in line: 65 | continue 66 | 67 | line_splits = line.split() 68 | if len(line_splits) < 2: 69 | continue 70 | 71 | section = line_splits[0] 72 | 73 | if len(section) > 0 and section[0].isnumeric(): 74 | last_section = section 75 | 76 | elif last_section.startswith("6.3") and section == "–": 77 | 78 | ie_text = line_splits[1].replace(".", "") 79 | key = ie_text.lower().replace("-", "_") 80 | if key in ie_dict and ie_text not in ie_dict[key]: 81 | ie_dict[key].append(ie_text) 82 | else: 83 | ie_dict[key] = [ie_text] 84 | 85 | remove_list = [] 86 | for k in ie_dict.keys(): 87 | if len(ie_dict[k][0].split(" ")) == 1 and dictionary.check(ie_dict[k][0].split(" ")[0]): 88 | remove_list.append(k) 89 | 90 | for k in remove_list: 91 | del ie_dict[k] 92 | 93 | pdfFileObj.close() 94 | return ie_dict -------------------------------------------------------------------------------- /neutrex/tree_to_xml/tree_to_xml.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import nltk.tree 22 | from nltk import Tree 23 | import os 24 | 25 | from tree_cleanup import clean_tree 26 | 27 | INPUT_FILENAME = "input.pid" 28 | OUTPUT_FILENAME = "output.txt" 29 | 30 | 31 | def reverse_tag(tag): 32 | return tag[0] + "/" + tag[1:] 33 | 34 | 35 | def clean_text(text: str) -> str: 36 | text = text.replace("[ ", "(").replace(" ]", ")") # parenthesis 37 | text = text.replace("# ", "#") # cause 38 | text = text.replace("`` ", "\"").replace(" ''", "\"") # quotes 39 | text = text.replace(" ,", ",").replace(" .", ".").replace(" ;", ";").replace(" :", ":") # punctuations 40 | text = text.replace("& gt;", ">") 41 | text = text.replace(" (s)", "(s)") # special cases 42 | 43 | while " " in text: 44 | text = text.replace(" ", " ") 45 | text = text.strip() 46 | 47 | return text 48 | 49 | def xml_generator(tree: Tree): 50 | output_string = "" 51 | if type(tree) == nltk.tree.Tree and tree.height() > 2: 52 | for subtree in tree: 53 | if subtree.label() in ["", "", "", "", ""]: 54 | output_string += subtree.label() + " " + xml_generator(subtree) + reverse_tag( 55 | subtree.label()) + " " 56 | else: 57 | output_string += xml_generator(subtree) 58 | elif tree.height() == 2: 59 | for word in tree.leaves(): 60 | output_string += word + " " 61 | 62 | return output_string 63 | 64 | 65 | def convert_xml(input_filename, output_filename): 66 | 67 | input_file = open(input_filename, "r") 68 | lines = input_file.readlines() 69 | input_file.close() 70 | 71 | tree_strings = lines 72 | xml_lines = [] 73 | for nltk_tree in tree_strings: 74 | nltk_tree = clean_tree(nltk_tree) 75 | converted_text = xml_generator(Tree.fromstring(nltk_tree)) 76 | converted_text = clean_text(converted_text) 77 | 78 | xml_lines.append(converted_text) 79 | 80 | with open(output_filename, "w") as outfile: 81 | for line in xml_lines: 82 | outfile.write(line + "\n") 83 | outfile.close() 84 | 85 | 86 | if __name__ == '__main__': 87 | convert_xml(INPUT_FILENAME, OUTPUT_FILENAME) 88 | 89 | -------------------------------------------------------------------------------- /neutrex/supar/utils/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | import os 24 | from ast import literal_eval 25 | from configparser import ConfigParser 26 | 27 | import supar 28 | from supar.utils.fn import download 29 | 30 | 31 | class Config(object): 32 | 33 | def __init__(self, **kwargs): 34 | super(Config, self).__init__() 35 | 36 | self.update(kwargs) 37 | 38 | def __repr__(self): 39 | s = line = "-" * 20 + "-+-" + "-" * 30 + "\n" 40 | s += f"{'Param':20} | {'Value':^30}\n" + line 41 | for name, value in vars(self).items(): 42 | s += f"{name:20} | {str(value):^30}\n" 43 | s += line 44 | 45 | return s 46 | 47 | def __getitem__(self, key): 48 | return getattr(self, key) 49 | 50 | def __contains__(self, key): 51 | return hasattr(self, key) 52 | 53 | def __getstate__(self): 54 | return vars(self) 55 | 56 | def __setstate__(self, state): 57 | self.__dict__.update(state) 58 | 59 | def keys(self): 60 | return vars(self).keys() 61 | 62 | def items(self): 63 | return vars(self).items() 64 | 65 | def update(self, kwargs): 66 | for key in ('self', 'cls', '__class__'): 67 | kwargs.pop(key, None) 68 | kwargs.update(kwargs.pop('kwargs', dict())) 69 | for name, value in kwargs.items(): 70 | setattr(self, name, value) 71 | return self 72 | 73 | def get(self, key, default=None): 74 | return getattr(self, key) if hasattr(self, key) else default 75 | 76 | def pop(self, key, val=None): 77 | return self.__dict__.pop(key, val) 78 | 79 | @classmethod 80 | def load(cls, conf='', unknown=None, **kwargs): 81 | config = ConfigParser() 82 | config.read(conf if not conf or os.path.exists(conf) else download(supar.CONFIG['github'].get(conf, conf))) 83 | config = dict((name, literal_eval(value)) 84 | for section in config.sections() 85 | for name, value in config.items(section)) 86 | if unknown is not None: 87 | parser = argparse.ArgumentParser() 88 | for name, value in config.items(): 89 | parser.add_argument('--'+name.replace('_', '-'), type=type(value), default=value) 90 | config.update(vars(parser.parse_args(unknown))) 91 | config.update(kwargs) 92 | return cls(**config) 93 | -------------------------------------------------------------------------------- /synthesizers/script_msg_defs_5_rrc.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | um_msg_list = { 22 | "countercheckresponse", "dedicatedsibrequest", "uldedicatedmessagesegment", "ulinformationtransferirat", 23 | "ulinformationtransfermrdc", "failureinformation", "locationmeasurementindication", "mcgfailureinformation", 24 | "mbsinterestindication", "measurementreport", "measurementreportapplayer", "rrcresumerequest1", "rrcsetuprequest", 25 | "rrcresumerequest", "ueinformationresponse", "rrcreestablishmentrequest", "rrcreestablishmentcomplete", 26 | "rrcsetupcomplete", "rrcresumecomplete", "rrcreconfigurationcomplete", "rrcsysteminforequest", 27 | "ulinformationtransfer", "scgfailureinformationeutra", "scgfailureinformation", "securitymodefailure", 28 | "securitymodecomplete", "sidelinkueinformationnr", "ueassistanceinformation", "uecapabilityinformation", 29 | "rrcreconfigurationcompletesidelink_message" 30 | } 31 | 32 | mu_msg_list = { 33 | "rrcsetup", "countercheck", "dldedicatedmessagesegment", "dlinformationtransfermrdc", "dlinformationtransfer", 34 | "loggedmeasurementconfiguration", "mbsbroadcastconfiguration", "mib", "paging", "mobilityfromnrcommand", 35 | "ueinformationrequest", "systeminformation", "rrcreestablishment", "rrcreconfiguration", "rrcreject", "rrcresume", 36 | "rrcrelease", "securitymodecommand", "uecapabilityenquiry", "rrcreconfigurationsidelink_message" 37 | 38 | 39 | } 40 | 41 | both_dir_msg_list = { 42 | "iabotherinformation", "rrc_message", "nas_message", "warning_message" 43 | } 44 | 45 | 46 | special_msg_list = { 47 | "iabotherinformation", "rrc_message", "nas_message", "warning_message" 48 | "initial_nas_message", "user_data", "uplink_signalling", "uplink_data", "downlink_signalling", 49 | "downlink_data", "five_gmm_message", "five_gsm_message" 50 | } 51 | 52 | 53 | msg_response = { 54 | "countercheck": "countercheckresponse", 55 | "securitymodecommand": "securitymodecomplete", 56 | "uecapabilityenquiry": "uecapabilityinformation", 57 | "rrcreconfiguration": "rrcreconfigurationcomplete", 58 | "rrcreconfigurationsidelink_message": "rrcreconfigurationcompletesidelink_message", 59 | "rrcreestablishmentrequest": "rrcreestablishment", 60 | "rrcreestablishment": "rrcreestablishmentcomplete", 61 | "rrcsetuprequest": "rrcsetup", 62 | "rrcsetup": "rrcsetupcomplete", 63 | "rrcresumerequest": "rrcresume", 64 | "rrcresumerequest1": "rrcresume", 65 | "rrcresume": "rrcresume", 66 | 67 | } 68 | 69 | mme_wait_for_message = { 70 | } 71 | 72 | check_mme_wait_for = { 73 | 74 | } 75 | 76 | 77 | emm_sublayer_msg_list = { 78 | } 79 | 80 | esm_sublayer_msg_list = { 81 | } -------------------------------------------------------------------------------- /neutrex/supar/utils/vocab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from collections import defaultdict 23 | from collections.abc import Iterable 24 | 25 | 26 | class Vocab(object): 27 | r""" 28 | Defines a vocabulary object that will be used to numericalize a field. 29 | 30 | Args: 31 | counter (~collections.Counter): 32 | :class:`~collections.Counter` object holding the frequencies of each value found in the data. 33 | min_freq (int): 34 | The minimum frequency needed to include a token in the vocabulary. Default: 1. 35 | specials (list[str]): 36 | The list of special tokens (e.g., pad, unk, bos and eos) that will be prepended to the vocabulary. Default: []. 37 | unk_index (int): 38 | The index of unk token. Default: 0. 39 | 40 | Attributes: 41 | itos: 42 | A list of token strings indexed by their numerical identifiers. 43 | stoi: 44 | A :class:`~collections.defaultdict` object mapping token strings to numerical identifiers. 45 | """ 46 | 47 | def __init__(self, counter, min_freq=1, specials=[], unk_index=0): 48 | self.itos = list(specials) 49 | self.stoi = defaultdict(lambda: unk_index) 50 | self.stoi.update({token: i for i, token in enumerate(self.itos)}) 51 | self.extend([token for token, freq in counter.items() 52 | if freq >= min_freq]) 53 | self.unk_index = unk_index 54 | self.n_init = len(self) 55 | 56 | def __len__(self): 57 | return len(self.itos) 58 | 59 | def __getitem__(self, key): 60 | if isinstance(key, str): 61 | return self.stoi[key] 62 | elif not isinstance(key, Iterable): 63 | return self.itos[key] 64 | elif isinstance(key[0], str): 65 | return [self.stoi[i] for i in key] 66 | else: 67 | return [self.itos[i] for i in key] 68 | 69 | def __contains__(self, token): 70 | return token in self.stoi 71 | 72 | def __getstate__(self): 73 | # avoid picking defaultdict 74 | attrs = dict(self.__dict__) 75 | # cast to regular dict 76 | attrs['stoi'] = dict(self.stoi) 77 | return attrs 78 | 79 | def __setstate__(self, state): 80 | stoi = defaultdict(lambda: self.unk_index) 81 | stoi.update(state['stoi']) 82 | state['stoi'] = stoi 83 | self.__dict__.update(state) 84 | 85 | def items(self): 86 | return self.stoi.items() 87 | 88 | def extend(self, tokens): 89 | self.itos.extend(sorted(set(tokens).difference(self.stoi))) 90 | self.stoi.update({token: i for i, token in enumerate(self.itos)}) 91 | -------------------------------------------------------------------------------- /neutrex/tests/test_transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import itertools 23 | 24 | import nltk 25 | from supar.utils import CoNLL, Tree 26 | 27 | 28 | class TestCoNLL: 29 | 30 | def istree_naive(self, sequence, proj=False, multiroot=True): 31 | if proj and not CoNLL.isprojective(sequence): 32 | return False 33 | roots = [i for i, head in enumerate(sequence, 1) if head == 0] 34 | if len(roots) == 0: 35 | return False 36 | if len(roots) > 1 and not multiroot: 37 | return False 38 | sequence = [-1] + sequence 39 | 40 | def track(sequence, visited, i): 41 | if visited[i]: 42 | return False 43 | visited[i] = True 44 | for j, head in enumerate(sequence[1:], 1): 45 | if head == i: 46 | track(sequence, visited, j) 47 | return True 48 | visited = [False]*len(sequence) 49 | for root in roots: 50 | if not track(sequence, visited, root): 51 | return False 52 | if any([not i for i in visited[1:]]): 53 | return False 54 | return True 55 | 56 | def test_isprojective(self): 57 | assert CoNLL.isprojective([2, 4, 2, 0, 5]) 58 | assert CoNLL.isprojective([3, -1, 0, -1, 3]) 59 | assert not CoNLL.isprojective([2, 4, 0, 3, 4]) 60 | assert not CoNLL.isprojective([4, -1, 0, -1, 4]) 61 | assert not CoNLL.isprojective([2, -1, -1, 1, 0]) 62 | assert not CoNLL.isprojective([0, 5, -1, -1, 4]) 63 | 64 | def test_istree(self): 65 | permutations = [list(sequence[:5]) for sequence in itertools.permutations(range(6))] 66 | for sequence in permutations: 67 | assert CoNLL.istree(sequence, False, False) == self.istree_naive(sequence, False, False), f"{sequence}" 68 | assert CoNLL.istree(sequence, False, True) == self.istree_naive(sequence, False, True), f"{sequence}" 69 | assert CoNLL.istree(sequence, True, False) == self.istree_naive(sequence, True, False), f"{sequence}" 70 | assert CoNLL.istree(sequence, True, True) == self.istree_naive(sequence, True, True), f"{sequence}" 71 | 72 | 73 | class TestTree: 74 | 75 | def test_tree(self): 76 | tree = nltk.Tree.fromstring(""" 77 | (TOP 78 | (S 79 | (NP (DT This) (NN time)) 80 | (, ,) 81 | (NP (DT the) (NNS firms)) 82 | (VP (VBD were) (ADJP (JJ ready))) 83 | (. .))) 84 | """) 85 | assert tree == Tree.build(tree, Tree.factorize(Tree.binarize(tree)[0])) 86 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/vi_con.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import VIConstituencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create Constituency Parser using Variational Inference.') 30 | parser.set_defaults(Parser=VIConstituencyParser) 31 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 32 | # train 33 | subparser = subparsers.add_parser('train', help='Train a parser.') 34 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 35 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 36 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 37 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 38 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 39 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 40 | subparser.add_argument('--train', default='data/ptb/train.pid', help='path to train file') 41 | subparser.add_argument('--dev', default='data/ptb/dev.pid', help='path to dev file') 42 | subparser.add_argument('--test', default='data/ptb/test.pid', help='path to test file') 43 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 44 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 45 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 46 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 47 | subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods') 48 | # evaluate 49 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 50 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 51 | subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset') 52 | # predict 53 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 54 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 55 | subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset') 56 | subparser.add_argument('--pred', default='pred.pid', help='path to predicted result') 57 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 58 | parse(parser) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/crf_con.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | import sys 24 | from supar import CRFConstituencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | sys.setrecursionlimit(8000) 28 | 29 | 30 | 31 | 32 | def main(): 33 | parser = argparse.ArgumentParser(description='Create CRF Constituency Parser.') 34 | parser.set_defaults(Parser=CRFConstituencyParser) 35 | parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding') 36 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 37 | # train 38 | subparser = subparsers.add_parser('train', help='Train a parser.') 39 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 40 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 41 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 42 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 43 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 44 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 45 | subparser.add_argument('--train', default='data/ptb/train.pid', help='path to train file') 46 | subparser.add_argument('--dev', default='data/ptb/dev.pid', help='path to dev file') 47 | subparser.add_argument('--test', default='data/ptb/test.pid', help='path to test file') 48 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 49 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 50 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 51 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 52 | # evaluate 53 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 54 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 55 | subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset') 56 | # predict 57 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 58 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 59 | subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset') 60 | subparser.add_argument('--pred', default='pred.pid', help='path to predicted result') 61 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 62 | parse(parser) 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /synthesizers/script_build_string_keyword_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import copy 22 | from Levenshtein import distance as levenshtein_distance 23 | 24 | LOCAL_DISTANCE_THR = 3 25 | LOCAL_SHORT_THR = 5 26 | 27 | 28 | def build_string_distance(db_conn, db_cursor, text: str, keywords_dict: dict, skip_substr=False, 29 | skip_matched_string=False, update_existing=False, searched_strings=None, 30 | thread_num=0) -> None: 31 | if searched_strings is None: 32 | searched_strings = {} 33 | 34 | if len(keywords_dict) == 0: 35 | return 36 | 37 | from script_db_handler import insert_substring_keyword_distance_batch, db_commit, substring_in_db, \ 38 | matched_string_in_db 39 | 40 | max_key_len = max([len(item) for item in keywords_dict]) 41 | 42 | text = copy.deepcopy(text.lower()) 43 | text_len = len(text) 44 | 45 | for start_idx in range(text_len): 46 | insert_list = [] 47 | 48 | if text[start_idx] == "<" or text[start_idx] == ">": 49 | continue 50 | for end_idx in range(start_idx, text_len): 51 | if text[end_idx - 1] == "<" or text[end_idx - 1] == ">": 52 | break 53 | elif start_idx == end_idx: 54 | continue 55 | elif end_idx - start_idx > max_key_len + LOCAL_DISTANCE_THR: 56 | break 57 | 58 | substr = text[start_idx: end_idx] 59 | if substr.strip() == "": 60 | continue 61 | elif substr in searched_strings: 62 | continue 63 | elif skip_substr and substring_in_db(db_cursor, substr): 64 | searched_strings[substr] = 1 65 | continue 66 | 67 | searched_strings[substr] = 1 68 | 69 | for lookup_text in keywords_dict: 70 | lookup_len = len(lookup_text) 71 | if lookup_len != len(substr): 72 | continue 73 | elif skip_matched_string and matched_string_in_db(db_cursor, lookup_text): 74 | continue 75 | 76 | keyword = keywords_dict[lookup_text] 77 | 78 | if lookup_len < LOCAL_SHORT_THR: 79 | lookup_text = " " + lookup_text + " " 80 | substr = " " + substr + " " 81 | 82 | dist = levenshtein_distance(substr, lookup_text) 83 | 84 | lookup_text = lookup_text.strip() 85 | if lookup_len < LOCAL_SHORT_THR: 86 | substr = substr[1:-1] 87 | 88 | if dist > LOCAL_DISTANCE_THR or dist >= lookup_len: 89 | continue 90 | 91 | insert_list.append((substr, lookup_text, keyword, dist)) 92 | 93 | insert_substring_keyword_distance_batch(db_conn, db_cursor, insert_list, update_existing, thread_num=thread_num) 94 | db_commit(db_conn) 95 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/biaffine_sdp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import BiaffineSemanticDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create Biaffine Semantic Dependency Parser.') 30 | parser.set_defaults(Parser=BiaffineSemanticDependencyParser) 31 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 32 | # train 33 | subparser = subparsers.add_parser('train', help='Train a parser.') 34 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use') 35 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 36 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 37 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 38 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 39 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 40 | subparser.add_argument('--train', default='data/sdp/DM/train.conllu', help='path to train file') 41 | subparser.add_argument('--dev', default='data/sdp/DM/dev.conllu', help='path to dev file') 42 | subparser.add_argument('--test', default='data/sdp/DM/test.conllu', help='path to test file') 43 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 44 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 45 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 46 | subparser.add_argument('--n-embed-proj', default=125, type=int, help='dimension of projected embeddings') 47 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 48 | # evaluate 49 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 50 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 51 | subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset') 52 | # predict 53 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 54 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 55 | subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset') 56 | subparser.add_argument('--pred', default='pred.conllu', help='path to predicted result') 57 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 58 | parse(parser) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/vi_sdp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import VISemanticDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create Semantic Dependency Parser using Variational Inference.') 30 | parser.set_defaults(Parser=VISemanticDependencyParser) 31 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 32 | # train 33 | subparser = subparsers.add_parser('train', help='Train a parser.') 34 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use') 35 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 36 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 37 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 38 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 39 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 40 | subparser.add_argument('--train', default='data/sdp/DM/train.conllu', help='path to train file') 41 | subparser.add_argument('--dev', default='data/sdp/DM/dev.conllu', help='path to dev file') 42 | subparser.add_argument('--test', default='data/sdp/DM/test.conllu', help='path to test file') 43 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 44 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 45 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 46 | subparser.add_argument('--n-embed-proj', default=125, type=int, help='dimension of projected embeddings') 47 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 48 | subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods') 49 | # evaluate 50 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 51 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 52 | subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset') 53 | # predict 54 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 55 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 56 | subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset') 57 | subparser.add_argument('--pred', default='pred.conllu', help='path to predicted result') 58 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 59 | parse(parser) 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/biaffine_dep.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import BiaffineDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create Biaffine Dependency Parser.') 30 | parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness') 31 | parser.add_argument('--proj', action='store_true', help='whether to projectivize the data') 32 | parser.add_argument('--partial', action='store_true', help='whether partial annotation is included') 33 | parser.set_defaults(Parser=BiaffineDependencyParser) 34 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 35 | # train 36 | subparser = subparsers.add_parser('train', help='Train a parser.') 37 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 38 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 39 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 40 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 41 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 42 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 43 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 44 | subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file') 45 | subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file') 46 | subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file') 47 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 48 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 49 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 50 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 51 | # evaluate 52 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 53 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 54 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 55 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 56 | # predict 57 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 58 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 59 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 60 | subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result') 61 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 62 | parse(parser) 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/crf_dep.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import CRFDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create first-order CRF Dependency Parser.') 30 | parser.set_defaults(Parser=CRFDependencyParser) 31 | parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding') 32 | parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness') 33 | parser.add_argument('--proj', action='store_true', help='whether to projectivize the data') 34 | parser.add_argument('--partial', action='store_true', help='whether partial annotation is included') 35 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 36 | # train 37 | subparser = subparsers.add_parser('train', help='Train a parser.') 38 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 39 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 40 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 41 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 42 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 43 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 44 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 45 | subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file') 46 | subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file') 47 | subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file') 48 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 49 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 50 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 51 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 52 | # evaluate 53 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 54 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 55 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 56 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 57 | # predict 58 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 59 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 60 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 61 | subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result') 62 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 63 | parse(parser) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/crf2o_dep.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import CRF2oDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create second-order CRF Dependency Parser.') 30 | parser.set_defaults(Parser=CRF2oDependencyParser) 31 | parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding') 32 | parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness') 33 | parser.add_argument('--proj', action='store_true', help='whether to projectivize the data') 34 | parser.add_argument('--partial', action='store_true', help='whether partial annotation is included') 35 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 36 | # train 37 | subparser = subparsers.add_parser('train', help='Train a parser.') 38 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 39 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 40 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 41 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 42 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 43 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 44 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 45 | subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file') 46 | subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file') 47 | subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file') 48 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 49 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 50 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 51 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 52 | # evaluate 53 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 54 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 55 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 56 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 57 | # predict 58 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 59 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 60 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 61 | subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result') 62 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 63 | parse(parser) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /neutrex/supar/cmds/vi_dep.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import argparse 23 | 24 | from supar import VIDependencyParser 25 | from supar.cmds.cmd import parse 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Create Dependency Parser using Variational Inference.') 30 | parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness') 31 | parser.add_argument('--proj', action='store_true', help='whether to projectivise the data') 32 | parser.add_argument('--partial', action='store_true', help='whether partial annotation is included') 33 | parser.set_defaults(Parser=VIDependencyParser) 34 | subparsers = parser.add_subparsers(title='Commands', dest='mode') 35 | # train 36 | subparser = subparsers.add_parser('train', help='Train a parser.') 37 | subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use') 38 | subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first') 39 | subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training') 40 | subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use') 41 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 42 | subparser.add_argument('--max-len', type=int, help='max length of the sentences') 43 | subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use') 44 | subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file') 45 | subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file') 46 | subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file') 47 | subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings') 48 | subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings') 49 | subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings') 50 | subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use') 51 | subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods') 52 | # evaluate 53 | subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.') 54 | subparser.add_argument('--punct', action='store_true', help='whether to include punctuation') 55 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 56 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 57 | # predict 58 | subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.') 59 | subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use') 60 | subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset') 61 | subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result') 62 | subparser.add_argument('--prob', action='store_true', help='whether to output probs') 63 | parse(parser) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /neutrex/supar/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from .parsers import (BiaffineDependencyParser, 23 | BiaffineSemanticDependencyParser, CRF2oDependencyParser, 24 | CRFConstituencyParser, CRFDependencyParser, Parser, 25 | VIConstituencyParser, VIDependencyParser, 26 | VISemanticDependencyParser) 27 | from .structs import (BiLexicalizedConstituencyCRF, ConstituencyCRF, 28 | ConstituencyLBP, ConstituencyMFVI, Dependency2oCRF, 29 | DependencyCRF, DependencyLBP, DependencyMFVI, 30 | LinearChainCRF, MatrixTree, SemanticDependencyLBP, 31 | SemanticDependencyMFVI) 32 | 33 | __all__ = ['BiaffineDependencyParser', 34 | 'CRFDependencyParser', 35 | 'CRF2oDependencyParser', 36 | 'VIDependencyParser', 37 | 'CRFConstituencyParser', 38 | 'VIConstituencyParser', 39 | 'BiaffineSemanticDependencyParser', 40 | 'VISemanticDependencyParser', 41 | 'Parser', 42 | 'MatrixTree', 43 | 'DependencyCRF', 44 | 'Dependency2oCRF', 45 | 'ConstituencyCRF', 46 | 'BiLexicalizedConstituencyCRF', 47 | 'LinearChainCRF', 48 | 'DependencyLBP', 49 | 'DependencyMFVI', 50 | 'ConstituencyLBP', 51 | 'ConstituencyMFVI', 52 | 'SemanticDependencyLBP', 53 | 'SemanticDependencyMFVI'] 54 | 55 | __version__ = '1.1.4' 56 | 57 | PARSER = {parser.NAME: parser for parser in [BiaffineDependencyParser, 58 | CRFDependencyParser, 59 | CRF2oDependencyParser, 60 | VIDependencyParser, 61 | CRFConstituencyParser, 62 | VIConstituencyParser, 63 | BiaffineSemanticDependencyParser, 64 | VISemanticDependencyParser]} 65 | 66 | SRC = {'github': 'https://github.com/yzhangcs/parser/releases/download', 67 | 'hlt': 'http://hlt.suda.edu.cn/~yzhang/supar'} 68 | NAME = { 69 | 'biaffine-dep-en': 'ptb.biaffine.dep.lstm.char', 70 | 'biaffine-dep-zh': 'ctb7.biaffine.dep.lstm.char', 71 | 'crf2o-dep-en': 'ptb.crf2o.dep.lstm.char', 72 | 'crf2o-dep-zh': 'ctb7.crf2o.dep.lstm.char', 73 | 'biaffine-dep-roberta-en': 'ptb.biaffine.dep.roberta', 74 | 'biaffine-dep-electra-zh': 'ctb7.biaffine.dep.electra', 75 | 'biaffine-dep-xlmr': 'ud.biaffine.dep.xlmr', 76 | 'crf-con-en': 'ptb.crf.con.lstm.char', 77 | 'crf-con-zh': 'ctb7.crf.con.lstm.char', 78 | 'crf-con-roberta-en': 'ptb.crf.con.roberta', 79 | 'crf-con-electra-zh': 'ctb7.crf.con.electra', 80 | 'crf-con-xlmr': 'spmrl.crf.con.xlmr', 81 | 'biaffine-sdp-en': 'dm.biaffine.sdp.lstm.tag-char-lemma', 82 | 'biaffine-sdp-zh': 'semeval16.biaffine.sdp.lstm.tag-char-lemma', 83 | 'vi-sdp-en': 'dm.vi.sdp.lstm.tag-char-lemma', 84 | 'vi-sdp-zh': 'semeval16.vi.sdp.lstm.tag-char-lemma', 85 | 'vi-sdp-roberta-en': 'dm.vi.sdp.roberta', 86 | 'vi-sdp-electra-zh': 'semeval16.vi.sdp.electra' 87 | } 88 | MODEL = {src: {n: f"{link}/v1.1.0/{m}.zip" for n, m in NAME.items()} for src, link in SRC.items()} 89 | CONFIG = {src: {n: f"{link}/v1.1.0/{m}.ini" for n, m in NAME.items()} for src, link in SRC.items()} 90 | -------------------------------------------------------------------------------- /neutrex/supar/modules/dropout.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | import torch.nn as nn 24 | 25 | 26 | class SharedDropout(nn.Module): 27 | r""" 28 | SharedDropout differs from the vanilla dropout strategy in that the dropout mask is shared across one dimension. 29 | 30 | Args: 31 | p (float): 32 | The probability of an element to be zeroed. Default: 0.5. 33 | batch_first (bool): 34 | If ``True``, the input and output tensors are provided as ``[batch_size, seq_len, *]``. 35 | Default: ``True``. 36 | 37 | Examples: 38 | >>> x = torch.ones(1, 3, 5) 39 | >>> nn.Dropout()(x) 40 | tensor([[[0., 2., 2., 0., 0.], 41 | [2., 2., 0., 2., 2.], 42 | [2., 2., 2., 2., 0.]]]) 43 | >>> SharedDropout()(x) 44 | tensor([[[2., 0., 2., 0., 2.], 45 | [2., 0., 2., 0., 2.], 46 | [2., 0., 2., 0., 2.]]]) 47 | """ 48 | 49 | def __init__(self, p=0.5, batch_first=True): 50 | super().__init__() 51 | 52 | self.p = p 53 | self.batch_first = batch_first 54 | 55 | def __repr__(self): 56 | s = f"p={self.p}" 57 | if self.batch_first: 58 | s += f", batch_first={self.batch_first}" 59 | 60 | return f"{self.__class__.__name__}({s})" 61 | 62 | def forward(self, x): 63 | r""" 64 | Args: 65 | x (~torch.Tensor): 66 | A tensor of any shape. 67 | Returns: 68 | The returned tensor is of the same shape as `x`. 69 | """ 70 | 71 | if self.training: 72 | if self.batch_first: 73 | mask = self.get_mask(x[:, 0], self.p).unsqueeze(1) 74 | else: 75 | mask = self.get_mask(x[0], self.p) 76 | x = x * mask 77 | 78 | return x 79 | 80 | @staticmethod 81 | def get_mask(x, p): 82 | return x.new_empty(x.shape).bernoulli_(1 - p) / (1 - p) 83 | 84 | 85 | class IndependentDropout(nn.Module): 86 | r""" 87 | For :math:`N` tensors, they use different dropout masks respectively. 88 | When :math:`N-M` of them are dropped, the remaining :math:`M` ones are scaled by a factor of :math:`N/M` to compensate, 89 | and when all of them are dropped together, zeros are returned. 90 | 91 | Args: 92 | p (float): 93 | The probability of an element to be zeroed. Default: 0.5. 94 | 95 | Examples: 96 | >>> x, y = torch.ones(1, 3, 5), torch.ones(1, 3, 5) 97 | >>> x, y = IndependentDropout()(x, y) 98 | >>> x 99 | tensor([[[1., 1., 1., 1., 1.], 100 | [0., 0., 0., 0., 0.], 101 | [2., 2., 2., 2., 2.]]]) 102 | >>> y 103 | tensor([[[1., 1., 1., 1., 1.], 104 | [2., 2., 2., 2., 2.], 105 | [0., 0., 0., 0., 0.]]]) 106 | """ 107 | 108 | def __init__(self, p=0.5): 109 | super().__init__() 110 | 111 | self.p = p 112 | 113 | def __repr__(self): 114 | return f"{self.__class__.__name__}(p={self.p})" 115 | 116 | def forward(self, *items): 117 | r""" 118 | Args: 119 | items (list[~torch.Tensor]): 120 | A list of tensors that have the same shape except the last dimension. 121 | Returns: 122 | The returned tensors are of the same shape as `items`. 123 | """ 124 | 125 | if self.training: 126 | masks = [x.new_empty(x.shape[:2]).bernoulli_(1 - self.p) for x in items] 127 | total = sum(masks) 128 | scale = len(items) / total.max(torch.ones_like(total)) 129 | masks = [mask * scale for mask in masks] 130 | items = [item * mask.unsqueeze(-1) for item, mask in zip(items, masks)] 131 | 132 | return items 133 | -------------------------------------------------------------------------------- /keyword_extraction/assets/abbreviations.txt: -------------------------------------------------------------------------------- 1 | 5GC 5G Core Network 2 | ACK Acknowledgement 3 | AM Acknowledged Mode 4 | ARQ Automatic Repeat Request 5 | AS Access Stratum 6 | ASN.1 Abstract Syntax Notation One 7 | BAP Backhaul Adaptation Protocol 8 | BCD Binary Coded Decimal 9 | BFD Beam Failure Detection 10 | BH Backhaul 11 | BLER Block Error Rate 12 | BWP Bandwidth Part 13 | CA Carrier Aggregation 14 | CAG Closed Access Group 15 | CAG-ID Closed Access Group Identifier 16 | CAPC Channel Access Priority Class 17 | CBR Channel Busy Ratio 18 | CCCH Common Control Channel 19 | CG Cell Group 20 | CHO Conditional Handover 21 | CLI Cross Link Interference 22 | CMAS Commercial Mobile Alert Service 23 | CP Control Plane 24 | CPA Conditional PSCell Addition 25 | CPC Conditional PSCell Change 26 | C-RNTI Cell RNTI 27 | CSI Channel State Information 28 | DAPS Dual Active Protocol Stack 29 | DC Dual Connectivity 30 | DCCH Dedicated Control Channel 31 | DCI Downlink Control Information 32 | DCP DCI with CRC scrambled by PS-RNTI 33 | DFN Direct Frame Number 34 | DL Downlink 35 | DL-PRS Downlink Positioning Reference Signal 36 | DL-SCH Downlink Shared Channel 37 | DM-RS Demodulation Reference Signal 38 | DRB (user) Data Radio Bearer 39 | DRX Discontinuous Reception 40 | DTCH Dedicated Traffic Channel 41 | EN-DC E-UTRA NR Dual Connectivity with E-UTRA connected to EPC 42 | EPC Evolved Packet Core 43 | EPS Evolved Packet System 44 | ETWS Earthquake and Tsunami Warning System 45 | E-UTRA Evolved Universal Terrestrial Radio Access 46 | E-UTRA/5GC E-UTRA connected to 5GC 47 | E-UTRA/EPC E-UTRA connected to EPC 48 | E-UTRAN Evolved Universal Terrestrial Radio Access Network 49 | FDD Frequency Division Duplex 50 | FFS For Further Study 51 | G-CS-RNTI Group Configured Scheduling RNTI 52 | GERAN GSM/EDGE Radio Access Network 53 | GIN Group ID for Network selection 54 | GNSS Global Navigation Satellite System 55 | G-RNTI Group RNTI 56 | GSM Global System for Mobile Communications 57 | HARQ Hybrid Automatic Repeat Request 58 | HRNN Human Readable Network Name 59 | HSDN High Speed Dedicated Network 60 | H-SFN Hyper SFN 61 | IAB Integrated Access and Backhaul 62 | IAB-DU IAB-node DU 63 | IAB-MT IAB Mobile Termination 64 | IDC In-Device Coexistence 65 | IE Information element 66 | IMSI International Mobile Subscriber Identity 67 | kB Kilobyte (1000 bytes) 68 | L1 Layer 1 69 | L2 Layer 2 70 | L3 Layer 3 71 | LBT Listen Before Talk 72 | MAC Medium Access Control 73 | MBS Multicast/Broadcast Service 74 | MBS FSAI MBS Frequency Selection Area Identity 75 | MCCH MBS Control Channel 76 | MCG Master Cell Group 77 | MDT Minimization of Drive Tests 78 | MIB Master Information Block 79 | MPE Maximum Permissible Exposure 80 | MRB MBS Radio Bearer 81 | MR-DC Multi-Radio Dual Connectivity 82 | MTCH MBS Traffic Channel 83 | MTSI Multimedia Telephony Service for IMS 84 | MUSIM Multi-Universal Subscriber Identity Module 85 | N/A Not Applicable 86 | NE-DC NR E-UTRA Dual Connectivity 87 | (NG)EN-DC E-UTRA NR Dual Connectivity (covering E-UTRA connected to EPC or 5GC) 88 | NGEN-DC E-UTRA NR Dual Connectivity with E-UTRA connected to 5GC 89 | NID Network Identifier 90 | NPN Non-Public Network 91 | NR-DC NR-NR Dual Connectivity 92 | NR/5GC NR connected to 5GC 93 | PCell Primary Cell 94 | PDCP Packet Data Convergence Protocol 95 | PDU Protocol Data Unit 96 | PEI Paging Early Indication 97 | PLMN Public Land Mobile Network 98 | PNI-NPN Public Network Integrated Non-Public Network 99 | posSIB Positioning SIB 100 | PPW PRS Processing Window 101 | PRS Positioning Reference Signal 102 | PSCell Primary SCG Cell 103 | PTM Point to Multipoint 104 | PTP Point to Point 105 | PWS Public Warning System 106 | QoE Quality of Experience 107 | QoS Quality of Service 108 | RAN Radio Access Network 109 | RAT Radio Access Technology 110 | RLC Radio Link Control 111 | RLM Radio Link Monitoring 112 | RMTC RSSI Measurement Timing Configuration 113 | RNA RAN-based Notification Area 114 | RNTI Radio Network Temporary Identifier 115 | ROHC Robust Header Compression 116 | RPLMN Registered Public Land Mobile Network 117 | RRC Radio Resource Control 118 | RS Reference Signal 119 | SBAS Satellite Based Augmentation System 120 | SCell Secondary Cell 121 | SCG Secondary Cell Group 122 | SCS Subcarrier Spacing 123 | SDT Small Data Transmission 124 | SFN System Frame Number 125 | SFTD SFN and Frame Timing Difference 126 | SI System Information 127 | SIB System Information Block 128 | SL Sidelink 129 | SLSS Sidelink Synchronisation Signal 130 | SNPN Stand-alone Non-Public Network 131 | SpCell Special Cell 132 | SRAP Sidelink Relay Adaptation Protocol 133 | SRB Signalling Radio Bearer 134 | SRS Sounding Reference Signal 135 | SSB Synchronization Signal Block 136 | TAG Timing Advance Group 137 | TDD Time Division Duplex 138 | TEG Timing Error Group 139 | TM Transparent Mode 140 | TMGI Temporary Mobile Group Identity 141 | U2N UE-to-Network 142 | UDC Uplink Data Compression 143 | UE User Equipment 144 | UL Uplink 145 | UM Unacknowledged Mode 146 | UP User Plane 147 | VR Virtual Reality 148 | -------------------------------------------------------------------------------- /keyword_extraction/noun_phrase_cleanup.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq, Syed Md Mukit Rashid, and Ali Ranjbar 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import pickle 22 | import pprint 23 | import enchant 24 | 25 | pp = pprint.PrettyPrinter(indent=4) 26 | dictionary = enchant.Dict("en_US") 27 | 28 | noun_phrases = dict() 29 | with open("assets/5g-rrc_small_lines.np.count.2.txt", "r") as f: 30 | lines = f.readlines() 31 | for line in lines: 32 | phrase_freq = int(line.split("\n")[0].strip().split(" ")[0]) 33 | phrase = "" 34 | for i in range(1, len(line.split("\n")[0].strip().split(" "))): 35 | phrase += line.split("\n")[0].strip().split(" ")[i] + " " 36 | 37 | noun_phrases[phrase.strip()] = phrase_freq 38 | 39 | 40 | def refine_noun_phrases(): 41 | punctuations = ["(", ")", "{", "}", "[", "]", ":", ",", ".", "\"", "#", "and", "or"] 42 | 43 | def break_terms(phrase_dict, delimiter): 44 | pl_1 = phrase_dict 45 | pl_1_r = dict() 46 | for p, pf in pl_1.items(): 47 | if delimiter in p: 48 | new_phrases = p.split(delimiter) 49 | new_phrases = [p for p in new_phrases if p != ""] 50 | for np in new_phrases: 51 | np = np.strip() 52 | if np not in pl_1_r.keys(): 53 | pl_1_r[np] = pf 54 | else: 55 | pl_1_r[np] += pf 56 | 57 | else: 58 | p = p.strip() 59 | if p not in pl_1_r.keys(): 60 | pl_1_r[p] = pf 61 | else: 62 | pl_1_r[p] += pf 63 | 64 | return pl_1_r 65 | 66 | pd_comma = break_terms(noun_phrases, " , ") 67 | pd_and = break_terms(pd_comma, " and ") 68 | pd_or = break_terms(pd_and, " or ") 69 | 70 | refined_phrases = dict() 71 | for phrase, phrase_freq in pd_or.items(): 72 | new_phrase = phrase 73 | delete = False 74 | words = phrase.split(" ") 75 | for i in range(len(words)): 76 | if i == 0 and (words[i].lower() == "a" or words[i].lower() == "an" or words[i].lower() == "the" 77 | or words[i].lower() == "any"): 78 | new_phrase = "" 79 | for word in words[1:]: 80 | new_phrase += word + " " 81 | new_phrase = new_phrase.strip() 82 | continue 83 | 84 | if i != 0 and (words[i].lower() == "a" or words[i].lower() == "an" or words[i].lower() == "the"): 85 | delete = True 86 | 87 | elif any([p in words[i] for p in punctuations]): 88 | delete = True 89 | 90 | if words[0].isnumeric() or new_phrase == "": 91 | delete = True 92 | 93 | if phrase_freq < 7: 94 | delete = True 95 | 96 | if len(new_phrase.split(" ")) == 1 and new_phrase.split(" ")[0] != "" \ 97 | and dictionary.check(new_phrase.split(" ")[0]): 98 | delete = True 99 | 100 | if any([p in new_phrase.split(" ")[0] for p in ["/", "\\", ","]]): 101 | delete = True 102 | 103 | if any([p in new_phrase.split(" ")[0].lower() for p in ["octet", "note", "bit"]]): 104 | delete = True 105 | 106 | if not delete: 107 | if new_phrase not in refined_phrases.keys(): 108 | refined_phrases[new_phrase] = phrase_freq 109 | else: 110 | refined_phrases[new_phrase] += phrase_freq 111 | 112 | keyword_dict_ = dict() 113 | 114 | for phrase, phrase_freq in sorted(refined_phrases.items(), key=lambda item: item[1], reverse=True): 115 | phrase = phrase.strip() 116 | phrase_key = str(phrase).replace("-", "_").replace("/", "_"). \ 117 | replace(" ", "_").replace("__", "_").replace("__", "_") 118 | keyword_dict_[phrase_key] = [phrase] 119 | 120 | return keyword_dict_ 121 | 122 | 123 | keyword_dict = refine_noun_phrases() 124 | 125 | a_file = open("keyword_dict_np.pkl", "wb") 126 | pickle.dump(keyword_dict, a_file) 127 | a_file.close() 128 | -------------------------------------------------------------------------------- /keyword_extraction/assets/definitions.txt: -------------------------------------------------------------------------------- 1 | AM MRB: An MRB associated with at least an AM RLC bearer for PTP transmission. 2 | BH RLC channel: An RLC channel between two nodes, which is used to transport backhaul packets. 3 | Broadcast MRB: A radio bearer configured for MBS broadcast delivery. 4 | CEIL: Mathematical function used to 'round up' i.e. to the nearest integer having a higher or equal value. 5 | DAPS bearer: a bearer whose radio protocols are located in both the source gNB and the target gNB during DAPS handover to use both source gNB and target gNB resources. 6 | Dedicated signalling: Signalling sent on DCCH logical channel between the network and a single UE. 7 | Dormant BWP: The dormant BWP is one of downlink BWPs configured by the network via dedicated RRC signalling. 8 | In the dormant BWP, the UE stops monitoring PDCCH on/for the SCell, but continues performing CSI measurements, Automatic Gain Control (AGC) and beam management, if configured. For each serving cell other than the SpCell or PUCCH SCell, the network may configure one BWP as a dormant BWP. 9 | Field: The individual contents of an information element are referred to as fields. 10 | FLOOR: Mathematical function used to 'round down' i.e. to the nearest integer having a lower or equal value. 11 | Global cell identity: An identity to uniquely identifying an NR cell. It is consisted of cellIdentity and plmn-Identity of the first PLMN-Identity in plmn-IdentityList in SIB1. 12 | Information element: A structural element containing single or multiple fields is referred as information element. 13 | MBS Radio Bearer: A radio bearer that is configured for MBS delivery. 14 | Multicast/Broadcast Service: A point-to-multipoint service as defined in TS 23.247 . 15 | Multicast MRB: A radio bearer configured for MBS multicast delivery. 16 | NCSG: Network controlled small gap as defined in TS 38.133 . 17 | NPN-only Cell: A cell that is only available for normal service for NPNs' subscriber. An NPN-capable UE determines that a cell is NPN-only Cell by detecting that the cellReservedForOtherUse IE is set to true while the npn- IdentityInfoList IE is present in CellAccessRelatedInfo. 18 | NR sidelink communication: AS functionality enabling at least V2X Communication as defined in TS 23.287 , between two or more nearby UEs, using NR technology but not traversing any network node. 19 | PNI-NPN identity: an identifier of a PNI-NPN comprising of a PLMN ID and a CAG -ID combination. 20 | Primary Cell: The MCG cell, operating on the primary frequency, in which the UE either performs the initial connection establishment procedure or initiates the connection re-establishment procedure. 21 | PC5 Relay RLC channel: An RLC channel between L2 U2N Remote UE and L2 U2N Relay UE, which is used to transport packets over PC5 for L2 UE-to-Network relay. 22 | Primary SCG Cell: For dual connectivity operation, the SCG cell in which the UE performs random access when performing the Reconfiguration with Sync procedure. 23 | Primary Timing Advance Group: Timing Advance Group containing the SpCell. 24 | PUCCH SCell: An SCell configured with PUCCH. 25 | PUSCH-Less SCell: An SCell configured without PUSCH. 26 | RedCap UE: A UE with reduced capabilities as specified in clause 4.2.21.1 in TS 38.306 . 27 | RLC bearer configuration: The lower layer part of the radio bearer configuration comprising the RLC and logical channel configurations. 28 | Secondary Cell: For a UE configured with CA, a cell providing additional radio resources on top of Special Cell. 29 | Secondary Cell Group: For a UE configured with dual connectivity, the subset of serving cells comprising of the PSCell and zero or more secondary cells. 30 | Serving Cell: For a UE in RRC_CONNECTED not configured with CA/DC there is only one serving cell comprising of the primary cell. For a UE in RRC_CONNECTED configured with CA/ DC the term 'serving cells' is used to denote the set of cells comprising of the Special Cell(s) and all secondary cells. 31 | Small Data Transmission: A procedure used for transmission of data and/or signalling over allowed radio bearers in RRC_INACTIVE state (i.e. without the UE transitioning to RRC_CONNECTED state). 32 | SNPN identity: an identifier of an SNPN comprising of a PLMN ID and an NID combination. 33 | Special Cell: For Dual Connectivity operation the term Special Cell refers to the PCell of the MCG or the PSCell of the SCG, otherwise the term Special Cell refers to the PCell. 34 | Split SRB: In MR-DC, an SRB that supports transmission via MCG and SCG as well as duplication of RRC PDUs as defined in TS 37.340 . 35 | SSB Frequency: Frequency referring to the position of resource element RE=#0 (subcarrier #0) of resource block RB#10 of the SS block. 36 | U2N Relay UE: A UE that provides functionality to support connectivity to the network for U2N Remote UE(s). 37 | U2N Remote UE: A UE that communicates with the network via a U2N Relay UE. 38 | Uu Relay RLC channel: An RLC channel between L2 U2N Relay UE and gNB, which is used to transport packets over Uu for L2 UE-to-Network relay. 39 | UE Inactive AS Context: UE Inactive AS Context is stored when the connection is suspended and restored when the connection is resumed. It includes information as defined in clause 5.3.8.3. 40 | V2X sidelink communication: AS functionality enabling V2X Communication as defined in TS 23.285 , between nearby UEs, using E-UTRA technology but not traversing any network node. 41 | -------------------------------------------------------------------------------- /neutrex/supar/structs/dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | import torch.autograd as autograd 24 | from supar.structs.semiring import (CrossEntropySemiring, EntropySemiring, 25 | KLDivergenceSemiring, KMaxSemiring, 26 | LogSemiring, MaxSemiring, SampledSemiring) 27 | from torch.distributions.distribution import Distribution 28 | from torch.distributions.utils import lazy_property 29 | 30 | 31 | class StructuredDistribution(Distribution): 32 | r""" 33 | Base class for structured distribution :math:`p(y)` :cite:`eisner-2016-inside,goodman-1999-semiring,li-eisner-2009-first`. 34 | 35 | Args: 36 | scores (torch.Tensor): 37 | Log potentials, also for high-order cases. 38 | 39 | """ 40 | 41 | def __init__(self, scores, **kwargs): 42 | self.scores = scores.requires_grad_() if isinstance(scores, torch.Tensor) else [s.requires_grad_() for s in scores] 43 | self.kwargs = kwargs 44 | 45 | def __repr__(self): 46 | return f"{self.__class__.__name__}()" 47 | 48 | def __add__(self, other): 49 | return self.__class__(torch.stack((self.scores, other.scores), -1), lens=self.lens) 50 | 51 | @lazy_property 52 | def log_partition(self): 53 | r""" 54 | Computes the log partition function of the distribution :math:`p(y)`. 55 | """ 56 | 57 | return self.forward(LogSemiring) 58 | 59 | @lazy_property 60 | def marginals(self): 61 | r""" 62 | Computes marginal probabilities of the distribution :math:`p(y)`. 63 | """ 64 | 65 | return self.backward(self.log_partition.sum()) 66 | 67 | @lazy_property 68 | def max(self): 69 | r""" 70 | Computes the max score of the distribution :math:`p(y)`. 71 | """ 72 | 73 | return self.forward(MaxSemiring) 74 | 75 | @lazy_property 76 | def argmax(self): 77 | r""" 78 | Computes :math:`\arg\max_y p(y)` of the distribution :math:`p(y)`. 79 | """ 80 | raise NotImplementedError 81 | 82 | @lazy_property 83 | def mode(self): 84 | return self.argmax 85 | 86 | def kmax(self, k): 87 | r""" 88 | Computes the k-max of the distribution :math:`p(y)`. 89 | """ 90 | 91 | return self.forward(KMaxSemiring(k)) 92 | 93 | def topk(self, k): 94 | r""" 95 | Computes the k-argmax of the distribution :math:`p(y)`. 96 | """ 97 | raise NotImplementedError 98 | 99 | def sample(self): 100 | r""" 101 | Obtains a structured sample from the distribution :math:`y \sim p(y)`. 102 | TODO: multi-sampling. 103 | """ 104 | 105 | return self.backward(self.forward(SampledSemiring).sum()).detach() 106 | 107 | @lazy_property 108 | def entropy(self): 109 | r""" 110 | Computes entropy :math:`H[p]` of the distribution :math:`p(y)`. 111 | """ 112 | 113 | return self.forward(EntropySemiring) 114 | 115 | def cross_entropy(self, other): 116 | r""" 117 | Computes cross-entropy :math:`H[p,q]` of self and another distribution. 118 | 119 | Args: 120 | other (~supar.structs.dist.StructuredDistribution): Comparison distribution. 121 | """ 122 | 123 | return (self + other).forward(CrossEntropySemiring) 124 | 125 | def kl(self, other): 126 | r""" 127 | Computes KL-divergence :math:`KL[p \parallel q]=H[p,q]-H[p]` of self and another distribution. 128 | 129 | Args: 130 | other (~supar.structs.dist.StructuredDistribution): Comparison distribution. 131 | """ 132 | 133 | return (self + other).forward(KLDivergenceSemiring) 134 | 135 | def log_prob(self, value, **kwargs): 136 | """ 137 | Computes log probability over values :math:`p(y)`. 138 | """ 139 | 140 | return self.score(value, **kwargs) - self.log_partition 141 | 142 | def score(self, value): 143 | raise NotImplementedError 144 | 145 | @torch.enable_grad() 146 | def forward(self, semiring): 147 | raise NotImplementedError 148 | 149 | def backward(self, log_partition): 150 | return autograd.grad(log_partition, 151 | self.scores if isinstance(self.scores, torch.Tensor) else self.scores[0], 152 | create_graph=True)[0] 153 | -------------------------------------------------------------------------------- /keyword_extraction/constituency_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Ali Ranjbar 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import argparse 22 | import stanza 23 | import sys 24 | import threading 25 | 26 | from stanza.models.common.bert_embedding import BERT_ARGS 27 | from stanza.models.common.doc import Document 28 | from stanza.models.constituency.parse_tree import Tree 29 | from tqdm import tqdm 30 | from typing import List 31 | 32 | 33 | s_print_lock = threading.Lock() 34 | 35 | 36 | def get_args() -> argparse.Namespace: 37 | _parser = argparse.ArgumentParser(description="Filter constituency tree") 38 | _parser.add_argument("-f", "--file", 39 | type=str, 40 | help="input file", 41 | required=True) 42 | _parser.add_argument("--label", 43 | type=str, 44 | help="label of the node to print", 45 | required=True) 46 | _parser.add_argument("-v", 47 | action=argparse.BooleanOptionalAction, 48 | help='verbose logging', 49 | default=False, 50 | required=False) 51 | group = _parser.add_mutually_exclusive_group() 52 | group.add_argument("-j", 53 | type=int, 54 | help="number of threads", 55 | default=8, 56 | required=False) 57 | group.add_argument("--threading", 58 | action=argparse.BooleanOptionalAction, 59 | help="use threading lib", 60 | default=True, 61 | required=False) 62 | _args = _parser.parse_args() 63 | 64 | return _args 65 | 66 | 67 | def dfs_collect_np(tree: Tree, result: List[str]) -> None: 68 | if not tree.children: 69 | result.append(tree.label) 70 | return 71 | for child in tree.children: 72 | dfs_collect_np(child, result) 73 | 74 | 75 | def dfs_filter(tree: Tree, label: str) -> None: 76 | if not tree.children: 77 | return 78 | for child in tree.children: 79 | dfs_filter(child, label) 80 | if child.label == label and child.children: 81 | np: List[str] = [] 82 | dfs_collect_np(child, np) 83 | with s_print_lock: 84 | print(" ".join(np), flush=True) 85 | 86 | 87 | def analyze_sentence(sen: str): 88 | try: 89 | text_doc: Document = stanza_pipeline(sen) 90 | for sentence in text_doc.sentences: 91 | tree: Tree = sentence.constituency 92 | dfs_filter(tree, args.label) 93 | except Exception as e: 94 | print(e) 95 | 96 | 97 | def join_all(_threads: List[threading.Thread], _bar: tqdm): 98 | for t in _threads: 99 | t.join() 100 | _bar.update(1) 101 | 102 | 103 | if __name__ == "__main__": 104 | args = get_args() 105 | 106 | model_name = "roberta-base" 107 | if model_name in BERT_ARGS.keys(): 108 | BERT_ARGS[model_name]["model_max_length"] = 1024 109 | else: 110 | BERT_ARGS[model_name] = {"model_max_length": 1024} 111 | 112 | stanza_pipeline = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma,depparse,constituency', package={'constituency': 'wsj_bert'}, verbose=args.v) 113 | num_lines = sum(1 for line in open(args.file)) 114 | with open(args.file, "r") as f: 115 | with tqdm(total=num_lines, file=sys.stderr) as bar: 116 | if not args.threading: 117 | for line in f: 118 | analyze_sentence(line.rstrip()) 119 | bar.update(1) 120 | else: 121 | try: 122 | while True: 123 | threads: List[threading.Thread] = [] 124 | for _ in range(args.j): 125 | line = f.readline() 126 | if not line: 127 | break 128 | line = line.rstrip() 129 | x = threading.Thread(target=analyze_sentence, args=(line,)) 130 | threads.append(x) 131 | x.start() 132 | else: 133 | join_all(threads, bar) 134 | continue 135 | join_all(threads, bar) 136 | break 137 | except KeyboardInterrupt: 138 | join_all(threads, bar) 139 | -------------------------------------------------------------------------------- /synthesizers/script_msg_defs_4.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | um_msg_list = { 22 | "attach_req", "attach_complete", "identity_resp", "auth_resp", "auth_failure", "sm_complete", "sm_reject", 23 | "tau_req", "tau_complete", "guti_realloc_complete", "service_req", "ext_service_req", "control_service_req", 24 | "uplink_nas_transport", "uplink_generic_nas_transport", 25 | "activate_dedicated_eps_bearer_reject", "activate_default_eps_bearer_reject", "activate_default_eps_bearer_accept", 26 | "activate_dedicated_eps_bearer_accept", "bearer_resource_allocation_req", "bearer_resource_modification_req", 27 | "deactivate_eps_bearer_accept", "esm_info_resp", "modify_eps_bearer_accept", "modify_eps_bearer_reject", 28 | "pdn_connectivity_req", "pdn_disconnect_req", "remote_ue_report", 29 | "uplink_signalling" 30 | } 31 | 32 | mu_msg_list = { 33 | "attach_accept", "attach_reject", "identity_req", "auth_req", "auth_reject", "sm_command", 34 | "tau_accept", "tau_reject", "guti_realloc_command", "service_accept", "service_reject", "cs_service_notification", 35 | "downlink_nas_transport", "downlink_generic_nas_transport", "emm_information", "paging", 36 | "activate_dedicated_eps_bearer_req", "activate_default_eps_bearer_req", "bearer_resource_allocation_reject", 37 | "bearer_resource_modification_reject", "deactivate_eps_bearer_req", "esm_info_req", "modify_eps_bearer_req", 38 | "notification_msg", "pdn_connectivity_reject", "pdn_disconnect_reject", "remote_ue_report_resp", 39 | "downlink_signalling" 40 | } 41 | 42 | both_dir_msg_list = { 43 | "detach_req", "detach_accept", "emm_status", "security_protected_nas_msg", 44 | "esm_dummy", "esm_status", "esm_data_transport", 45 | "nas_message", "initial_nas_message", "user_data" 46 | } 47 | 48 | emm_sublayer_msg_list = { 49 | "attach_req", "attach_accept", "attach_reject", "attach_complete", "auth_req", "auth_resp", "auth_failure", 50 | "auth_reject", "cs_service_notification", "detach_req", "detach_accept", "downlink_nas_transport", "paging", 51 | "uplink_nas_transport", "downlink_generic_nas_transport", "uplink_generic_nas_transport", "emm_information", 52 | "emm_status", "service_req", "service_accept", "service_reject", "ext_service_req", "control_service_req", 53 | "guti_realloc_command", "guti_realloc_complete", "identity_req", "identity_resp", "sm_command", "sm_complete", 54 | "sm_reject", "security_protected_nas_msg", "tau_req", "tau_accept", "tau_reject", "tau_complete", "registration_req" 55 | } 56 | 57 | esm_sublayer_msg_list = { 58 | "activate_dedicated_eps_bearer_req", "activate_dedicated_eps_bearer_accept", "activate_dedicated_eps_bearer_reject", 59 | "activate_default_eps_bearer_req", "activate_default_eps_bearer_accept", "activate_default_eps_bearer_reject", 60 | "bearer_resource_allocation_req", "bearer_resource_allocation_reject", "bearer_resource_modification_req", 61 | "bearer_resource_modification_reject", "deactivate_eps_bearer_req", "deactivate_eps_bearer_accept", 62 | "modify_eps_bearer_req", "modify_eps_bearer_accept", "modify_eps_bearer_reject", "esm_dummy", "esm_info_req", 63 | "esm_info_resp", "esm_status", "notification_msg", "pdn_connectivity_req", "pdn_connectivity_reject", 64 | "pdn_disconnect_req", "pdn_disconnect_reject", "remote_ue_report", "remote_ue_report_resp", "esm_data_transport" 65 | } 66 | 67 | special_msg_list = { 68 | "nas_message", "initial_nas_message", "user_data", "uplink_signalling", "downlink_signalling" 69 | } 70 | 71 | msg_response = { 72 | "identity_req": "identity_resp", 73 | "auth_req": "auth_resp", 74 | "sm_command": "sm_complete", 75 | "guti_realloc_command": "guti_realloc_complete", 76 | "tau_req": "tau_accept", 77 | "tau_accept": "tau_complete", 78 | "esm_info_req": "esm_info_resp", 79 | "activate_default_eps_bearer_req": "activate_default_eps_bearer_req", 80 | "remote_ue_report": "remote_ue_report_resp" 81 | } 82 | 83 | mme_wait_for_message = { 84 | "attach_accept": "attach_resp", 85 | "identity_req": "identity_resp", 86 | "auth_req": "auth_resp", 87 | "sm_command": "sm_resp", 88 | "tau_accept": "tau_resp", 89 | "guti_realloc_command": "guti_realloc_resp" 90 | } 91 | 92 | check_mme_wait_for = { 93 | "attach_complete": "attach_resp", 94 | "identity_resp": "identity_resp", 95 | "auth_resp": "auth_resp", 96 | "auth_failure": "auth_resp", 97 | "sm_complete": "sm_resp", 98 | "sm_reject": "sm_resp", 99 | "tau_complete": "tau_resp", 100 | "guti_realloc_complete": "guti_realloc_resp" 101 | } 102 | 103 | -------------------------------------------------------------------------------- /neutrex/supar/structs/linearchain.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | from supar.structs.dist import StructuredDistribution 24 | from supar.structs.semiring import LogSemiring 25 | from torch.distributions.utils import lazy_property 26 | 27 | 28 | class LinearChainCRF(StructuredDistribution): 29 | r""" 30 | Linear-chain CRFs :cite:`lafferty-etal-2001-crf`. 31 | 32 | Args: 33 | scores (~torch.Tensor): ``[batch_size, seq_len, n_tags]``. 34 | Log potentials. 35 | trans (~torch.Tensor): ``[n_tags+1, n_tags+1]``. 36 | Transition scores. 37 | ``trans[-1, :-1]``/``trans[:-1, -1]`` represent transitions for start/end positions respectively. 38 | lens (~torch.LongTensor): ``[batch_size]``. 39 | Sentence lengths for masking. Default: ``None``. 40 | 41 | Examples: 42 | >>> from supar import LinearChainCRF 43 | >>> batch_size, seq_len, n_tags = 2, 5, 4 44 | >>> lens = torch.tensor([3, 4]) 45 | >>> value = torch.randint(n_tags, (batch_size, seq_len)) 46 | >>> s1 = LinearChainCRF(torch.randn(batch_size, seq_len, n_tags), 47 | torch.randn(n_tags+1, n_tags+1), 48 | lens) 49 | >>> s2 = LinearChainCRF(torch.randn(batch_size, seq_len, n_tags), 50 | torch.randn(n_tags+1, n_tags+1), 51 | lens) 52 | >>> s1.max 53 | tensor([4.4120, 8.9672], grad_fn=) 54 | >>> s1.argmax 55 | tensor([[2, 0, 3, 0, 0], 56 | [3, 3, 3, 2, 0]]) 57 | >>> s1.log_partition 58 | tensor([ 6.3486, 10.9106], grad_fn=) 59 | >>> s1.log_prob(value) 60 | tensor([ -8.1515, -10.5572], grad_fn=) 61 | >>> s1.entropy 62 | tensor([3.4150, 3.6549], grad_fn=) 63 | >>> s1.kl(s2) 64 | tensor([4.0333, 4.3807], grad_fn=) 65 | """ 66 | 67 | def __init__(self, scores, trans=None, lens=None): 68 | super().__init__(scores, lens=lens) 69 | 70 | batch_size, seq_len, self.n_tags = scores.shape[:3] 71 | self.lens = scores.new_full((batch_size,), seq_len).long() if lens is None else lens 72 | self.mask = self.lens.unsqueeze(-1).gt(self.lens.new_tensor(range(seq_len))) 73 | 74 | self.trans = self.scores.new_full((self.n_tags+1, self.n_tags+1), LogSemiring.one) if trans is None else trans 75 | 76 | def __repr__(self): 77 | return f"{self.__class__.__name__}(n_tags={self.n_tags})" 78 | 79 | def __add__(self, other): 80 | return LinearChainCRF(torch.stack((self.scores, other.scores), -1), 81 | torch.stack((self.trans, other.trans), -1), 82 | self.lens) 83 | 84 | @lazy_property 85 | def argmax(self): 86 | return self.lens.new_zeros(self.mask.shape).masked_scatter_(self.mask, torch.where(self.backward(self.max.sum()))[2]) 87 | 88 | def topk(self, k): 89 | preds = torch.stack([torch.where(self.backward(i))[2] for i in self.kmax(k).sum(0)], -1) 90 | return self.lens.new_zeros(*self.mask.shape, k).masked_scatter_(self.mask.unsqueeze(-1), preds) 91 | 92 | def score(self, value): 93 | scores, mask, value = self.scores.transpose(0, 1), self.mask.t(), value.t() 94 | prev, succ = torch.cat((torch.full_like(value[:1], -1), value[:-1]), 0), value 95 | # [seq_len, batch_size] 96 | alpha = scores.gather(-1, value.unsqueeze(-1)).squeeze(-1) 97 | # [batch_size] 98 | alpha = LogSemiring.prod(LogSemiring.one_mask(LogSemiring.mul(alpha, self.trans[prev, succ]), ~mask), 0) 99 | alpha = alpha + self.trans[value.gather(0, self.lens.unsqueeze(0) - 1).squeeze(0), torch.full_like(value[0], -1)] 100 | return alpha 101 | 102 | def forward(self, semiring): 103 | # [seq_len, batch_size, n_tags, ...] 104 | scores = semiring.convert(self.scores.transpose(0, 1)) 105 | trans = semiring.convert(self.trans) 106 | mask = self.mask.t() 107 | 108 | # [batch_size, n_tags] 109 | alpha = semiring.mul(trans[-1, :-1], scores[0]) 110 | for i in range(1, len(mask)): 111 | alpha[mask[i]] = semiring.mul(semiring.dot(alpha.unsqueeze(2), trans[:-1, :-1], 1), scores[i])[mask[i]] 112 | alpha = semiring.dot(alpha, trans[:-1, -1], 1) 113 | return semiring.unconvert(alpha) 114 | -------------------------------------------------------------------------------- /keyword_extraction/cellular_text_converter.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import pprint 22 | 23 | import find_capital_keywords 24 | import gather_keyword_pdf 25 | import ie_from_pdf 26 | import enchant 27 | 28 | import re 29 | 30 | INPUT_FILENAME = 'assets/5g-rrc.txt' 31 | 32 | 33 | def convert_to_keyword(file_name, keyword_dict, suffix, check_all_upper=False): 34 | with open(file_name + ".txt", "r") as f: 35 | with open(file_name + "_" + suffix + ".txt", "w") as f2: 36 | s = f.readlines() 37 | for org_sen in s: 38 | sen = org_sen.split("\n")[0].replace("- ", "-") 39 | for k, v in sorted(keyword_dict.items(), key=lambda item: len(item[0]), reverse=True): 40 | for phrase in sorted(v, key=lambda item: len(item), reverse=True): 41 | indices = [m.start() for m in re.finditer(phrase.lower(), sen.lower())] 42 | while len(indices) > 0: 43 | idx = indices[0] 44 | if not (idx + len(phrase) < len(sen) and sen[idx + len(phrase)].isalpha()) and \ 45 | (not check_all_upper or 46 | sen[idx: idx + len(phrase)] == sen[idx:idx + len(phrase)].upper()): 47 | sen = sen[:idx] + k + "_" + sen[idx + len(phrase):] 48 | indices = [m.start() for m in re.finditer(phrase.lower(), sen.lower())] 49 | else: 50 | indices = indices[1:] 51 | 52 | f2.write(sen + "\n") 53 | 54 | return file_name + "_" + suffix 55 | 56 | 57 | def convert_firstquotes(file_name, suffix): 58 | quote_dict = dict() 59 | with open(file_name, "r") as f: 60 | with open(file_name + "_" + suffix + ".txt", "w") as f2: 61 | s = f.readlines() 62 | for org_sen in s: 63 | sen = org_sen.split("\n")[0].replace("- ", "-") 64 | split_by_quote = str(sen).split("\"") 65 | if len(split_by_quote) % 2 == 0: 66 | f2.write(sen + "\n") 67 | continue 68 | sen = "" 69 | if len(split_by_quote) > 1: 70 | for i in range(0, len(split_by_quote), 2): 71 | if i + 1 < len(split_by_quote): 72 | if len(split_by_quote[i + 1].strip()) > 1 \ 73 | and len(split_by_quote[i + 1].split(" ")) < 8: 74 | content = split_by_quote[i + 1].strip() 75 | key = content.replace(" ", "_").lower().replace("(", "").replace(")", "") 76 | if key not in quote_dict.keys(): 77 | quote_dict[key] = [content] 78 | else: 79 | pass 80 | sen += split_by_quote[i] + key + "_" 81 | else: 82 | if len(split_by_quote[i + 1].strip()) >= 2: 83 | sen = sen + split_by_quote[i] + "\"" + split_by_quote[i + 1] + "\"" 84 | else: 85 | sen = sen + split_by_quote[i] + "\"" + split_by_quote[i + 1] 86 | else: 87 | sen += split_by_quote[i] 88 | else: 89 | sen = org_sen.split("\n")[0].replace("- ", "-") 90 | 91 | f2.write(sen + "\n") 92 | 93 | return file_name + "_" + suffix, quote_dict 94 | 95 | 96 | dictionary = enchant.Dict("en_US") 97 | pp = pprint.PrettyPrinter(width=200) 98 | 99 | message_dict_pdf = gather_keyword_pdf.gather_messages_and_procedures()[0] 100 | procedures_dict_pdf = gather_keyword_pdf.gather_messages_and_procedures()[1] 101 | state_dict_pdf = gather_keyword_pdf.gather_state() 102 | variable_dict_pdf = gather_keyword_pdf.gather_vars() 103 | ie_dict_pdf_purified = ie_from_pdf.get_IE_keywords_dict(True) 104 | 105 | 106 | def find_dictionaries(): 107 | fn, quote_dict = convert_firstquotes(INPUT_FILENAME, 'quote') 108 | 109 | fn2, msg_dict_cap = find_capital_keywords.get_message_keywords(fn) 110 | fn3, state_dict_cap = find_capital_keywords.get_state_keywords(fn2) 111 | fn4, status_dict_cap = find_capital_keywords.get_status_keywords(fn3) 112 | fn5, mode_dict_cap = find_capital_keywords.get_mode_keywords(fn4) 113 | 114 | return msg_dict_cap, message_dict_pdf, procedures_dict_pdf, state_dict_pdf, ie_dict_pdf_purified, mode_dict_cap, state_dict_cap, \ 115 | status_dict_cap, variable_dict_pdf 116 | 117 | -------------------------------------------------------------------------------- /keyword_extraction/gather_keyword_pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import os.path 22 | import pprint 23 | import PyPDF2 24 | 25 | pp = pprint.PrettyPrinter(width=200) 26 | INPUT_FILENAME = '5g-rrc.pdf' 27 | 28 | 29 | def get_definitions(): 30 | definition_keywords = dict() 31 | if not os.path.exists("assets/definitions.txt"): 32 | return definition_keywords 33 | 34 | with open("assets/definitions.txt", "r") as f: 35 | s = f.readlines() 36 | f.close() 37 | for org_sen in s: 38 | org_sen = org_sen.split("\n")[0] 39 | if ':' in org_sen: 40 | keyterm = org_sen.split(":")[0] 41 | if "note" not in keyterm.lower(): 42 | key = keyterm.lower().replace("\"", "").replace("\'", "").replace(" ", "_") 43 | definition_keywords[key + "_"] = [keyterm] 44 | 45 | return definition_keywords 46 | 47 | 48 | def gather_messages_and_procedures(): 49 | pdfFileObj = open(INPUT_FILENAME, 'rb') 50 | pdfReader = PyPDF2.PdfReader(pdfFileObj) 51 | message_dict = dict() 52 | procedures_dict = dict() 53 | 54 | last_section = "" 55 | for i in range(1, 24): 56 | pageObj = pdfReader.pages[i] 57 | lines = pageObj.extract_text().split("\n") 58 | for line in lines: 59 | if "..." not in line: 60 | continue 61 | 62 | line_splits = line.split() 63 | if len(line_splits) < 2: 64 | continue 65 | 66 | section = line_splits[0] 67 | 68 | if len(section) > 0 and section[0].isnumeric(): 69 | last_section = section 70 | 71 | section_splits = section.split(".") 72 | if not len(section_splits) == 3: 73 | continue 74 | 75 | if section_splits[0] == "5" and int(section_splits[1][0]) > 2: 76 | proc_text = " ".join(line_splits[1: -2]).replace(".", "") 77 | 78 | proc_key = proc_text.lower().replace("-", "_").replace(" ", "_").replace("/", "_") 79 | 80 | if "introduction" in proc_key or "void" in proc_key or proc_key == "" or proc_key == "general": 81 | continue 82 | 83 | if proc_key in procedures_dict and proc_text not in procedures_dict[proc_key]: 84 | procedures_dict[proc_key].append(proc_text) 85 | else: 86 | procedures_dict[proc_key] = [proc_text] 87 | 88 | elif last_section.startswith("6.2.2") and section == "–": 89 | msg_text = line_splits[1].replace(".", "") 90 | msg_key = msg_text.lower().replace("-", "_") 91 | if msg_key in message_dict and msg_text not in message_dict[msg_key]: 92 | message_dict[msg_key].append(msg_text) 93 | else: 94 | message_dict[msg_key] = [msg_text] 95 | 96 | pdfFileObj.close() 97 | return message_dict, procedures_dict 98 | 99 | 100 | def gather_vars(): 101 | pdfFileObj = open(INPUT_FILENAME, 'rb') 102 | pdfReader = PyPDF2.PdfReader(pdfFileObj) 103 | vars_dict = dict() 104 | 105 | last_section = "" 106 | for i in range(1, 24): 107 | pageObj = pdfReader.pages[i] 108 | lines = pageObj.extract_text().split("\n") 109 | for line in lines: 110 | if "..." not in line: 111 | continue 112 | 113 | line_splits = line.split() 114 | if len(line_splits) < 2: 115 | continue 116 | 117 | section = line_splits[0] 118 | 119 | if len(section) > 0 and section[0].isnumeric(): 120 | last_section = section 121 | 122 | elif last_section.startswith("7.4") and section == "–": 123 | var_text = line_splits[1].replace(".", "") 124 | var_key = var_text.lower().replace("-", "_") 125 | if var_key in vars_dict and var_text not in vars_dict[var_key]: 126 | vars_dict[var_key].append(var_text) 127 | else: 128 | vars_dict[var_key] = [var_text] 129 | 130 | pdfFileObj.close() 131 | return vars_dict 132 | 133 | 134 | def gather_state(): 135 | pdfFileObj = open(INPUT_FILENAME, 'rb') 136 | pdfReader = PyPDF2.PdfReader(pdfFileObj) 137 | state_dict = dict() 138 | 139 | pdfFileObj.close() 140 | return state_dict 141 | 142 | 143 | def get_abbreviations(): 144 | abbreviations_keyword = dict() 145 | if not os.path.exists("assets/abbreviations.txt"): 146 | return abbreviations_keyword 147 | 148 | with open("assets/abbreviations.txt", "r") as f: 149 | s = f.readlines() 150 | f.close() 151 | for line in s: 152 | line_ = line.split("\n")[0] 153 | words = line_.split(" ") 154 | abbreviation = words[0] 155 | meaning = line_.split(abbreviation)[1] 156 | abbreviations_keyword[abbreviation.replace("\'", "")] = [meaning.strip()] 157 | 158 | return abbreviations_keyword 159 | -------------------------------------------------------------------------------- /synthesizers/defs-saved.json: -------------------------------------------------------------------------------- 1 | { 2 | "id2agent": { 3 | "ue": ["UE", "subscriber", "user"], 4 | "mme": ["MME", "network", "Mobility Management Entity"], 5 | "core_network": ["core network"], 6 | "esm_entity": ["evolved packet system session management entity", "esm entity"], 7 | "amf": ["amf"], 8 | "emm_entity": ["EMM entity", "receiver"] 9 | }, 10 | 11 | "id2verb": { 12 | "set": ["set", "assign", "populate"], 13 | "reset": ["reset"], 14 | "equal": ["equal"], 15 | "increase": ["increase", "increment"], 16 | "decrease": ["decrease", "decrement"], 17 | "send": ["send", "sent", "sending", "transmit", "return"], 18 | "pass": ["pass"], 19 | "forward": ["forward"], 20 | "respond": ["respond"], 21 | "process": ["process"], 22 | "receive": ["receive", "receipt", "reception"], 23 | "start": ["start", "restart"], 24 | "initiate": ["initiate", "initiating", "invoke"], 25 | "activate": ["activate"], 26 | "deactivate": ["deactivate"], 27 | "enable": ["enable", "enabling"], 28 | "disable": ["disable", "disabling"], 29 | "stop": ["stop", "stopped", "stopping", "stops", "terminate", "abort", "cancel"], 30 | "suspend": ["suspend", "pause"], 31 | "expire" : ["expire", "expiry", "timeout"], 32 | "add": ["add", "added", "adding", "adds"], 33 | "delete": ["delete", "remove", "erase"], 34 | "accept": ["accept"], 35 | "reject": ["reject"], 36 | "apply": ["apply"], 37 | "include": ["include", "contain", "including"], 38 | "ignore": ["ignore", "drop", "dropped", "dropping", "drops", "discard"], 39 | "wait": ["wait", "waited", "waiting", "waits"], 40 | "complete": ["complete", "completion", "success"], 41 | "establish": ["establish"], 42 | "create": ["create"], 43 | "derive": ["derive", "derivation"], 44 | "handover": ["handover"], 45 | "authenticate": ["authenticate"], 46 | "replace": ["replace", "overwrite"], 47 | "attempt": ["attempt", "try", "tried", "trying", "tries"], 48 | "perform": ["perform", "execut"], 49 | "save": ["store", "memorize", "save", "saved", "saves", "saving", "hold", "holds", "held", "holding", "keep", "keeps", "keeping", "kept"], 50 | "support": ["support"], 51 | "become": ["become"], 52 | "exist": ["exist", "have"], 53 | "generate": ["generate"], 54 | "release": ["release"], 55 | "consider": ["consider", "mark", "regard"], 56 | "find": ["find", "finding", "found", "finds"], 57 | "indicate": ["indicate"], 58 | "request": ["request"], 59 | "enter": ["enter"], 60 | "leave": ["leave", "leaves", "left"], 61 | "update": ["update"], 62 | "provide": ["provide", "give", "given", "giving", "gave", "gives"], 63 | "know": ["know", "knew", "known", "knowing", "has", "have"], 64 | "fail": ["fail", "fails", "failed", "failing", "unsuccessful"], 65 | "camp": ["camp", "camping", "camped", "camps"], 66 | "select": ["select", "choose", "chose", "chosen"], 67 | "operate": ["operate"], 68 | "take": ["take"], 69 | "use": ["use", "uses", "using", "taken into use", "used"], 70 | "exchange": ["exchange"], 71 | "continue": ["continue", "resume", "progress"], 72 | "change": ["change", "alter"], 73 | "wrap": ["wrap", "wraps", "wrapping", "wrapped"], 74 | "calculate": ["calculate", "compute"], 75 | "check": ["check"], 76 | "maintain": ["maintain"], 77 | "match": ["match"], 78 | "differ": ["differ"], 79 | "cipher": ["cipher", "encipher"], 80 | "protect": ["protect"], 81 | "configure": ["configure"], 82 | "require": ["require", "need", "needs", "needed", "needing", "decide"], 83 | "want": ["want", "wants", "wanted", "wanting", "wish", "wishes", "wished", "wishing"] 84 | }, 85 | 86 | "id2adj": { 87 | "ready": ["ready"], 88 | "running": ["running"], 89 | "available" : ["available"], 90 | "valid" : ["valid", "correct"], 91 | "invalid" : ["invalid", "incorrect"], 92 | "present": ["present"], 93 | "absent": ["absent"], 94 | "native": ["native"], 95 | "new": ["new"], 96 | "old": ["old"], 97 | "replayed": ["replayed"], 98 | "ciphered": ["ciphered"], 99 | "partially_ciphered": ["partially ciphered"], 100 | "unciphered": ["unciphered"], 101 | "integrity_protected": ["integrity protected"], 102 | "out_of_range": ["out of range"], 103 | "different": ["different"], 104 | "same": ["same"], 105 | "restricted": ["restricted"] 106 | }, 107 | 108 | "id2state" : { 109 | 110 | }, 111 | 112 | "id2message": { 113 | 114 | }, 115 | 116 | "id2procedure": { 117 | 118 | }, 119 | 120 | "id2event" : { 121 | 122 | 123 | }, 124 | 125 | "id2timer" : { 126 | 127 | }, 128 | 129 | "id2counter": { 130 | 131 | }, 132 | 133 | "id2var" : { 134 | 135 | }, 136 | 137 | "id2mode": { 138 | 139 | }, 140 | 141 | "id2service":{ 142 | 143 | }, 144 | 145 | "id2field_val": { 146 | 147 | }, 148 | 149 | "id2msg_field": { 150 | 151 | }, 152 | 153 | "id2cause": { 154 | 155 | }, 156 | 157 | "id2misc": { 158 | 159 | }, 160 | 161 | "id2other": { 162 | "otherwise": ["otherwise"], 163 | "other": ["other"], 164 | "security" : ["security"], 165 | "response": ["response"], 166 | "counter": ["counter"], 167 | "maximum": ["maximum"], 168 | "minimum": ["minimum"], 169 | "last_counter": ["this counter", "that counter", "the counter"], 170 | "last_message": ["this message", "that message", "the message"], 171 | "last_msg_field": ["the ie", "this ie", "this information element", "this value"], 172 | "last_timer": ["the timer", "this timer"], 173 | "last_procedure": ["the procedure", "this procedure"] 174 | }, 175 | 176 | "id2num": { 177 | "zero": ["zero", "0"], 178 | "one": ["one", "1"], 179 | "two": ["two", "2"], 180 | "three": ["three", "3"], 181 | "four": ["four", "4"], 182 | "five": ["five", "5"], 183 | "six": ["six", "6"], 184 | "seven": ["seven", "7"], 185 | "eight": ["eight", "8"], 186 | "nine": ["nine", "9"], 187 | "ten": ["ten", "10"] 188 | } 189 | 190 | } -------------------------------------------------------------------------------- /synthesizers/script_msg_defs_5.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | um_msg_list = { 22 | "authentication_response", "authentication_failure", "registration_request", "registration_complete", 23 | "ul_nas_transport", "service_request", "configuration_update_complete", "identity_response", 24 | "notification_response", "security_mode_complete", "security_mode_reject", "control_plane_service_request", 25 | "network_slice_specific_authentication_complete", "relay_key_request", "relay_authentication_response", 26 | 27 | "pdu_session_establishment_request", "pdu_session_authentication_complete", "pdu_session_modification_request", 28 | "pdu_session_modification_complete", "pdu_session_modification_command_reject", "pdu_session_release_request", 29 | "pdu_session_release_complete", "remote_ue_report", 30 | 31 | "tracking_area_update_request", 32 | 33 | "uplink_data" 34 | } 35 | 36 | mu_msg_list = { 37 | "authentication_request", "authentication_result", "authentication_reject", "registration_accept", 38 | "registration_reject", "dl_nas_transport", "service_accept", "service_reject", "configuration_update_command", 39 | "identity_request", "notification_message", "security_mode_command", 40 | "network_slice_specific_authentication_command", "network_slice_specific_authentication_result", "relay_key_accept", 41 | "relay_key_reject", "relay_authentication_request", 42 | 43 | "pdu_session_establishment_accept", "pdu_session_establishment_reject", "pdu_session_authentication_command", 44 | "pdu_session_authentication_result", "pdu_session_modification_reject", "pdu_session_modification_command", 45 | "pdu_session_release_reject", "pdu_session_release_command", "five_gsm_status_message", "remote_ue_report_resp", 46 | 47 | "activate_default_eps_bearer_context_request", "downlink_data" 48 | } 49 | 50 | both_dir_msg_list = { 51 | "deregistration_request", "deregistration_accept", "security_protected_5gs_nas_message", "five_gmm_status_message", 52 | "five_gsm_status_message", 53 | "nas_message", "initial_nas_message", "five_gmm_message", "five_gsm_message", "user_data" 54 | "detach_request" 55 | } 56 | 57 | emm_sublayer_msg_list = { 58 | "authentication_response", "authentication_failure", "registration_request", "registration_complete", 59 | "ul_nas_transport", "service_request", "configuration_update_complete", "identity_response", 60 | "notification_response", "security_mode_complete", "security_mode_reject", "control_plane_service_request", 61 | "network_slice_specific_authentication_complete", "relay_key_request", "relay_authentication_response", 62 | "authentication_request", "authentication_result", "authentication_reject", "registration_accept", 63 | "registration_reject", "dl_nas_transport", "service_accept", "service_reject", "configuration_update_command", 64 | "identity_request", "notification_message", "security_mode_command", 65 | "network_slice_specific_authentication_command", "network_slice_specific_authentication_result", "relay_key_accept", 66 | "relay_key_reject", "relay_authentication_request", "five_gmm_message" 67 | 68 | 69 | } 70 | 71 | esm_sublayer_msg_list = { 72 | "pdu_session_establishment_request", "pdu_session_authentication_complete", "pdu_session_modification_request", 73 | "pdu_session_modification_complete", "pdu_session_modification_command_reject", "pdu_session_release_request", 74 | "pdu_session_release_complete", "pdu_session_establishment_accept", "pdu_session_establishment_reject", 75 | "pdu_session_authentication_command", "pdu_session_authentication_result", "pdu_session_modification_reject", 76 | "pdu_session_modification_command", "pdu_session_release_reject", "pdu_session_release_command", 77 | "five_gsm_status_message", "remote_ue_report", "remote_ue_report_resp", "five_gsm_message" 78 | 79 | } 80 | 81 | special_msg_list = { 82 | "nas_message", "initial_nas_message", "user_data", "uplink_signalling", "uplink_data", "downlink_signalling", 83 | "downlink_data", "five_gmm_message", "five_gsm_message" 84 | } 85 | 86 | msg_response = { 87 | "authentication_request": "authentication_response", 88 | "registration_request": "registration_accept", 89 | "identity_request": "identity_response", 90 | "service_request": "service_accept", 91 | "control_plane_service_request": "service_accept", 92 | "relay_key_request": "relay_key_accept", 93 | "relay_authentication_request": "relay_authentication_response", 94 | "security_mode_command": "security_mode_complete", 95 | "configuration_update_command": "configuration_update_complete", 96 | "network_slice_specific_authentication_command": "network_slice_specific_authentication_complete", 97 | "pdu_session_authentication_command": "pdu_session_authentication_complete", 98 | "pdu_session_modification_command": "pdu_session_modification_complete", 99 | "pdu_session_release_command": "pdu_session_release_complete", 100 | "pdu_session_establishment_request": "pdu_session_establishment_accept", 101 | "remote_ue_report": "remote_ue_report_resp" 102 | } 103 | 104 | mme_wait_for_message = { 105 | "registration_accept": "registration_resp", 106 | "identity_request": "identity_resp", 107 | "authentication_request": "auth_resp", 108 | "security_mode_command": "sm_resp", 109 | "configuration_update_command": "conf_resp", 110 | "network_slice_specific_authentication_command": "network_slice_auth_resp" 111 | } 112 | 113 | check_mme_wait_for = { 114 | "registration_complete": "registration_resp", 115 | "identity_response": "identity_resp", 116 | "authentication_response": "auth_resp", 117 | "authentication_failure": "auth_resp", 118 | "security_mode_complete": "sm_resp", 119 | "security_mode_reject": "sm_resp", 120 | "configuration_update_complete": "conf_resp", 121 | "network_slice_specific_authentication_complete": "network_slice_auth_resp" 122 | } 123 | 124 | -------------------------------------------------------------------------------- /neutrex/tree_to_xml/tree_cleanup.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import argparse 22 | import copy 23 | 24 | import nltk 25 | from collections import defaultdict 26 | from nltk.tree import ParentedTree 27 | 28 | 29 | restrictions = defaultdict(list) 30 | restrictions['top'] = ['', '', '', ''] 31 | restrictions[''] = ['', '', '', '', ''] 32 | restrictions[''] = ['', '', ''] 33 | restrictions[''] = ['', '', ''] 34 | 35 | 36 | def insert_ptree_forced(target_tree, position, chunk): 37 | target_tree.insert(position, ParentedTree.fromstring(str(chunk))) 38 | 39 | def remove_parent(parent): 40 | gParent = parent.parent() 41 | target_pos = None 42 | 43 | for i, k in enumerate(gParent): 44 | if k == parent: 45 | target_pos = i 46 | break 47 | else: 48 | raise Exception("parent not found in gparent! Probably implementation issue") 49 | 50 | parent_copy = copy.deepcopy(parent) 51 | parent_copy.reverse() 52 | 53 | for child in parent_copy: 54 | gParent.insert(target_pos, ParentedTree.fromstring(str(child))) 55 | 56 | gParent.remove(parent) 57 | 58 | 59 | def make_sibling(parent, chunk): 60 | gParent = parent.parent() 61 | target_pos = None 62 | for i, k in enumerate(gParent): 63 | if k == parent: 64 | target_pos = i 65 | break 66 | else: 67 | raise Exception("parent not found in gparent! Probably implementation issue") 68 | 69 | insert_ptree_forced(gParent, target_pos, chunk) 70 | 71 | parent.remove(chunk) 72 | 73 | def fix_other(parent: ParentedTree, node: ParentedTree) -> None: 74 | remove_parent(parent) 75 | 76 | 77 | 78 | def fix_top_simple(parent: ParentedTree, node_idx: int) -> None: 79 | 80 | new_ctl_tree = ParentedTree('', [ParentedTree.fromstring(str(parent[node_idx]))]) 81 | parent.remove(parent[node_idx]) 82 | parent.insert(node_idx, new_ctl_tree) 83 | 84 | 85 | def fix_top(parent: ParentedTree, node_idx: int) -> None: 86 | end_idx = node_idx 87 | for idx in range(node_idx, len(parent)): 88 | child_node = parent[idx] 89 | if isinstance(child_node, str) or child_node.label() == "": 90 | break 91 | end_idx = idx+1 92 | 93 | new_ctl_tree = ParentedTree('', [ParentedTree.fromstring(str(parent[idx])) for idx in range(node_idx, end_idx)]) 94 | 95 | del parent[node_idx:end_idx] 96 | parent.insert(node_idx, new_ctl_tree) 97 | 98 | 99 | 100 | 101 | def fix_action(parent, node): 102 | # case 1: action -> action 103 | if len(parent) == 1 and node.label() == '': 104 | parent.set_label('') 105 | 106 | # case 2: base case. that is if there is any control/action under action, they'll be made siblings 107 | # make sure after making sibling this parent is not empty, if it is, delete it 108 | else: 109 | make_sibling(parent, node) 110 | if len(parent) == 0: 111 | parent.parent().remove(parent) 112 | 113 | 114 | def fix_condition(parent, node): 115 | # does it have both actions and conditions? then it should probably be control! 116 | child_labels = [k.label() for k in parent] 117 | if '' in child_labels and '' in child_labels: 118 | parent.set_label('') 119 | return 120 | else: 121 | # base case: if there is any control/action under condition. make it a sibling. 122 | # once again check for empty parent. 123 | make_sibling(parent, node) 124 | if len(parent) == 0: 125 | parent.parent().remove(parent) 126 | return 127 | 128 | 129 | def parse_fix(tree: ParentedTree, enable_top = True): 130 | q = [] 131 | q.append(tree) 132 | 133 | while len(q) > 0: 134 | parent = q.pop(0) 135 | parent_label = parent.label() 136 | 137 | for idx, node in enumerate(parent): 138 | if enable_top and parent_label == "top" and not isinstance(node, str) and \ 139 | node.label() in restrictions[parent_label]: 140 | fix_top(parent, idx) 141 | # as tree changed, start parsing from start 142 | parse_fix(tree) 143 | return 144 | 145 | if parent_label == "" and not isinstance(node, str) and node.label() in restrictions[parent_label]: 146 | fix_other(parent, node) 147 | # as tree changed, start parsing from start 148 | parse_fix(tree) 149 | return 150 | 151 | elif parent_label == '' and not isinstance(node, str) and node.label() in restrictions[parent_label]: 152 | # violation found try action fixes 153 | fix_action(parent, node) 154 | # as tree changed, start parsing from start 155 | parse_fix(tree) 156 | return 157 | 158 | elif parent_label == '' and not isinstance(node, str) and node.label() in restrictions[parent_label]: 159 | # violation in condition, try condition_fixes 160 | fix_condition(parent, node) 161 | parse_fix(tree) 162 | return 163 | 164 | else: 165 | if not isinstance(node, str): 166 | pass 167 | 168 | if not isinstance(node, str): 169 | q.append(node) 170 | else: 171 | pass 172 | 173 | 174 | def clean_tree(input_tree_str: str) -> str: 175 | input_tree = ParentedTree.convert(nltk.Tree.fromstring(input_tree_str)) 176 | 177 | parse_fix(input_tree, enable_top=False) 178 | parse_fix(input_tree, enable_top=True) 179 | 180 | return str(input_tree) 181 | -------------------------------------------------------------------------------- /neutrex/supar/modules/affine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | import torch 23 | import torch.nn as nn 24 | 25 | 26 | class Biaffine(nn.Module): 27 | r""" 28 | Biaffine layer for first-order scoring :cite:`dozat-etal-2017-biaffine`. 29 | 30 | This function has a tensor of weights :math:`W` and bias terms if needed. 31 | The score :math:`s(x, y)` of the vector pair :math:`(x, y)` is computed as :math:`x^T W y / d^s`, 32 | where `d` and `s` are vector dimension and scaling factor respectively. 33 | :math:`x` and :math:`y` can be concatenated with bias terms. 34 | 35 | Args: 36 | n_in (int): 37 | The size of the input feature. 38 | n_out (int): 39 | The number of output channels. 40 | scale (float): 41 | Factor to scale the scores. Default: 0. 42 | bias_x (bool): 43 | If ``True``, adds a bias term for tensor :math:`x`. Default: ``True``. 44 | bias_y (bool): 45 | If ``True``, adds a bias term for tensor :math:`y`. Default: ``True``. 46 | """ 47 | 48 | def __init__(self, n_in, n_out=1, scale=0, bias_x=True, bias_y=True): 49 | super().__init__() 50 | 51 | self.n_in = n_in 52 | self.n_out = n_out 53 | self.scale = scale 54 | self.bias_x = bias_x 55 | self.bias_y = bias_y 56 | self.weight = nn.Parameter(torch.Tensor(n_out, n_in+bias_x, n_in+bias_y)) 57 | 58 | self.reset_parameters() 59 | 60 | def __repr__(self): 61 | s = f"n_in={self.n_in}" 62 | if self.n_out > 1: 63 | s += f", n_out={self.n_out}" 64 | if self.scale != 0: 65 | s += f", scale={self.scale}" 66 | if self.bias_x: 67 | s += f", bias_x={self.bias_x}" 68 | if self.bias_y: 69 | s += f", bias_y={self.bias_y}" 70 | 71 | return f"{self.__class__.__name__}({s})" 72 | 73 | def reset_parameters(self): 74 | nn.init.zeros_(self.weight) 75 | 76 | def forward(self, x, y): 77 | r""" 78 | Args: 79 | x (torch.Tensor): ``[batch_size, seq_len, n_in]``. 80 | y (torch.Tensor): ``[batch_size, seq_len, n_in]``. 81 | 82 | Returns: 83 | ~torch.Tensor: 84 | A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len]``. 85 | If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically. 86 | """ 87 | 88 | if self.bias_x: 89 | x = torch.cat((x, torch.ones_like(x[..., :1])), -1) 90 | if self.bias_y: 91 | y = torch.cat((y, torch.ones_like(y[..., :1])), -1) 92 | # [batch_size, n_out, seq_len, seq_len] 93 | s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y) / self.n_in ** self.scale 94 | # remove dim 1 if n_out == 1 95 | s = s.squeeze(1) 96 | 97 | return s 98 | 99 | 100 | class Triaffine(nn.Module): 101 | r""" 102 | Triaffine layer for second-order scoring :cite:`zhang-etal-2020-efficient,wang-etal-2019-second`. 103 | 104 | This function has a tensor of weights :math:`W` and bias terms if needed. 105 | The score :math:`s(x, y, z)` of the vector triple :math:`(x, y, z)` is computed as :math:`x^T z^T W y / d^s`, 106 | where `d` and `s` are vector dimension and scaling factor respectively. 107 | :math:`x` and :math:`y` can be concatenated with bias terms. 108 | 109 | Args: 110 | n_in (int): 111 | The size of the input feature. 112 | n_out (int): 113 | The number of output channels. 114 | scale (float): 115 | Factor to scale the scores. Default: 0. 116 | bias_x (bool): 117 | If ``True``, adds a bias term for tensor :math:`x`. Default: ``False``. 118 | bias_y (bool): 119 | If ``True``, adds a bias term for tensor :math:`y`. Default: ``False``. 120 | """ 121 | 122 | def __init__(self, n_in, n_out=1, scale=0, bias_x=False, bias_y=False): 123 | super().__init__() 124 | 125 | self.n_in = n_in 126 | self.n_out = n_out 127 | self.scale = scale 128 | self.bias_x = bias_x 129 | self.bias_y = bias_y 130 | self.weight = nn.Parameter(torch.Tensor(n_out, n_in+bias_x, n_in, n_in+bias_y)) 131 | 132 | self.reset_parameters() 133 | 134 | def __repr__(self): 135 | s = f"n_in={self.n_in}" 136 | if self.n_out > 1: 137 | s += f", n_out={self.n_out}" 138 | if self.scale != 0: 139 | s += f", scale={self.scale}" 140 | if self.bias_x: 141 | s += f", bias_x={self.bias_x}" 142 | if self.bias_y: 143 | s += f", bias_y={self.bias_y}" 144 | 145 | return f"{self.__class__.__name__}({s})" 146 | 147 | def reset_parameters(self): 148 | nn.init.zeros_(self.weight) 149 | 150 | def forward(self, x, y, z): 151 | r""" 152 | Args: 153 | x (torch.Tensor): ``[batch_size, seq_len, n_in]``. 154 | y (torch.Tensor): ``[batch_size, seq_len, n_in]``. 155 | z (torch.Tensor): ``[batch_size, seq_len, n_in]``. 156 | 157 | Returns: 158 | ~torch.Tensor: 159 | A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len, seq_len]``. 160 | If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically. 161 | """ 162 | 163 | if self.bias_x: 164 | x = torch.cat((x, torch.ones_like(x[..., :1])), -1) 165 | if self.bias_y: 166 | y = torch.cat((y, torch.ones_like(y[..., :1])), -1) 167 | w = torch.einsum('bzk,oikj->bozij', z, self.weight) 168 | # [batch_size, n_out, seq_len, seq_len, seq_len] 169 | s = torch.einsum('bxi,bozij,byj->bozxy', x, w, y) / self.n_in ** self.scale 170 | # remove dim 1 if n_out == 1 171 | s = s.squeeze(1) 172 | 173 | return s 174 | -------------------------------------------------------------------------------- /neutrex/supar/utils/metric.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This is the public release of the code of our paper titled 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 5 | Language Specifications" (USENIX Security '24) 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das 7 | Contact: abdullah.ishtiaq@psu.edu 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | https://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | """ 21 | 22 | from collections import Counter 23 | 24 | 25 | class Metric(object): 26 | 27 | def __lt__(self, other): 28 | return self.score < other 29 | 30 | def __le__(self, other): 31 | return self.score <= other 32 | 33 | def __ge__(self, other): 34 | return self.score >= other 35 | 36 | def __gt__(self, other): 37 | return self.score > other 38 | 39 | @property 40 | def score(self): 41 | return 0. 42 | 43 | 44 | class AttachmentMetric(Metric): 45 | 46 | def __init__(self, eps=1e-12): 47 | super().__init__() 48 | 49 | self.eps = eps 50 | 51 | self.n = 0.0 52 | self.n_ucm = 0.0 53 | self.n_lcm = 0.0 54 | self.total = 0.0 55 | self.correct_arcs = 0.0 56 | self.correct_rels = 0.0 57 | 58 | def __repr__(self): 59 | s = f"UCM: {self.ucm:6.2%} LCM: {self.lcm:6.2%} " 60 | s += f"UAS: {self.uas:6.2%} LAS: {self.las:6.2%}" 61 | return s 62 | 63 | def __call__(self, arc_preds, rel_preds, arc_golds, rel_golds, mask): 64 | lens = mask.sum(1) 65 | arc_mask = arc_preds.eq(arc_golds) & mask 66 | rel_mask = rel_preds.eq(rel_golds) & arc_mask 67 | arc_mask_seq, rel_mask_seq = arc_mask[mask], rel_mask[mask] 68 | 69 | self.n += len(mask) 70 | self.n_ucm += arc_mask.sum(1).eq(lens).sum().item() 71 | self.n_lcm += rel_mask.sum(1).eq(lens).sum().item() 72 | 73 | self.total += len(arc_mask_seq) 74 | self.correct_arcs += arc_mask_seq.sum().item() 75 | self.correct_rels += rel_mask_seq.sum().item() 76 | return self 77 | 78 | @property 79 | def score(self): 80 | return self.las 81 | 82 | @property 83 | def ucm(self): 84 | return self.n_ucm / (self.n + self.eps) 85 | 86 | @property 87 | def lcm(self): 88 | return self.n_lcm / (self.n + self.eps) 89 | 90 | @property 91 | def uas(self): 92 | return self.correct_arcs / (self.total + self.eps) 93 | 94 | @property 95 | def las(self): 96 | return self.correct_rels / (self.total + self.eps) 97 | 98 | 99 | class SpanMetric(Metric): 100 | 101 | def __init__(self, eps=1e-12): 102 | super().__init__() 103 | 104 | self.n = 0.0 105 | self.n_ucm = 0.0 106 | self.n_lcm = 0.0 107 | self.utp = 0.0 108 | self.ltp = 0.0 109 | self.pred = 0.0 110 | self.gold = 0.0 111 | self.eps = eps 112 | 113 | def __call__(self, preds, golds): 114 | for pred, gold in zip(preds, golds): 115 | upred, ugold = Counter([tuple(span[:-1]) for span in pred]), Counter([tuple(span[:-1]) for span in gold]) 116 | lpred, lgold = Counter([tuple(span) for span in pred]), Counter([tuple(span) for span in gold]) 117 | utp, ltp = list((upred & ugold).elements()), list((lpred & lgold).elements()) 118 | self.n += 1 119 | self.n_ucm += len(utp) == len(pred) == len(gold) 120 | self.n_lcm += len(ltp) == len(pred) == len(gold) 121 | self.utp += len(utp) 122 | self.ltp += len(ltp) 123 | self.pred += len(pred) 124 | self.gold += len(gold) 125 | return self 126 | 127 | def __repr__(self): 128 | s = f"UCM: {self.ucm:6.2%} LCM: {self.lcm:6.2%} " 129 | s += f"UP: {self.up:6.2%} UR: {self.ur:6.2%} UF: {self.uf:6.2%} " 130 | s += f"LP: {self.lp:6.2%} LR: {self.lr:6.2%} LF: {self.lf:6.2%}" 131 | 132 | return s 133 | 134 | @property 135 | def score(self): 136 | return self.lf 137 | 138 | @property 139 | def ucm(self): 140 | return self.n_ucm / (self.n + self.eps) 141 | 142 | @property 143 | def lcm(self): 144 | return self.n_lcm / (self.n + self.eps) 145 | 146 | @property 147 | def up(self): 148 | return self.utp / (self.pred + self.eps) 149 | 150 | @property 151 | def ur(self): 152 | return self.utp / (self.gold + self.eps) 153 | 154 | @property 155 | def uf(self): 156 | return 2 * self.utp / (self.pred + self.gold + self.eps) 157 | 158 | @property 159 | def lp(self): 160 | return self.ltp / (self.pred + self.eps) 161 | 162 | @property 163 | def lr(self): 164 | return self.ltp / (self.gold + self.eps) 165 | 166 | @property 167 | def lf(self): 168 | return 2 * self.ltp / (self.pred + self.gold + self.eps) 169 | 170 | 171 | class ChartMetric(Metric): 172 | 173 | def __init__(self, eps=1e-12): 174 | super(ChartMetric, self).__init__() 175 | 176 | self.tp = 0.0 177 | self.utp = 0.0 178 | self.pred = 0.0 179 | self.gold = 0.0 180 | self.eps = eps 181 | 182 | def __call__(self, preds, golds): 183 | pred_mask = preds.ge(0) 184 | gold_mask = golds.ge(0) 185 | span_mask = pred_mask & gold_mask 186 | self.pred += pred_mask.sum().item() 187 | self.gold += gold_mask.sum().item() 188 | self.tp += (preds.eq(golds) & span_mask).sum().item() 189 | self.utp += span_mask.sum().item() 190 | return self 191 | 192 | def __repr__(self): 193 | return f"UP: {self.up:6.2%} UR: {self.ur:6.2%} UF: {self.uf:6.2%} P: {self.p:6.2%} R: {self.r:6.2%} F: {self.f:6.2%}" 194 | 195 | @property 196 | def score(self): 197 | return self.f 198 | 199 | @property 200 | def up(self): 201 | return self.utp / (self.pred + self.eps) 202 | 203 | @property 204 | def ur(self): 205 | return self.utp / (self.gold + self.eps) 206 | 207 | @property 208 | def uf(self): 209 | return 2 * self.utp / (self.pred + self.gold + self.eps) 210 | 211 | @property 212 | def p(self): 213 | return self.tp / (self.pred + self.eps) 214 | 215 | @property 216 | def r(self): 217 | return self.tp / (self.gold + self.eps) 218 | 219 | @property 220 | def f(self): 221 | return 2 * self.tp / (self.pred + self.gold + self.eps) 222 | -------------------------------------------------------------------------------- /synthesizers/sympy_expression_builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | from sympy import Symbol, And, Or, Not, Equality 22 | from sympy import simplify_logic 23 | 24 | special_chars = ['&', '|', '!', '(', ')', '='] 25 | 26 | 27 | def find_infix_exp(string_exp): 28 | vars_list = [] 29 | enums_list = [] 30 | e = str(string_exp).strip().replace("\n", "").replace(" ", "") 31 | var_name = "" 32 | exp_list = [] 33 | last_operator = "" 34 | for i in range(len(e)): 35 | if e[i] not in special_chars: 36 | var_name += str(e[i]) 37 | if i == len(e) - 1 and var_name != "": 38 | exp_list.append(var_name) 39 | if last_operator == "=" or last_operator == "!=": 40 | enums_list.append(var_name) 41 | else: 42 | vars_list.append(var_name) 43 | else: 44 | if var_name != "": 45 | exp_list.append(var_name) 46 | 47 | if last_operator == "=" or last_operator == "!=": 48 | enums_list.append(var_name) 49 | else: 50 | vars_list.append(var_name) 51 | 52 | var_name = "" 53 | if e[i - 1] == '!' and e[i] == '=': 54 | exp_list.append("!=") 55 | last_operator = "!=" 56 | elif e[i] == "!" and e[i + 1] == "=": 57 | continue 58 | else: 59 | exp_list.append(e[i]) 60 | last_operator = str(e[i]) 61 | 62 | return exp_list, vars_list, enums_list 63 | 64 | 65 | def find_postfix_exp(infix_exp): 66 | stack = [] 67 | operators = ['&', '|', '!=', '!', '(', ')', '='] 68 | precedence = {'!': 1, '!=': 2, '=': 2, '&': 3, '|': 4} 69 | postfix_exp = [] 70 | for i in range(len(infix_exp)): 71 | if infix_exp[i] not in operators: 72 | postfix_exp.append(infix_exp[i]) 73 | continue 74 | 75 | if infix_exp[i] == '(': 76 | stack.append(infix_exp[i]) 77 | continue 78 | 79 | if infix_exp[i] == ')': 80 | while len(stack) != 0 and stack[-1] != '(': 81 | postfix_exp.append(stack.pop()) 82 | stack.pop() 83 | continue 84 | 85 | if infix_exp[i] in operators: 86 | if len(stack) == 0 or stack[-1] == '(': 87 | stack.append(infix_exp[i]) 88 | else: 89 | while len(stack) != 0 and stack[-1] != '(' and precedence[infix_exp[i]] >= precedence[stack[-1]]: 90 | postfix_exp.append(stack.pop()) 91 | stack.append(infix_exp[i]) 92 | 93 | while len(stack) != 0: 94 | postfix_exp.append(stack.pop()) 95 | 96 | return postfix_exp 97 | 98 | 99 | def evaluate_exp(postfix_exp): 100 | operators = ['&', '|', '!=', '!', '(', ')', '='] 101 | stack = [] 102 | for i in range(len(postfix_exp)): 103 | if postfix_exp[i] not in operators: 104 | stack.append(Symbol(str(postfix_exp[i]))) 105 | continue 106 | else: 107 | 108 | if postfix_exp[i] == '&': 109 | try: 110 | a = stack.pop() 111 | except: 112 | a = None 113 | try: 114 | b = stack.pop() 115 | except: 116 | b = None 117 | 118 | if "coin_toss" in str(a): 119 | a = None 120 | if "coin_toss" in str(b): 121 | b = None 122 | 123 | if a is not None and b is not None: 124 | c = And(b, a) 125 | stack.append(c) 126 | elif a is not None: 127 | stack.append(a) 128 | elif b is not None: 129 | stack.append(b) 130 | 131 | elif postfix_exp[i] == '|': 132 | try: 133 | a = stack.pop() 134 | except: 135 | a = None 136 | try: 137 | b = stack.pop() 138 | except: 139 | b = None 140 | 141 | if "coin_toss" in str(a): 142 | a = None 143 | if "coin_toss" in str(b): 144 | b = None 145 | 146 | if a is not None and b is not None: 147 | c = Or(b, a) 148 | stack.append(c) 149 | elif a is not None: 150 | stack.append(a) 151 | elif b is not None: 152 | stack.append(b) 153 | elif postfix_exp[i] == '!': 154 | try: 155 | a = stack.pop() 156 | if "coin_toss" not in str(a): 157 | c = Not(a) 158 | stack.append(c) 159 | except IndexError: 160 | pass 161 | 162 | elif postfix_exp[i] == '=': 163 | try: 164 | a = stack.pop() 165 | b = stack.pop() 166 | if "coin_toss" not in str(a) or "coin_toss" not in str(b): 167 | c = Equality(b, a) 168 | stack.append(c) 169 | except IndexError: 170 | pass 171 | elif postfix_exp[i] == "!=": 172 | try: 173 | a = stack.pop() 174 | b = stack.pop() 175 | if "coin_toss" not in str(a) or "coin_toss" not in str(b): 176 | c = Not(Equality(b, a)) 177 | stack.append(c) 178 | except IndexError: 179 | pass 180 | if len(stack) == 0: 181 | return None 182 | return stack.pop() 183 | 184 | 185 | def get_sympy_simplified_expression(condition_str): 186 | infix, vars_list, enums_list = find_infix_exp(condition_str) 187 | postfix = find_postfix_exp(infix) 188 | exp = evaluate_exp(postfix) 189 | 190 | if exp is None: 191 | return "" 192 | 193 | result = str(simplify_logic(exp, force=True)) 194 | result = result.replace("~", "!") 195 | return result 196 | -------------------------------------------------------------------------------- /synthesizers/script_db_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import time 22 | 23 | import sqlite3 24 | from sqlite3 import OperationalError, IntegrityError, DataError 25 | 26 | import script_config 27 | 28 | INF = 9999 29 | KEYWORD_DB_TABLE = script_config.keyword_db_table 30 | min_keyword_dist_cache = {} 31 | 32 | CONN_CLOSED = True 33 | 34 | 35 | def get_new_conn_cursor(): 36 | global CONN_CLOSED 37 | db_conn = sqlite3.connect("hermes.sqlite") 38 | db_cursor = db_conn.cursor() 39 | 40 | try: 41 | db_cursor.execute("CREATE TABLE {} (Substring TEXT NOT NULL, MatchedString TEXT NOT NULL, " 42 | "Keyword TEXT NOT NULL, Distance INTEGER NOT NULL, MatchedStringLen INTEGER NOT NULL, " 43 | "PRIMARY KEY (Substring, MatchedString));".format(KEYWORD_DB_TABLE)) 44 | except OperationalError: 45 | pass 46 | 47 | CONN_CLOSED = False 48 | return db_conn, db_cursor 49 | 50 | 51 | def db_commit(db_conn): 52 | db_conn.commit() 53 | 54 | 55 | def close_connection(db_conn, db_cursor): 56 | global CONN_CLOSED 57 | if not CONN_CLOSED: 58 | db_cursor.close() 59 | db_conn.close() 60 | CONN_CLOSED = True 61 | 62 | 63 | def check_conn_closed(): 64 | return CONN_CLOSED 65 | 66 | 67 | def update_substring_keyword_distance(db_conn, db_cursor, substring: str, matched_string: str, keyword: str, 68 | distance: int): 69 | if len(substring) > 180 or len(matched_string) > 180 or len(keyword) > 180: 70 | print("Length too long for :", substring, matched_string, keyword, distance) 71 | return 72 | 73 | sql = "UPDATE {} SET keyword = ?, Distance = ? WHERE Substring = ? AND MatchedString = ?".format(KEYWORD_DB_TABLE) 74 | val = (keyword, distance, substring, matched_string) 75 | try: 76 | db_cursor.execute(sql, val) 77 | except OperationalError: 78 | print("Sleeping for a bit...") 79 | time.sleep(5.0) 80 | update_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance) 81 | 82 | 83 | def insert_substring_keyword_distance(db_conn, db_cursor, substring: str, matched_string: str, keyword: str, 84 | distance: int, force_update=False, thread_num=0): 85 | if len(substring) > 180 or len(matched_string) > 180 or len(keyword) > 180: 86 | print("Length too long for :", substring, matched_string, keyword, distance) 87 | return 88 | 89 | sql = "INSERT INTO {} (Substring, MatchedString, Keyword, Distance, MatchedStringLen) " \ 90 | "VALUES (?, ?, ?, ?, ?)".format(KEYWORD_DB_TABLE) 91 | val = (substring, matched_string, keyword, distance, len(matched_string)) 92 | 93 | try: 94 | db_cursor.execute(sql, val) 95 | except IntegrityError: 96 | if force_update: 97 | update_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance) 98 | except DataError: 99 | print("DataError for :", val) 100 | return 101 | except OperationalError: 102 | print("Thread {}: Sleeping for a bit...".format(thread_num)) 103 | time.sleep(5.0) 104 | print("Thread {}: Resuming...".format(thread_num)) 105 | insert_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance, 106 | force_update, thread_num=thread_num) 107 | 108 | 109 | def insert_substring_keyword_distance_batch(db_conn, db_cursor, insert_list, force_update=False, thread_num=0): 110 | for item in insert_list: 111 | insert_substring_keyword_distance(db_conn, db_cursor, item[0], item[1], item[2], item[3], 112 | force_update, thread_num=thread_num) 113 | db_commit(db_conn) 114 | 115 | 116 | def lookup_substring_keyword_distance(db_cursor, substring: str, matched_string: str) -> (str, int): 117 | sql = "SELECT Keyword, Distance FROM {} WHERE Substring = ? AND MatchedString = ?".format(KEYWORD_DB_TABLE) 118 | val = (substring, matched_string) 119 | db_cursor.execute(sql, val) 120 | db_result = db_cursor.fetchall() 121 | 122 | if len(db_result) > 0: 123 | return db_result[0][0], db_result[0][1] 124 | else: 125 | return "", INF 126 | 127 | 128 | def get_min_keyword_distance(db_cursor, substring: str) -> (str, str, int): 129 | if substring in min_keyword_dist_cache: 130 | return min_keyword_dist_cache[substring] 131 | 132 | sql = "select * from {} " \ 133 | "where Substring = ? " \ 134 | "and Distance = (select min(Distance) from {} where Substring = ?) " \ 135 | "order by MatchedStringLen DESC " \ 136 | "limit 1".format(KEYWORD_DB_TABLE, KEYWORD_DB_TABLE) 137 | val = (substring, substring) 138 | db_cursor.execute(sql, val) 139 | db_result = db_cursor.fetchall() 140 | 141 | if len(db_result) > 0: 142 | result = (db_result[0][1], db_result[0][2], db_result[0][3]) 143 | else: 144 | result = ("", "", INF) 145 | 146 | min_keyword_dist_cache[substring] = result 147 | return result 148 | 149 | 150 | def substring_in_db(db_cursor, substring: str) -> bool: 151 | sql = "select * from {} " \ 152 | "where Substring = ? " \ 153 | "limit 1".format(KEYWORD_DB_TABLE) 154 | val = (substring,) 155 | db_cursor.execute(sql, val) 156 | db_result = db_cursor.fetchall() 157 | return len(db_result) > 0 158 | 159 | 160 | def matched_string_in_db(db_cursor, matched_string: str) -> bool: 161 | sql = "select * from {} " \ 162 | "where MatchedString = ? " \ 163 | "limit 1".format(KEYWORD_DB_TABLE) 164 | val = (matched_string,) 165 | db_cursor.execute(sql, val) 166 | db_result = db_cursor.fetchall() 167 | return len(db_result) > 0 168 | 169 | 170 | def delete_matched_string(db_conn, db_cursor, matched_string: str): 171 | sql = "delete from {} " \ 172 | "where MatchedString = ?".format(KEYWORD_DB_TABLE) 173 | val = (matched_string,) 174 | try: 175 | db_cursor.execute(sql, val) 176 | except OperationalError: 177 | print("Sleeping for a bit...") 178 | time.sleep(5.0) 179 | delete_matched_string(db_conn, db_cursor, matched_string) 180 | -------------------------------------------------------------------------------- /keyword_extraction/categorize_keywords.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the public release of the code of our paper titled 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural 4 | Language Specifications" (USENIX Security '24) 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid 6 | Contact: abdullah.ishtiaq@psu.edu 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | https://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | import pickle 22 | import pprint 23 | 24 | pp = pprint.PrettyPrinter(width=200) 25 | 26 | 27 | def get_dictionaries(): 28 | a_file = open("keyword_dict_np_merged.pkl", "rb") 29 | output = pickle.load(a_file) 30 | keyword_dict = dict(output) 31 | a_file.close() 32 | 33 | message_suffixes = ['message', 'messages', 'request'] 34 | message_dictionary = dict() 35 | remove_list = [] 36 | for k, v in keyword_dict.items(): 37 | for phrase in v: 38 | for suffix in message_suffixes: 39 | if str(phrase).lower().endswith(suffix.lower()): 40 | message_dictionary[k] = v 41 | remove_list.append(k) 42 | 43 | for key in list(set(remove_list)): 44 | del keyword_dict[key] 45 | 46 | procedure_suffixes = ['procedure', 'procedures'] 47 | procedure_dictionary = dict() 48 | remove_list = [] 49 | for k, v in keyword_dict.items(): 50 | for phrase in v: 51 | for suffix in procedure_suffixes: 52 | if str(phrase).lower().endswith(suffix.lower()): 53 | procedure_dictionary[k] = v 54 | remove_list.append(k) 55 | 56 | for key in list(set(remove_list)): 57 | del keyword_dict[key] 58 | 59 | counter_suffixes = ['count', 'counter', 'counters'] 60 | counter_dictionary = dict() 61 | remove_list = [] 62 | for k, v in keyword_dict.items(): 63 | for phrase in v: 64 | for suffix in counter_suffixes: 65 | if str(phrase).lower().endswith(suffix.lower()): 66 | counter_dictionary[k] = v 67 | remove_list.append(k) 68 | 69 | for key in list(set(remove_list)): 70 | del keyword_dict[key] 71 | 72 | service_substrings = ['optimization', 'optimisation', 'service', 'services', 'bearer service', 'bearer services', 73 | 'signalling connection', 'PDN connection', 'RRC connection', 'RR Connection'] 74 | 75 | service_suffixes = ['bearer context', 'bearer contexts', 'connection', 'connections', 'capability'] 76 | 77 | service_dictionary = dict() 78 | remove_list = [] 79 | for k, v in keyword_dict.items(): 80 | for phrase in v: 81 | for substring in service_substrings: 82 | if str(substring) in str(phrase): 83 | service_dictionary[k] = v 84 | remove_list.append(k) 85 | 86 | for suffix in service_suffixes: 87 | if str(phrase).lower().endswith(suffix.lower()): 88 | service_dictionary[k] = v 89 | remove_list.append(k) 90 | 91 | for key in list(set(remove_list)): 92 | del keyword_dict[key] 93 | 94 | mode_suffixes = ['mode', 'modes'] 95 | mode_dictionary = dict() 96 | remove_list = [] 97 | for k, v in keyword_dict.items(): 98 | for phrase in v: 99 | for suffix in mode_suffixes: 100 | if suffix.lower() in str(phrase).lower(): 101 | mode_dictionary[k] = v 102 | remove_list.append(k) 103 | 104 | for key in list(set(remove_list)): 105 | del keyword_dict[key] 106 | 107 | ie_substrings = ['information element', 'information elements', ' ie', 'additional', 'type', 'message identity', 108 | 'policy', 'identifier', 'indication', 'indicator'] 109 | 110 | def contains_timer(phrase_): 111 | words = str(phrase_).split() 112 | for word in words: 113 | if word[0].lower() == 't' and str(word[1:]).isnumeric(): 114 | num = int(word[1:]) 115 | if num != 1: 116 | return True 117 | 118 | return False 119 | 120 | message_field_dictionary = dict() 121 | remove_list = [] 122 | for k, v in keyword_dict.items(): 123 | for phrase in v: 124 | for substring in ie_substrings: 125 | if str(substring).lower() in str(phrase).lower(): 126 | message_field_dictionary[k] = v 127 | remove_list.append(k) 128 | 129 | if "timer" in str(phrase).lower() and "value" in str(phrase).lower(): 130 | message_field_dictionary[k] = v 131 | remove_list.append(k) 132 | 133 | if contains_timer(phrase) and "value" in str(phrase).lower(): 134 | message_field_dictionary[k] = v 135 | remove_list.append(k) 136 | 137 | for key in list(set(remove_list)): 138 | del keyword_dict[key] 139 | 140 | timer_dictionary = dict() 141 | remove_list = [] 142 | for k, v in keyword_dict.items(): 143 | for phrase in v: 144 | if contains_timer(phrase): 145 | timer_dictionary[k] = v 146 | remove_list.append(k) 147 | 148 | for key in list(set(remove_list)): 149 | del keyword_dict[key] 150 | 151 | variable_suffixes = ['security context', 'security contexts', 'list', 'lists', 'key', 'keys'] 152 | variable_dictionary = dict() 153 | remove_list = [] 154 | for k, v in keyword_dict.items(): 155 | for phrase in v: 156 | for suffix in variable_suffixes: 157 | if str(phrase).lower().endswith(suffix.lower()): 158 | variable_dictionary[k] = v 159 | remove_list.append(k) 160 | 161 | for key in list(set(remove_list)): 162 | del keyword_dict[key] 163 | 164 | algorithm_suffixes = ['algorithm', 'algorithms'] 165 | algorithm_dictionary = dict() 166 | remove_list = [] 167 | for k, v in keyword_dict.items(): 168 | for phrase in v: 169 | for suffix in algorithm_suffixes: 170 | if str(phrase).lower().endswith(suffix.lower()): 171 | variable_dictionary[k] = v 172 | remove_list.append(k) 173 | 174 | elif len(phrase.split(" ")) > 2 and suffix == phrase.split(" ")[-2]: 175 | algorithm_dictionary[k] = v 176 | remove_list.append(k) 177 | 178 | for key in list(set(remove_list)): 179 | del keyword_dict[key] 180 | 181 | return message_dictionary, procedure_dictionary, message_field_dictionary, counter_dictionary, \ 182 | mode_dictionary, service_dictionary, timer_dictionary, variable_dictionary, algorithm_dictionary, keyword_dict --------------------------------------------------------------------------------