├── synthesizers
    ├── input.txt
    ├── defs.json
    ├── script_context_config.py
    ├── README.md
    ├── script_ir_end_dump.txt
    ├── script_config.py
    ├── CoreNLP_server.py
    ├── script_msg_helpers.py
    ├── common-defs.json
    ├── script_msg_defs_5_rrc.py
    ├── script_build_string_keyword_distance.py
    ├── script_msg_defs_4.py
    ├── defs-saved.json
    ├── script_msg_defs_5.py
    ├── sympy_expression_builder.py
    └── script_db_handler.py
├── keyword_extraction
    ├── 5g-rrc.pdf
    ├── combined.json
    ├── assets
    │   ├── manual_recategorization.txt
    │   ├── abbreviations.txt
    │   └── definitions.txt
    ├── cause_extraction.py
    ├── README.md
    ├── merge_keywords_np.py
    ├── ie_from_pdf.py
    ├── noun_phrase_cleanup.py
    ├── constituency_parser.py
    ├── cellular_text_converter.py
    ├── gather_keyword_pdf.py
    └── categorize_keywords.py
├── neutrex
    ├── supar
    │   ├── cmds
    │   │   ├── __init__.py
    │   │   ├── cmd.py
    │   │   ├── vi_con.py
    │   │   ├── crf_con.py
    │   │   ├── biaffine_sdp.py
    │   │   ├── vi_sdp.py
    │   │   ├── biaffine_dep.py
    │   │   ├── crf_dep.py
    │   │   ├── crf2o_dep.py
    │   │   └── vi_dep.py
    │   ├── utils
    │   │   ├── common.py
    │   │   ├── __init__.py
    │   │   ├── tokenizer.py
    │   │   ├── scripting.py
    │   │   ├── embedding.py
    │   │   ├── parallel.py
    │   │   ├── logging.py
    │   │   ├── config.py
    │   │   ├── vocab.py
    │   │   └── metric.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── scalar_mix.py
    │   │   ├── mlp.py
    │   │   ├── dropout.py
    │   │   └── affine.py
    │   ├── models
    │   │   └── __init__.py
    │   ├── parsers
    │   │   └── __init__.py
    │   ├── structs
    │   │   ├── __init__.py
    │   │   ├── dist.py
    │   │   └── linearchain.py
    │   └── __init__.py
    ├── tests
    │   ├── test_fn.py
    │   ├── test_parse.py
    │   └── test_transform.py
    ├── README.md
    └── tree_to_xml
    │   ├── tree_to_xml.py
    │   └── tree_cleanup.py
└── README.md


/synthesizers/input.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/synthesizers/defs.json:
--------------------------------------------------------------------------------
1 | {
2 | }


--------------------------------------------------------------------------------
/keyword_extraction/5g-rrc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SyNSec-den/hermes-spec-to-fsm/HEAD/keyword_extraction/5g-rrc.pdf


--------------------------------------------------------------------------------
/keyword_extraction/combined.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id2agent": {},
 3 |   "id2verb": {},
 4 |   "id2adj": {},
 5 |   "id2state": {},
 6 |   "id2message": {},
 7 |   "id2procedure": {},
 8 |   "id2event": {},
 9 |   "id2timer": {},
10 |   "id2counter": {},
11 |   "id2var": {},
12 |   "id2mode": {},
13 |   "id2service": {},
14 |   "id2field_val": {},
15 |   "id2msg_field": {},
16 |   "id2cause": {},
17 |   "id2misc": {},
18 |   "id2other": {},
19 |   "id2num": {}
20 | }


--------------------------------------------------------------------------------
/keyword_extraction/assets/manual_recategorization.txt:
--------------------------------------------------------------------------------
 1 | # categories = [ message procedure messagefield state mode status service counter timer algorithm variable ]
 2 | # lines with - indicate which category to search from for replacement, lines indicate which keyword to move and : <to_category>indicates to which category
 3 | # if no : <to_caregory> is indicated, it will be moved to the last mentioned <to_category> category used.
 4 | - definitions
 5 | - abbreviation
 6 | guti : variable
 7 | - misc
 8 | authentication_check : procedure
 9 | imsi_attach : procedure
10 | imsi_detach : procedure
11 | plmn_search : procedure
12 | eps_update_status : status
13 | current_plmn : variable
14 | emm_cause_value : variable
15 | native_guti : variable
16 | plmn_identity : variable
17 | security_context_flag : variable
18 | selected_plmn : variable
19 | integrity_check : event
20 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 


--------------------------------------------------------------------------------
/synthesizers/script_context_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | global_context = {}
22 | header_context = []
23 | 


--------------------------------------------------------------------------------
/synthesizers/README.md:
--------------------------------------------------------------------------------
 1 | # IRSynthesizer and FSMSynthesizer
 2 | 
 3 | ## Requirements
 4 | 
 5 | - python=3.7
 6 | - stanza==1.4.2
 7 | - nltk==3.8.1
 8 | - tokenizers==0.13.3
 9 | - torch==1.13.1
10 | - sympy==1.10.1
11 | - python-levenshtein==0.20.9
12 | 
13 | 
14 | ## Config
15 | 
16 | - Update `./script_config.py` to select appropriate configuration.
17 | 
18 | 
19 | ## Input
20 | 
21 | - Put input Hermes annotated document into `./input.txt`
22 | - Put extracted keywords into `./defs-saved.json`
23 | 
24 | 
25 | ## CoreNLP Server
26 | 
27 | - Run `./CoreNLP_server.py` to start CoreNLP server and keep it running.
28 | 
29 | 
30 | ## Keyword Preprocess
31 | 
32 | - Run `./run-keyword-db-builder.py` to create database for keywords.
33 | 
34 | 
35 | ## Synthesizers
36 | 
37 | - Run `./run-synthesizers.py` to run IRSynthesizer and FSMSynthesizer.
38 | 
39 | 
40 | ## Output
41 | 
42 | - `./transitions.txt` outputs the transitions.
43 | - `./ir-out.xml` outputs the FSM in IR format.
44 | - `./smv-out.smv` FSM transpiled to nuXmv.
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/synthesizers/script_ir_end_dump.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 |     <channels>
 4 |         <channel label="chan_UM">
 5 |             <start>UE</start>
 6 |             <end>MME</end>
 7 |             <noisy>FALSE</noisy>
 8 |         </channel>
 9 | 
10 |         <channel label="chan_MU">
11 |             <start>MME</start>
12 |             <end>UE</end>
13 |             <noisy>FALSE</noisy>
14 |         </channel>
15 |     </channels>
16 | 
17 |     <injective_adversaries>
18 |         <adversary label="inj_adv_UM">
19 |             <activechannel> chan_UM</activechannel>
20 |             <alwayson>TRUE</alwayson>
21 |         </adversary>
22 | 
23 |         <adversary label="inj_adv_MU">
24 |             <activechannel> chan_MU</activechannel>
25 |             <alwayson>TRUE</alwayson>
26 |         </adversary>
27 |     </injective_adversaries>
28 | 
29 | 
30 |     <manual_dump>
31 |         <instance label="range">
32 |             <section>DEFINE</section>
33 |             <text> range := 8; </text>
34 |         </instance>
35 | 
36 |     </manual_dump>
37 | 
38 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/common.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | PAD = '<pad>'
23 | UNK = '<unk>'
24 | BOS = '<bos>'
25 | EOS = '<eos>'
26 | 
27 | MIN = -1e32
28 | 


--------------------------------------------------------------------------------
/synthesizers/script_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | GEN = "4g"  #4g, 5g, 5g-rrc
22 | 
23 | common_definitions = "common-defs.json"
24 | 
25 | saved_nas_definitions = "defs-saved.json"
26 | nas_definitions = "defs.json"
27 | keyword_db_table = "SubstringKeywordDistance"
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from . import field, fn, metric, transform
23 | from .config import Config
24 | from .data import Dataset
25 | from .embedding import Embedding
26 | from .field import ChartField, Field, RawField, SubwordField
27 | from .transform import CoNLL, Transform, Tree
28 | from .vocab import Vocab
29 | 
30 | __all__ = ['ChartField', 'CoNLL', 'Config', 'Dataset', 'Embedding', 'Field',
31 |            'RawField', 'SubwordField', 'Transform', 'Tree', 'Vocab', 'field', 'fn', 'metric', 'transform']
32 | 


--------------------------------------------------------------------------------
/synthesizers/CoreNLP_server.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import time
22 | 
23 | import stanza
24 | from stanza.server import CoreNLPClient
25 | 
26 | stanza.install_corenlp()
27 | 
28 | 
29 | def Main():
30 |     corenlp_client = CoreNLPClient(
31 |         annotators=['tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse', 'coref'],
32 |         properties={'annotators': 'coref', 'coref.algorithm': 'neural'}, timeout=30000,
33 |         memory='4G', endpoint='http://localhost:9001')
34 | 
35 |     while True:
36 |         corenlp_client.ensure_alive()
37 |         time.sleep(300)
38 | 
39 | if __name__ == '__main__':
40 |     Main()
41 | 


--------------------------------------------------------------------------------
/neutrex/tests/test_fn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from supar.structs.fn import tarjan
23 | 
24 | 
25 | def test_tarjan():
26 |     sequences = [[4, 1, 2, 0, 4, 4, 8, 6, 8],
27 |                  [2, 5, 0, 3, 1, 5, 8, 6, 8],
28 |                  [2, 5, 0, 4, 1, 5, 8, 6, 8],
29 |                  [2, 5, 0, 4, 1, 9, 6, 5, 7]]
30 |     answers = [None, [[2, 5, 1]], [[2, 5, 1]], [[2, 5, 1], [9, 7, 6]]]
31 |     for sequence, answer in zip(sequences, answers):
32 |         if answer is None:
33 |             assert next(tarjan(sequence), None) == answer
34 |         else:
35 |             assert list(tarjan(sequence)) == answer
36 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | 
23 | class Tokenizer:
24 | 
25 |     def __init__(self, lang='en'):
26 |         import stanza
27 |         try:
28 |             self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True)
29 |         except Exception:
30 |             stanza.download(lang=lang, resources_url='stanford')
31 |             self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True)
32 | 
33 |     def __call__(self, text):
34 |         return [i.text for i in self.pipeline(text).sentences[0].tokens]
35 | 


--------------------------------------------------------------------------------
/neutrex/supar/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from .affine import Biaffine, Triaffine
23 | from .dropout import IndependentDropout, SharedDropout
24 | from .lstm import CharLSTM, VariationalLSTM
25 | from .mlp import MLP
26 | from .pretrained import ELMoEmbedding, TransformerEmbedding
27 | from .scalar_mix import ScalarMix
28 | from .transformer import RelativePositionTransformerEncoder, TransformerEncoder
29 | 
30 | __all__ = ['MLP', 'TransformerEmbedding', 'Biaffine', 'CharLSTM', 'ELMoEmbedding', 'IndependentDropout',
31 |            'RelativePositionTransformerEncoder', 'ScalarMix', 'SharedDropout', 'TransformerEncoder', 'Triaffine',
32 |            'VariationalLSTM']
33 | 


--------------------------------------------------------------------------------
/neutrex/supar/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from .const import CRFConstituencyModel, VIConstituencyModel
23 | from .dep import (BiaffineDependencyModel, CRF2oDependencyModel,
24 |                   CRFDependencyModel, VIDependencyModel)
25 | from .model import Model
26 | from .sdp import BiaffineSemanticDependencyModel, VISemanticDependencyModel
27 | 
28 | __all__ = ['Model',
29 |            'BiaffineDependencyModel',
30 |            'CRFDependencyModel',
31 |            'CRF2oDependencyModel',
32 |            'VIDependencyModel',
33 |            'CRFConstituencyModel',
34 |            'VIConstituencyModel',
35 |            'BiaffineSemanticDependencyModel',
36 |            'VISemanticDependencyModel']
37 | 


--------------------------------------------------------------------------------
/neutrex/supar/parsers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from .const import CRFConstituencyParser, VIConstituencyParser
23 | from .dep import (BiaffineDependencyParser, CRF2oDependencyParser,
24 |                   CRFDependencyParser, VIDependencyParser)
25 | from .parser import Parser
26 | from .sdp import BiaffineSemanticDependencyParser, VISemanticDependencyParser
27 | 
28 | __all__ = ['BiaffineDependencyParser',
29 |            'CRFDependencyParser',
30 |            'CRF2oDependencyParser',
31 |            'VIDependencyParser',
32 |            'CRFConstituencyParser',
33 |            'VIConstituencyParser',
34 |            'BiaffineSemanticDependencyParser',
35 |            'VISemanticDependencyParser',
36 |            'Parser']
37 | 


--------------------------------------------------------------------------------
/neutrex/supar/structs/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from .dist import StructuredDistribution
23 | from .linearchain import LinearChainCRF
24 | from .tree import (BiLexicalizedConstituencyCRF, ConstituencyCRF,
25 |                    Dependency2oCRF, DependencyCRF, MatrixTree)
26 | from .vi import (ConstituencyLBP, ConstituencyMFVI, DependencyLBP,
27 |                  DependencyMFVI, SemanticDependencyLBP, SemanticDependencyMFVI)
28 | 
29 | __all__ = ['StructuredDistribution',
30 |            'MatrixTree',
31 |            'DependencyCRF',
32 |            'Dependency2oCRF',
33 |            'ConstituencyCRF',
34 |            'BiLexicalizedConstituencyCRF',
35 |            'LinearChainCRF',
36 |            'DependencyMFVI',
37 |            'DependencyLBP',
38 |            'ConstituencyMFVI',
39 |            'ConstituencyLBP',
40 |            'SemanticDependencyMFVI',
41 |            'SemanticDependencyLBP', ]
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hermes
 2 | 
 3 | This is the official repository of the paper titled "[Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural Language Specifications](https://www.usenix.org/conference/usenixsecurity24/presentation/al-ishtiaq)" (USENIX Security '24). 
 4 | 
 5 | 
 6 | ## System 
 7 | - OS: Ubuntu 22.04.3 LTS
 8 | - GPU: NVIDIA RTX A6000
 9 | - CUDA Version: 12.2
10 | - NVIDIA Driver version: 535.86.05
11 | 
12 | 
13 | ## Components
14 | 
15 | ### Annotated data 
16 | 
17 | `data` contains the annotated data for 4G NAS, 5G NAS and 5G RRC specifications.
18 | 
19 | 
20 | ### NEUTREX
21 | 
22 | `neutrex` contains the implementation of NEUTREX. It also provides instructions to run it.  
23 | 
24 | 
25 | ### Keyword Extractor
26 | 
27 | `keyword_extraction` contains the implementation of Keyword Extractor from Hermes. 
28 | It also contains the instructions on how to use the tool.  
29 | 
30 | 
31 | ### Synthesizers
32 | 
33 | `synthesizers` contains the implementation of IRSynthesizer and FSMSynthesizer.
34 | It also provides instructions to use the tool.  
35 | 
36 | 
37 | ## Citation
38 | 
39 | ```bibtex
40 | @inproceedings {ishtiaq2023hermes,
41 | author = {Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das and Syed Md Mukit Rashid and Ali Ranjbar and Kai Tu and Tianwei Wu and Zhezheng Song and Weixuan Wang and Mujtahid Akon and Rui Zhang and Syed Rafiul Hussain},
42 | title = {Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural Language Specifications},
43 | booktitle = {33rd USENIX Security Symposium (USENIX Security 24)},
44 | year = {2024},
45 | isbn = {978-1-939133-44-1},
46 | address = {Philadelphia, PA},
47 | pages = {4445--4462},
48 | url = {https://www.usenix.org/conference/usenixsecurity24/presentation/al-ishtiaq},
49 | publisher = {USENIX Association},
50 | month = aug
51 | }
52 | ```
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/keyword_extraction/cause_extraction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import pprint
22 | 
23 | pp = pprint.PrettyPrinter(indent=4)
24 | 
25 | 
26 | def create_cause_set():
27 |     causes = []
28 |     with open("assets/cause.txt", "r") as fr:
29 |         lines = fr.readlines()
30 |         for line in lines:
31 |             if line.startswith("Cause #"):
32 |                 causes.append(line.split("\n")[0])
33 | 
34 |     keyword_set = dict()
35 |     for line in causes:
36 |         tokens = line.split(" ")
37 | 
38 |         phrase_1 = str(tokens[0]) + " " + str(tokens[1])
39 |         phrase_2 = str(tokens[1])
40 |         phrase_3 = line.split(" ", 2)[2].replace("-", "").strip()
41 |         phrase_4 = line.split(" ", 1)[1]
42 | 
43 |         key_str = phrase_1.replace(" ", "_").replace("#", "").lower()
44 |         if key_str in keyword_set.keys():
45 |             keyword_set[key_str] = list(set(keyword_set[key_str] + [phrase_1, phrase_2, phrase_3, phrase_4]))
46 |         else:
47 |             keyword_set[key_str] = [phrase_1, phrase_2, phrase_3, phrase_4]
48 | 
49 |     return keyword_set
50 | 
51 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/scripting.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import nltk
22 | 
23 | from utils.metric import SpanMetric
24 | from nltk import tree
25 | from utils.transform import Tree
26 | 
27 | 
28 | pred_path = './current_predictions.txt'
29 | gt_path = './current_predictions.txt'
30 | 
31 | preds = []
32 | gts = []
33 | 
34 | 
35 | with open(pred_path, mode='r', encoding='utf8', newline='\n\n\n') as f:
36 |     lines = f.readlines()
37 |     for l in lines:
38 |         preds.append(nltk.Tree.fromstring(l))
39 | 
40 | with open(gt_path, mode='r', encoding='utf8', newline='\n\n\n') as f:
41 |     lines = f.readlines()
42 |     for l in lines:
43 |         gts.append(nltk.Tree.fromstring(l))
44 | 
45 | metric = SpanMetric()
46 | delete={'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', '', '<B-control>', '<B-condition>', '<B-action>', '<I-control>', '<I-condition>', '<I-action>', '<other>', '<B-end_state>', '<B-start_state>', '<I-end_state>', '<I-start_state>'}
47 | equal={'ADVP': 'PRT'}
48 | 
49 | result = metric([Tree.factorize(tree, delete, equal) for tree in preds],
50 |                    [Tree.factorize(tree, delete, equal) for tree in gts])
51 |                    
52 | print(result)


--------------------------------------------------------------------------------
/neutrex/supar/utils/embedding.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import torch
23 | 
24 | 
25 | class Embedding(object):
26 | 
27 |     def __init__(self, tokens, vectors, unk=None):
28 |         self.tokens = tokens
29 |         self.vectors = torch.tensor(vectors)
30 |         self.pretrained = {w: v for w, v in zip(tokens, vectors)}
31 |         self.unk = unk
32 | 
33 |     def __len__(self):
34 |         return len(self.tokens)
35 | 
36 |     def __contains__(self, token):
37 |         return token in self.pretrained
38 | 
39 |     @property
40 |     def dim(self):
41 |         return self.vectors.size(1)
42 | 
43 |     @property
44 |     def unk_index(self):
45 |         if self.unk is not None:
46 |             return self.tokens.index(self.unk)
47 |         else:
48 |             raise AttributeError
49 | 
50 |     @classmethod
51 |     def load(cls, path, unk=None):
52 |         with open(path, 'r') as f:
53 |             lines = [line for line in f]
54 |         splits = [line.split() for line in lines]
55 |         tokens, vectors = zip(*[(s[0], list(map(float, s[1:])))
56 |                                 for s in splits])
57 | 
58 |         return cls(tokens, vectors, unk=unk)
59 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/parallel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import os
23 | from random import Random
24 | 
25 | import torch
26 | import torch.distributed as dist
27 | import torch.nn as nn
28 | 
29 | 
30 | class DistributedDataParallel(nn.parallel.DistributedDataParallel):
31 | 
32 |     def __init__(self, module, **kwargs):
33 |         super().__init__(module, **kwargs)
34 | 
35 |     def __getattr__(self, name):
36 |         wrapped = super().__getattr__('module')
37 |         if hasattr(wrapped, name):
38 |             return getattr(wrapped, name)
39 |         return super().__getattr__(name)
40 | 
41 | 
42 | def init_device(device, local_rank=-1, backend='nccl', host=None, port=None):
43 |     os.environ['CUDA_VISIBLE_DEVICES'] = device
44 |     if torch.cuda.device_count() > 1:
45 |         host = host or os.environ.get('MASTER_ADDR', 'localhost')
46 |         port = port or os.environ.get('MASTER_PORT', str(Random(0).randint(10000, 20000)))
47 |         os.environ['MASTER_ADDR'] = host
48 |         os.environ['MASTER_PORT'] = port
49 |         dist.init_process_group(backend)
50 |         torch.cuda.set_device(local_rank)
51 | 
52 | 
53 | def is_master():
54 |     return not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0
55 | 


--------------------------------------------------------------------------------
/neutrex/README.md:
--------------------------------------------------------------------------------
 1 | # NEUTREX
 2 | 
 3 | ## Setup
 4 | 
 5 | Download the following files to `neutrex` folder:  
 6 | - model_4g_nas: https://drive.google.com/file/d/11-4ujqtQAwDf8p_7j_leG_hECiAaxw2t/view?usp=sharing
 7 | - model_5g_nas: https://drive.google.com/file/d/1xHqhadH3mgjK9v_0eV7MWSRQAx6ZycJE/view?usp=sharing
 8 | 
 9 | Download and unzip the following file containing `CellulaRoBERTa` to `neutrex` folder:  
10 | - saved_model.zip: https://drive.google.com/file/d/1R3A5zfM9z6aQzILrfh7aPkSzlQebu5iX/view?usp=sharing
11 | 
12 | Download the following file to `neutrex/data` folder:  
13 | - glove.6B.100d.txt: https://drive.google.com/file/d/1qot1XbmuN6R7bwDmT7CwSZBV1Sh1X1VD/view?usp=sharing
14 | 
15 | 
16 | ### Requirements
17 | 
18 | - python=3.7
19 | - dill==0.3.6
20 | - nltk==3.8.1
21 | - stanza==1.5.0
22 | - tokenizers==0.13.3
23 | - torch==1.13.1
24 | - transformers==4.30.1
25 | 
26 | 
27 | ## Preprocess
28 | 
29 | Preprocess text document with `neutrex/xml_to_tree/conversion.py`. 
30 | It takes inputs from a `input.txt` file and 
31 | will generate `out_full.pid` file with preprocessed trees to be given as input to NEUTREX.  
32 | 
33 | 
34 | ## Commands
35 | 
36 | train: 
37 | ```sh
38 | python3 -u -m supar.cmds.crf_con train -b -d 0 -c crf-con-roberta-en -p model_4g_nas \
39 |     --train data/4g-nas.pid \
40 |     --dev data/5g-nas.pid \
41 |     --test data/5g-nas.pid \
42 |     --encoder=bert \
43 |     --bert=saved_model/ \
44 |     --lr=5e-5 \
45 |     --lr-rate=20 \
46 |     --epochs=200 \
47 |     --update-steps=4 
48 | ```
49 | 
50 | predict:
51 | ```sh
52 | python3 -u -m supar.cmds.crf_con predict -d 0 -c crf-con-roberta-en -p model_4g_nas \
53 |     --data data/5g-nas.pid \
54 |     --pred pred_out.pid \
55 |     --encoder=bert \
56 |     --bert=saved_model/
57 | ```
58 | 
59 | evaluate:
60 | ```sh
61 | python3 -u -m supar.cmds.crf_con evaluate -d 0 -c crf-con-roberta-en -p model_4g_nas \
62 |     --data data/5g-nas.pid \
63 |     --encoder=bert \
64 |     --bert=saved_model/
65 | ```
66 | 
67 | 
68 | ## Tree to XML
69 | 
70 | The output trees from NEUTREX can be translated to XML formats with `neutrex/tree_to_xml/tree_to_xml.py`. 
71 | It takes inputs from a `input.pid` file and will generate outputs to `output.txt`.  
72 | 
73 | 
74 | ## Acknowledgement
75 | 
76 | We acknowledge [SuPar](https://github.com/yzhangcs/parser) as the baseline implementation of NEUTREX. 
77 | 


--------------------------------------------------------------------------------
/keyword_extraction/README.md:
--------------------------------------------------------------------------------
 1 | # Keyword Extraction
 2 | 
 3 | The folder shows keyword extraction for 5G RRC Release 17. It can be adapted for other specification documents.
 4 | 
 5 | 
 6 | ## Required packages
 7 | 
 8 | ```bash
 9 | pip3 install stanza transformers nltk
10 | pip3 install PyEnchant
11 | pip3 install PyPDF2
12 | pip3 install tabula-py
13 | 
14 | python3 -m nltk.downloader all-nltk
15 | ```
16 | 
17 | 
18 | ## How to generate files in `assets` folder:
19 | 
20 | ```bash
21 | # substitute '. ' and '; ' with '.\n' and ';\n'
22 | cat assets/5g-rrc.txt | sed 's/\. /\.\n/g' | sed 's/; /;\n/g' > assets/5g-rrc_small_lines.txt
23 | python3 constituency_parser.py -f assets/5g-rrc_small_lines.txt --label NP > assets/5g-rrc_small_lines.np.txt
24 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.0.txt
25 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sed 's/^the \|^a \|^an //' | sed '/[],:;\(\){}[]/d' | grep -Evw '(and|or|but)' | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.1.txt
26 | cat assets/5g-rrc_small_lines.np.txt | awk '{print tolower($0)}' | sed 's/^the \|^a \|^an //' | sed '/[],:;\(\){}[]/d' | grep -Evw '(and|or|but)' > temp
27 | cat temp | grep 's$' | sed 's/.$//' | sort -u | grep -xFf temp | sed -e 's/$/s/' > temp.remove
28 | cat temp | grep -vxFf temp.remove > out.1
29 | cat temp | grep -xFf temp.remove | sed 's/.$//' > out.2
30 | cat out.1 out.2 | sort | uniq -c | sort -nr > assets/5g-rrc_small_lines.np.count.2.txt
31 | rm temp temp.remove out.1 out.2
32 | ```
33 | 
34 | 
35 | ## Update the following files manually
36 | 
37 | - `assets/abbreviations.txt`
38 | - `assets/definitions.txt`
39 | - `assets/cause.txt`
40 | - `assets/manual_recategorization.txt`
41 | - `gather_keyword_pdf.py: gather_messages_and_procedures, gather_state, gather_vars`
42 | - `ie_from_pdf.py: get_IE_toc`
43 | 
44 | 
45 | ## Run the following commands
46 | 
47 | ```bash
48 | python3 noun_phrase_cleanup.py
49 | python3 merge_keywords_np.py
50 | python3 create_combined_dictionary.py
51 | python3 post_refinement_combined_keywords.py
52 | ```
53 | 
54 | Output: `combined.json`
55 | 
56 | 
57 | ## Note
58 | 
59 | The output of automated keyword extraction and categorization may still contain some uncategorized keywords. 
60 | In Hermes, we manually check and categorize them.
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/neutrex/tests/test_parse.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import os
23 | 
24 | import supar
25 | from supar import Parser
26 | 
27 | 
28 | def test_parse():
29 |     sents = {'en': ['She enjoys playing tennis.', 'Too young too simple!'],
30 |              'zh': '她喜欢打网球.',
31 |              'de': 'Sie spielt gerne Tennis.',
32 |              'fr': 'Elle aime jouer au tennis.',
33 |              'ru': 'Она любит играть в теннис.',
34 |              'he': 'היא נהנית לשחק טניס.'}
35 |     tokenized_sents = {'en': [['She', 'enjoys', 'playing', 'tennis', '.'], ['Too', 'young', 'too', 'simple', '!']],
36 |                        'zh': ['她', '喜欢', '打', '网球', '.'],
37 |                        'de': ['Sie', 'spielt', 'gerne', 'Tennis', '.'],
38 |                        'fr': ['Elle', 'aime', 'jouer', 'au', 'tennis', '.'],
39 |                        'ru': ['Она', 'любит', 'играть', 'в', 'теннис', '.'],
40 |                        'he': ['היא', 'נהנית', 'לשחק', 'טניס', '.']}
41 |     for name, model in supar.NAME.items():
42 |         if 'xlmr' in name or 'roberta' in name or 'electra' in name:
43 |             continue
44 |         parser = Parser.load(name, reload=True)
45 |         if name.endswith(('en', 'zh')):
46 |             lang = name[-2:]
47 |             parser.predict(sents[lang], prob=True, lang=lang)
48 |             parser.predict(tokenized_sents[lang], prob=True, lang=None)
49 |         else:
50 |             for lang in sents:
51 |                 parser.predict(sents[lang], prob=True, lang=lang)
52 |             parser.predict(list(tokenized_sents.values()), prob=True, lang=None)
53 |         os.remove(os.path.join(os.path.expanduser('~/.cache/supar'), model))
54 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/cmd.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import torch
23 | from supar.utils import Config
24 | from supar.utils.logging import init_logger, logger
25 | from supar.utils.parallel import init_device
26 | 
27 | from pathlib import Path
28 | 
29 | 
30 | def parse(parser):
31 |     parser.add_argument('--path', '-p', help='path to model file')
32 |     parser.add_argument('--conf', '-c', default='', help='path to config file')
33 |     parser.add_argument('--device', '-d', default='3', help='ID of GPU to use')
34 |     parser.add_argument('--seed', '-s', default=1, type=int, help='seed for generating random numbers')
35 |     parser.add_argument('--threads', '-t', default=16, type=int, help='max num of threads')
36 |     parser.add_argument("--local_rank", type=int, default=-1, help='node rank for distributed training')
37 |     args, unknown = parser.parse_known_args()
38 |     args, unknown = parser.parse_known_args(unknown, args)
39 |     args = Config.load(**vars(args), unknown=unknown)
40 |     Parser = args.pop('Parser')
41 | 
42 |     torch.set_num_threads(args.threads)
43 |     torch.manual_seed(args.seed)
44 |     init_device(args.device, args.local_rank)
45 |     init_logger(logger, f"{args.path}.{args.mode}.log", 'a' if args.get('checkpoint') else 'w')
46 |     logger.info('\n' + str(args))
47 | 
48 |     if args.mode == 'train':
49 |         parser = Parser.load(**args) if args.checkpoint else Parser.build(**args)
50 |         Path(args.path).touch()
51 |         parser.train(**args)
52 |     elif args.mode == 'evaluate':
53 |         parser = Parser.load(**args)
54 |         parser.evaluate(**args)
55 |     elif args.mode == 'predict':
56 |         parser = Parser.load(**args)
57 |         parser.predict(**args)
58 | 


--------------------------------------------------------------------------------
/synthesizers/script_msg_helpers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | from script_config import GEN
22 | 
23 | if GEN == "5g":
24 |     from script_msg_defs_5 import *
25 | elif GEN == "4g":
26 |     from script_msg_defs_4 import *
27 | elif GEN == "5g-rrc":
28 |     from script_msg_defs_5_rrc import *
29 | 
30 | 
31 | def get_msg_direction(message_name: str) -> str:
32 |     if message_name in um_msg_list:
33 |         return "ue_to_mme"
34 |     elif message_name in mu_msg_list:
35 |         return "mme_to_ue"
36 |     elif message_name in both_dir_msg_list:
37 |         return "both_dir"
38 |     else:
39 |         return "unk_msg"
40 | 
41 | 
42 | def get_msg_sublayer(message_name: str) -> str:
43 |     if message_name in emm_sublayer_msg_list:
44 |         return "emm_sublayer"
45 |     elif message_name in esm_sublayer_msg_list:
46 |         return "esm_sublayer"
47 |     elif message_name in special_msg_list:
48 |         return "special"
49 |     else:
50 |         return "unk_msg"
51 | 
52 | 
53 | def get_msg_response(message_name: str) -> str:
54 |     if message_name in msg_response:
55 |         return msg_response[message_name]
56 |     else:
57 |         return "unk_resp"
58 | 
59 | 
60 | def get_mme_wait_for(msg: str):
61 |     if msg in mme_wait_for_message:
62 |         return mme_wait_for_message[msg]
63 |     else:
64 |         return ""
65 | 
66 | 
67 | def get_check_mme_wait_for(msg: str):
68 |     if msg in check_mme_wait_for:
69 |         return check_mme_wait_for[msg]
70 |     else:
71 |         return ""
72 | 
73 | 
74 | def check_valid_msg(msg: str):
75 |     if msg in um_msg_list or msg in mu_msg_list or msg in both_dir_msg_list or msg in emm_sublayer_msg_list \
76 |             or msg in esm_sublayer_msg_list or msg in special_msg_list:
77 |         return True
78 |     else:
79 |         return False
80 | 


--------------------------------------------------------------------------------
/synthesizers/common-defs.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "directive": {
  3 |     "shall": "_SHALL_",
  4 |     "should": "_SHOULD_",
  5 |     "will": "_WILL_",
  6 |     "can": "_CAN_",
  7 |     "may": "_MAY_",
  8 |     "need": "_NEED_"
  9 |   },
 10 | 
 11 |   "conjunction": {
 12 |     "/" : "_OR_",
 13 |     "or": "_OR_",
 14 |     "and": "_AND_"
 15 |   },
 16 | 
 17 |   "conj_label": {
 18 |     "conj:and" : "_AND_",
 19 |     "conj:or" : "_OR_"
 20 |   },
 21 | 
 22 |   "preposition" : {
 23 |     "from" : "_FROM_",
 24 |     "to" : "_TO_",
 25 |     "before" : "_BEFORE_",
 26 |     "after" : "_AFTER_",
 27 |     "for" : "_FOR_",
 28 |     "by" : "_BY_",
 29 |     "during": "_DURING_",
 30 |     "due to": "_DUE_TO_",
 31 |     "in": "_IN_",
 32 |     "of": "_OF_",
 33 |     "with": "_WITH_",
 34 |     "into": "_INTO_"
 35 |   },
 36 | 
 37 |   "preposition_label" : {
 38 |     "nmod:from" : "_FROM_",
 39 |     "nmod:to" : "_TO_",
 40 |     "nmod:before" : "_BEFORE_",
 41 |     "nmod:after" : "_AFTER_",
 42 |     "nmod:for" : "_FOR_",
 43 |     "nmod:by" : "_BY_",
 44 |     "nmod:during": "_DURING_",
 45 |     "nmod:due_to": "_DUE_TO_",
 46 |     "nmod:in": "_IN_",
 47 |     "nmod:of": "_OF_",
 48 |     "nmod:with": "_WITH_",
 49 |     "nmod:except": "_EXCEPT_",
 50 |     "nmod:instead_of": "_INSTEAD_OF_",
 51 |     "obl:from" : "_FROM_",
 52 |     "obl:to" : "_TO_",
 53 |     "obl:before" : "_BEFORE_",
 54 |     "obl:after" : "_AFTER_",
 55 |     "obl:for" : "_FOR_",
 56 |     "obl:by" : "_BY_",
 57 |     "obl:during": "_DURING_",
 58 |     "obl:due_to": "_DUE_TO_",
 59 |     "obl:in": "_IN_",
 60 |     "obl:within": "_IN_",
 61 |     "obl:of": "_OF_",
 62 |     "obl:with": "_WITH_",
 63 |     "obl:except": "_EXCEPT_",
 64 |     "obl:except_for": "_EXCEPT_",
 65 |     "obl:into": "_INTO_"
 66 |   },
 67 | 
 68 |   "mark": {
 69 |     "until": "_UNTIL_",
 70 |     "without": "_WITHOUT_",
 71 |     "before": "_BEFORE_",
 72 |     "unless": "_UNLESS_"
 73 |   },
 74 | 
 75 | 
 76 |   "case": {
 77 |     "until": "_UNTIL_",
 78 |     "without": "_WITHOUT_",
 79 |     "before": "_BEFORE_",
 80 |     "except": "_EXCEPT_",
 81 |     "via": "_VIA_",
 82 |     "in": "_IN_"
 83 |   },
 84 | 
 85 |   "special": {
 86 |     "specified": "_REFERENCE_",
 87 |     "section": "_SECTION_",
 88 |     "subsection": "_SUBSECTION_",
 89 |     "subclause": "_SUBCLAUSE_",
 90 |     "annex": "_ANNEX_",
 91 |     "ts": "_TS_"
 92 |   },
 93 | 
 94 |   "number": {
 95 |     "zero": 0,
 96 |     "one": 1,
 97 |     "two": 2,
 98 |     "three": 3,
 99 |     "four": 4,
100 |     "five": 5,
101 |     "six": 6,
102 |     "seven": 7,
103 |     "eight": 8,
104 |     "nine": 9,
105 |     "ten": 10
106 |   },
107 | 
108 |   "ignore_list": [
109 |     "already",
110 |     "subclause",
111 |     "unchanged"
112 |   ]
113 | 
114 | }


--------------------------------------------------------------------------------
/neutrex/supar/modules/scalar_mix.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import torch
23 | import torch.nn as nn
24 | 
25 | 
26 | class ScalarMix(nn.Module):
27 |     r"""
28 |     Computes a parameterized scalar mixture of :math:`N` tensors, :math:`mixture = \gamma * \sum_{k}(s_k * tensor_k)`
29 |     where :math:`s = \mathrm{softmax}(w)`, with :math:`w` and :math:`\gamma` scalar parameters.
30 | 
31 |     Args:
32 |         n_layers (int):
33 |             The number of layers to be mixed, i.e., :math:`N`.
34 |         dropout (float):
35 |             The dropout ratio of the layer weights.
36 |             If dropout > 0, then for each scalar weight, adjusts its softmax weight mass to 0
37 |             with the dropout probability (i.e., setting the unnormalized weight to -inf).
38 |             This effectively redistributes the dropped probability mass to all other weights.
39 |             Default: 0.
40 |     """
41 | 
42 |     def __init__(self, n_layers, dropout=0):
43 |         super().__init__()
44 | 
45 |         self.n_layers = n_layers
46 | 
47 |         self.weights = nn.Parameter(torch.zeros(n_layers))
48 |         self.gamma = nn.Parameter(torch.tensor([1.0]))
49 |         self.dropout = nn.Dropout(dropout)
50 | 
51 |     def __repr__(self):
52 |         s = f"n_layers={self.n_layers}"
53 |         if self.dropout.p > 0:
54 |             s += f", dropout={self.dropout.p}"
55 | 
56 |         return f"{self.__class__.__name__}({s})"
57 | 
58 |     def forward(self, tensors):
59 |         r"""
60 |         Args:
61 |             tensors (list[~torch.Tensor]):
62 |                 :math:`N` tensors to be mixed.
63 | 
64 |         Returns:
65 |             The mixture of :math:`N` tensors.
66 |         """
67 | 
68 |         normed_weights = self.dropout(self.weights.softmax(-1))
69 |         weighted_sum = sum(w * h for w, h in zip(normed_weights, tensors))
70 | 
71 |         return self.gamma * weighted_sum
72 | 


--------------------------------------------------------------------------------
/neutrex/supar/modules/mlp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import torch.nn as nn
23 | from supar.modules.dropout import SharedDropout
24 | 
25 | 
26 | class MLP(nn.Module):
27 |     r"""
28 |     Applies a linear transformation together with a non-linear activation to the incoming tensor:
29 |     :math:`y = \mathrm{Activation}(x A^T + b)`
30 | 
31 |     Args:
32 |         n_in (~torch.Tensor):
33 |             The size of each input feature.
34 |         n_out (~torch.Tensor):
35 |             The size of each output feature.
36 |         dropout (float):
37 |             If non-zero, introduces a :class:`SharedDropout` layer on the output with this dropout ratio. Default: 0.
38 |         activation (bool):
39 |             Whether to use activations. Default: True.
40 |     """
41 | 
42 |     def __init__(self, n_in, n_out, dropout=0, activation=True):
43 |         super().__init__()
44 | 
45 |         self.n_in = n_in
46 |         self.n_out = n_out
47 |         self.linear = nn.Linear(n_in, n_out)
48 |         self.activation = nn.LeakyReLU(negative_slope=0.1) if activation else nn.Identity()
49 |         self.dropout = SharedDropout(p=dropout)
50 | 
51 |         self.reset_parameters()
52 | 
53 |     def __repr__(self):
54 |         s = f"n_in={self.n_in}, n_out={self.n_out}"
55 |         if self.dropout.p > 0:
56 |             s += f", dropout={self.dropout.p}"
57 | 
58 |         return f"{self.__class__.__name__}({s})"
59 | 
60 |     def reset_parameters(self):
61 |         nn.init.orthogonal_(self.linear.weight)
62 |         nn.init.zeros_(self.linear.bias)
63 | 
64 |     def forward(self, x):
65 |         r"""
66 |         Args:
67 |             x (~torch.Tensor):
68 |                 The size of each input feature is `n_in`.
69 | 
70 |         Returns:
71 |             A tensor with the size of each output feature `n_out`.
72 |         """
73 | 
74 |         x = self.linear(x)
75 |         x = self.activation(x)
76 |         x = self.dropout(x)
77 | 
78 |         return x
79 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/logging.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import logging
23 | import os
24 | 
25 | from supar.utils.parallel import is_master
26 | from tqdm import tqdm
27 | 
28 | 
29 | def get_logger(name):
30 |     return logging.getLogger(name)
31 | 
32 | 
33 | class TqdmHandler(logging.StreamHandler):
34 | 
35 |     def __init__(self, *args, **kwargs):
36 |         super().__init__(*args, **kwargs)
37 | 
38 |     def emit(self, record):
39 |         try:
40 |             msg = self.format(record)
41 |             tqdm.write(msg)
42 |             self.flush()
43 |         except (KeyboardInterrupt, SystemExit):
44 |             raise
45 |         except Exception:
46 |             self.handleError(record)
47 | 
48 | 
49 | def init_logger(logger,
50 |                 path=None,
51 |                 mode='w',
52 |                 level=None,
53 |                 handlers=None,
54 |                 verbose=True):
55 |     level = level or logging.WARNING
56 |     if not handlers:
57 |         handlers = [TqdmHandler()]
58 |         if path:
59 |             os.makedirs(os.path.dirname(path) or './', exist_ok=True)
60 |             handlers.append(logging.FileHandler(path, mode))
61 |     logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
62 |                         datefmt='%Y-%m-%d %H:%M:%S',
63 |                         level=level,
64 |                         handlers=handlers)
65 |     logger.setLevel(logging.INFO if is_master() and verbose else logging.WARNING)
66 | 
67 | 
68 | def progress_bar(iterator,
69 |                  ncols=None,
70 |                  bar_format='{l_bar}{bar:18}| {n_fmt}/{total_fmt} {elapsed}<{remaining}, {rate_fmt}{postfix}',
71 |                  leave=False,
72 |                  **kwargs):
73 |     return tqdm(iterator,
74 |                 ncols=ncols,
75 |                 bar_format=bar_format,
76 |                 ascii=True,
77 |                 disable=(not (logger.level == logging.INFO and is_master())),
78 |                 leave=leave,
79 |                 **kwargs)
80 | 
81 | 
82 | logger = get_logger('supar')
83 | 


--------------------------------------------------------------------------------
/keyword_extraction/merge_keywords_np.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq, Syed Md Mukit Rashid, and Ali Ranjbar
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | from nltk.stem.porter import *
22 | import pickle
23 | import pprint
24 | import enchant
25 | 
26 | pp = pprint.PrettyPrinter(width=200)
27 | 
28 | 
29 | def get_merged_dict_myalgo(get_updated):
30 |     if get_updated:
31 |         p_stemmer = PorterStemmer()
32 | 
33 |         a_file = open("keyword_dict_np.pkl", "rb")
34 |         output = pickle.load(a_file)
35 |         keyword_dict = dict(output)
36 |         a_file.close()
37 | 
38 |         remove_list = []
39 |         for k, v in keyword_dict.items():
40 |             words = str(k).split("_")
41 |             k_to_compare = ""
42 |             for word_k in words[:-1]:
43 |                 k_to_compare += word_k + " "
44 |             k_to_compare += p_stemmer.stem(words[-1])
45 |             for other_k in keyword_dict.keys():
46 |                 other_k_words = str(other_k).split("_")
47 |                 other_k_to_compare = ""
48 |                 for word_k in other_k_words[:-1]:
49 |                     other_k_to_compare += word_k + " "
50 |                 other_k_to_compare += p_stemmer.stem(other_k_words[-1])
51 |                 if k_to_compare == other_k_to_compare and len(k) > len(other_k):
52 |                     remove_list.append(k)
53 |                     keyword_dict[other_k] = keyword_dict[other_k] + keyword_dict[k]
54 |                 elif k_to_compare == other_k_to_compare and len(k) < len(other_k):
55 |                     remove_list.append(other_k)
56 |                     keyword_dict[k] = keyword_dict[k] + keyword_dict[other_k]
57 | 
58 |         remove_list = list(set(remove_list))
59 |         for key in remove_list:
60 |             del keyword_dict[key]
61 | 
62 |         for k, v in keyword_dict.items():
63 |             keyword_dict[k] = list(set(keyword_dict[k]))
64 | 
65 |         a_file = open("keyword_dict_np_merged.pkl", "wb")
66 |         pickle.dump(keyword_dict, a_file)
67 |         a_file.close()
68 | 
69 |     else:
70 |         a_file = open("keyword_dict_np_merged.pkl", "rb")
71 |         output = pickle.load(a_file)
72 |         keyword_dict = dict(output)
73 |         a_file.close()
74 | 
75 | 
76 | 
77 | get_merged_dict_myalgo(True)


--------------------------------------------------------------------------------
/keyword_extraction/ie_from_pdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import pickle
22 | import pprint
23 | 
24 | import PyPDF2
25 | import numpy as np
26 | import pandas
27 | import enchant
28 | from nltk.stem.porter import *
29 | 
30 | from tabula import read_pdf
31 | 
32 | # Change this file to accommodate new specs
33 | INPUT_FILENAME = '5g-rrc.pdf'
34 | 
35 | pp = pprint.PrettyPrinter(width=150)
36 | dictionary = enchant.Dict("en_US")
37 | 
38 | 
39 | def get_IE_keywords_dict(get_updated):
40 |     if get_updated:
41 |         keyword_dict_new = get_IE_toc()
42 |         a_file = open("ie_from_pdf.pkl", "wb")
43 |         pickle.dump(keyword_dict_new, a_file)
44 |         a_file.close()
45 |     else:
46 |         a_file = open("ie_from_pdf.pkl", "rb")
47 |         output = pickle.load(a_file)
48 |         keyword_dict_new = dict(output)
49 |         a_file.close()
50 | 
51 |     return keyword_dict_new
52 | 
53 | 
54 | def get_IE_toc():
55 |     pdfFileObj = open(INPUT_FILENAME, 'rb')
56 |     pdfReader = PyPDF2.PdfReader(pdfFileObj)
57 |     ie_dict = dict()
58 | 
59 |     last_section = ""
60 |     for i in range(4, 24):
61 |         pageObj = pdfReader.pages[i]
62 |         lines = pageObj.extract_text().split("\n")
63 |         for line in lines:
64 |             if "..." not in line:
65 |                 continue
66 | 
67 |             line_splits = line.split()
68 |             if len(line_splits) < 2:
69 |                 continue
70 | 
71 |             section = line_splits[0]
72 | 
73 |             if len(section) > 0 and section[0].isnumeric():
74 |                 last_section = section
75 | 
76 |             elif last_section.startswith("6.3") and section == "–":
77 | 
78 |                 ie_text = line_splits[1].replace(".", "")
79 |                 key = ie_text.lower().replace("-", "_")
80 |                 if key in ie_dict and ie_text not in ie_dict[key]:
81 |                     ie_dict[key].append(ie_text)
82 |                 else:
83 |                     ie_dict[key] = [ie_text]
84 | 
85 |     remove_list = []
86 |     for k in ie_dict.keys():
87 |         if len(ie_dict[k][0].split(" ")) == 1 and dictionary.check(ie_dict[k][0].split(" ")[0]):
88 |             remove_list.append(k)
89 | 
90 |     for k in remove_list:
91 |         del ie_dict[k]
92 | 
93 |     pdfFileObj.close()
94 |     return ie_dict


--------------------------------------------------------------------------------
/neutrex/tree_to_xml/tree_to_xml.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import nltk.tree
22 | from nltk import Tree
23 | import os
24 | 
25 | from tree_cleanup import clean_tree
26 | 
27 | INPUT_FILENAME = "input.pid"
28 | OUTPUT_FILENAME = "output.txt"
29 | 
30 | 
31 | def reverse_tag(tag):
32 |     return tag[0] + "/" + tag[1:]
33 | 
34 | 
35 | def clean_text(text: str) -> str:
36 |     text = text.replace("[ ", "(").replace(" ]", ")")   # parenthesis
37 |     text = text.replace("# ", "#")  # cause
38 |     text = text.replace("`` ", "\"").replace(" ''", "\"")   # quotes
39 |     text = text.replace(" ,", ",").replace(" .", ".").replace(" ;", ";").replace(" :", ":")  # punctuations
40 |     text = text.replace("& gt;", "&gt;")
41 |     text = text.replace(" (s)", "(s)")  # special cases
42 | 
43 |     while "  " in text:
44 |         text = text.replace("  ", " ")
45 |     text = text.strip()
46 | 
47 |     return text
48 | 
49 | def xml_generator(tree: Tree):
50 |     output_string = ""
51 |     if type(tree) == nltk.tree.Tree and tree.height() > 2:
52 |         for subtree in tree:
53 |             if subtree.label() in ["<control>", "<action>", "<condition>", "<start_state>", "<end_state>"]:
54 |                 output_string += subtree.label() + " " + xml_generator(subtree) + reverse_tag(
55 |                     subtree.label()) + " "
56 |             else:
57 |                 output_string += xml_generator(subtree)
58 |     elif tree.height() == 2:
59 |         for word in tree.leaves():
60 |             output_string += word + " "
61 | 
62 |     return output_string
63 | 
64 | 
65 | def convert_xml(input_filename, output_filename):
66 | 
67 |     input_file = open(input_filename, "r")
68 |     lines = input_file.readlines()
69 |     input_file.close()
70 | 
71 |     tree_strings = lines
72 |     xml_lines = []
73 |     for nltk_tree in tree_strings:
74 |         nltk_tree = clean_tree(nltk_tree)
75 |         converted_text = xml_generator(Tree.fromstring(nltk_tree))
76 |         converted_text = clean_text(converted_text)
77 | 
78 |         xml_lines.append(converted_text)
79 | 
80 |     with open(output_filename, "w") as outfile:
81 |         for line in xml_lines:
82 |             outfile.write(line + "\n")
83 |         outfile.close()
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     convert_xml(INPUT_FILENAME, OUTPUT_FILENAME)
88 | 
89 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | import os
24 | from ast import literal_eval
25 | from configparser import ConfigParser
26 | 
27 | import supar
28 | from supar.utils.fn import download
29 | 
30 | 
31 | class Config(object):
32 | 
33 |     def __init__(self, **kwargs):
34 |         super(Config, self).__init__()
35 | 
36 |         self.update(kwargs)
37 | 
38 |     def __repr__(self):
39 |         s = line = "-" * 20 + "-+-" + "-" * 30 + "\n"
40 |         s += f"{'Param':20} | {'Value':^30}\n" + line
41 |         for name, value in vars(self).items():
42 |             s += f"{name:20} | {str(value):^30}\n"
43 |         s += line
44 | 
45 |         return s
46 | 
47 |     def __getitem__(self, key):
48 |         return getattr(self, key)
49 | 
50 |     def __contains__(self, key):
51 |         return hasattr(self, key)
52 | 
53 |     def __getstate__(self):
54 |         return vars(self)
55 | 
56 |     def __setstate__(self, state):
57 |         self.__dict__.update(state)
58 | 
59 |     def keys(self):
60 |         return vars(self).keys()
61 | 
62 |     def items(self):
63 |         return vars(self).items()
64 | 
65 |     def update(self, kwargs):
66 |         for key in ('self', 'cls', '__class__'):
67 |             kwargs.pop(key, None)
68 |         kwargs.update(kwargs.pop('kwargs', dict()))
69 |         for name, value in kwargs.items():
70 |             setattr(self, name, value)
71 |         return self
72 | 
73 |     def get(self, key, default=None):
74 |         return getattr(self, key) if hasattr(self, key) else default
75 | 
76 |     def pop(self, key, val=None):
77 |         return self.__dict__.pop(key, val)
78 | 
79 |     @classmethod
80 |     def load(cls, conf='', unknown=None, **kwargs):
81 |         config = ConfigParser()
82 |         config.read(conf if not conf or os.path.exists(conf) else download(supar.CONFIG['github'].get(conf, conf)))
83 |         config = dict((name, literal_eval(value))
84 |                       for section in config.sections()
85 |                       for name, value in config.items(section))
86 |         if unknown is not None:
87 |             parser = argparse.ArgumentParser()
88 |             for name, value in config.items():
89 |                 parser.add_argument('--'+name.replace('_', '-'), type=type(value), default=value)
90 |             config.update(vars(parser.parse_args(unknown)))
91 |         config.update(kwargs)
92 |         return cls(**config)
93 | 


--------------------------------------------------------------------------------
/synthesizers/script_msg_defs_5_rrc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | um_msg_list = {
22 |     "countercheckresponse", "dedicatedsibrequest", "uldedicatedmessagesegment", "ulinformationtransferirat",
23 |     "ulinformationtransfermrdc", "failureinformation", "locationmeasurementindication", "mcgfailureinformation",
24 |     "mbsinterestindication", "measurementreport", "measurementreportapplayer", "rrcresumerequest1",  "rrcsetuprequest",
25 |     "rrcresumerequest", "ueinformationresponse", "rrcreestablishmentrequest", "rrcreestablishmentcomplete",
26 |     "rrcsetupcomplete", "rrcresumecomplete", "rrcreconfigurationcomplete", "rrcsysteminforequest",
27 |     "ulinformationtransfer", "scgfailureinformationeutra", "scgfailureinformation", "securitymodefailure",
28 |     "securitymodecomplete", "sidelinkueinformationnr", "ueassistanceinformation", "uecapabilityinformation",
29 |     "rrcreconfigurationcompletesidelink_message"
30 | }
31 | 
32 | mu_msg_list = {
33 |     "rrcsetup", "countercheck", "dldedicatedmessagesegment", "dlinformationtransfermrdc", "dlinformationtransfer",
34 |     "loggedmeasurementconfiguration",  "mbsbroadcastconfiguration", "mib", "paging", "mobilityfromnrcommand",
35 |     "ueinformationrequest", "systeminformation", "rrcreestablishment",  "rrcreconfiguration",  "rrcreject", "rrcresume",
36 |     "rrcrelease", "securitymodecommand", "uecapabilityenquiry", "rrcreconfigurationsidelink_message"
37 | 
38 | 
39 | }
40 | 
41 | both_dir_msg_list = {
42 |     "iabotherinformation", "rrc_message", "nas_message", "warning_message"
43 | }
44 | 
45 | 
46 | special_msg_list = {
47 |     "iabotherinformation", "rrc_message", "nas_message", "warning_message"
48 |     "initial_nas_message", "user_data", "uplink_signalling", "uplink_data", "downlink_signalling",
49 |     "downlink_data", "five_gmm_message", "five_gsm_message"
50 | }
51 | 
52 | 
53 | msg_response = {
54 |     "countercheck": "countercheckresponse",
55 |     "securitymodecommand": "securitymodecomplete",
56 |     "uecapabilityenquiry": "uecapabilityinformation",
57 |     "rrcreconfiguration": "rrcreconfigurationcomplete",
58 |     "rrcreconfigurationsidelink_message": "rrcreconfigurationcompletesidelink_message",
59 |     "rrcreestablishmentrequest": "rrcreestablishment",
60 |     "rrcreestablishment": "rrcreestablishmentcomplete",
61 |     "rrcsetuprequest": "rrcsetup",
62 |     "rrcsetup": "rrcsetupcomplete",
63 |     "rrcresumerequest": "rrcresume",
64 |     "rrcresumerequest1": "rrcresume",
65 |     "rrcresume": "rrcresume",
66 | 
67 | }
68 | 
69 | mme_wait_for_message = {
70 | }
71 | 
72 | check_mme_wait_for = {
73 | 
74 | }
75 | 
76 | 
77 | emm_sublayer_msg_list = {
78 | }
79 | 
80 | esm_sublayer_msg_list = {
81 | }


--------------------------------------------------------------------------------
/neutrex/supar/utils/vocab.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from collections import defaultdict
23 | from collections.abc import Iterable
24 | 
25 | 
26 | class Vocab(object):
27 |     r"""
28 |     Defines a vocabulary object that will be used to numericalize a field.
29 | 
30 |     Args:
31 |         counter (~collections.Counter):
32 |             :class:`~collections.Counter` object holding the frequencies of each value found in the data.
33 |         min_freq (int):
34 |             The minimum frequency needed to include a token in the vocabulary. Default: 1.
35 |         specials (list[str]):
36 |             The list of special tokens (e.g., pad, unk, bos and eos) that will be prepended to the vocabulary. Default: [].
37 |         unk_index (int):
38 |             The index of unk token. Default: 0.
39 | 
40 |     Attributes:
41 |         itos:
42 |             A list of token strings indexed by their numerical identifiers.
43 |         stoi:
44 |             A :class:`~collections.defaultdict` object mapping token strings to numerical identifiers.
45 |     """
46 | 
47 |     def __init__(self, counter, min_freq=1, specials=[], unk_index=0):
48 |         self.itos = list(specials)
49 |         self.stoi = defaultdict(lambda: unk_index)
50 |         self.stoi.update({token: i for i, token in enumerate(self.itos)})
51 |         self.extend([token for token, freq in counter.items()
52 |                      if freq >= min_freq])
53 |         self.unk_index = unk_index
54 |         self.n_init = len(self)
55 | 
56 |     def __len__(self):
57 |         return len(self.itos)
58 | 
59 |     def __getitem__(self, key):
60 |         if isinstance(key, str):
61 |             return self.stoi[key]
62 |         elif not isinstance(key, Iterable):
63 |             return self.itos[key]
64 |         elif isinstance(key[0], str):
65 |             return [self.stoi[i] for i in key]
66 |         else:
67 |             return [self.itos[i] for i in key]
68 | 
69 |     def __contains__(self, token):
70 |         return token in self.stoi
71 | 
72 |     def __getstate__(self):
73 |         # avoid picking defaultdict
74 |         attrs = dict(self.__dict__)
75 |         # cast to regular dict
76 |         attrs['stoi'] = dict(self.stoi)
77 |         return attrs
78 | 
79 |     def __setstate__(self, state):
80 |         stoi = defaultdict(lambda: self.unk_index)
81 |         stoi.update(state['stoi'])
82 |         state['stoi'] = stoi
83 |         self.__dict__.update(state)
84 | 
85 |     def items(self):
86 |         return self.stoi.items()
87 | 
88 |     def extend(self, tokens):
89 |         self.itos.extend(sorted(set(tokens).difference(self.stoi)))
90 |         self.stoi.update({token: i for i, token in enumerate(self.itos)})
91 | 


--------------------------------------------------------------------------------
/neutrex/tests/test_transform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import itertools
23 | 
24 | import nltk
25 | from supar.utils import CoNLL, Tree
26 | 
27 | 
28 | class TestCoNLL:
29 | 
30 |     def istree_naive(self, sequence, proj=False, multiroot=True):
31 |         if proj and not CoNLL.isprojective(sequence):
32 |             return False
33 |         roots = [i for i, head in enumerate(sequence, 1) if head == 0]
34 |         if len(roots) == 0:
35 |             return False
36 |         if len(roots) > 1 and not multiroot:
37 |             return False
38 |         sequence = [-1] + sequence
39 | 
40 |         def track(sequence, visited, i):
41 |             if visited[i]:
42 |                 return False
43 |             visited[i] = True
44 |             for j, head in enumerate(sequence[1:], 1):
45 |                 if head == i:
46 |                     track(sequence, visited, j)
47 |             return True
48 |         visited = [False]*len(sequence)
49 |         for root in roots:
50 |             if not track(sequence, visited, root):
51 |                 return False
52 |             if any([not i for i in visited[1:]]):
53 |                 return False
54 |         return True
55 | 
56 |     def test_isprojective(self):
57 |         assert CoNLL.isprojective([2, 4, 2, 0, 5])
58 |         assert CoNLL.isprojective([3, -1, 0, -1, 3])
59 |         assert not CoNLL.isprojective([2, 4, 0, 3, 4])
60 |         assert not CoNLL.isprojective([4, -1, 0, -1, 4])
61 |         assert not CoNLL.isprojective([2, -1, -1, 1, 0])
62 |         assert not CoNLL.isprojective([0, 5, -1, -1, 4])
63 | 
64 |     def test_istree(self):
65 |         permutations = [list(sequence[:5]) for sequence in itertools.permutations(range(6))]
66 |         for sequence in permutations:
67 |             assert CoNLL.istree(sequence, False, False) == self.istree_naive(sequence, False, False), f"{sequence}"
68 |             assert CoNLL.istree(sequence, False, True) == self.istree_naive(sequence, False, True), f"{sequence}"
69 |             assert CoNLL.istree(sequence, True, False) == self.istree_naive(sequence, True, False), f"{sequence}"
70 |             assert CoNLL.istree(sequence, True, True) == self.istree_naive(sequence, True, True), f"{sequence}"
71 | 
72 | 
73 | class TestTree:
74 | 
75 |     def test_tree(self):
76 |         tree = nltk.Tree.fromstring("""
77 |                                     (TOP
78 |                                       (S
79 |                                         (NP (DT This) (NN time))
80 |                                         (, ,)
81 |                                         (NP (DT the) (NNS firms))
82 |                                         (VP (VBD were) (ADJP (JJ ready)))
83 |                                         (. .)))
84 |                                     """)
85 |         assert tree == Tree.build(tree, Tree.factorize(Tree.binarize(tree)[0]))
86 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/vi_con.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import VIConstituencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create Constituency Parser using Variational Inference.')
30 |     parser.set_defaults(Parser=VIConstituencyParser)
31 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
32 |     # train
33 |     subparser = subparsers.add_parser('train', help='Train a parser.')
34 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
35 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
36 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
37 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
38 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
39 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
40 |     subparser.add_argument('--train', default='data/ptb/train.pid', help='path to train file')
41 |     subparser.add_argument('--dev', default='data/ptb/dev.pid', help='path to dev file')
42 |     subparser.add_argument('--test', default='data/ptb/test.pid', help='path to test file')
43 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
44 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
45 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
46 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
47 |     subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods')
48 |     # evaluate
49 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
50 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
51 |     subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
52 |     # predict
53 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
54 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
55 |     subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
56 |     subparser.add_argument('--pred', default='pred.pid', help='path to predicted result')
57 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
58 |     parse(parser)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     main()
63 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/crf_con.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | import sys
24 | from supar import CRFConstituencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | sys.setrecursionlimit(8000)
28 | 
29 | 
30 | 
31 | 
32 | def main():
33 |     parser = argparse.ArgumentParser(description='Create CRF Constituency Parser.')
34 |     parser.set_defaults(Parser=CRFConstituencyParser)
35 |     parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding')
36 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
37 |     # train
38 |     subparser = subparsers.add_parser('train', help='Train a parser.')
39 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
40 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
41 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
42 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
43 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
44 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
45 |     subparser.add_argument('--train', default='data/ptb/train.pid', help='path to train file')
46 |     subparser.add_argument('--dev', default='data/ptb/dev.pid', help='path to dev file')
47 |     subparser.add_argument('--test', default='data/ptb/test.pid', help='path to test file')
48 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
49 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
50 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
51 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
52 |     # evaluate
53 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
54 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
55 |     subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
56 |     # predict
57 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
58 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
59 |     subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
60 |     subparser.add_argument('--pred', default='pred.pid', help='path to predicted result')
61 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
62 |     parse(parser)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/synthesizers/script_build_string_keyword_distance.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the public release of the code of our paper titled
 3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 4 |     Language Specifications" (USENIX Security '24)
 5 | Author: Abdullah Al Ishtiaq
 6 | Contact: abdullah.ishtiaq@psu.edu
 7 | 
 8 | Licensed under the Apache License, Version 2.0 (the "License");
 9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 | 
12 |       https://www.apache.org/licenses/LICENSE-2.0
13 | 
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 | 
21 | import copy
22 | from Levenshtein import distance as levenshtein_distance
23 | 
24 | LOCAL_DISTANCE_THR = 3
25 | LOCAL_SHORT_THR = 5
26 | 
27 | 
28 | def build_string_distance(db_conn, db_cursor, text: str, keywords_dict: dict, skip_substr=False,
29 |                           skip_matched_string=False, update_existing=False, searched_strings=None,
30 |                           thread_num=0) -> None:
31 |     if searched_strings is None:
32 |         searched_strings = {}
33 | 
34 |     if len(keywords_dict) == 0:
35 |         return
36 | 
37 |     from script_db_handler import insert_substring_keyword_distance_batch, db_commit, substring_in_db, \
38 |         matched_string_in_db
39 | 
40 |     max_key_len = max([len(item) for item in keywords_dict])
41 | 
42 |     text = copy.deepcopy(text.lower())
43 |     text_len = len(text)
44 | 
45 |     for start_idx in range(text_len):
46 |         insert_list = []
47 | 
48 |         if text[start_idx] == "<" or text[start_idx] == ">":
49 |             continue
50 |         for end_idx in range(start_idx, text_len):
51 |             if text[end_idx - 1] == "<" or text[end_idx - 1] == ">":
52 |                 break
53 |             elif start_idx == end_idx:
54 |                 continue
55 |             elif end_idx - start_idx > max_key_len + LOCAL_DISTANCE_THR:
56 |                 break
57 | 
58 |             substr = text[start_idx: end_idx]
59 |             if substr.strip() == "":
60 |                 continue
61 |             elif substr in searched_strings:
62 |                 continue
63 |             elif skip_substr and substring_in_db(db_cursor, substr):
64 |                 searched_strings[substr] = 1
65 |                 continue
66 | 
67 |             searched_strings[substr] = 1
68 | 
69 |             for lookup_text in keywords_dict:
70 |                 lookup_len = len(lookup_text)
71 |                 if lookup_len != len(substr):
72 |                     continue
73 |                 elif skip_matched_string and matched_string_in_db(db_cursor, lookup_text):
74 |                     continue
75 | 
76 |                 keyword = keywords_dict[lookup_text]
77 | 
78 |                 if lookup_len < LOCAL_SHORT_THR:
79 |                     lookup_text = " " + lookup_text + " "
80 |                     substr = " " + substr + " "
81 | 
82 |                 dist = levenshtein_distance(substr, lookup_text)
83 | 
84 |                 lookup_text = lookup_text.strip()
85 |                 if lookup_len < LOCAL_SHORT_THR:
86 |                     substr = substr[1:-1]
87 | 
88 |                 if dist > LOCAL_DISTANCE_THR or dist >= lookup_len:
89 |                     continue
90 | 
91 |                 insert_list.append((substr, lookup_text, keyword, dist))
92 | 
93 |         insert_substring_keyword_distance_batch(db_conn, db_cursor, insert_list, update_existing, thread_num=thread_num)
94 |         db_commit(db_conn)
95 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/biaffine_sdp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import BiaffineSemanticDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create Biaffine Semantic Dependency Parser.')
30 |     parser.set_defaults(Parser=BiaffineSemanticDependencyParser)
31 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
32 |     # train
33 |     subparser = subparsers.add_parser('train', help='Train a parser.')
34 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use')
35 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
36 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
37 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
38 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
39 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
40 |     subparser.add_argument('--train', default='data/sdp/DM/train.conllu', help='path to train file')
41 |     subparser.add_argument('--dev', default='data/sdp/DM/dev.conllu', help='path to dev file')
42 |     subparser.add_argument('--test', default='data/sdp/DM/test.conllu', help='path to test file')
43 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
44 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
45 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
46 |     subparser.add_argument('--n-embed-proj', default=125, type=int, help='dimension of projected embeddings')
47 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
48 |     # evaluate
49 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
50 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
51 |     subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
52 |     # predict
53 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
54 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
55 |     subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
56 |     subparser.add_argument('--pred', default='pred.conllu', help='path to predicted result')
57 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
58 |     parse(parser)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     main()
63 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/vi_sdp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import VISemanticDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create Semantic Dependency Parser using Variational Inference.')
30 |     parser.set_defaults(Parser=VISemanticDependencyParser)
31 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
32 |     # train
33 |     subparser = subparsers.add_parser('train', help='Train a parser.')
34 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use')
35 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
36 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
37 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
38 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
39 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
40 |     subparser.add_argument('--train', default='data/sdp/DM/train.conllu', help='path to train file')
41 |     subparser.add_argument('--dev', default='data/sdp/DM/dev.conllu', help='path to dev file')
42 |     subparser.add_argument('--test', default='data/sdp/DM/test.conllu', help='path to test file')
43 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
44 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
45 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
46 |     subparser.add_argument('--n-embed-proj', default=125, type=int, help='dimension of projected embeddings')
47 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
48 |     subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods')
49 |     # evaluate
50 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
51 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
52 |     subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
53 |     # predict
54 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
55 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
56 |     subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
57 |     subparser.add_argument('--pred', default='pred.conllu', help='path to predicted result')
58 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
59 |     parse(parser)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/biaffine_dep.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import BiaffineDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create Biaffine Dependency Parser.')
30 |     parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness')
31 |     parser.add_argument('--proj', action='store_true', help='whether to projectivize the data')
32 |     parser.add_argument('--partial', action='store_true', help='whether partial annotation is included')
33 |     parser.set_defaults(Parser=BiaffineDependencyParser)
34 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
35 |     # train
36 |     subparser = subparsers.add_parser('train', help='Train a parser.')
37 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
38 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
39 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
40 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
41 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
42 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
43 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
44 |     subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file')
45 |     subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file')
46 |     subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file')
47 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
48 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
49 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
50 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
51 |     # evaluate
52 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
53 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
54 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
55 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
56 |     # predict
57 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
58 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
59 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
60 |     subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
61 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
62 |     parse(parser)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/crf_dep.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import CRFDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create first-order CRF Dependency Parser.')
30 |     parser.set_defaults(Parser=CRFDependencyParser)
31 |     parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding')
32 |     parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness')
33 |     parser.add_argument('--proj', action='store_true', help='whether to projectivize the data')
34 |     parser.add_argument('--partial', action='store_true', help='whether partial annotation is included')
35 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
36 |     # train
37 |     subparser = subparsers.add_parser('train', help='Train a parser.')
38 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
39 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
40 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
41 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
42 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
43 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
44 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
45 |     subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file')
46 |     subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file')
47 |     subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file')
48 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
49 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
50 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
51 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
52 |     # evaluate
53 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
54 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
55 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
56 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
57 |     # predict
58 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
59 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
60 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
61 |     subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
62 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
63 |     parse(parser)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/crf2o_dep.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import CRF2oDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create second-order CRF Dependency Parser.')
30 |     parser.set_defaults(Parser=CRF2oDependencyParser)
31 |     parser.add_argument('--mbr', action='store_true', help='whether to use MBR decoding')
32 |     parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness')
33 |     parser.add_argument('--proj', action='store_true', help='whether to projectivize the data')
34 |     parser.add_argument('--partial', action='store_true', help='whether partial annotation is included')
35 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
36 |     # train
37 |     subparser = subparsers.add_parser('train', help='Train a parser.')
38 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
39 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
40 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
41 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
42 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
43 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
44 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
45 |     subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file')
46 |     subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file')
47 |     subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file')
48 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
49 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
50 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
51 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
52 |     # evaluate
53 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
54 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
55 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
56 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
57 |     # predict
58 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
59 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
60 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
61 |     subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
62 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
63 |     parse(parser)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/neutrex/supar/cmds/vi_dep.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | import argparse
23 | 
24 | from supar import VIDependencyParser
25 | from supar.cmds.cmd import parse
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Create Dependency Parser using Variational Inference.')
30 |     parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness')
31 |     parser.add_argument('--proj', action='store_true', help='whether to projectivise the data')
32 |     parser.add_argument('--partial', action='store_true', help='whether partial annotation is included')
33 |     parser.set_defaults(Parser=VIDependencyParser)
34 |     subparsers = parser.add_subparsers(title='Commands', dest='mode')
35 |     # train
36 |     subparser = subparsers.add_parser('train', help='Train a parser.')
37 |     subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
38 |     subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
39 |     subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
40 |     subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
41 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
42 |     subparser.add_argument('--max-len', type=int, help='max length of the sentences')
43 |     subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
44 |     subparser.add_argument('--train', default='data/ptb/train.conllx', help='path to train file')
45 |     subparser.add_argument('--dev', default='data/ptb/dev.conllx', help='path to dev file')
46 |     subparser.add_argument('--test', default='data/ptb/test.conllx', help='path to test file')
47 |     subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
48 |     subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
49 |     subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
50 |     subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
51 |     subparser.add_argument('--inference', default='mfvi', choices=['mfvi', 'lbp'], help='approximate inference methods')
52 |     # evaluate
53 |     subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
54 |     subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
55 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
56 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
57 |     # predict
58 |     subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
59 |     subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
60 |     subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
61 |     subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
62 |     subparser.add_argument('--prob', action='store_true', help='whether to output probs')
63 |     parse(parser)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/neutrex/supar/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This is the public release of the code of our paper titled
 4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
 5 |     Language Specifications" (USENIX Security '24)
 6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
 7 | Contact: abdullah.ishtiaq@psu.edu
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |       https://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | """
21 | 
22 | from .parsers import (BiaffineDependencyParser,
23 |                       BiaffineSemanticDependencyParser, CRF2oDependencyParser,
24 |                       CRFConstituencyParser, CRFDependencyParser, Parser,
25 |                       VIConstituencyParser, VIDependencyParser,
26 |                       VISemanticDependencyParser)
27 | from .structs import (BiLexicalizedConstituencyCRF, ConstituencyCRF,
28 |                       ConstituencyLBP, ConstituencyMFVI, Dependency2oCRF,
29 |                       DependencyCRF, DependencyLBP, DependencyMFVI,
30 |                       LinearChainCRF, MatrixTree, SemanticDependencyLBP,
31 |                       SemanticDependencyMFVI)
32 | 
33 | __all__ = ['BiaffineDependencyParser',
34 |            'CRFDependencyParser',
35 |            'CRF2oDependencyParser',
36 |            'VIDependencyParser',
37 |            'CRFConstituencyParser',
38 |            'VIConstituencyParser',
39 |            'BiaffineSemanticDependencyParser',
40 |            'VISemanticDependencyParser',
41 |            'Parser',
42 |            'MatrixTree',
43 |            'DependencyCRF',
44 |            'Dependency2oCRF',
45 |            'ConstituencyCRF',
46 |            'BiLexicalizedConstituencyCRF',
47 |            'LinearChainCRF',
48 |            'DependencyLBP',
49 |            'DependencyMFVI',
50 |            'ConstituencyLBP',
51 |            'ConstituencyMFVI',
52 |            'SemanticDependencyLBP',
53 |            'SemanticDependencyMFVI']
54 | 
55 | __version__ = '1.1.4'
56 | 
57 | PARSER = {parser.NAME: parser for parser in [BiaffineDependencyParser,
58 |                                              CRFDependencyParser,
59 |                                              CRF2oDependencyParser,
60 |                                              VIDependencyParser,
61 |                                              CRFConstituencyParser,
62 |                                              VIConstituencyParser,
63 |                                              BiaffineSemanticDependencyParser,
64 |                                              VISemanticDependencyParser]}
65 | 
66 | SRC = {'github': 'https://github.com/yzhangcs/parser/releases/download',
67 |        'hlt': 'http://hlt.suda.edu.cn/~yzhang/supar'}
68 | NAME = {
69 |     'biaffine-dep-en': 'ptb.biaffine.dep.lstm.char',
70 |     'biaffine-dep-zh': 'ctb7.biaffine.dep.lstm.char',
71 |     'crf2o-dep-en': 'ptb.crf2o.dep.lstm.char',
72 |     'crf2o-dep-zh': 'ctb7.crf2o.dep.lstm.char',
73 |     'biaffine-dep-roberta-en': 'ptb.biaffine.dep.roberta',
74 |     'biaffine-dep-electra-zh': 'ctb7.biaffine.dep.electra',
75 |     'biaffine-dep-xlmr': 'ud.biaffine.dep.xlmr',
76 |     'crf-con-en': 'ptb.crf.con.lstm.char',
77 |     'crf-con-zh': 'ctb7.crf.con.lstm.char',
78 |     'crf-con-roberta-en': 'ptb.crf.con.roberta',
79 |     'crf-con-electra-zh': 'ctb7.crf.con.electra',
80 |     'crf-con-xlmr': 'spmrl.crf.con.xlmr',
81 |     'biaffine-sdp-en': 'dm.biaffine.sdp.lstm.tag-char-lemma',
82 |     'biaffine-sdp-zh': 'semeval16.biaffine.sdp.lstm.tag-char-lemma',
83 |     'vi-sdp-en': 'dm.vi.sdp.lstm.tag-char-lemma',
84 |     'vi-sdp-zh': 'semeval16.vi.sdp.lstm.tag-char-lemma',
85 |     'vi-sdp-roberta-en': 'dm.vi.sdp.roberta',
86 |     'vi-sdp-electra-zh': 'semeval16.vi.sdp.electra'
87 | }
88 | MODEL = {src: {n: f"{link}/v1.1.0/{m}.zip" for n, m in NAME.items()} for src, link in SRC.items()}
89 | CONFIG = {src: {n: f"{link}/v1.1.0/{m}.ini" for n, m in NAME.items()} for src, link in SRC.items()}
90 | 


--------------------------------------------------------------------------------
/neutrex/supar/modules/dropout.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This is the public release of the code of our paper titled
  4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  5 |     Language Specifications" (USENIX Security '24)
  6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  7 | Contact: abdullah.ishtiaq@psu.edu
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |       https://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | """
 21 | 
 22 | import torch
 23 | import torch.nn as nn
 24 | 
 25 | 
 26 | class SharedDropout(nn.Module):
 27 |     r"""
 28 |     SharedDropout differs from the vanilla dropout strategy in that the dropout mask is shared across one dimension.
 29 | 
 30 |     Args:
 31 |         p (float):
 32 |             The probability of an element to be zeroed. Default: 0.5.
 33 |         batch_first (bool):
 34 |             If ``True``, the input and output tensors are provided as ``[batch_size, seq_len, *]``.
 35 |             Default: ``True``.
 36 | 
 37 |     Examples:
 38 |         >>> x = torch.ones(1, 3, 5)
 39 |         >>> nn.Dropout()(x)
 40 |         tensor([[[0., 2., 2., 0., 0.],
 41 |                  [2., 2., 0., 2., 2.],
 42 |                  [2., 2., 2., 2., 0.]]])
 43 |         >>> SharedDropout()(x)
 44 |         tensor([[[2., 0., 2., 0., 2.],
 45 |                  [2., 0., 2., 0., 2.],
 46 |                  [2., 0., 2., 0., 2.]]])
 47 |     """
 48 | 
 49 |     def __init__(self, p=0.5, batch_first=True):
 50 |         super().__init__()
 51 | 
 52 |         self.p = p
 53 |         self.batch_first = batch_first
 54 | 
 55 |     def __repr__(self):
 56 |         s = f"p={self.p}"
 57 |         if self.batch_first:
 58 |             s += f", batch_first={self.batch_first}"
 59 | 
 60 |         return f"{self.__class__.__name__}({s})"
 61 | 
 62 |     def forward(self, x):
 63 |         r"""
 64 |         Args:
 65 |             x (~torch.Tensor):
 66 |                 A tensor of any shape.
 67 |         Returns:
 68 |             The returned tensor is of the same shape as `x`.
 69 |         """
 70 | 
 71 |         if self.training:
 72 |             if self.batch_first:
 73 |                 mask = self.get_mask(x[:, 0], self.p).unsqueeze(1)
 74 |             else:
 75 |                 mask = self.get_mask(x[0], self.p)
 76 |             x = x * mask
 77 | 
 78 |         return x
 79 | 
 80 |     @staticmethod
 81 |     def get_mask(x, p):
 82 |         return x.new_empty(x.shape).bernoulli_(1 - p) / (1 - p)
 83 | 
 84 | 
 85 | class IndependentDropout(nn.Module):
 86 |     r"""
 87 |     For :math:`N` tensors, they use different dropout masks respectively.
 88 |     When :math:`N-M` of them are dropped, the remaining :math:`M` ones are scaled by a factor of :math:`N/M` to compensate,
 89 |     and when all of them are dropped together, zeros are returned.
 90 | 
 91 |     Args:
 92 |         p (float):
 93 |             The probability of an element to be zeroed. Default: 0.5.
 94 | 
 95 |     Examples:
 96 |         >>> x, y = torch.ones(1, 3, 5), torch.ones(1, 3, 5)
 97 |         >>> x, y = IndependentDropout()(x, y)
 98 |         >>> x
 99 |         tensor([[[1., 1., 1., 1., 1.],
100 |                  [0., 0., 0., 0., 0.],
101 |                  [2., 2., 2., 2., 2.]]])
102 |         >>> y
103 |         tensor([[[1., 1., 1., 1., 1.],
104 |                  [2., 2., 2., 2., 2.],
105 |                  [0., 0., 0., 0., 0.]]])
106 |     """
107 | 
108 |     def __init__(self, p=0.5):
109 |         super().__init__()
110 | 
111 |         self.p = p
112 | 
113 |     def __repr__(self):
114 |         return f"{self.__class__.__name__}(p={self.p})"
115 | 
116 |     def forward(self, *items):
117 |         r"""
118 |         Args:
119 |             items (list[~torch.Tensor]):
120 |                 A list of tensors that have the same shape except the last dimension.
121 |         Returns:
122 |             The returned tensors are of the same shape as `items`.
123 |         """
124 | 
125 |         if self.training:
126 |             masks = [x.new_empty(x.shape[:2]).bernoulli_(1 - self.p) for x in items]
127 |             total = sum(masks)
128 |             scale = len(items) / total.max(torch.ones_like(total))
129 |             masks = [mask * scale for mask in masks]
130 |             items = [item * mask.unsqueeze(-1) for item, mask in zip(items, masks)]
131 | 
132 |         return items
133 | 


--------------------------------------------------------------------------------
/keyword_extraction/assets/abbreviations.txt:
--------------------------------------------------------------------------------
  1 | 5GC 5G Core Network
  2 | ACK Acknowledgement
  3 | AM Acknowledged Mode
  4 | ARQ Automatic Repeat Request
  5 | AS Access Stratum
  6 | ASN.1 Abstract Syntax Notation One
  7 | BAP Backhaul Adaptation Protocol
  8 | BCD Binary Coded Decimal
  9 | BFD Beam Failure Detection
 10 | BH Backhaul
 11 | BLER Block Error Rate
 12 | BWP Bandwidth Part
 13 | CA Carrier Aggregation
 14 | CAG Closed Access Group
 15 | CAG-ID Closed Access Group Identifier
 16 | CAPC Channel Access Priority Class
 17 | CBR Channel Busy Ratio
 18 | CCCH Common Control Channel
 19 | CG Cell Group
 20 | CHO Conditional Handover
 21 | CLI Cross Link Interference
 22 | CMAS Commercial Mobile Alert Service
 23 | CP Control Plane
 24 | CPA Conditional PSCell Addition
 25 | CPC Conditional PSCell Change
 26 | C-RNTI Cell RNTI
 27 | CSI Channel State Information
 28 | DAPS Dual Active Protocol Stack
 29 | DC Dual Connectivity
 30 | DCCH Dedicated Control Channel
 31 | DCI Downlink Control Information
 32 | DCP DCI with CRC scrambled by PS-RNTI
 33 | DFN Direct Frame Number
 34 | DL Downlink
 35 | DL-PRS Downlink Positioning Reference Signal
 36 | DL-SCH Downlink Shared Channel
 37 | DM-RS Demodulation Reference Signal
 38 | DRB (user) Data Radio Bearer
 39 | DRX Discontinuous Reception
 40 | DTCH Dedicated Traffic Channel
 41 | EN-DC E-UTRA NR Dual Connectivity with E-UTRA connected to EPC
 42 | EPC Evolved Packet Core
 43 | EPS Evolved Packet System
 44 | ETWS Earthquake and Tsunami Warning System
 45 | E-UTRA Evolved Universal Terrestrial Radio Access
 46 | E-UTRA/5GC E-UTRA connected to 5GC
 47 | E-UTRA/EPC E-UTRA connected to EPC
 48 | E-UTRAN Evolved Universal Terrestrial Radio Access Network
 49 | FDD Frequency Division Duplex
 50 | FFS For Further Study
 51 | G-CS-RNTI Group Configured Scheduling RNTI
 52 | GERAN GSM/EDGE Radio Access Network
 53 | GIN Group ID for Network selection
 54 | GNSS Global Navigation Satellite System
 55 | G-RNTI Group RNTI
 56 | GSM Global System for Mobile Communications
 57 | HARQ Hybrid Automatic Repeat Request
 58 | HRNN Human Readable Network Name
 59 | HSDN High Speed Dedicated Network
 60 | H-SFN Hyper SFN
 61 | IAB Integrated Access and Backhaul
 62 | IAB-DU IAB-node DU
 63 | IAB-MT IAB Mobile Termination
 64 | IDC In-Device Coexistence
 65 | IE Information element
 66 | IMSI International Mobile Subscriber Identity
 67 | kB Kilobyte (1000 bytes)
 68 | L1 Layer 1
 69 | L2 Layer 2
 70 | L3 Layer 3
 71 | LBT Listen Before Talk
 72 | MAC Medium Access Control
 73 | MBS Multicast/Broadcast Service
 74 | MBS FSAI MBS Frequency Selection Area Identity
 75 | MCCH MBS Control Channel
 76 | MCG Master Cell Group
 77 | MDT Minimization of Drive Tests
 78 | MIB Master Information Block
 79 | MPE Maximum Permissible Exposure
 80 | MRB MBS Radio Bearer
 81 | MR-DC Multi-Radio Dual Connectivity
 82 | MTCH MBS Traffic Channel
 83 | MTSI Multimedia Telephony Service for IMS
 84 | MUSIM Multi-Universal Subscriber Identity Module
 85 | N/A Not Applicable
 86 | NE-DC NR E-UTRA Dual Connectivity
 87 | (NG)EN-DC E-UTRA NR Dual Connectivity (covering E-UTRA connected to EPC or 5GC)
 88 | NGEN-DC E-UTRA NR Dual Connectivity with E-UTRA connected to 5GC
 89 | NID Network Identifier
 90 | NPN Non-Public Network
 91 | NR-DC NR-NR Dual Connectivity
 92 | NR/5GC NR connected to 5GC
 93 | PCell Primary Cell
 94 | PDCP Packet Data Convergence Protocol
 95 | PDU Protocol Data Unit
 96 | PEI Paging Early Indication
 97 | PLMN Public Land Mobile Network
 98 | PNI-NPN Public Network Integrated Non-Public Network
 99 | posSIB Positioning SIB
100 | PPW PRS Processing Window
101 | PRS Positioning Reference Signal
102 | PSCell Primary SCG Cell
103 | PTM Point to Multipoint
104 | PTP Point to Point
105 | PWS Public Warning System
106 | QoE Quality of Experience
107 | QoS Quality of Service
108 | RAN Radio Access Network
109 | RAT Radio Access Technology
110 | RLC Radio Link Control
111 | RLM Radio Link Monitoring
112 | RMTC RSSI Measurement Timing Configuration
113 | RNA RAN-based Notification Area
114 | RNTI Radio Network Temporary Identifier
115 | ROHC Robust Header Compression
116 | RPLMN Registered Public Land Mobile Network
117 | RRC Radio Resource Control
118 | RS Reference Signal
119 | SBAS Satellite Based Augmentation System
120 | SCell Secondary Cell
121 | SCG Secondary Cell Group
122 | SCS Subcarrier Spacing
123 | SDT Small Data Transmission
124 | SFN System Frame Number
125 | SFTD SFN and Frame Timing Difference
126 | SI System Information
127 | SIB System Information Block
128 | SL Sidelink
129 | SLSS Sidelink Synchronisation Signal
130 | SNPN Stand-alone Non-Public Network
131 | SpCell Special Cell
132 | SRAP Sidelink Relay Adaptation Protocol
133 | SRB Signalling Radio Bearer
134 | SRS Sounding Reference Signal
135 | SSB Synchronization Signal Block
136 | TAG Timing Advance Group
137 | TDD Time Division Duplex
138 | TEG Timing Error Group
139 | TM Transparent Mode
140 | TMGI Temporary Mobile Group Identity
141 | U2N UE-to-Network
142 | UDC Uplink Data Compression
143 | UE User Equipment
144 | UL Uplink
145 | UM Unacknowledged Mode
146 | UP User Plane
147 | VR Virtual Reality
148 | 


--------------------------------------------------------------------------------
/keyword_extraction/noun_phrase_cleanup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq, Syed Md Mukit Rashid, and Ali Ranjbar
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import pickle
 22 | import pprint
 23 | import enchant
 24 | 
 25 | pp = pprint.PrettyPrinter(indent=4)
 26 | dictionary = enchant.Dict("en_US")
 27 | 
 28 | noun_phrases = dict()
 29 | with open("assets/5g-rrc_small_lines.np.count.2.txt", "r") as f:
 30 |     lines = f.readlines()
 31 |     for line in lines:
 32 |         phrase_freq = int(line.split("\n")[0].strip().split(" ")[0])
 33 |         phrase = ""
 34 |         for i in range(1, len(line.split("\n")[0].strip().split(" "))):
 35 |             phrase += line.split("\n")[0].strip().split(" ")[i] + " "
 36 | 
 37 |         noun_phrases[phrase.strip()] = phrase_freq
 38 | 
 39 | 
 40 | def refine_noun_phrases():
 41 |     punctuations = ["(", ")", "{", "}", "[", "]", ":", ",", ".", "\"", "#", "and", "or"]
 42 | 
 43 |     def break_terms(phrase_dict, delimiter):
 44 |         pl_1 = phrase_dict
 45 |         pl_1_r = dict()
 46 |         for p, pf in pl_1.items():
 47 |             if delimiter in p:
 48 |                 new_phrases = p.split(delimiter)
 49 |                 new_phrases = [p for p in new_phrases if p != ""]
 50 |                 for np in new_phrases:
 51 |                     np = np.strip()
 52 |                     if np not in pl_1_r.keys():
 53 |                         pl_1_r[np] = pf
 54 |                     else:
 55 |                         pl_1_r[np] += pf
 56 | 
 57 |             else:
 58 |                 p = p.strip()
 59 |                 if p not in pl_1_r.keys():
 60 |                     pl_1_r[p] = pf
 61 |                 else:
 62 |                     pl_1_r[p] += pf
 63 | 
 64 |         return pl_1_r
 65 | 
 66 |     pd_comma = break_terms(noun_phrases, " , ")
 67 |     pd_and = break_terms(pd_comma, " and ")
 68 |     pd_or = break_terms(pd_and, " or ")
 69 | 
 70 |     refined_phrases = dict()
 71 |     for phrase, phrase_freq in pd_or.items():
 72 |         new_phrase = phrase
 73 |         delete = False
 74 |         words = phrase.split(" ")
 75 |         for i in range(len(words)):
 76 |             if i == 0 and (words[i].lower() == "a" or words[i].lower() == "an" or words[i].lower() == "the"
 77 |                            or words[i].lower() == "any"):
 78 |                 new_phrase = ""
 79 |                 for word in words[1:]:
 80 |                     new_phrase += word + " "
 81 |                 new_phrase = new_phrase.strip()
 82 |                 continue
 83 | 
 84 |             if i != 0 and (words[i].lower() == "a" or words[i].lower() == "an" or words[i].lower() == "the"):
 85 |                 delete = True
 86 | 
 87 |             elif any([p in words[i] for p in punctuations]):
 88 |                 delete = True
 89 | 
 90 |         if words[0].isnumeric() or new_phrase == "":
 91 |             delete = True
 92 | 
 93 |         if phrase_freq < 7:
 94 |             delete = True
 95 | 
 96 |         if len(new_phrase.split(" ")) == 1 and new_phrase.split(" ")[0] != "" \
 97 |                 and dictionary.check(new_phrase.split(" ")[0]):
 98 |             delete = True
 99 | 
100 |         if any([p in new_phrase.split(" ")[0] for p in ["/", "\\", ","]]):
101 |             delete = True
102 | 
103 |         if any([p in new_phrase.split(" ")[0].lower() for p in ["octet", "note", "bit"]]):
104 |             delete = True
105 | 
106 |         if not delete:
107 |             if new_phrase not in refined_phrases.keys():
108 |                 refined_phrases[new_phrase] = phrase_freq
109 |             else:
110 |                 refined_phrases[new_phrase] += phrase_freq
111 | 
112 |     keyword_dict_ = dict()
113 | 
114 |     for phrase, phrase_freq in sorted(refined_phrases.items(), key=lambda item: item[1], reverse=True):
115 |         phrase = phrase.strip()
116 |         phrase_key = str(phrase).replace("-", "_").replace("/", "_"). \
117 |             replace(" ", "_").replace("__", "_").replace("__", "_")
118 |         keyword_dict_[phrase_key] = [phrase]
119 | 
120 |     return keyword_dict_
121 | 
122 | 
123 | keyword_dict = refine_noun_phrases()
124 | 
125 | a_file = open("keyword_dict_np.pkl", "wb")
126 | pickle.dump(keyword_dict, a_file)
127 | a_file.close()
128 | 


--------------------------------------------------------------------------------
/keyword_extraction/assets/definitions.txt:
--------------------------------------------------------------------------------
 1 | AM MRB: An MRB associated with at least an AM RLC bearer for PTP transmission.
 2 | BH RLC channel: An RLC channel between two nodes, which is used to transport backhaul packets.
 3 | Broadcast MRB: A radio bearer configured for MBS broadcast delivery.
 4 | CEIL: Mathematical function used to 'round up' i.e. to the nearest integer having a higher or equal value.
 5 | DAPS bearer: a bearer whose radio protocols are located in both the source gNB and the target gNB during DAPS handover to use both source gNB and target gNB resources.
 6 | Dedicated signalling: Signalling sent on DCCH logical channel between the network and a single UE.
 7 | Dormant BWP: The dormant BWP is one of downlink BWPs configured by the network via dedicated RRC signalling.
 8 | In the dormant BWP, the UE stops monitoring PDCCH on/for the SCell, but continues performing CSI measurements, Automatic Gain Control (AGC) and beam management, if configured. For each serving cell other than the SpCell or PUCCH SCell, the network may configure one BWP as a dormant BWP.
 9 | Field: The individual contents of an information element are referred to as fields.
10 | FLOOR: Mathematical function used to 'round down' i.e. to the nearest integer having a lower or equal value.
11 | Global cell identity: An identity to uniquely identifying an NR cell. It is consisted of cellIdentity and plmn-Identity of the first PLMN-Identity in plmn-IdentityList in SIB1.
12 | Information element: A structural element containing single or multiple fields is referred as information element.
13 | MBS Radio Bearer: A radio bearer that is configured for MBS delivery.
14 | Multicast/Broadcast Service: A point-to-multipoint service as defined in TS 23.247 .
15 | Multicast MRB: A radio bearer configured for MBS multicast delivery.
16 | NCSG: Network controlled small gap as defined in TS 38.133 .
17 | NPN-only Cell: A cell that is only available for normal service for NPNs' subscriber. An NPN-capable UE determines that a cell is NPN-only Cell by detecting that the cellReservedForOtherUse IE is set to true while the npn- IdentityInfoList IE is present in CellAccessRelatedInfo.
18 | NR sidelink communication: AS functionality enabling at least V2X Communication as defined in TS 23.287 , between two or more nearby UEs, using NR technology but not traversing any network node.
19 | PNI-NPN identity: an identifier of a PNI-NPN comprising of a PLMN ID and a CAG -ID combination.
20 | Primary Cell: The MCG cell, operating on the primary frequency, in which the UE either performs the initial connection establishment procedure or initiates the connection re-establishment procedure.
21 | PC5 Relay RLC channel: An RLC channel between L2 U2N Remote UE and L2 U2N Relay UE, which is used to transport packets over PC5 for L2 UE-to-Network relay.
22 | Primary SCG Cell: For dual connectivity operation, the SCG cell in which the UE performs random access when performing the Reconfiguration with Sync procedure.
23 | Primary Timing Advance Group: Timing Advance Group containing the SpCell.
24 | PUCCH SCell: An SCell configured with PUCCH.
25 | PUSCH-Less SCell: An SCell configured without PUSCH.
26 | RedCap UE: A UE with reduced capabilities as specified in clause 4.2.21.1 in TS 38.306 .
27 | RLC bearer configuration: The lower layer part of the radio bearer configuration comprising the RLC and logical channel configurations.
28 | Secondary Cell: For a UE configured with CA, a cell providing additional radio resources on top of Special Cell.
29 | Secondary Cell Group: For a UE configured with dual connectivity, the subset of serving cells comprising of the PSCell and zero or more secondary cells.
30 | Serving Cell: For a UE in RRC_CONNECTED not configured with CA/DC there is only one serving cell comprising of the primary cell. For a UE in RRC_CONNECTED configured with CA/ DC the term 'serving cells' is used to denote the set of cells comprising of the Special Cell(s) and all secondary cells.
31 | Small Data Transmission: A procedure used for transmission of data and/or signalling over allowed radio bearers in RRC_INACTIVE state (i.e. without the UE transitioning to RRC_CONNECTED state).
32 | SNPN identity: an identifier of an SNPN comprising of a PLMN ID and an NID combination.
33 | Special Cell: For Dual Connectivity operation the term Special Cell refers to the PCell of the MCG or the PSCell of the SCG, otherwise the term Special Cell refers to the PCell.
34 | Split SRB: In MR-DC, an SRB that supports transmission via MCG and SCG as well as duplication of RRC PDUs as defined in TS 37.340 .
35 | SSB Frequency: Frequency referring to the position of resource element RE=#0 (subcarrier #0) of resource block RB#10 of the SS block.
36 | U2N Relay UE: A UE that provides functionality to support connectivity to the network for U2N Remote UE(s).
37 | U2N Remote UE: A UE that communicates with the network via a U2N Relay UE.
38 | Uu Relay RLC channel: An RLC channel between L2 U2N Relay UE and gNB, which is used to transport packets over Uu for L2 UE-to-Network relay.
39 | UE Inactive AS Context: UE Inactive AS Context is stored when the connection is suspended and restored when the connection is resumed. It includes information as defined in clause 5.3.8.3.
40 | V2X sidelink communication: AS functionality enabling V2X Communication as defined in TS 23.285 , between nearby UEs, using E-UTRA technology but not traversing any network node.
41 | 


--------------------------------------------------------------------------------
/neutrex/supar/structs/dist.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This is the public release of the code of our paper titled
  4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  5 |     Language Specifications" (USENIX Security '24)
  6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  7 | Contact: abdullah.ishtiaq@psu.edu
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |       https://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | """
 21 | 
 22 | import torch
 23 | import torch.autograd as autograd
 24 | from supar.structs.semiring import (CrossEntropySemiring, EntropySemiring,
 25 |                                     KLDivergenceSemiring, KMaxSemiring,
 26 |                                     LogSemiring, MaxSemiring, SampledSemiring)
 27 | from torch.distributions.distribution import Distribution
 28 | from torch.distributions.utils import lazy_property
 29 | 
 30 | 
 31 | class StructuredDistribution(Distribution):
 32 |     r"""
 33 |     Base class for structured distribution :math:`p(y)` :cite:`eisner-2016-inside,goodman-1999-semiring,li-eisner-2009-first`.
 34 | 
 35 |     Args:
 36 |         scores (torch.Tensor):
 37 |             Log potentials, also for high-order cases.
 38 | 
 39 |     """
 40 | 
 41 |     def __init__(self, scores, **kwargs):
 42 |         self.scores = scores.requires_grad_() if isinstance(scores, torch.Tensor) else [s.requires_grad_() for s in scores]
 43 |         self.kwargs = kwargs
 44 | 
 45 |     def __repr__(self):
 46 |         return f"{self.__class__.__name__}()"
 47 | 
 48 |     def __add__(self, other):
 49 |         return self.__class__(torch.stack((self.scores, other.scores), -1), lens=self.lens)
 50 | 
 51 |     @lazy_property
 52 |     def log_partition(self):
 53 |         r"""
 54 |         Computes the log partition function of the distribution :math:`p(y)`.
 55 |         """
 56 | 
 57 |         return self.forward(LogSemiring)
 58 | 
 59 |     @lazy_property
 60 |     def marginals(self):
 61 |         r"""
 62 |         Computes marginal probabilities of the distribution :math:`p(y)`.
 63 |         """
 64 | 
 65 |         return self.backward(self.log_partition.sum())
 66 | 
 67 |     @lazy_property
 68 |     def max(self):
 69 |         r"""
 70 |         Computes the max score of the distribution :math:`p(y)`.
 71 |         """
 72 | 
 73 |         return self.forward(MaxSemiring)
 74 | 
 75 |     @lazy_property
 76 |     def argmax(self):
 77 |         r"""
 78 |         Computes :math:`\arg\max_y p(y)` of the distribution :math:`p(y)`.
 79 |         """
 80 |         raise NotImplementedError
 81 | 
 82 |     @lazy_property
 83 |     def mode(self):
 84 |         return self.argmax
 85 | 
 86 |     def kmax(self, k):
 87 |         r"""
 88 |         Computes the k-max of the distribution :math:`p(y)`.
 89 |         """
 90 | 
 91 |         return self.forward(KMaxSemiring(k))
 92 | 
 93 |     def topk(self, k):
 94 |         r"""
 95 |         Computes the k-argmax of the distribution :math:`p(y)`.
 96 |         """
 97 |         raise NotImplementedError
 98 | 
 99 |     def sample(self):
100 |         r"""
101 |         Obtains a structured sample from the distribution :math:`y \sim p(y)`.
102 |         TODO: multi-sampling.
103 |         """
104 | 
105 |         return self.backward(self.forward(SampledSemiring).sum()).detach()
106 | 
107 |     @lazy_property
108 |     def entropy(self):
109 |         r"""
110 |         Computes entropy :math:`H[p]` of the distribution :math:`p(y)`.
111 |         """
112 | 
113 |         return self.forward(EntropySemiring)
114 | 
115 |     def cross_entropy(self, other):
116 |         r"""
117 |         Computes cross-entropy :math:`H[p,q]` of self and another distribution.
118 | 
119 |         Args:
120 |             other (~supar.structs.dist.StructuredDistribution): Comparison distribution.
121 |         """
122 | 
123 |         return (self + other).forward(CrossEntropySemiring)
124 | 
125 |     def kl(self, other):
126 |         r"""
127 |         Computes KL-divergence :math:`KL[p \parallel q]=H[p,q]-H[p]` of self and another distribution.
128 | 
129 |         Args:
130 |             other (~supar.structs.dist.StructuredDistribution): Comparison distribution.
131 |         """
132 | 
133 |         return (self + other).forward(KLDivergenceSemiring)
134 | 
135 |     def log_prob(self, value, **kwargs):
136 |         """
137 |         Computes log probability over values :math:`p(y)`.
138 |         """
139 | 
140 |         return self.score(value, **kwargs) - self.log_partition
141 | 
142 |     def score(self, value):
143 |         raise NotImplementedError
144 | 
145 |     @torch.enable_grad()
146 |     def forward(self, semiring):
147 |         raise NotImplementedError
148 | 
149 |     def backward(self, log_partition):
150 |         return autograd.grad(log_partition,
151 |                              self.scores if isinstance(self.scores, torch.Tensor) else self.scores[0],
152 |                              create_graph=True)[0]
153 | 


--------------------------------------------------------------------------------
/keyword_extraction/constituency_parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Ali Ranjbar
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import argparse
 22 | import stanza
 23 | import sys
 24 | import threading
 25 | 
 26 | from stanza.models.common.bert_embedding import BERT_ARGS
 27 | from stanza.models.common.doc import Document
 28 | from stanza.models.constituency.parse_tree import Tree
 29 | from tqdm import tqdm
 30 | from typing import List
 31 | 
 32 | 
 33 | s_print_lock = threading.Lock()
 34 | 
 35 | 
 36 | def get_args() -> argparse.Namespace:
 37 |     _parser = argparse.ArgumentParser(description="Filter constituency tree")
 38 |     _parser.add_argument("-f", "--file",
 39 |                          type=str,
 40 |                          help="input file",
 41 |                          required=True)
 42 |     _parser.add_argument("--label",
 43 |                          type=str,
 44 |                          help="label of the node to print",
 45 |                          required=True)
 46 |     _parser.add_argument("-v",
 47 |                          action=argparse.BooleanOptionalAction,
 48 |                          help='verbose logging',
 49 |                          default=False,
 50 |                          required=False)
 51 |     group = _parser.add_mutually_exclusive_group()
 52 |     group.add_argument("-j",
 53 |                        type=int,
 54 |                        help="number of threads",
 55 |                        default=8,
 56 |                        required=False)
 57 |     group.add_argument("--threading",
 58 |                        action=argparse.BooleanOptionalAction,
 59 |                        help="use threading lib",
 60 |                        default=True,
 61 |                        required=False)
 62 |     _args = _parser.parse_args()
 63 | 
 64 |     return _args
 65 | 
 66 | 
 67 | def dfs_collect_np(tree: Tree, result: List[str]) -> None:
 68 |     if not tree.children:
 69 |         result.append(tree.label)
 70 |         return
 71 |     for child in tree.children:
 72 |         dfs_collect_np(child, result)
 73 | 
 74 | 
 75 | def dfs_filter(tree: Tree, label: str) -> None:
 76 |     if not tree.children:
 77 |         return
 78 |     for child in tree.children:
 79 |         dfs_filter(child, label)
 80 |         if child.label == label and child.children:
 81 |             np: List[str] = []
 82 |             dfs_collect_np(child, np)
 83 |             with s_print_lock:
 84 |                 print(" ".join(np), flush=True)
 85 | 
 86 | 
 87 | def analyze_sentence(sen: str):
 88 |     try:
 89 |         text_doc: Document = stanza_pipeline(sen)
 90 |         for sentence in text_doc.sentences:
 91 |             tree: Tree = sentence.constituency
 92 |             dfs_filter(tree, args.label)
 93 |     except Exception as e:
 94 |         print(e)
 95 | 
 96 | 
 97 | def join_all(_threads: List[threading.Thread], _bar: tqdm):
 98 |     for t in _threads:
 99 |         t.join()
100 |         _bar.update(1)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     args = get_args()
105 | 
106 |     model_name = "roberta-base"
107 |     if model_name in BERT_ARGS.keys():
108 |         BERT_ARGS[model_name]["model_max_length"] = 1024
109 |     else:
110 |         BERT_ARGS[model_name] = {"model_max_length": 1024}
111 | 
112 |     stanza_pipeline = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma,depparse,constituency', package={'constituency': 'wsj_bert'}, verbose=args.v)
113 |     num_lines = sum(1 for line in open(args.file))
114 |     with open(args.file, "r") as f:
115 |         with tqdm(total=num_lines, file=sys.stderr) as bar:
116 |             if not args.threading:
117 |                 for line in f:
118 |                     analyze_sentence(line.rstrip())
119 |                     bar.update(1)
120 |             else:
121 |                 try:
122 |                     while True:
123 |                         threads: List[threading.Thread] = []
124 |                         for _ in range(args.j):
125 |                             line = f.readline()
126 |                             if not line:
127 |                                 break
128 |                             line = line.rstrip()
129 |                             x = threading.Thread(target=analyze_sentence, args=(line,))
130 |                             threads.append(x)
131 |                             x.start()
132 |                         else:
133 |                             join_all(threads, bar)
134 |                             continue
135 |                         join_all(threads, bar)
136 |                         break
137 |                 except KeyboardInterrupt:
138 |                     join_all(threads, bar)
139 | 


--------------------------------------------------------------------------------
/synthesizers/script_msg_defs_4.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | um_msg_list = {
 22 |     "attach_req", "attach_complete", "identity_resp", "auth_resp", "auth_failure", "sm_complete", "sm_reject",
 23 |     "tau_req", "tau_complete", "guti_realloc_complete", "service_req", "ext_service_req", "control_service_req",
 24 |     "uplink_nas_transport", "uplink_generic_nas_transport",
 25 |     "activate_dedicated_eps_bearer_reject", "activate_default_eps_bearer_reject", "activate_default_eps_bearer_accept",
 26 |     "activate_dedicated_eps_bearer_accept", "bearer_resource_allocation_req", "bearer_resource_modification_req",
 27 |     "deactivate_eps_bearer_accept", "esm_info_resp", "modify_eps_bearer_accept", "modify_eps_bearer_reject",
 28 |     "pdn_connectivity_req", "pdn_disconnect_req", "remote_ue_report",
 29 |     "uplink_signalling"
 30 | }
 31 | 
 32 | mu_msg_list = {
 33 |     "attach_accept", "attach_reject", "identity_req", "auth_req", "auth_reject", "sm_command",
 34 |     "tau_accept", "tau_reject", "guti_realloc_command", "service_accept", "service_reject", "cs_service_notification",
 35 |     "downlink_nas_transport", "downlink_generic_nas_transport", "emm_information", "paging",
 36 |     "activate_dedicated_eps_bearer_req", "activate_default_eps_bearer_req", "bearer_resource_allocation_reject",
 37 |     "bearer_resource_modification_reject", "deactivate_eps_bearer_req", "esm_info_req", "modify_eps_bearer_req",
 38 |     "notification_msg", "pdn_connectivity_reject", "pdn_disconnect_reject", "remote_ue_report_resp",
 39 |     "downlink_signalling"
 40 | }
 41 | 
 42 | both_dir_msg_list = {
 43 |     "detach_req", "detach_accept", "emm_status", "security_protected_nas_msg",
 44 |     "esm_dummy", "esm_status", "esm_data_transport",
 45 |     "nas_message", "initial_nas_message", "user_data"
 46 | }
 47 | 
 48 | emm_sublayer_msg_list = {
 49 |     "attach_req", "attach_accept", "attach_reject", "attach_complete", "auth_req", "auth_resp", "auth_failure",
 50 |     "auth_reject", "cs_service_notification", "detach_req", "detach_accept", "downlink_nas_transport", "paging",
 51 |     "uplink_nas_transport", "downlink_generic_nas_transport", "uplink_generic_nas_transport", "emm_information",
 52 |     "emm_status", "service_req", "service_accept", "service_reject", "ext_service_req", "control_service_req",
 53 |     "guti_realloc_command", "guti_realloc_complete", "identity_req", "identity_resp", "sm_command", "sm_complete",
 54 |     "sm_reject", "security_protected_nas_msg", "tau_req", "tau_accept", "tau_reject", "tau_complete", "registration_req"
 55 | }
 56 | 
 57 | esm_sublayer_msg_list = {
 58 |     "activate_dedicated_eps_bearer_req", "activate_dedicated_eps_bearer_accept", "activate_dedicated_eps_bearer_reject",
 59 |     "activate_default_eps_bearer_req", "activate_default_eps_bearer_accept", "activate_default_eps_bearer_reject",
 60 |     "bearer_resource_allocation_req", "bearer_resource_allocation_reject", "bearer_resource_modification_req",
 61 |     "bearer_resource_modification_reject", "deactivate_eps_bearer_req", "deactivate_eps_bearer_accept",
 62 |     "modify_eps_bearer_req", "modify_eps_bearer_accept", "modify_eps_bearer_reject", "esm_dummy", "esm_info_req",
 63 |     "esm_info_resp", "esm_status", "notification_msg", "pdn_connectivity_req", "pdn_connectivity_reject",
 64 |     "pdn_disconnect_req", "pdn_disconnect_reject", "remote_ue_report", "remote_ue_report_resp", "esm_data_transport"
 65 | }
 66 | 
 67 | special_msg_list = {
 68 |     "nas_message", "initial_nas_message", "user_data", "uplink_signalling", "downlink_signalling"
 69 | }
 70 | 
 71 | msg_response = {
 72 |     "identity_req": "identity_resp",
 73 |     "auth_req": "auth_resp",
 74 |     "sm_command": "sm_complete",
 75 |     "guti_realloc_command": "guti_realloc_complete",
 76 |     "tau_req": "tau_accept",
 77 |     "tau_accept": "tau_complete",
 78 |     "esm_info_req": "esm_info_resp",
 79 |     "activate_default_eps_bearer_req": "activate_default_eps_bearer_req",
 80 |     "remote_ue_report": "remote_ue_report_resp"
 81 | }
 82 | 
 83 | mme_wait_for_message = {
 84 |     "attach_accept": "attach_resp",
 85 |     "identity_req": "identity_resp",
 86 |     "auth_req": "auth_resp",
 87 |     "sm_command": "sm_resp",
 88 |     "tau_accept": "tau_resp",
 89 |     "guti_realloc_command": "guti_realloc_resp"
 90 | }
 91 | 
 92 | check_mme_wait_for = {
 93 |     "attach_complete": "attach_resp",
 94 |     "identity_resp": "identity_resp",
 95 |     "auth_resp": "auth_resp",
 96 |     "auth_failure": "auth_resp",
 97 |     "sm_complete": "sm_resp",
 98 |     "sm_reject": "sm_resp",
 99 |     "tau_complete": "tau_resp",
100 |     "guti_realloc_complete": "guti_realloc_resp"
101 | }
102 | 
103 | 


--------------------------------------------------------------------------------
/neutrex/supar/structs/linearchain.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This is the public release of the code of our paper titled
  4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  5 |     Language Specifications" (USENIX Security '24)
  6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  7 | Contact: abdullah.ishtiaq@psu.edu
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |       https://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | """
 21 | 
 22 | import torch
 23 | from supar.structs.dist import StructuredDistribution
 24 | from supar.structs.semiring import LogSemiring
 25 | from torch.distributions.utils import lazy_property
 26 | 
 27 | 
 28 | class LinearChainCRF(StructuredDistribution):
 29 |     r"""
 30 |         Linear-chain CRFs :cite:`lafferty-etal-2001-crf`.
 31 | 
 32 |         Args:
 33 |             scores (~torch.Tensor): ``[batch_size, seq_len, n_tags]``.
 34 |                 Log potentials.
 35 |             trans (~torch.Tensor): ``[n_tags+1, n_tags+1]``.
 36 |                 Transition scores.
 37 |                 ``trans[-1, :-1]``/``trans[:-1, -1]`` represent transitions for start/end positions respectively.
 38 |             lens (~torch.LongTensor): ``[batch_size]``.
 39 |                 Sentence lengths for masking. Default: ``None``.
 40 | 
 41 |         Examples:
 42 |             >>> from supar import LinearChainCRF
 43 |             >>> batch_size, seq_len, n_tags = 2, 5, 4
 44 |             >>> lens = torch.tensor([3, 4])
 45 |             >>> value = torch.randint(n_tags, (batch_size, seq_len))
 46 |             >>> s1 = LinearChainCRF(torch.randn(batch_size, seq_len, n_tags),
 47 |                                     torch.randn(n_tags+1, n_tags+1),
 48 |                                     lens)
 49 |             >>> s2 = LinearChainCRF(torch.randn(batch_size, seq_len, n_tags),
 50 |                                     torch.randn(n_tags+1, n_tags+1),
 51 |                                     lens)
 52 |             >>> s1.max
 53 |             tensor([4.4120, 8.9672], grad_fn=<MaxBackward0>)
 54 |             >>> s1.argmax
 55 |             tensor([[2, 0, 3, 0, 0],
 56 |                     [3, 3, 3, 2, 0]])
 57 |             >>> s1.log_partition
 58 |             tensor([ 6.3486, 10.9106], grad_fn=<LogsumexpBackward>)
 59 |             >>> s1.log_prob(value)
 60 |             tensor([ -8.1515, -10.5572], grad_fn=<SubBackward0>)
 61 |             >>> s1.entropy
 62 |             tensor([3.4150, 3.6549], grad_fn=<SelectBackward>)
 63 |             >>> s1.kl(s2)
 64 |             tensor([4.0333, 4.3807], grad_fn=<SelectBackward>)
 65 |     """
 66 | 
 67 |     def __init__(self, scores, trans=None, lens=None):
 68 |         super().__init__(scores, lens=lens)
 69 | 
 70 |         batch_size, seq_len, self.n_tags = scores.shape[:3]
 71 |         self.lens = scores.new_full((batch_size,), seq_len).long() if lens is None else lens
 72 |         self.mask = self.lens.unsqueeze(-1).gt(self.lens.new_tensor(range(seq_len)))
 73 | 
 74 |         self.trans = self.scores.new_full((self.n_tags+1, self.n_tags+1), LogSemiring.one) if trans is None else trans
 75 | 
 76 |     def __repr__(self):
 77 |         return f"{self.__class__.__name__}(n_tags={self.n_tags})"
 78 | 
 79 |     def __add__(self, other):
 80 |         return LinearChainCRF(torch.stack((self.scores, other.scores), -1),
 81 |                               torch.stack((self.trans, other.trans), -1),
 82 |                               self.lens)
 83 | 
 84 |     @lazy_property
 85 |     def argmax(self):
 86 |         return self.lens.new_zeros(self.mask.shape).masked_scatter_(self.mask, torch.where(self.backward(self.max.sum()))[2])
 87 | 
 88 |     def topk(self, k):
 89 |         preds = torch.stack([torch.where(self.backward(i))[2] for i in self.kmax(k).sum(0)], -1)
 90 |         return self.lens.new_zeros(*self.mask.shape, k).masked_scatter_(self.mask.unsqueeze(-1), preds)
 91 | 
 92 |     def score(self, value):
 93 |         scores, mask, value = self.scores.transpose(0, 1), self.mask.t(), value.t()
 94 |         prev, succ = torch.cat((torch.full_like(value[:1], -1), value[:-1]), 0), value
 95 |         # [seq_len, batch_size]
 96 |         alpha = scores.gather(-1, value.unsqueeze(-1)).squeeze(-1)
 97 |         # [batch_size]
 98 |         alpha = LogSemiring.prod(LogSemiring.one_mask(LogSemiring.mul(alpha, self.trans[prev, succ]), ~mask), 0)
 99 |         alpha = alpha + self.trans[value.gather(0, self.lens.unsqueeze(0) - 1).squeeze(0), torch.full_like(value[0], -1)]
100 |         return alpha
101 | 
102 |     def forward(self, semiring):
103 |         # [seq_len, batch_size, n_tags, ...]
104 |         scores = semiring.convert(self.scores.transpose(0, 1))
105 |         trans = semiring.convert(self.trans)
106 |         mask = self.mask.t()
107 | 
108 |         # [batch_size, n_tags]
109 |         alpha = semiring.mul(trans[-1, :-1], scores[0])
110 |         for i in range(1, len(mask)):
111 |             alpha[mask[i]] = semiring.mul(semiring.dot(alpha.unsqueeze(2), trans[:-1, :-1], 1), scores[i])[mask[i]]
112 |         alpha = semiring.dot(alpha, trans[:-1, -1], 1)
113 |         return semiring.unconvert(alpha)
114 | 


--------------------------------------------------------------------------------
/keyword_extraction/cellular_text_converter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import pprint
 22 | 
 23 | import find_capital_keywords
 24 | import gather_keyword_pdf
 25 | import ie_from_pdf
 26 | import enchant
 27 | 
 28 | import re
 29 | 
 30 | INPUT_FILENAME = 'assets/5g-rrc.txt'
 31 | 
 32 | 
 33 | def convert_to_keyword(file_name, keyword_dict, suffix, check_all_upper=False):
 34 |     with open(file_name + ".txt", "r") as f:
 35 |         with open(file_name + "_" + suffix + ".txt", "w") as f2:
 36 |             s = f.readlines()
 37 |             for org_sen in s:
 38 |                 sen = org_sen.split("\n")[0].replace("- ", "-")
 39 |                 for k, v in sorted(keyword_dict.items(), key=lambda item: len(item[0]), reverse=True):
 40 |                     for phrase in sorted(v, key=lambda item: len(item), reverse=True):
 41 |                         indices = [m.start() for m in re.finditer(phrase.lower(), sen.lower())]
 42 |                         while len(indices) > 0:
 43 |                             idx = indices[0]
 44 |                             if not (idx + len(phrase) < len(sen) and sen[idx + len(phrase)].isalpha()) and \
 45 |                                     (not check_all_upper or
 46 |                                      sen[idx: idx + len(phrase)] == sen[idx:idx + len(phrase)].upper()):
 47 |                                 sen = sen[:idx] + k + "_" + sen[idx + len(phrase):]
 48 |                                 indices = [m.start() for m in re.finditer(phrase.lower(), sen.lower())]
 49 |                             else:
 50 |                                 indices = indices[1:]
 51 | 
 52 |                 f2.write(sen + "\n")
 53 | 
 54 |     return file_name + "_" + suffix
 55 | 
 56 | 
 57 | def convert_firstquotes(file_name, suffix):
 58 |     quote_dict = dict()
 59 |     with open(file_name, "r") as f:
 60 |         with open(file_name + "_" + suffix + ".txt", "w") as f2:
 61 |             s = f.readlines()
 62 |             for org_sen in s:
 63 |                 sen = org_sen.split("\n")[0].replace("- ", "-")
 64 |                 split_by_quote = str(sen).split("\"")
 65 |                 if len(split_by_quote) % 2 == 0:
 66 |                     f2.write(sen + "\n")
 67 |                     continue
 68 |                 sen = ""
 69 |                 if len(split_by_quote) > 1:
 70 |                     for i in range(0, len(split_by_quote), 2):
 71 |                         if i + 1 < len(split_by_quote):
 72 |                             if len(split_by_quote[i + 1].strip()) > 1 \
 73 |                                     and len(split_by_quote[i + 1].split(" ")) < 8:
 74 |                                 content = split_by_quote[i + 1].strip()
 75 |                                 key = content.replace(" ", "_").lower().replace("(", "").replace(")", "")
 76 |                                 if key not in quote_dict.keys():
 77 |                                     quote_dict[key] = [content]
 78 |                                 else:
 79 |                                     pass
 80 |                                 sen += split_by_quote[i] + key + "_"
 81 |                             else:
 82 |                                 if len(split_by_quote[i + 1].strip()) >= 2:
 83 |                                     sen = sen + split_by_quote[i] + "\"" + split_by_quote[i + 1] + "\""
 84 |                                 else:
 85 |                                     sen = sen + split_by_quote[i] + "\"" + split_by_quote[i + 1]
 86 |                         else:
 87 |                             sen += split_by_quote[i]
 88 |                 else:
 89 |                     sen = org_sen.split("\n")[0].replace("- ", "-")
 90 | 
 91 |                 f2.write(sen + "\n")
 92 | 
 93 |     return file_name + "_" + suffix, quote_dict
 94 | 
 95 | 
 96 | dictionary = enchant.Dict("en_US")
 97 | pp = pprint.PrettyPrinter(width=200)
 98 | 
 99 | message_dict_pdf = gather_keyword_pdf.gather_messages_and_procedures()[0]
100 | procedures_dict_pdf = gather_keyword_pdf.gather_messages_and_procedures()[1]
101 | state_dict_pdf = gather_keyword_pdf.gather_state()
102 | variable_dict_pdf = gather_keyword_pdf.gather_vars()
103 | ie_dict_pdf_purified = ie_from_pdf.get_IE_keywords_dict(True)
104 | 
105 | 
106 | def find_dictionaries():
107 |     fn, quote_dict = convert_firstquotes(INPUT_FILENAME, 'quote')
108 | 
109 |     fn2, msg_dict_cap = find_capital_keywords.get_message_keywords(fn)
110 |     fn3, state_dict_cap = find_capital_keywords.get_state_keywords(fn2)
111 |     fn4, status_dict_cap = find_capital_keywords.get_status_keywords(fn3)
112 |     fn5, mode_dict_cap = find_capital_keywords.get_mode_keywords(fn4)
113 | 
114 |     return msg_dict_cap, message_dict_pdf, procedures_dict_pdf, state_dict_pdf, ie_dict_pdf_purified, mode_dict_cap, state_dict_cap, \
115 |            status_dict_cap, variable_dict_pdf
116 | 
117 | 


--------------------------------------------------------------------------------
/keyword_extraction/gather_keyword_pdf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import os.path
 22 | import pprint
 23 | import PyPDF2
 24 | 
 25 | pp = pprint.PrettyPrinter(width=200)
 26 | INPUT_FILENAME = '5g-rrc.pdf'
 27 | 
 28 | 
 29 | def get_definitions():
 30 |     definition_keywords = dict()
 31 |     if not os.path.exists("assets/definitions.txt"):
 32 |         return definition_keywords
 33 | 
 34 |     with open("assets/definitions.txt", "r") as f:
 35 |         s = f.readlines()
 36 |         f.close()
 37 |         for org_sen in s:
 38 |             org_sen = org_sen.split("\n")[0]
 39 |             if ':' in org_sen:
 40 |                 keyterm = org_sen.split(":")[0]
 41 |                 if "note" not in keyterm.lower():
 42 |                     key = keyterm.lower().replace("\"", "").replace("\'", "").replace(" ", "_")
 43 |                     definition_keywords[key + "_"] = [keyterm]
 44 | 
 45 |     return definition_keywords
 46 | 
 47 | 
 48 | def gather_messages_and_procedures():
 49 |     pdfFileObj = open(INPUT_FILENAME, 'rb')
 50 |     pdfReader = PyPDF2.PdfReader(pdfFileObj)
 51 |     message_dict = dict()
 52 |     procedures_dict = dict()
 53 | 
 54 |     last_section = ""
 55 |     for i in range(1, 24):
 56 |         pageObj = pdfReader.pages[i]
 57 |         lines = pageObj.extract_text().split("\n")
 58 |         for line in lines:
 59 |             if "..." not in line:
 60 |                 continue
 61 | 
 62 |             line_splits = line.split()
 63 |             if len(line_splits) < 2:
 64 |                 continue
 65 | 
 66 |             section = line_splits[0]
 67 | 
 68 |             if len(section) > 0 and section[0].isnumeric():
 69 |                 last_section = section
 70 | 
 71 |                 section_splits = section.split(".")
 72 |                 if not len(section_splits) == 3:
 73 |                     continue
 74 | 
 75 |                 if section_splits[0] == "5" and int(section_splits[1][0]) > 2:
 76 |                     proc_text = " ".join(line_splits[1: -2]).replace(".", "")
 77 | 
 78 |                     proc_key = proc_text.lower().replace("-", "_").replace(" ", "_").replace("/", "_")
 79 | 
 80 |                     if "introduction" in proc_key or "void" in proc_key or proc_key == "" or proc_key == "general":
 81 |                         continue
 82 | 
 83 |                     if proc_key in procedures_dict and proc_text not in procedures_dict[proc_key]:
 84 |                         procedures_dict[proc_key].append(proc_text)
 85 |                     else:
 86 |                         procedures_dict[proc_key] = [proc_text]
 87 | 
 88 |             elif last_section.startswith("6.2.2") and section == "–":
 89 |                 msg_text = line_splits[1].replace(".", "")
 90 |                 msg_key = msg_text.lower().replace("-", "_")
 91 |                 if msg_key in message_dict and msg_text not in message_dict[msg_key]:
 92 |                     message_dict[msg_key].append(msg_text)
 93 |                 else:
 94 |                     message_dict[msg_key] = [msg_text]
 95 | 
 96 |     pdfFileObj.close()
 97 |     return message_dict, procedures_dict
 98 | 
 99 | 
100 | def gather_vars():
101 |     pdfFileObj = open(INPUT_FILENAME, 'rb')
102 |     pdfReader = PyPDF2.PdfReader(pdfFileObj)
103 |     vars_dict = dict()
104 | 
105 |     last_section = ""
106 |     for i in range(1, 24):
107 |         pageObj = pdfReader.pages[i]
108 |         lines = pageObj.extract_text().split("\n")
109 |         for line in lines:
110 |             if "..." not in line:
111 |                 continue
112 | 
113 |             line_splits = line.split()
114 |             if len(line_splits) < 2:
115 |                 continue
116 | 
117 |             section = line_splits[0]
118 | 
119 |             if len(section) > 0 and section[0].isnumeric():
120 |                 last_section = section
121 | 
122 |             elif last_section.startswith("7.4") and section == "–":
123 |                 var_text = line_splits[1].replace(".", "")
124 |                 var_key = var_text.lower().replace("-", "_")
125 |                 if var_key in vars_dict and var_text not in vars_dict[var_key]:
126 |                     vars_dict[var_key].append(var_text)
127 |                 else:
128 |                     vars_dict[var_key] = [var_text]
129 | 
130 |     pdfFileObj.close()
131 |     return vars_dict
132 | 
133 | 
134 | def gather_state():
135 |     pdfFileObj = open(INPUT_FILENAME, 'rb')
136 |     pdfReader = PyPDF2.PdfReader(pdfFileObj)
137 |     state_dict = dict()
138 | 
139 |     pdfFileObj.close()
140 |     return state_dict
141 | 
142 | 
143 | def get_abbreviations():
144 |     abbreviations_keyword = dict()
145 |     if not os.path.exists("assets/abbreviations.txt"):
146 |         return abbreviations_keyword
147 | 
148 |     with open("assets/abbreviations.txt", "r") as f:
149 |         s = f.readlines()
150 |         f.close()
151 |         for line in s:
152 |             line_ = line.split("\n")[0]
153 |             words = line_.split(" ")
154 |             abbreviation = words[0]
155 |             meaning = line_.split(abbreviation)[1]
156 |             abbreviations_keyword[abbreviation.replace("\'", "")] = [meaning.strip()]
157 | 
158 |     return abbreviations_keyword
159 | 


--------------------------------------------------------------------------------
/synthesizers/defs-saved.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "id2agent": {
  3 |         "ue": ["UE", "subscriber", "user"],
  4 |         "mme": ["MME", "network", "Mobility Management Entity"],
  5 |         "core_network": ["core network"],
  6 |         "esm_entity": ["evolved packet system session management entity", "esm entity"],
  7 |         "amf": ["amf"],
  8 |         "emm_entity": ["EMM entity", "receiver"]
  9 |     },
 10 | 
 11 |     "id2verb": {
 12 |         "set": ["set", "assign", "populate"],
 13 |         "reset": ["reset"],
 14 |         "equal": ["equal"],
 15 |         "increase": ["increase", "increment"],
 16 |         "decrease": ["decrease", "decrement"],
 17 |         "send": ["send", "sent", "sending", "transmit", "return"],
 18 |         "pass": ["pass"],
 19 |         "forward": ["forward"],
 20 |         "respond": ["respond"],
 21 |         "process": ["process"],
 22 |         "receive": ["receive", "receipt", "reception"],
 23 |         "start": ["start", "restart"],
 24 |         "initiate": ["initiate", "initiating", "invoke"],
 25 |         "activate": ["activate"],
 26 |         "deactivate": ["deactivate"],
 27 |         "enable": ["enable", "enabling"],
 28 |         "disable": ["disable", "disabling"],
 29 |         "stop": ["stop", "stopped", "stopping", "stops", "terminate", "abort", "cancel"],
 30 |         "suspend": ["suspend", "pause"],
 31 |         "expire" : ["expire", "expiry", "timeout"],
 32 |         "add": ["add", "added", "adding", "adds"],
 33 |         "delete": ["delete", "remove", "erase"],
 34 |         "accept": ["accept"],
 35 |         "reject": ["reject"],
 36 |         "apply": ["apply"],
 37 |         "include": ["include", "contain", "including"],
 38 |         "ignore": ["ignore", "drop", "dropped", "dropping", "drops", "discard"],
 39 |         "wait": ["wait", "waited", "waiting", "waits"],
 40 |         "complete": ["complete", "completion", "success"],
 41 |         "establish": ["establish"],
 42 |         "create": ["create"],
 43 |         "derive": ["derive", "derivation"],
 44 |         "handover": ["handover"],
 45 |         "authenticate": ["authenticate"],
 46 |         "replace": ["replace", "overwrite"],
 47 |         "attempt": ["attempt", "try", "tried", "trying", "tries"],
 48 |         "perform": ["perform", "execut"],
 49 |         "save": ["store", "memorize", "save", "saved", "saves", "saving", "hold", "holds", "held", "holding", "keep", "keeps", "keeping", "kept"],
 50 |         "support": ["support"],
 51 |         "become": ["become"],
 52 |         "exist": ["exist", "have"],
 53 |         "generate": ["generate"],
 54 |         "release": ["release"],
 55 |         "consider": ["consider", "mark", "regard"],
 56 |         "find": ["find", "finding", "found", "finds"],
 57 |         "indicate": ["indicate"],
 58 |         "request": ["request"],
 59 |         "enter": ["enter"],
 60 |         "leave": ["leave", "leaves", "left"],
 61 |         "update": ["update"],
 62 |         "provide": ["provide", "give", "given", "giving", "gave", "gives"],
 63 |         "know": ["know", "knew", "known", "knowing", "has", "have"],
 64 |         "fail": ["fail", "fails", "failed", "failing", "unsuccessful"],
 65 |         "camp": ["camp", "camping", "camped", "camps"],
 66 |         "select": ["select", "choose", "chose", "chosen"],
 67 |         "operate": ["operate"],
 68 |         "take": ["take"],
 69 |         "use": ["use", "uses", "using", "taken into use", "used"],
 70 |         "exchange": ["exchange"],
 71 |         "continue": ["continue", "resume", "progress"],
 72 |         "change": ["change", "alter"],
 73 |         "wrap": ["wrap", "wraps", "wrapping", "wrapped"],
 74 |         "calculate": ["calculate", "compute"],
 75 |         "check": ["check"],
 76 |         "maintain": ["maintain"],
 77 |         "match": ["match"],
 78 |         "differ": ["differ"],
 79 |         "cipher": ["cipher", "encipher"],
 80 |         "protect": ["protect"],
 81 |         "configure": ["configure"],
 82 |         "require": ["require", "need", "needs", "needed", "needing", "decide"],
 83 |         "want": ["want", "wants", "wanted", "wanting", "wish", "wishes", "wished", "wishing"]
 84 |     },
 85 | 
 86 |     "id2adj": {
 87 |         "ready": ["ready"],
 88 |         "running": ["running"],
 89 |         "available" : ["available"],
 90 |         "valid" : ["valid", "correct"],
 91 |         "invalid" : ["invalid", "incorrect"],
 92 |         "present": ["present"],
 93 |         "absent": ["absent"],
 94 |         "native": ["native"],
 95 |         "new": ["new"],
 96 |         "old": ["old"],
 97 |         "replayed": ["replayed"],
 98 |         "ciphered": ["ciphered"],
 99 |         "partially_ciphered": ["partially ciphered"],
100 |         "unciphered": ["unciphered"],
101 |         "integrity_protected": ["integrity protected"],
102 |         "out_of_range": ["out of range"],
103 |         "different": ["different"],
104 |         "same": ["same"],
105 |         "restricted": ["restricted"]
106 |     },
107 | 
108 |     "id2state" : {
109 | 
110 |     },
111 | 
112 |     "id2message": {
113 | 
114 |     },
115 | 
116 |     "id2procedure": {
117 | 
118 |     },
119 | 
120 |     "id2event" : {
121 | 
122 | 
123 |     },
124 | 
125 |     "id2timer" : {
126 | 
127 |     },
128 | 
129 |     "id2counter": {
130 | 
131 |     },
132 | 
133 |     "id2var" : {
134 | 
135 |     },
136 | 
137 |     "id2mode": {
138 | 
139 |     },
140 | 
141 |     "id2service":{
142 | 
143 |     },
144 | 
145 |     "id2field_val": {
146 | 
147 |     },
148 | 
149 |     "id2msg_field": {
150 | 
151 |     },
152 | 
153 |     "id2cause": {
154 | 
155 |     },
156 | 
157 |     "id2misc": {
158 | 
159 |     },
160 | 
161 |     "id2other": {
162 |         "otherwise": ["otherwise"],
163 |         "other": ["other"],
164 |         "security" : ["security"],
165 |         "response": ["response"],
166 |         "counter": ["counter"],
167 |         "maximum": ["maximum"],
168 |         "minimum": ["minimum"],
169 |         "last_counter": ["this counter", "that counter", "the counter"],
170 |         "last_message": ["this message", "that message", "the message"],
171 |         "last_msg_field": ["the ie", "this ie", "this information element", "this value"],
172 |         "last_timer": ["the timer", "this timer"],
173 |         "last_procedure": ["the procedure", "this procedure"]
174 |     },
175 | 
176 |     "id2num": {
177 |         "zero": ["zero", "0"],
178 |         "one": ["one", "1"],
179 |         "two": ["two", "2"],
180 |         "three": ["three", "3"],
181 |         "four": ["four", "4"],
182 |         "five": ["five", "5"],
183 |         "six": ["six", "6"],
184 |         "seven": ["seven", "7"],
185 |         "eight": ["eight", "8"],
186 |         "nine": ["nine", "9"],
187 |         "ten": ["ten", "10"]
188 |     }
189 | 
190 | }


--------------------------------------------------------------------------------
/synthesizers/script_msg_defs_5.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | um_msg_list = {
 22 |     "authentication_response", "authentication_failure", "registration_request", "registration_complete",
 23 |     "ul_nas_transport", "service_request", "configuration_update_complete", "identity_response",
 24 |     "notification_response", "security_mode_complete", "security_mode_reject", "control_plane_service_request",
 25 |     "network_slice_specific_authentication_complete", "relay_key_request",  "relay_authentication_response",
 26 | 
 27 |     "pdu_session_establishment_request", "pdu_session_authentication_complete", "pdu_session_modification_request",
 28 |     "pdu_session_modification_complete", "pdu_session_modification_command_reject", "pdu_session_release_request",
 29 |     "pdu_session_release_complete", "remote_ue_report",
 30 | 
 31 |     "tracking_area_update_request",
 32 | 
 33 |     "uplink_data"
 34 | }
 35 | 
 36 | mu_msg_list = {
 37 |     "authentication_request", "authentication_result", "authentication_reject", "registration_accept",
 38 |     "registration_reject", "dl_nas_transport", "service_accept", "service_reject", "configuration_update_command",
 39 |     "identity_request", "notification_message", "security_mode_command",
 40 |     "network_slice_specific_authentication_command", "network_slice_specific_authentication_result", "relay_key_accept",
 41 |     "relay_key_reject", "relay_authentication_request",
 42 | 
 43 |     "pdu_session_establishment_accept", "pdu_session_establishment_reject", "pdu_session_authentication_command",
 44 |     "pdu_session_authentication_result", "pdu_session_modification_reject", "pdu_session_modification_command",
 45 |     "pdu_session_release_reject", "pdu_session_release_command", "five_gsm_status_message", "remote_ue_report_resp",
 46 | 
 47 |     "activate_default_eps_bearer_context_request", "downlink_data"
 48 | }
 49 | 
 50 | both_dir_msg_list = {
 51 |     "deregistration_request", "deregistration_accept", "security_protected_5gs_nas_message", "five_gmm_status_message",
 52 |     "five_gsm_status_message",
 53 |     "nas_message", "initial_nas_message", "five_gmm_message", "five_gsm_message", "user_data"
 54 |     "detach_request"
 55 | }
 56 | 
 57 | emm_sublayer_msg_list = {
 58 |     "authentication_response", "authentication_failure", "registration_request", "registration_complete",
 59 |     "ul_nas_transport", "service_request", "configuration_update_complete", "identity_response",
 60 |     "notification_response", "security_mode_complete", "security_mode_reject", "control_plane_service_request",
 61 |     "network_slice_specific_authentication_complete", "relay_key_request",  "relay_authentication_response",
 62 |     "authentication_request", "authentication_result", "authentication_reject", "registration_accept",
 63 |     "registration_reject", "dl_nas_transport", "service_accept", "service_reject", "configuration_update_command",
 64 |     "identity_request", "notification_message", "security_mode_command",
 65 |     "network_slice_specific_authentication_command", "network_slice_specific_authentication_result", "relay_key_accept",
 66 |     "relay_key_reject", "relay_authentication_request", "five_gmm_message"
 67 | 
 68 | 
 69 | }
 70 | 
 71 | esm_sublayer_msg_list = {
 72 |     "pdu_session_establishment_request", "pdu_session_authentication_complete", "pdu_session_modification_request",
 73 |     "pdu_session_modification_complete", "pdu_session_modification_command_reject", "pdu_session_release_request",
 74 |     "pdu_session_release_complete", "pdu_session_establishment_accept", "pdu_session_establishment_reject",
 75 |     "pdu_session_authentication_command", "pdu_session_authentication_result", "pdu_session_modification_reject",
 76 |     "pdu_session_modification_command", "pdu_session_release_reject", "pdu_session_release_command",
 77 |     "five_gsm_status_message", "remote_ue_report", "remote_ue_report_resp", "five_gsm_message"
 78 | 
 79 | }
 80 | 
 81 | special_msg_list = {
 82 |     "nas_message", "initial_nas_message", "user_data", "uplink_signalling", "uplink_data", "downlink_signalling",
 83 |     "downlink_data", "five_gmm_message", "five_gsm_message"
 84 | }
 85 | 
 86 | msg_response = {
 87 |     "authentication_request": "authentication_response",
 88 |     "registration_request": "registration_accept",
 89 |     "identity_request": "identity_response",
 90 |     "service_request": "service_accept",
 91 |     "control_plane_service_request": "service_accept",
 92 |     "relay_key_request": "relay_key_accept",
 93 |     "relay_authentication_request": "relay_authentication_response",
 94 |     "security_mode_command": "security_mode_complete",
 95 |     "configuration_update_command": "configuration_update_complete",
 96 |     "network_slice_specific_authentication_command": "network_slice_specific_authentication_complete",
 97 |     "pdu_session_authentication_command": "pdu_session_authentication_complete",
 98 |     "pdu_session_modification_command": "pdu_session_modification_complete",
 99 |     "pdu_session_release_command": "pdu_session_release_complete",
100 |     "pdu_session_establishment_request": "pdu_session_establishment_accept",
101 |     "remote_ue_report": "remote_ue_report_resp"
102 | }
103 | 
104 | mme_wait_for_message = {
105 |     "registration_accept": "registration_resp",
106 |     "identity_request": "identity_resp",
107 |     "authentication_request": "auth_resp",
108 |     "security_mode_command": "sm_resp",
109 |     "configuration_update_command": "conf_resp",
110 |     "network_slice_specific_authentication_command": "network_slice_auth_resp"
111 | }
112 | 
113 | check_mme_wait_for = {
114 |     "registration_complete": "registration_resp",
115 |     "identity_response": "identity_resp",
116 |     "authentication_response": "auth_resp",
117 |     "authentication_failure": "auth_resp",
118 |     "security_mode_complete": "sm_resp",
119 |     "security_mode_reject": "sm_resp",
120 |     "configuration_update_complete": "conf_resp",
121 |     "network_slice_specific_authentication_complete": "network_slice_auth_resp"
122 | }
123 | 
124 | 


--------------------------------------------------------------------------------
/neutrex/tree_to_xml/tree_cleanup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import argparse
 22 | import copy
 23 | 
 24 | import nltk
 25 | from collections import defaultdict
 26 | from nltk.tree import ParentedTree
 27 | 
 28 | 
 29 | restrictions = defaultdict(list)
 30 | restrictions['top'] = ['<action>', '<end_state>', '<start_state>', '<condition>']
 31 | restrictions['<other>'] = ['<control>', '<action>', '<end_state>', '<start_state>', '<condition>']
 32 | restrictions['<action>'] = ['<action>', '<end_state>', '<control>']
 33 | restrictions['<condition>'] = ['<action>', '<end_state>', '<control>']
 34 | 
 35 | 
 36 | def insert_ptree_forced(target_tree, position, chunk):
 37 |     target_tree.insert(position, ParentedTree.fromstring(str(chunk)))
 38 | 
 39 | def remove_parent(parent):
 40 |     gParent = parent.parent()
 41 |     target_pos = None
 42 | 
 43 |     for i, k in enumerate(gParent):
 44 |         if k == parent:
 45 |             target_pos = i
 46 |             break
 47 |     else:
 48 |         raise Exception("parent not found in gparent! Probably implementation issue")
 49 | 
 50 |     parent_copy = copy.deepcopy(parent)
 51 |     parent_copy.reverse()
 52 | 
 53 |     for child in parent_copy:
 54 |         gParent.insert(target_pos, ParentedTree.fromstring(str(child)))
 55 | 
 56 |     gParent.remove(parent)
 57 | 
 58 | 
 59 | def make_sibling(parent, chunk):
 60 |     gParent = parent.parent()
 61 |     target_pos = None
 62 |     for i, k in enumerate(gParent):
 63 |         if k == parent:
 64 |             target_pos = i
 65 |             break
 66 |     else:
 67 |         raise Exception("parent not found in gparent! Probably implementation issue")
 68 | 
 69 |     insert_ptree_forced(gParent, target_pos, chunk)
 70 | 
 71 |     parent.remove(chunk)
 72 | 
 73 | def fix_other(parent: ParentedTree, node: ParentedTree) -> None:
 74 |     remove_parent(parent)
 75 | 
 76 | 
 77 | 
 78 | def fix_top_simple(parent: ParentedTree, node_idx: int) -> None:
 79 | 
 80 |     new_ctl_tree = ParentedTree('<control>', [ParentedTree.fromstring(str(parent[node_idx]))])
 81 |     parent.remove(parent[node_idx])
 82 |     parent.insert(node_idx, new_ctl_tree)
 83 | 
 84 | 
 85 | def fix_top(parent: ParentedTree, node_idx: int) -> None:
 86 |     end_idx = node_idx
 87 |     for idx in range(node_idx, len(parent)):
 88 |         child_node = parent[idx]
 89 |         if isinstance(child_node, str) or child_node.label() == "<control>":
 90 |             break
 91 |         end_idx = idx+1
 92 | 
 93 |     new_ctl_tree = ParentedTree('<control>', [ParentedTree.fromstring(str(parent[idx])) for idx in range(node_idx, end_idx)])
 94 | 
 95 |     del parent[node_idx:end_idx]
 96 |     parent.insert(node_idx, new_ctl_tree)
 97 | 
 98 | 
 99 | 
100 | 
101 | def fix_action(parent, node):
102 |     # case 1: action -> action
103 |     if len(parent) == 1 and node.label() == '<action>':
104 |         parent.set_label('<control>')
105 | 
106 |     # case 2: base case. that is if there is any control/action under action, they'll be made siblings
107 |     # make sure after making sibling this parent is not empty, if it is, delete it
108 |     else:
109 |         make_sibling(parent, node)
110 |         if len(parent) == 0:
111 |             parent.parent().remove(parent)
112 | 
113 | 
114 | def fix_condition(parent, node):
115 |     # does it have both actions and conditions? then it should probably be control!
116 |     child_labels = [k.label() for k in parent]
117 |     if '<condition>' in child_labels and '<action>' in child_labels:
118 |         parent.set_label('<control>')
119 |         return
120 |     else:
121 |         # base case: if there is any control/action under condition. make it  a sibling.
122 |         # once again check for empty parent.
123 |         make_sibling(parent, node)
124 |         if len(parent) == 0:
125 |             parent.parent().remove(parent)
126 |         return
127 | 
128 | 
129 | def parse_fix(tree: ParentedTree, enable_top = True):
130 |     q = []
131 |     q.append(tree)
132 | 
133 |     while len(q) > 0:
134 |         parent = q.pop(0)
135 |         parent_label = parent.label()
136 | 
137 |         for idx, node in enumerate(parent):
138 |             if enable_top and parent_label == "top" and not isinstance(node, str) and \
139 |                     node.label() in restrictions[parent_label]:
140 |                 fix_top(parent, idx)
141 |                 # as tree changed, start parsing from start
142 |                 parse_fix(tree)
143 |                 return
144 | 
145 |             if parent_label == "<other>" and not isinstance(node, str) and node.label() in restrictions[parent_label]:
146 |                 fix_other(parent, node)
147 |                 # as tree changed, start parsing from start
148 |                 parse_fix(tree)
149 |                 return
150 | 
151 |             elif parent_label == '<action>' and not isinstance(node, str) and node.label() in restrictions[parent_label]:
152 |                 # violation found try action fixes
153 |                 fix_action(parent, node)
154 |                 # as tree changed, start parsing from start
155 |                 parse_fix(tree)
156 |                 return
157 | 
158 |             elif parent_label == '<condition>' and not isinstance(node, str) and node.label() in restrictions[parent_label]:
159 |                 # violation in condition, try condition_fixes
160 |                 fix_condition(parent, node)
161 |                 parse_fix(tree)
162 |                 return
163 | 
164 |             else:
165 |                 if not isinstance(node, str):
166 |                     pass
167 | 
168 |             if not isinstance(node, str):
169 |                 q.append(node)
170 |             else:
171 |                 pass
172 | 
173 | 
174 | def clean_tree(input_tree_str: str) -> str:
175 |     input_tree = ParentedTree.convert(nltk.Tree.fromstring(input_tree_str))
176 | 
177 |     parse_fix(input_tree, enable_top=False)
178 |     parse_fix(input_tree, enable_top=True)
179 | 
180 |     return str(input_tree)
181 | 


--------------------------------------------------------------------------------
/neutrex/supar/modules/affine.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This is the public release of the code of our paper titled
  4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  5 |     Language Specifications" (USENIX Security '24)
  6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  7 | Contact: abdullah.ishtiaq@psu.edu
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |       https://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | """
 21 | 
 22 | import torch
 23 | import torch.nn as nn
 24 | 
 25 | 
 26 | class Biaffine(nn.Module):
 27 |     r"""
 28 |     Biaffine layer for first-order scoring :cite:`dozat-etal-2017-biaffine`.
 29 | 
 30 |     This function has a tensor of weights :math:`W` and bias terms if needed.
 31 |     The score :math:`s(x, y)` of the vector pair :math:`(x, y)` is computed as :math:`x^T W y / d^s`,
 32 |     where `d` and `s` are vector dimension and scaling factor respectively.
 33 |     :math:`x` and :math:`y` can be concatenated with bias terms.
 34 | 
 35 |     Args:
 36 |         n_in (int):
 37 |             The size of the input feature.
 38 |         n_out (int):
 39 |             The number of output channels.
 40 |         scale (float):
 41 |             Factor to scale the scores. Default: 0.
 42 |         bias_x (bool):
 43 |             If ``True``, adds a bias term for tensor :math:`x`. Default: ``True``.
 44 |         bias_y (bool):
 45 |             If ``True``, adds a bias term for tensor :math:`y`. Default: ``True``.
 46 |     """
 47 | 
 48 |     def __init__(self, n_in, n_out=1, scale=0, bias_x=True, bias_y=True):
 49 |         super().__init__()
 50 | 
 51 |         self.n_in = n_in
 52 |         self.n_out = n_out
 53 |         self.scale = scale
 54 |         self.bias_x = bias_x
 55 |         self.bias_y = bias_y
 56 |         self.weight = nn.Parameter(torch.Tensor(n_out, n_in+bias_x, n_in+bias_y))
 57 | 
 58 |         self.reset_parameters()
 59 | 
 60 |     def __repr__(self):
 61 |         s = f"n_in={self.n_in}"
 62 |         if self.n_out > 1:
 63 |             s += f", n_out={self.n_out}"
 64 |         if self.scale != 0:
 65 |             s += f", scale={self.scale}"
 66 |         if self.bias_x:
 67 |             s += f", bias_x={self.bias_x}"
 68 |         if self.bias_y:
 69 |             s += f", bias_y={self.bias_y}"
 70 | 
 71 |         return f"{self.__class__.__name__}({s})"
 72 | 
 73 |     def reset_parameters(self):
 74 |         nn.init.zeros_(self.weight)
 75 | 
 76 |     def forward(self, x, y):
 77 |         r"""
 78 |         Args:
 79 |             x (torch.Tensor): ``[batch_size, seq_len, n_in]``.
 80 |             y (torch.Tensor): ``[batch_size, seq_len, n_in]``.
 81 | 
 82 |         Returns:
 83 |             ~torch.Tensor:
 84 |                 A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len]``.
 85 |                 If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically.
 86 |         """
 87 | 
 88 |         if self.bias_x:
 89 |             x = torch.cat((x, torch.ones_like(x[..., :1])), -1)
 90 |         if self.bias_y:
 91 |             y = torch.cat((y, torch.ones_like(y[..., :1])), -1)
 92 |         # [batch_size, n_out, seq_len, seq_len]
 93 |         s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y) / self.n_in ** self.scale
 94 |         # remove dim 1 if n_out == 1
 95 |         s = s.squeeze(1)
 96 | 
 97 |         return s
 98 | 
 99 | 
100 | class Triaffine(nn.Module):
101 |     r"""
102 |     Triaffine layer for second-order scoring :cite:`zhang-etal-2020-efficient,wang-etal-2019-second`.
103 | 
104 |     This function has a tensor of weights :math:`W` and bias terms if needed.
105 |     The score :math:`s(x, y, z)` of the vector triple :math:`(x, y, z)` is computed as :math:`x^T z^T W y / d^s`,
106 |     where `d` and `s` are vector dimension and scaling factor respectively.
107 |     :math:`x` and :math:`y` can be concatenated with bias terms.
108 | 
109 |     Args:
110 |         n_in (int):
111 |             The size of the input feature.
112 |         n_out (int):
113 |             The number of output channels.
114 |         scale (float):
115 |             Factor to scale the scores. Default: 0.
116 |         bias_x (bool):
117 |             If ``True``, adds a bias term for tensor :math:`x`. Default: ``False``.
118 |         bias_y (bool):
119 |             If ``True``, adds a bias term for tensor :math:`y`. Default: ``False``.
120 |     """
121 | 
122 |     def __init__(self, n_in, n_out=1, scale=0, bias_x=False, bias_y=False):
123 |         super().__init__()
124 | 
125 |         self.n_in = n_in
126 |         self.n_out = n_out
127 |         self.scale = scale
128 |         self.bias_x = bias_x
129 |         self.bias_y = bias_y
130 |         self.weight = nn.Parameter(torch.Tensor(n_out, n_in+bias_x, n_in, n_in+bias_y))
131 | 
132 |         self.reset_parameters()
133 | 
134 |     def __repr__(self):
135 |         s = f"n_in={self.n_in}"
136 |         if self.n_out > 1:
137 |             s += f", n_out={self.n_out}"
138 |         if self.scale != 0:
139 |             s += f", scale={self.scale}"
140 |         if self.bias_x:
141 |             s += f", bias_x={self.bias_x}"
142 |         if self.bias_y:
143 |             s += f", bias_y={self.bias_y}"
144 | 
145 |         return f"{self.__class__.__name__}({s})"
146 | 
147 |     def reset_parameters(self):
148 |         nn.init.zeros_(self.weight)
149 | 
150 |     def forward(self, x, y, z):
151 |         r"""
152 |         Args:
153 |             x (torch.Tensor): ``[batch_size, seq_len, n_in]``.
154 |             y (torch.Tensor): ``[batch_size, seq_len, n_in]``.
155 |             z (torch.Tensor): ``[batch_size, seq_len, n_in]``.
156 | 
157 |         Returns:
158 |             ~torch.Tensor:
159 |                 A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len, seq_len]``.
160 |                 If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically.
161 |         """
162 | 
163 |         if self.bias_x:
164 |             x = torch.cat((x, torch.ones_like(x[..., :1])), -1)
165 |         if self.bias_y:
166 |             y = torch.cat((y, torch.ones_like(y[..., :1])), -1)
167 |         w = torch.einsum('bzk,oikj->bozij', z, self.weight)
168 |         # [batch_size, n_out, seq_len, seq_len, seq_len]
169 |         s = torch.einsum('bxi,bozij,byj->bozxy', x, w, y) / self.n_in ** self.scale
170 |         # remove dim 1 if n_out == 1
171 |         s = s.squeeze(1)
172 | 
173 |         return s
174 | 


--------------------------------------------------------------------------------
/neutrex/supar/utils/metric.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This is the public release of the code of our paper titled
  4 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  5 |     Language Specifications" (USENIX Security '24)
  6 | Author: Abdullah Al Ishtiaq and Sarkar Snigdha Sarathi Das
  7 | Contact: abdullah.ishtiaq@psu.edu
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |       https://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | """
 21 | 
 22 | from collections import Counter
 23 | 
 24 | 
 25 | class Metric(object):
 26 | 
 27 |     def __lt__(self, other):
 28 |         return self.score < other
 29 | 
 30 |     def __le__(self, other):
 31 |         return self.score <= other
 32 | 
 33 |     def __ge__(self, other):
 34 |         return self.score >= other
 35 | 
 36 |     def __gt__(self, other):
 37 |         return self.score > other
 38 | 
 39 |     @property
 40 |     def score(self):
 41 |         return 0.
 42 | 
 43 | 
 44 | class AttachmentMetric(Metric):
 45 | 
 46 |     def __init__(self, eps=1e-12):
 47 |         super().__init__()
 48 | 
 49 |         self.eps = eps
 50 | 
 51 |         self.n = 0.0
 52 |         self.n_ucm = 0.0
 53 |         self.n_lcm = 0.0
 54 |         self.total = 0.0
 55 |         self.correct_arcs = 0.0
 56 |         self.correct_rels = 0.0
 57 | 
 58 |     def __repr__(self):
 59 |         s = f"UCM: {self.ucm:6.2%} LCM: {self.lcm:6.2%} "
 60 |         s += f"UAS: {self.uas:6.2%} LAS: {self.las:6.2%}"
 61 |         return s
 62 | 
 63 |     def __call__(self, arc_preds, rel_preds, arc_golds, rel_golds, mask):
 64 |         lens = mask.sum(1)
 65 |         arc_mask = arc_preds.eq(arc_golds) & mask
 66 |         rel_mask = rel_preds.eq(rel_golds) & arc_mask
 67 |         arc_mask_seq, rel_mask_seq = arc_mask[mask], rel_mask[mask]
 68 | 
 69 |         self.n += len(mask)
 70 |         self.n_ucm += arc_mask.sum(1).eq(lens).sum().item()
 71 |         self.n_lcm += rel_mask.sum(1).eq(lens).sum().item()
 72 | 
 73 |         self.total += len(arc_mask_seq)
 74 |         self.correct_arcs += arc_mask_seq.sum().item()
 75 |         self.correct_rels += rel_mask_seq.sum().item()
 76 |         return self
 77 | 
 78 |     @property
 79 |     def score(self):
 80 |         return self.las
 81 | 
 82 |     @property
 83 |     def ucm(self):
 84 |         return self.n_ucm / (self.n + self.eps)
 85 | 
 86 |     @property
 87 |     def lcm(self):
 88 |         return self.n_lcm / (self.n + self.eps)
 89 | 
 90 |     @property
 91 |     def uas(self):
 92 |         return self.correct_arcs / (self.total + self.eps)
 93 | 
 94 |     @property
 95 |     def las(self):
 96 |         return self.correct_rels / (self.total + self.eps)
 97 | 
 98 | 
 99 | class SpanMetric(Metric):
100 | 
101 |     def __init__(self, eps=1e-12):
102 |         super().__init__()
103 | 
104 |         self.n = 0.0
105 |         self.n_ucm = 0.0
106 |         self.n_lcm = 0.0
107 |         self.utp = 0.0
108 |         self.ltp = 0.0
109 |         self.pred = 0.0
110 |         self.gold = 0.0
111 |         self.eps = eps
112 | 
113 |     def __call__(self, preds, golds):
114 |         for pred, gold in zip(preds, golds):
115 |             upred, ugold = Counter([tuple(span[:-1]) for span in pred]), Counter([tuple(span[:-1]) for span in gold])
116 |             lpred, lgold = Counter([tuple(span) for span in pred]), Counter([tuple(span) for span in gold])
117 |             utp, ltp = list((upred & ugold).elements()), list((lpred & lgold).elements())
118 |             self.n += 1
119 |             self.n_ucm += len(utp) == len(pred) == len(gold)
120 |             self.n_lcm += len(ltp) == len(pred) == len(gold)
121 |             self.utp += len(utp)
122 |             self.ltp += len(ltp)
123 |             self.pred += len(pred)
124 |             self.gold += len(gold)
125 |         return self
126 | 
127 |     def __repr__(self):
128 |         s = f"UCM: {self.ucm:6.2%} LCM: {self.lcm:6.2%} "
129 |         s += f"UP: {self.up:6.2%} UR: {self.ur:6.2%} UF: {self.uf:6.2%} "
130 |         s += f"LP: {self.lp:6.2%} LR: {self.lr:6.2%} LF: {self.lf:6.2%}"
131 | 
132 |         return s
133 | 
134 |     @property
135 |     def score(self):
136 |         return self.lf
137 | 
138 |     @property
139 |     def ucm(self):
140 |         return self.n_ucm / (self.n + self.eps)
141 | 
142 |     @property
143 |     def lcm(self):
144 |         return self.n_lcm / (self.n + self.eps)
145 | 
146 |     @property
147 |     def up(self):
148 |         return self.utp / (self.pred + self.eps)
149 | 
150 |     @property
151 |     def ur(self):
152 |         return self.utp / (self.gold + self.eps)
153 | 
154 |     @property
155 |     def uf(self):
156 |         return 2 * self.utp / (self.pred + self.gold + self.eps)
157 | 
158 |     @property
159 |     def lp(self):
160 |         return self.ltp / (self.pred + self.eps)
161 | 
162 |     @property
163 |     def lr(self):
164 |         return self.ltp / (self.gold + self.eps)
165 | 
166 |     @property
167 |     def lf(self):
168 |         return 2 * self.ltp / (self.pred + self.gold + self.eps)
169 | 
170 | 
171 | class ChartMetric(Metric):
172 | 
173 |     def __init__(self, eps=1e-12):
174 |         super(ChartMetric, self).__init__()
175 | 
176 |         self.tp = 0.0
177 |         self.utp = 0.0
178 |         self.pred = 0.0
179 |         self.gold = 0.0
180 |         self.eps = eps
181 | 
182 |     def __call__(self, preds, golds):
183 |         pred_mask = preds.ge(0)
184 |         gold_mask = golds.ge(0)
185 |         span_mask = pred_mask & gold_mask
186 |         self.pred += pred_mask.sum().item()
187 |         self.gold += gold_mask.sum().item()
188 |         self.tp += (preds.eq(golds) & span_mask).sum().item()
189 |         self.utp += span_mask.sum().item()
190 |         return self
191 | 
192 |     def __repr__(self):
193 |         return f"UP: {self.up:6.2%} UR: {self.ur:6.2%} UF: {self.uf:6.2%} P: {self.p:6.2%} R: {self.r:6.2%} F: {self.f:6.2%}"
194 | 
195 |     @property
196 |     def score(self):
197 |         return self.f
198 | 
199 |     @property
200 |     def up(self):
201 |         return self.utp / (self.pred + self.eps)
202 | 
203 |     @property
204 |     def ur(self):
205 |         return self.utp / (self.gold + self.eps)
206 | 
207 |     @property
208 |     def uf(self):
209 |         return 2 * self.utp / (self.pred + self.gold + self.eps)
210 | 
211 |     @property
212 |     def p(self):
213 |         return self.tp / (self.pred + self.eps)
214 | 
215 |     @property
216 |     def r(self):
217 |         return self.tp / (self.gold + self.eps)
218 | 
219 |     @property
220 |     def f(self):
221 |         return 2 * self.tp / (self.pred + self.gold + self.eps)
222 | 


--------------------------------------------------------------------------------
/synthesizers/sympy_expression_builder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | from sympy import Symbol, And, Or, Not, Equality
 22 | from sympy import simplify_logic
 23 | 
 24 | special_chars = ['&', '|', '!', '(', ')', '=']
 25 | 
 26 | 
 27 | def find_infix_exp(string_exp):
 28 |     vars_list = []
 29 |     enums_list = []
 30 |     e = str(string_exp).strip().replace("\n", "").replace(" ", "")
 31 |     var_name = ""
 32 |     exp_list = []
 33 |     last_operator = ""
 34 |     for i in range(len(e)):
 35 |         if e[i] not in special_chars:
 36 |             var_name += str(e[i])
 37 |             if i == len(e) - 1 and var_name != "":
 38 |                 exp_list.append(var_name)
 39 |                 if last_operator == "=" or last_operator == "!=":
 40 |                     enums_list.append(var_name)
 41 |                 else:
 42 |                     vars_list.append(var_name)
 43 |         else:
 44 |             if var_name != "":
 45 |                 exp_list.append(var_name)
 46 | 
 47 |                 if last_operator == "=" or last_operator == "!=":
 48 |                     enums_list.append(var_name)
 49 |                 else:
 50 |                     vars_list.append(var_name)
 51 | 
 52 |             var_name = ""
 53 |             if e[i - 1] == '!' and e[i] == '=':
 54 |                 exp_list.append("!=")
 55 |                 last_operator = "!="
 56 |             elif e[i] == "!" and e[i + 1] == "=":
 57 |                 continue
 58 |             else:
 59 |                 exp_list.append(e[i])
 60 |                 last_operator = str(e[i])
 61 | 
 62 |     return exp_list, vars_list, enums_list
 63 | 
 64 | 
 65 | def find_postfix_exp(infix_exp):
 66 |     stack = []
 67 |     operators = ['&', '|', '!=', '!', '(', ')', '=']
 68 |     precedence = {'!': 1, '!=': 2, '=': 2, '&': 3, '|': 4}
 69 |     postfix_exp = []
 70 |     for i in range(len(infix_exp)):
 71 |         if infix_exp[i] not in operators:
 72 |             postfix_exp.append(infix_exp[i])
 73 |             continue
 74 | 
 75 |         if infix_exp[i] == '(':
 76 |             stack.append(infix_exp[i])
 77 |             continue
 78 | 
 79 |         if infix_exp[i] == ')':
 80 |             while len(stack) != 0 and stack[-1] != '(':
 81 |                 postfix_exp.append(stack.pop())
 82 |             stack.pop()
 83 |             continue
 84 | 
 85 |         if infix_exp[i] in operators:
 86 |             if len(stack) == 0 or stack[-1] == '(':
 87 |                 stack.append(infix_exp[i])
 88 |             else:
 89 |                 while len(stack) != 0 and stack[-1] != '(' and precedence[infix_exp[i]] >= precedence[stack[-1]]:
 90 |                     postfix_exp.append(stack.pop())
 91 |                 stack.append(infix_exp[i])
 92 | 
 93 |     while len(stack) != 0:
 94 |         postfix_exp.append(stack.pop())
 95 | 
 96 |     return postfix_exp
 97 | 
 98 | 
 99 | def evaluate_exp(postfix_exp):
100 |     operators = ['&', '|', '!=', '!', '(', ')', '=']
101 |     stack = []
102 |     for i in range(len(postfix_exp)):
103 |         if postfix_exp[i] not in operators:
104 |             stack.append(Symbol(str(postfix_exp[i])))
105 |             continue
106 |         else:
107 | 
108 |             if postfix_exp[i] == '&':
109 |                 try:
110 |                     a = stack.pop()
111 |                 except:
112 |                     a = None
113 |                 try:
114 |                     b = stack.pop()
115 |                 except:
116 |                     b = None
117 | 
118 |                 if "coin_toss" in str(a):
119 |                     a = None
120 |                 if "coin_toss" in str(b):
121 |                     b = None
122 | 
123 |                 if a is not None and b is not None:
124 |                     c = And(b, a)
125 |                     stack.append(c)
126 |                 elif a is not None:
127 |                     stack.append(a)
128 |                 elif b is not None:
129 |                     stack.append(b)
130 | 
131 |             elif postfix_exp[i] == '|':
132 |                 try:
133 |                     a = stack.pop()
134 |                 except:
135 |                     a = None
136 |                 try:
137 |                     b = stack.pop()
138 |                 except:
139 |                     b = None
140 | 
141 |                 if "coin_toss" in str(a):
142 |                     a = None
143 |                 if "coin_toss" in str(b):
144 |                     b = None
145 | 
146 |                 if a is not None and b is not None:
147 |                     c = Or(b, a)
148 |                     stack.append(c)
149 |                 elif a is not None:
150 |                     stack.append(a)
151 |                 elif b is not None:
152 |                     stack.append(b)
153 |             elif postfix_exp[i] == '!':
154 |                 try:
155 |                     a = stack.pop()
156 |                     if "coin_toss" not in str(a):
157 |                         c = Not(a)
158 |                         stack.append(c)
159 |                 except IndexError:
160 |                     pass
161 | 
162 |             elif postfix_exp[i] == '=':
163 |                 try:
164 |                     a = stack.pop()
165 |                     b = stack.pop()
166 |                     if "coin_toss" not in str(a) or "coin_toss" not in str(b):
167 |                         c = Equality(b, a)
168 |                         stack.append(c)
169 |                 except IndexError:
170 |                     pass
171 |             elif postfix_exp[i] == "!=":
172 |                 try:
173 |                     a = stack.pop()
174 |                     b = stack.pop()
175 |                     if "coin_toss" not in str(a) or "coin_toss" not in str(b):
176 |                         c = Not(Equality(b, a))
177 |                         stack.append(c)
178 |                 except IndexError:
179 |                     pass
180 |     if len(stack) == 0:
181 |         return None
182 |     return stack.pop()
183 | 
184 | 
185 | def get_sympy_simplified_expression(condition_str):
186 |     infix, vars_list, enums_list = find_infix_exp(condition_str)
187 |     postfix = find_postfix_exp(infix)
188 |     exp = evaluate_exp(postfix)
189 | 
190 |     if exp is None:
191 |         return ""
192 | 
193 |     result = str(simplify_logic(exp, force=True))
194 |     result = result.replace("~", "!")
195 |     return result
196 | 


--------------------------------------------------------------------------------
/synthesizers/script_db_handler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import time
 22 | 
 23 | import sqlite3
 24 | from sqlite3 import OperationalError, IntegrityError, DataError
 25 | 
 26 | import script_config
 27 | 
 28 | INF = 9999
 29 | KEYWORD_DB_TABLE = script_config.keyword_db_table
 30 | min_keyword_dist_cache = {}
 31 | 
 32 | CONN_CLOSED = True
 33 | 
 34 | 
 35 | def get_new_conn_cursor():
 36 |     global CONN_CLOSED
 37 |     db_conn = sqlite3.connect("hermes.sqlite")
 38 |     db_cursor = db_conn.cursor()
 39 | 
 40 |     try:
 41 |         db_cursor.execute("CREATE TABLE {} (Substring TEXT NOT NULL, MatchedString TEXT NOT NULL, "
 42 |                           "Keyword TEXT NOT NULL, Distance INTEGER NOT NULL, MatchedStringLen INTEGER NOT NULL, "
 43 |                           "PRIMARY KEY (Substring, MatchedString));".format(KEYWORD_DB_TABLE))
 44 |     except OperationalError:
 45 |         pass
 46 | 
 47 |     CONN_CLOSED = False
 48 |     return db_conn, db_cursor
 49 | 
 50 | 
 51 | def db_commit(db_conn):
 52 |     db_conn.commit()
 53 | 
 54 | 
 55 | def close_connection(db_conn, db_cursor):
 56 |     global CONN_CLOSED
 57 |     if not CONN_CLOSED:
 58 |         db_cursor.close()
 59 |         db_conn.close()
 60 |         CONN_CLOSED = True
 61 | 
 62 | 
 63 | def check_conn_closed():
 64 |     return CONN_CLOSED
 65 | 
 66 | 
 67 | def update_substring_keyword_distance(db_conn, db_cursor, substring: str, matched_string: str, keyword: str,
 68 |                                       distance: int):
 69 |     if len(substring) > 180 or len(matched_string) > 180 or len(keyword) > 180:
 70 |         print("Length too long for :", substring, matched_string, keyword, distance)
 71 |         return
 72 | 
 73 |     sql = "UPDATE {} SET keyword = ?, Distance = ?  WHERE Substring = ? AND MatchedString = ?".format(KEYWORD_DB_TABLE)
 74 |     val = (keyword, distance, substring, matched_string)
 75 |     try:
 76 |         db_cursor.execute(sql, val)
 77 |     except OperationalError:
 78 |         print("Sleeping for a bit...")
 79 |         time.sleep(5.0)
 80 |         update_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance)
 81 | 
 82 | 
 83 | def insert_substring_keyword_distance(db_conn, db_cursor, substring: str, matched_string: str, keyword: str,
 84 |                                       distance: int, force_update=False, thread_num=0):
 85 |     if len(substring) > 180 or len(matched_string) > 180 or len(keyword) > 180:
 86 |         print("Length too long for :", substring, matched_string, keyword, distance)
 87 |         return
 88 | 
 89 |     sql = "INSERT INTO {} (Substring, MatchedString, Keyword, Distance, MatchedStringLen) " \
 90 |           "VALUES (?, ?, ?, ?, ?)".format(KEYWORD_DB_TABLE)
 91 |     val = (substring, matched_string, keyword, distance, len(matched_string))
 92 | 
 93 |     try:
 94 |         db_cursor.execute(sql, val)
 95 |     except IntegrityError:
 96 |         if force_update:
 97 |             update_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance)
 98 |     except DataError:
 99 |         print("DataError for :", val)
100 |         return
101 |     except OperationalError:
102 |         print("Thread {}: Sleeping for a bit...".format(thread_num))
103 |         time.sleep(5.0)
104 |         print("Thread {}: Resuming...".format(thread_num))
105 |         insert_substring_keyword_distance(db_conn, db_cursor, substring, matched_string, keyword, distance,
106 |                                           force_update, thread_num=thread_num)
107 | 
108 | 
109 | def insert_substring_keyword_distance_batch(db_conn, db_cursor, insert_list, force_update=False, thread_num=0):
110 |     for item in insert_list:
111 |         insert_substring_keyword_distance(db_conn, db_cursor, item[0], item[1], item[2], item[3],
112 |                                           force_update, thread_num=thread_num)
113 |     db_commit(db_conn)
114 | 
115 | 
116 | def lookup_substring_keyword_distance(db_cursor, substring: str, matched_string: str) -> (str, int):
117 |     sql = "SELECT Keyword, Distance FROM {} WHERE Substring = ? AND MatchedString = ?".format(KEYWORD_DB_TABLE)
118 |     val = (substring, matched_string)
119 |     db_cursor.execute(sql, val)
120 |     db_result = db_cursor.fetchall()
121 | 
122 |     if len(db_result) > 0:
123 |         return db_result[0][0], db_result[0][1]
124 |     else:
125 |         return "", INF
126 | 
127 | 
128 | def get_min_keyword_distance(db_cursor, substring: str) -> (str, str, int):
129 |     if substring in min_keyword_dist_cache:
130 |         return min_keyword_dist_cache[substring]
131 | 
132 |     sql = "select * from {} " \
133 |           "where Substring = ? " \
134 |           "and Distance = (select min(Distance) from {} where Substring = ?) " \
135 |           "order by MatchedStringLen DESC " \
136 |           "limit 1".format(KEYWORD_DB_TABLE, KEYWORD_DB_TABLE)
137 |     val = (substring, substring)
138 |     db_cursor.execute(sql, val)
139 |     db_result = db_cursor.fetchall()
140 | 
141 |     if len(db_result) > 0:
142 |         result = (db_result[0][1], db_result[0][2], db_result[0][3])
143 |     else:
144 |         result = ("", "", INF)
145 | 
146 |     min_keyword_dist_cache[substring] = result
147 |     return result
148 | 
149 | 
150 | def substring_in_db(db_cursor, substring: str) -> bool:
151 |     sql = "select * from {} " \
152 |           "where Substring = ? " \
153 |           "limit 1".format(KEYWORD_DB_TABLE)
154 |     val = (substring,)
155 |     db_cursor.execute(sql, val)
156 |     db_result = db_cursor.fetchall()
157 |     return len(db_result) > 0
158 | 
159 | 
160 | def matched_string_in_db(db_cursor, matched_string: str) -> bool:
161 |     sql = "select * from {} " \
162 |           "where MatchedString = ? " \
163 |           "limit 1".format(KEYWORD_DB_TABLE)
164 |     val = (matched_string,)
165 |     db_cursor.execute(sql, val)
166 |     db_result = db_cursor.fetchall()
167 |     return len(db_result) > 0
168 | 
169 | 
170 | def delete_matched_string(db_conn, db_cursor, matched_string: str):
171 |     sql = "delete from {} " \
172 |           "where MatchedString = ?".format(KEYWORD_DB_TABLE)
173 |     val = (matched_string,)
174 |     try:
175 |         db_cursor.execute(sql, val)
176 |     except OperationalError:
177 |         print("Sleeping for a bit...")
178 |         time.sleep(5.0)
179 |         delete_matched_string(db_conn, db_cursor, matched_string)
180 | 


--------------------------------------------------------------------------------
/keyword_extraction/categorize_keywords.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the public release of the code of our paper titled
  3 | "Hermes: Unlocking Security Analysis of Cellular Network Protocols by Synthesizing Finite State Machines from Natural
  4 |     Language Specifications" (USENIX Security '24)
  5 | Author: Abdullah Al Ishtiaq and Syed Md Mukit Rashid
  6 | Contact: abdullah.ishtiaq@psu.edu
  7 | 
  8 | Licensed under the Apache License, Version 2.0 (the "License");
  9 | you may not use this file except in compliance with the License.
 10 | You may obtain a copy of the License at
 11 | 
 12 |       https://www.apache.org/licenses/LICENSE-2.0
 13 | 
 14 | Unless required by applicable law or agreed to in writing, software
 15 | distributed under the License is distributed on an "AS IS" BASIS,
 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | See the License for the specific language governing permissions and
 18 | limitations under the License.
 19 | """
 20 | 
 21 | import pickle
 22 | import pprint
 23 | 
 24 | pp = pprint.PrettyPrinter(width=200)
 25 | 
 26 | 
 27 | def get_dictionaries():
 28 |     a_file = open("keyword_dict_np_merged.pkl", "rb")
 29 |     output = pickle.load(a_file)
 30 |     keyword_dict = dict(output)
 31 |     a_file.close()
 32 | 
 33 |     message_suffixes = ['message', 'messages', 'request']
 34 |     message_dictionary = dict()
 35 |     remove_list = []
 36 |     for k, v in keyword_dict.items():
 37 |         for phrase in v:
 38 |             for suffix in message_suffixes:
 39 |                 if str(phrase).lower().endswith(suffix.lower()):
 40 |                     message_dictionary[k] = v
 41 |                     remove_list.append(k)
 42 | 
 43 |     for key in list(set(remove_list)):
 44 |         del keyword_dict[key]
 45 | 
 46 |     procedure_suffixes = ['procedure', 'procedures']
 47 |     procedure_dictionary = dict()
 48 |     remove_list = []
 49 |     for k, v in keyword_dict.items():
 50 |         for phrase in v:
 51 |             for suffix in procedure_suffixes:
 52 |                 if str(phrase).lower().endswith(suffix.lower()):
 53 |                     procedure_dictionary[k] = v
 54 |                     remove_list.append(k)
 55 | 
 56 |     for key in list(set(remove_list)):
 57 |         del keyword_dict[key]
 58 | 
 59 |     counter_suffixes = ['count', 'counter', 'counters']
 60 |     counter_dictionary = dict()
 61 |     remove_list = []
 62 |     for k, v in keyword_dict.items():
 63 |         for phrase in v:
 64 |             for suffix in counter_suffixes:
 65 |                 if str(phrase).lower().endswith(suffix.lower()):
 66 |                     counter_dictionary[k] = v
 67 |                     remove_list.append(k)
 68 | 
 69 |     for key in list(set(remove_list)):
 70 |         del keyword_dict[key]
 71 | 
 72 |     service_substrings = ['optimization', 'optimisation', 'service', 'services', 'bearer service', 'bearer services',
 73 |                           'signalling connection', 'PDN connection', 'RRC connection', 'RR Connection']
 74 | 
 75 |     service_suffixes = ['bearer context', 'bearer contexts', 'connection', 'connections', 'capability']
 76 | 
 77 |     service_dictionary = dict()
 78 |     remove_list = []
 79 |     for k, v in keyword_dict.items():
 80 |         for phrase in v:
 81 |             for substring in service_substrings:
 82 |                 if str(substring) in str(phrase):
 83 |                     service_dictionary[k] = v
 84 |                     remove_list.append(k)
 85 | 
 86 |             for suffix in service_suffixes:
 87 |                 if str(phrase).lower().endswith(suffix.lower()):
 88 |                     service_dictionary[k] = v
 89 |                     remove_list.append(k)
 90 | 
 91 |     for key in list(set(remove_list)):
 92 |         del keyword_dict[key]
 93 | 
 94 |     mode_suffixes = ['mode', 'modes']
 95 |     mode_dictionary = dict()
 96 |     remove_list = []
 97 |     for k, v in keyword_dict.items():
 98 |         for phrase in v:
 99 |             for suffix in mode_suffixes:
100 |                 if suffix.lower() in str(phrase).lower():
101 |                     mode_dictionary[k] = v
102 |                     remove_list.append(k)
103 | 
104 |     for key in list(set(remove_list)):
105 |         del keyword_dict[key]
106 | 
107 |     ie_substrings = ['information element', 'information elements', ' ie', 'additional', 'type', 'message identity',
108 |                      'policy', 'identifier', 'indication', 'indicator']
109 | 
110 |     def contains_timer(phrase_):
111 |         words = str(phrase_).split()
112 |         for word in words:
113 |             if word[0].lower() == 't' and str(word[1:]).isnumeric():
114 |                 num = int(word[1:])
115 |                 if num != 1:
116 |                     return True
117 | 
118 |         return False
119 | 
120 |     message_field_dictionary = dict()
121 |     remove_list = []
122 |     for k, v in keyword_dict.items():
123 |         for phrase in v:
124 |             for substring in ie_substrings:
125 |                 if str(substring).lower() in str(phrase).lower():
126 |                     message_field_dictionary[k] = v
127 |                     remove_list.append(k)
128 | 
129 |             if "timer" in str(phrase).lower() and "value" in str(phrase).lower():
130 |                 message_field_dictionary[k] = v
131 |                 remove_list.append(k)
132 | 
133 |             if contains_timer(phrase) and "value" in str(phrase).lower():
134 |                 message_field_dictionary[k] = v
135 |                 remove_list.append(k)
136 | 
137 |     for key in list(set(remove_list)):
138 |         del keyword_dict[key]
139 | 
140 |     timer_dictionary = dict()
141 |     remove_list = []
142 |     for k, v in keyword_dict.items():
143 |         for phrase in v:
144 |             if contains_timer(phrase):
145 |                 timer_dictionary[k] = v
146 |                 remove_list.append(k)
147 | 
148 |     for key in list(set(remove_list)):
149 |         del keyword_dict[key]
150 | 
151 |     variable_suffixes = ['security context', 'security contexts', 'list', 'lists', 'key', 'keys']
152 |     variable_dictionary = dict()
153 |     remove_list = []
154 |     for k, v in keyword_dict.items():
155 |         for phrase in v:
156 |             for suffix in variable_suffixes:
157 |                 if str(phrase).lower().endswith(suffix.lower()):
158 |                     variable_dictionary[k] = v
159 |                     remove_list.append(k)
160 | 
161 |     for key in list(set(remove_list)):
162 |         del keyword_dict[key]
163 | 
164 |     algorithm_suffixes = ['algorithm', 'algorithms']
165 |     algorithm_dictionary = dict()
166 |     remove_list = []
167 |     for k, v in keyword_dict.items():
168 |         for phrase in v:
169 |             for suffix in algorithm_suffixes:
170 |                 if str(phrase).lower().endswith(suffix.lower()):
171 |                     variable_dictionary[k] = v
172 |                     remove_list.append(k)
173 | 
174 |                 elif len(phrase.split(" ")) > 2 and suffix == phrase.split(" ")[-2]:
175 |                     algorithm_dictionary[k] = v
176 |                     remove_list.append(k)
177 | 
178 |     for key in list(set(remove_list)):
179 |         del keyword_dict[key]
180 | 
181 |     return message_dictionary, procedure_dictionary, message_field_dictionary, counter_dictionary, \
182 |            mode_dictionary, service_dictionary, timer_dictionary, variable_dictionary, algorithm_dictionary, keyword_dict


--------------------------------------------------------------------------------