├── ranking
    ├── model.bin
    ├── models
    │   ├── base_ranker.py
    │   └── mr_ranker.py
    ├── all_rules.txt
    ├── gaussian_binner.py
    ├── main.py
    └── features
    │   └── feature_extractor.py
├── .gitignore
├── DiscourseSimplification
    ├── README.md
    ├── src
    │   ├── main
    │   │   ├── java
    │   │   │   └── org
    │   │   │   │   └── lambda3
    │   │   │   │       └── text
    │   │   │   │           └── simplification
    │   │   │   │               └── discourse
    │   │   │   │                   ├── processing
    │   │   │   │                       ├── ProcessingType.java
    │   │   │   │                       ├── SentencePreprocessor.java
    │   │   │   │                       └── ExtendedDiscourseSimplifier.java
    │   │   │   │                   ├── utils
    │   │   │   │                       ├── IDGenerator.java
    │   │   │   │                       ├── ner
    │   │   │   │                       │   ├── NERStringParseException.java
    │   │   │   │                       │   ├── NERExtractionUtils.java
    │   │   │   │                       │   ├── tner
    │   │   │   │                       │   │   ├── TNERString.java
    │   │   │   │                       │   │   └── TNERToken.java
    │   │   │   │                       │   ├── NERToken.java
    │   │   │   │                       │   ├── NERTokenGroup.java
    │   │   │   │                       │   ├── NERString.java
    │   │   │   │                       │   └── NERStringParser.java
    │   │   │   │                       ├── parseTree
    │   │   │   │                       │   ├── ParseTreeException.java
    │   │   │   │                       │   ├── ParseTreeParser.java
    │   │   │   │                       │   └── ParseTreeVisualizer.java
    │   │   │   │                       ├── ConfigUtils.java
    │   │   │   │                       ├── IndexRange.java
    │   │   │   │                       ├── pos
    │   │   │   │                       │   ├── POSToken.java
    │   │   │   │                       │   └── POSTagger.java
    │   │   │   │                       ├── sentences
    │   │   │   │                       │   └── SentencesUtils.java
    │   │   │   │                       └── words
    │   │   │   │                       │   └── WordsUtils.java
    │   │   │   │                   ├── runner
    │   │   │   │                       └── discourse_tree
    │   │   │   │                       │   ├── extraction
    │   │   │   │                       │       ├── rules
    │   │   │   │                       │       │   ├── ListNP
    │   │   │   │                       │       │   │   ├── PreListNPExtractor.java
    │   │   │   │                       │       │   │   ├── PostListNPExtractor.java
    │   │   │   │                       │       │   │   └── ListNPExtractor.java
    │   │   │   │                       │       │   ├── ReferenceInitialAdverbialExtractor.java
    │   │   │   │                       │       │   ├── ReferenceInitialConjunctionExtractor.java
    │   │   │   │                       │       │   ├── ReferenceMedialAdverbialExtractor.java
    │   │   │   │                       │       │   ├── ReferenceFinalAdverbialExtractor.java
    │   │   │   │                       │       │   ├── LeadNPExtractor.java
    │   │   │   │                       │       │   ├── AdjectivalAdverbialInitialExtractor.java
    │   │   │   │                       │       │   ├── SubordinationPreExtractor.java
    │   │   │   │                       │       │   ├── PurposePreExtractor.java
    │   │   │   │                       │       │   ├── PurposePostExtractor.java
    │   │   │   │                       │       │   ├── SubordinationPostExtractor2.java
    │   │   │   │                       │       │   ├── SubordinationPostExtractor.java
    │   │   │   │                       │       │   ├── SubordinationPrePurposeExtractor.java
    │   │   │   │                       │       │   ├── SubordinationPostPurposeExtractor.java
    │   │   │   │                       │       │   ├── AdjectivalAdverbialMiddleFinalExtractor.java
    │   │   │   │                       │       │   ├── SharedNPPostCoordinationExtractor.java
    │   │   │   │                       │       │   ├── SubordinationPostAttributionExtractor2.java
    │   │   │   │                       │       │   ├── SubordinationPostAttributionExtractor.java
    │   │   │   │                       │       │   ├── NonRestrictiveRelativeClauseWhereExtractor.java
    │   │   │   │                       │       │   ├── RestrictiveParticipialExtractor.java
    │   │   │   │                       │       │   ├── SharedNPPreParticipalExtractor.java
    │   │   │   │                       │       │   ├── SharedNPPostParticipalExtractor.java
    │   │   │   │                       │       │   └── NonRestrictiveRelativeClausePrepWhichWhoExtractor.java
    │   │   │   │                       │       └── utils
    │   │   │   │                       │       │   └── TregexUtils.java
    │   │   │   │                       │   ├── model
    │   │   │   │                       │       ├── SentenceLeaf.java
    │   │   │   │                       │       ├── Invalidation.java
    │   │   │   │                       │       └── Leaf.java
    │   │   │   │                       │   └── Relation.java
    │   │   │   │                   ├── model
    │   │   │   │                       ├── TimeInformation.java
    │   │   │   │                       ├── serializer
    │   │   │   │                       │   ├── TreeSerializer.java
    │   │   │   │                       │   └── TreeDeserializer.java
    │   │   │   │                       ├── LinkedContext.java
    │   │   │   │                       ├── OutSentence.java
    │   │   │   │                       ├── Content.java
    │   │   │   │                       └── Element.java
    │   │   │   │                   └── App.java
    │   │   └── resources
    │   │   │   ├── logback.xml
    │   │   │   ├── attribution_verbs.conf
    │   │   │   └── cue_phrases.conf
    │   └── test
    │   │   └── java
    │   │       └── org
    │   │           └── lambda3
    │   │               └── text
    │   │                   └── simplification
    │   │                       └── discourse
    │   │                           ├── processing
    │   │                               ├── SentencePreprocessorTest.java
    │   │                               └── DiscourseSimplifierTest.java
    │   │                           └── utils
    │   │                               └── words
    │   │                                   └── WordUtilsTest.java
    └── DiscourseSimplification.iml
├── generate_candidates.py
└── README.md


/ranking/model.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mounicam/controllable_simplification/HEAD/ranking/model.bin


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | DiscourseSimplification/input.txt
3 | DiscourseSimplification/output*
4 | DiscourseSimplification/target
5 | sample_data
6 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/Lambda-3/DiscourseSimplification.svg?branch=master)](https://travis-ci.org/Lambda-3/DiscourseSimplification)
 2 | 
 3 | # Discourse Simplification
 4 | 
 5 | A project for simplifying sentences wrt. discourse/rhetorical structures.
 6 | 
 7 | This is the core component of the [Graphene](https://github.com/Lambda-3/Graphene) project.
 8 | 
 9 | ## Setup
10 | 
11 |     mvn clean install -DskipTests
12 | 
13 | ### Run the program
14 | Create a new text file with the input
15 | 
16 |     vim input.txt
17 |      
18 | Run program
19 | 
20 |     mvn clean compile exec:java
21 |     
22 | Inspect output
23 | 
24 |     cat output_default.txt
25 |     cat output_flat.txt
26 | 
27 | ## Use as library
28 | Check `App.java`. 
29 | Or its usage in the [Graphene](https://github.com/Lambda-3/Graphene) project.
30 |     
31 |    
32 | ## Contributors (alphabetical order)
33 | - Andre Freitas
34 | - Bernhard Bermeitinger
35 | - Christina Niklaus
36 | - Matthias Cetto
37 | - Siegfried Handschuh
38 | 


--------------------------------------------------------------------------------
/ranking/models/base_ranker.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from torch.autograd import Variable
 4 | from gaussian_binner import GaussianBinner
 5 | 
 6 | 
 7 | class BaseRanker:
 8 |     def __init__(self, epochs, lr):
 9 |         self.epochs = epochs
10 |         self.model = None
11 |         self.lr = lr
12 | 
13 |         self.binner = GaussianBinner()
14 | 
15 |     def set_model(self, d_in, dropout=0.2):
16 |         h, d_out = 100, 1
17 |         self.model = torch.nn.Sequential(
18 |             torch.nn.Linear(d_in, h),
19 |             torch.nn.Tanh(),
20 |             torch.nn.Dropout(p=dropout),
21 |             torch.nn.Linear(h, h),
22 |             torch.nn.Tanh(),
23 |             torch.nn.Dropout(p=dropout),
24 |             torch.nn.Linear(h, h),
25 |             torch.nn.Tanh(),
26 |             torch.nn.Dropout(p=dropout),
27 |             torch.nn.Linear(h, d_out)
28 |         )
29 | 
30 |     def set_device(self, device):
31 |         self.model.to(device)
32 | 
33 |     def predict(self, test_x, test_segs):
34 |         test_x = self.binner.transform(np.array(test_x))
35 |         test_x = Variable(torch.FloatTensor(test_x))
36 |         return [score[0] for score in self.model(test_x).data.numpy()], test_segs
37 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/processing/ProcessingType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ProcessingType
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.processing;
24 | 
25 | /**
26 |  *
27 |  */
28 | public enum ProcessingType {
29 |     SEPARATE,
30 |     WHOLE
31 | }
32 | 


--------------------------------------------------------------------------------
/generate_candidates.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import dissim
 3 | import argparse
 4 | 
 5 | 
 6 | def main(args):
 7 |     cwd = os.getcwd()
 8 | 
 9 |     # Runs DisSim to generate candidates
10 |     os.system("cp " + args.input + " DiscourseSimplification/input.txt")
11 |     os.chdir('DiscourseSimplification')
12 |     os.system("mvn clean compile exec:java")
13 |     os.chdir(cwd)
14 |     dissim_candidates = dissim.generate_candidates(args.input, "DiscourseSimplification/output_dt.txt")
15 | 
16 |     # TODO: add neural splitter candidates.
17 | 
18 |     fpout = open(args.output, "w")
19 |     for candidates in dissim_candidates:
20 |         fpout.write("\t".join(candidates) + "\n")
21 |     fpout.close()
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     parser = argparse.ArgumentParser(
26 |         description='Generate DisSim candidates that have undergone splitting and deletion.')
27 |     parser.add_argument('--input', help="Input sentences with one sentence in each line.")
28 |     parser.add_argument('--output', help="Candidates for each input sentence seperated by tabs. \n"
29 |                                          "The format for each candidate is "
30 |                                          "<candidate>|||<DisSim|Transformer>|||<Rules applied to obtain the candidate>")
31 |     args = parser.parse_args()
32 |     main(args)
33 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/IDGenerator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : IDGenerator
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils;
24 | 
25 | import java.util.UUID;
26 | 
27 | public class IDGenerator {
28 |     public static String generateUUID() {
29 |         return String.valueOf(UUID.randomUUID()).replaceAll("-", "");
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParseException.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERStringParseException
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class NERStringParseException extends Exception {
29 | 
30 |     public NERStringParseException(String msg) {
31 |         super(msg);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeException.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ParseTreeException
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.parseTree;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class ParseTreeException extends Exception {
29 | 
30 |     public ParseTreeException(String text) {
31 |         super("Failed to parse text: \"" + text + "\"");
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ListNP/PreListNPExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : PreListNPExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.ListNP;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class PreListNPExtractor extends ListNPExtractor {
29 | 
30 |     public PreListNPExtractor() {
31 |         super("ROOT <<: (S < (NP=np $.. VP))");
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/TimeInformation.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : TimeInformation
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model;
24 | 
25 | public class TimeInformation {
26 | 	private String value;
27 | 
28 | 	// for deserialization
29 | 	public TimeInformation() {
30 | 	}
31 | 
32 | 	public TimeInformation(String value) {
33 | 		this.value = value;
34 | 	}
35 | 
36 | 	public String getValue() {
37 | 		return value;
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ListNP/PostListNPExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : PostListNPExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.ListNP;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class PostListNPExtractor extends ListNPExtractor {
29 | 
30 |     public PostListNPExtractor() {
31 |         super("ROOT <<: (S < (NP $.. (VP << (NP=np))))");
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ConfigUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ConfigUtils
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils;
24 | 
25 | import com.typesafe.config.Config;
26 | import com.typesafe.config.ConfigRenderOptions;
27 | 
28 | /**
29 |  *
30 |  */
31 | public class ConfigUtils {
32 | 	public static String prettyPrint(Config config) {
33 | 		return config == null
34 | 				? null
35 | 				: config.root().render(ConfigRenderOptions.concise().setFormatted(true));
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/SentenceLeaf.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SentenceLeaf
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.model;
24 | 
25 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
26 | 
27 | /**
28 |  *
29 |  */
30 | public class SentenceLeaf extends Leaf {
31 | 
32 |     public SentenceLeaf(String sentence, int sentenceIdx) throws ParseTreeException {
33 |         super("SENTENCE", sentence);
34 |         this.setRecursiveUnsetSentenceIdx(sentenceIdx);
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/ranking/all_rules.txt:
--------------------------------------------------------------------------------
 1 | PP_Shortening
 2 | ParticipialMiddleExtractor
 3 | ReferenceInitialAdverbialExtractor
 4 | PostListNPExtractor
 5 | RestrictiveParticipialExtractor
 6 | NonRestrictiveRelativeClausePrepWhichWhoExtractor
 7 | SubordinationPostExtractor
 8 | PrepositionalAttachedtoVPExtractor
 9 | SubordinationPostAttributionExtractor2
10 | SubordinationPostPurposeExtractor
11 | RestrictiveRelativeClauseWhoWhichExtractor
12 | AdjectivalAdverbialInitialExtractor
13 | SubordinationPrePurposeExtractor
14 | PurposePreExtractor
15 | NonRestrictiveRelativeClauseWhoseExtractor
16 | RestrictiveRelativeClauseWithoutRelativePronounExtractor
17 | NonRestrictiveAppositionExtractor
18 | PrepositionalMiddleFinalExtractor
19 | NonRestrictiveRelativeClauseWhoWhichExtractor
20 | NonRestrictiveRelativeClauseWhereExtractor
21 | SBAR_Shortening
22 | SharedNPPostParticipalExtractor
23 | SubordinationPreExtractor
24 | PrepositionalInitialExtractor
25 | XPOverXP
26 | PreListNPExtractor
27 | RestrictiveAppositionExtractor
28 | PreAttributionExtractor
29 | TimeExpressions
30 | CoordinationExtractor
31 | ReferenceFinalAdverbialExtractor
32 | SharedNPPreParticipalExtractor
33 | PurposePostExtractor
34 | ProjectionPrinciple
35 | NonRestrictiveRelativeClauseWhomExtractor
36 | PreposedAdjuncts
37 | AdjectivalAdverbialMiddleFinalExtractor
38 | RestrictiveRelativeClauseWhoseExtractor
39 | SubordinationPostAttributionExtractor
40 | SharedNPPostCoordinationExtractor
41 | ReferenceInitialConjunctionExtractor
42 | QuotedAttributionPostExtractor
43 | LeadNPExtractor
44 | QuotedAttributionPreExtractor
45 | ReferenceMedialAdverbialExtractor
46 | Transformer
47 | DisSim
48 | HT
49 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/IndexRange.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : IndexRange
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class IndexRange {
29 |     private final int fromIdx;
30 |     private final int toIdx;
31 | 
32 |     public IndexRange(int fromIdx, int toIdx) {
33 |         this.fromIdx = fromIdx;
34 |         this.toIdx = toIdx;
35 |     }
36 | 
37 |     public int getFromIdx() {
38 |         return fromIdx;
39 |     }
40 | 
41 |     public int getToIdx() {
42 |         return toIdx;
43 |     }
44 | 
45 |     @Override
46 |     public String toString() {
47 |         return "(" + fromIdx + " | " + toIdx + ")";
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERExtractionUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERExtractionUtils
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | import org.lambda3.text.simplification.discourse.utils.IndexRange;
26 | 
27 | import java.util.ArrayList;
28 | import java.util.List;
29 | 
30 | /**
31 |  *
32 |  */
33 | public class NERExtractionUtils {
34 | 
35 |     public static List<IndexRange> getNERIndexRanges(NERString nerString) {
36 |         List<IndexRange> res = new ArrayList<>();
37 | 
38 |         for (NERTokenGroup group : nerString.getGroups()) {
39 |             res.add(new IndexRange(group.getFromTokenIndex(), group.getToTokenIndex()));
40 |         }
41 | 
42 |         return res;
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERString.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : TNERString
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner.tner;
24 | 
25 | import edu.stanford.nlp.trees.Tree;
26 | import org.lambda3.text.simplification.discourse.utils.ner.NERString;
27 | 
28 | import java.util.ArrayList;
29 | import java.util.List;
30 | 
31 | /**
32 |  *
33 |  */
34 | public class TNERString extends NERString {
35 |     private final Tree parseTree;
36 | 
37 |     public TNERString(List<TNERToken> tokens, Tree parseTree) {
38 |         super(new ArrayList<>(tokens));
39 |         this.parseTree = parseTree;
40 |         this.tokens.forEach(t -> ((TNERToken) t).setNerString(this));
41 |     }
42 | 
43 |     public Tree getParseTree() {
44 |         return parseTree;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/pos/POSToken.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : POSToken
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.pos;
24 | 
25 | /**
26 |  *
27 |  */
28 | public class POSToken {
29 |     private final int index;
30 |     private final String text;
31 |     private final String pos;
32 | 
33 |     public POSToken(int index, String text, String pos) {
34 |         this.index = index;
35 |         this.text = text;
36 |         this.pos = pos;
37 |     }
38 | 
39 |     public int getIndex() {
40 |         return index;
41 |     }
42 | 
43 |     public String getText() {
44 |         return text;
45 |     }
46 | 
47 |     public String getPos() {
48 |         return pos;
49 |     }
50 | 
51 |     public String toString() {
52 |         return "(" + index + ": " + pos + ", '" + text + "')";
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/ranking/gaussian_binner.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def gaussian(diff, sig):
 7 |     return np.exp(-np.power(diff, 2.) / (2 * sig * sig))
 8 | 
 9 | 
10 | class GaussianBinner:
11 | 
12 |     def __init__(self, bins=10, w=0.2):
13 |         self.bin_values, self.sigmas = [], []
14 |         self.bins = bins
15 |         self.width = w
16 |         self.eps = 0.000001
17 | 
18 |     def fit(self, x, features_to_be_binned=7):
19 |         for index in range(0, features_to_be_binned):
20 | 
21 |             dimension = x[:, index]
22 |             bin_divisions = np.histogram(dimension, bins=self.bins)[1]
23 | 
24 |             bin_means = [(bin_divisions[i] + bin_divisions[i+1]) / 2.0
25 |                          for i in range(0, len(bin_divisions) - 1)]
26 | 
27 |             half_width = abs(bin_divisions[1] - bin_divisions[0]) / 2.0
28 |             bin_means[0:0] = [bin_divisions[0] - half_width]
29 |             bin_means.append(bin_divisions[len(bin_divisions) - 1] + half_width)
30 |             self.bin_values.append(bin_means)
31 | 
32 |             self.sigmas.append(abs(bin_divisions[1] - bin_divisions[0]) * self.width)
33 | 
34 |     def transform(self, x, features_to_be_binned=7):
35 |         expanded_features = [x[:, features_to_be_binned:]]
36 |         for index in range(0, features_to_be_binned):
37 | 
38 |             bin_means = np.array(self.bin_values[index])
39 | 
40 |             projected_features = gaussian(np.tile(x[:, index], (self.bins + 2, 1)).T - bin_means,
41 |                                               self.sigmas[index])
42 | 
43 |             sum_f = np.sum(projected_features, axis=1)
44 |             sum_f[sum_f == 0] = self.eps
45 |             projected_features = (projected_features.T / sum_f).T
46 |             expanded_features.append(projected_features)
47 | 
48 |         return np.concatenate(expanded_features, axis=1)
49 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Invalidation.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : Invalidation
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.model;
24 | 
25 | import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
26 | 
27 | import java.util.ArrayList;
28 | import java.util.Collections;
29 | import java.util.List;
30 | 
31 | /**
32 |  *
33 |  */
34 | public class Invalidation extends DiscourseTree {
35 | 
36 |     public Invalidation() {
37 |         super("");
38 |     }
39 | 
40 |     // VISUALIZATION ///////////////////////////////////////////////////////////////////////////////////////////////////
41 | 
42 |     @Override
43 |     public List<String> getPTPCaption() {
44 |         return Collections.singletonList("INVALIDATED");
45 |     }
46 | 
47 |     @Override
48 |     public List<PrettyTreePrinter.Edge> getPTPEdges() {
49 |         return new ArrayList<>();
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/serializer/TreeSerializer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : TreeSerializer
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model.serializer;
24 | 
25 | import com.fasterxml.jackson.core.JsonGenerator;
26 | import com.fasterxml.jackson.databind.SerializerProvider;
27 | import com.fasterxml.jackson.databind.ser.std.StdSerializer;
28 | import edu.stanford.nlp.trees.Tree;
29 | 
30 | import java.io.IOException;
31 | 
32 | /**
33 |  *
34 |  */
35 | public class TreeSerializer extends StdSerializer<Tree> {
36 | 
37 |     public TreeSerializer() {
38 |         this(null);
39 |     }
40 | 
41 |     protected TreeSerializer(Class<Tree> t) {
42 |         super(t);
43 |     }
44 | 
45 |     @Override
46 |     public void serialize(Tree value, JsonGenerator gen, SerializerProvider provider) throws IOException {
47 |         gen.writeString(value.pennString().trim().replaceAll("\\s+", " ").replaceAll("[\\n\\t]", ""));
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Controllable Text Simplification with Explicit Paraphrasing
 2 | 
 3 | This repository contains the code and resources from the following [paper](https://arxiv.org/pdf/2010.11004.pdf). Our approach simplifies the given complex sentence in three steps:
 4 | 
 5 | 1. Generate candidates for an input sentence using [DisSim](https://www.aclweb.org/anthology/P19-1333.pdf) and neural sentence splitter. DisSim is a rule-based approach proposed by Nikluas et al. 2019 that uses 35 syntactic rules to split a sentence. 
 6 | 
 7 | 1. Rank the candidates that have undergone splitting and deletion based on the quality of simplification.  
 8 | 
 9 | 1. Pass the best ranked candidate to the paraphrase generation Transformer model.
10 | 
11 | 
12 | ## Candidate Generation: 
13 | 
14 | First, you need to install the DiscourseSimplification code. We use the same code from [this](https://github.com/Lambda-3/DiscourseSimplification) repo.
15 | 
16 | ```
17 | cd DiscourseSimplification
18 | mvn clean install -DskipTests
19 | ```
20 | 
21 | To generate the candidates, you can use the following command:
22 | 
23 | ```python3 generate_candidates.py --input <input filename> --output <candidate filename>```
24 |     
25 | ## Candidate Ranking: 
26 | 
27 | To rank the candidates generated in the previous step,  you can use the following command:
28 | 
29 | ```
30 | python3 ranking/main.py --input <input filename> --candidates <candidate filename> --output <best ranked candidate filename>
31 | ```
32 | 
33 | ## Paraphrase Generation:
34 | 
35 | Coming Soon.
36 | 
37 | ## Citation
38 | Please cite if you use the above resources for your research
39 | ```
40 | @InProceedings{NAACL-2021-Maddela,
41 |   author = 	"Maddela, Mounica and Alva-Manchego, Fernando and Xu, Wei",
42 |   title = 	"Controllable Text Simplification with Explicit Paraphrasing",
43 |   booktitle = 	"Proceedings of the North American Association for Computational Linguistics (NAACL)",
44 |   year = 	"2021",
45 | }
46 | ```
47 | 
48 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERToken.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERToken
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | 
27 | /**
28 |  *
29 |  */
30 | public class NERToken {
31 |     protected final int index;
32 |     protected final String text;
33 |     protected final String category;
34 | 
35 |     public NERToken(int index, String text, String category) {
36 |         this.index = index;
37 |         this.text = text;
38 |         this.category = category;
39 |     }
40 | 
41 |     public int getIndex() {
42 |         return index;
43 |     }
44 | 
45 |     public String getText() {
46 |         return text;
47 |     }
48 | 
49 |     public Word getWord() {
50 |         return new Word(text);
51 |     }
52 | 
53 |     public String getCategory() {
54 |         return category;
55 |     }
56 | 
57 |     @Override
58 |     public String toString() {
59 |         return "(" + index + ": " + category + ", '" + text + "')";
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/LinkedContext.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : LinkedContext
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model;
24 | 
25 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
26 | 
27 | public class LinkedContext {
28 | 	private String targetID;
29 | 	private Relation relation;
30 | 
31 | 	// for deserialization
32 | 	public LinkedContext() {
33 | 	}
34 | 
35 | 	public LinkedContext(String targetID, Relation relation) {
36 | 		this.targetID = targetID;
37 | 		this.relation = relation;
38 | 	}
39 | 
40 | 	public String getTargetID() {
41 | 		return targetID;
42 | 	}
43 | 
44 | 	public Element getTargetElement(SimplificationContent content) {
45 | 		return content.getElement(targetID);
46 | 	}
47 | 
48 | 	public Relation getRelation() {
49 | 		return relation;
50 | 	}
51 | 
52 | 	@Override
53 | 	public boolean equals(Object o) {
54 | 		return ((o instanceof LinkedContext)
55 | 			&& (((LinkedContext) o).targetID.equals(targetID))
56 | 			&& (((LinkedContext) o).relation.equals(relation)));
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/ranking/main.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | from models import mr_ranker
 4 | from features.feature_extractor import FeatureExtractor
 5 | 
 6 | 
 7 | def rerank(segs, segs_feats, model):
 8 |     score_map = {}
 9 |     predicted_scores, segs = model.predict(segs_feats, segs)
10 |     for ind, seg in enumerate(segs):
11 |         score_map[seg] = predicted_scores[ind]
12 |     return sorted(score_map.keys(), key=score_map.__getitem__, reverse=True)
13 | 
14 | 
15 | def main(args):
16 | 
17 |     feature_extractor = FeatureExtractor()
18 |     test_feats, test_cands, test_src = feature_extractor.get_features(args.input, args.candidates)
19 | 
20 |     model = torch.load(args.model)
21 |     model.model.eval()
22 | 
23 |     top_simplifications = []
24 |     i = 0
25 |     for segs_feats, segs, src in zip(test_feats, test_cands, test_src):
26 | 
27 |         if i % 1000 == 0:
28 |             print(i)
29 |         i += 1
30 | 
31 |         if len(segs) == 0:
32 |             top_simplifications.append([src])
33 |         else:
34 |             reranked_segs = rerank(segs, segs_feats, model)
35 |             top_simplifications.append(reranked_segs)
36 | 
37 |     if args.output is not None:
38 |         fp = open(args.output, 'w')
39 |         for segs in top_simplifications:
40 |             fp.write(segs[0] + "\n")
41 |         fp.close()
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument('--model', dest='model', default='ranking/model.bin', type=str)
47 |     parser.add_argument("--output", dest="output", help="Best ranked candidate.", type=str)
48 |     parser.add_argument('--input', help="Input sentences with one sentence in each line.")
49 |     parser.add_argument('--candidates', help="Candidates for each input sentence seperated by tabs. \n"
50 |                                          "The format for each candidate is "
51 |                                          "<candidate>|||<DisSim|Transformer>|||<Rules applied to obtain the candidate>")
52 |     args = parser.parse_args()
53 |     main(args)
54 | 
55 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/serializer/TreeDeserializer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : TreeDeSerializer
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model.serializer;
24 | 
25 | import com.fasterxml.jackson.core.JsonParser;
26 | import com.fasterxml.jackson.core.JsonProcessingException;
27 | import com.fasterxml.jackson.databind.DeserializationContext;
28 | import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
29 | import edu.stanford.nlp.trees.PennTreeReader;
30 | import edu.stanford.nlp.trees.Tree;
31 | 
32 | import java.io.IOException;
33 | import java.io.StringReader;
34 | 
35 | /**
36 |  *
37 |  */
38 | public class TreeDeserializer extends StdDeserializer<Tree> {
39 | 
40 |     public TreeDeserializer() {
41 |         this(null);
42 |     }
43 | 
44 |     protected TreeDeserializer(Class<?> vc) {
45 |         super(vc);
46 |     }
47 | 
48 |     @Override
49 |     public Tree deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException {
50 |         String pennString = p.getValueAsString();
51 |         return new PennTreeReader(new StringReader(pennString)).readTree();
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/test/java/org/lambda3/text/simplification/discourse/processing/SentencePreprocessorTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SentencePreprocessorTest
 4 |  *
 5 |  * Copyright © 2018 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.processing;
24 | 
25 | import com.typesafe.config.Config;
26 | import com.typesafe.config.ConfigFactory;
27 | import org.junit.jupiter.api.Assertions;
28 | import org.junit.jupiter.api.Test;
29 | 
30 | public class SentencePreprocessorTest {
31 |     private final Config config = ConfigFactory.load().getConfig("discourse-simplification");
32 |     private final SentencePreprocessor preprocessor = new SentencePreprocessor(config);
33 | 
34 |     @Test
35 |     void preprocessSentence() {
36 |         preprocessor.setRemoveBrackets(true);
37 | 
38 |         String sentence = "This is a test (in brackets) and [the last (one)].";
39 |         String sentence2 = "This is -LRB- a second test -RRB-.";
40 | 
41 |         String psentence = preprocessor.preprocessSentence(sentence);
42 |         Assertions.assertEquals("This is a test and .", psentence);
43 | 
44 |         String psentence2 = preprocessor.preprocessSentence(sentence2);
45 |         Assertions.assertEquals("This is .", psentence2);
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/test/java/org/lambda3/text/simplification/discourse/utils/words/WordUtilsTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : WordUtilsTest
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.words;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import org.junit.jupiter.api.Assertions;
27 | import org.junit.jupiter.api.Test;
28 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
29 | 
30 | import java.util.Arrays;
31 | import java.util.List;
32 | 
33 | /**
34 |  *
35 |  */
36 | class WordUtilsTest {
37 | 
38 |     @Test
39 |     void wordsToProperSentence() throws Exception {
40 |         List<Word> words = Arrays.asList(
41 |                 new Word("."),
42 |                 new Word("."),
43 |                 new Word("hello"),
44 |                 new Word(","),
45 |                 new Word(","),
46 |                 new Word("this"),
47 |                 new Word("is"),
48 |                 new Word("a"),
49 |                 new Word("test"),
50 |                 new Word("."),
51 |                 new Word(".")
52 |         );
53 | 
54 |         String sentence = WordsUtils.wordsToProperSentenceString(words);
55 |         Assertions.assertEquals("Hello , this is a test .", sentence);
56 |     }
57 | }


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/pos/POSTagger.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : POSTagger
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.pos;
24 | 
25 | import edu.stanford.nlp.tagger.maxent.MaxentTagger;
26 | 
27 | import java.util.ArrayList;
28 | import java.util.List;
29 | 
30 | /**
31 |  *
32 |  */
33 | public class POSTagger {
34 |     private static final MaxentTagger TAGGER = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
35 | 
36 |     public static List<POSToken> parse(String text) {
37 |         List<POSToken> tokens = new ArrayList<>();
38 | 
39 |         String posString = TAGGER.tagString(text);
40 | 
41 |         String[] posTokens = posString.split(" ");
42 | 
43 |         int idx = 0;
44 |         for (String posToken : posTokens) {
45 |             int sep_idx = posToken.lastIndexOf("_");
46 | 
47 |             // create text
48 |             String txt = posToken.substring(0, sep_idx);
49 |             String pos = posToken.substring(sep_idx + 1);
50 |             POSToken token = new POSToken(idx, txt, pos);
51 |             tokens.add(token);
52 | 
53 |             ++idx;
54 |         }
55 | 
56 |         return tokens;
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   ~ ==========================License-Start=============================
 4 |   ~ DiscourseSimplification : logback.xml
 5 |   ~
 6 |   ~ Copyright © 2017 Lambda³
 7 |   ~
 8 |   ~ GNU General Public License 3
 9 |   ~ This program is free software: you can redistribute it and/or modify
10 |   ~ it under the terms of the GNU General Public License as published by
11 |   ~ the Free Software Foundation, either version 3 of the License, or
12 |   ~ (at your option) any later version.
13 |   ~
14 |   ~ This program is distributed in the hope that it will be useful,
15 |   ~ but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |   ~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |   ~ GNU General Public License for more details.
18 |   ~
19 |   ~ You should have received a copy of the GNU General Public License
20 |   ~ along with this program.  If not, see http://www.gnu.org/licenses/.
21 |   ~ ==========================License-End==============================
22 |   -->
23 | 
24 | <!--suppress XmlUnboundNsPrefix, XmlUnboundNsPrefix -->
25 | <configuration>
26 |     <property name="LOG_DIR" value="log" />
27 | 
28 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
29 |         <encoder>
30 |             <!--<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>-->
31 |             <pattern>%msg%n%n</pattern>
32 |         </encoder>
33 |     </appender>
34 | 
35 |     <appender name="FILE" class="ch.qos.logback.core.FileAppender">
36 |         <file>${LOG_DIR}/out.log</file>
37 |         <append>false</append>
38 |         <encoder>
39 |             <!--<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>-->
40 |             <pattern>%msg%n%n</pattern>
41 |         </encoder>
42 |     </appender>
43 | 
44 |     <logger name="org.lambda3.text.simplification.discourse.processing" level="debug"/>
45 |     <!--<logger name="org.lambda3.text.simplification.discourse.tree" level="debug"/>-->
46 | 
47 |     <root level="info">
48 |         <appender-ref ref="STDOUT"/>
49 |         <appender-ref ref="FILE"/>
50 |     </root>
51 | </configuration>


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERTokenGroup.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERTokenGroup
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | 
27 | import java.util.List;
28 | import java.util.stream.Collectors;
29 | 
30 | /**
31 |  *
32 |  */
33 | class NERTokenGroup {
34 |     private final List<NERToken> tokens;
35 | 
36 |     public NERTokenGroup(List<NERToken> tokens) {
37 |         this.tokens = tokens;
38 |     }
39 | 
40 |     public int getFromTokenIndex() {
41 |         return tokens.get(0).index;
42 |     }
43 | 
44 |     public int getToTokenIndex() {
45 |         return tokens.get(tokens.size() - 1).index;
46 |     }
47 | 
48 |     public List<NERToken> getTokens() {
49 |         return tokens;
50 |     }
51 | 
52 |     private String getCategory() {
53 |         return tokens.get(0).getCategory();
54 |     }
55 | 
56 |     public boolean isNamedEntity() {
57 |         return !getCategory().equals(NERString.NO_CATEGORY);
58 |     }
59 | 
60 |     public List<Word> getWords() {
61 |         return tokens.stream().map(t -> new Word(t.getText())).collect(Collectors.toList());
62 |     }
63 | 
64 |     @Override
65 |     public String toString() {
66 |         return "[\n" + tokens.stream().map(t -> "\t" + t.toString()).collect(Collectors.joining("\n")) + "\n]";
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERToken.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : TNERToken
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner.tner;
24 | 
25 | import edu.stanford.nlp.trees.Tree;
26 | import org.lambda3.text.simplification.discourse.utils.ner.NERToken;
27 | 
28 | /**
29 |  *
30 |  */
31 | public class TNERToken extends NERToken {
32 | 
33 |     private final Tree leafNode;
34 |     private TNERString nerString;
35 |     private Tree posNode;
36 | 
37 |     public TNERToken(int index, String token, String category, Tree leafNode) {
38 |         super(index, token, category);
39 |         this.nerString = null;
40 |         this.leafNode = leafNode;
41 |         this.posNode = null; // wait until nerString is set
42 |     }
43 | 
44 |     public void setNerString(TNERString nerString) {
45 |         this.nerString = nerString;
46 |         this.posNode = leafNode.parent(getParseTree());
47 |     }
48 | 
49 |     private Tree getParseTree() {
50 |         return nerString.getParseTree();
51 |     }
52 | 
53 |     public Tree getLeafNode() {
54 |         return leafNode;
55 |     }
56 | 
57 |     public Tree getPosNode() {
58 |         return posNode;
59 |     }
60 | 
61 |     private String getPOSTag() {
62 |         return posNode.value();
63 |     }
64 | 
65 |     @Override
66 |     public String toString() {
67 |         return "(" + index + ": " + category + ", '" + text + "', " + getPOSTag() + ")";
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeParser.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ParseTreeParser
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.parseTree;
24 | 
25 | import edu.stanford.nlp.ling.CoreLabel;
26 | import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
27 | import edu.stanford.nlp.process.CoreLabelTokenFactory;
28 | import edu.stanford.nlp.process.PTBTokenizer;
29 | import edu.stanford.nlp.process.TokenizerFactory;
30 | import edu.stanford.nlp.trees.Tree;
31 | 
32 | import java.io.StringReader;
33 | import java.util.List;
34 | 
35 | /**
36 |  *
37 |  */
38 | public class ParseTreeParser {
39 | 
40 |     private static final TokenizerFactory<CoreLabel> TOKENIZER_FACTORY = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
41 |     private static final LexicalizedParser LEX_PARSER = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
42 | 
43 |     static {
44 |         LEX_PARSER.setOptionFlags("-outputFormat", "penn,typedDependenciesCollapsed", "-retainTmpSubcategories");
45 |     }
46 | 
47 |     public static Tree parse(String text) throws ParseTreeException {
48 |         List<CoreLabel> rawWords = TOKENIZER_FACTORY.getTokenizer(new StringReader(text)).tokenize();
49 |         Tree bestParse = LEX_PARSER.parseTree(rawWords);
50 |         if (bestParse == null) {
51 |             throw new ParseTreeException(text);
52 |         }
53 | 
54 |         return bestParse;
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/sentences/SentencesUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SentencesUtils
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.sentences;
24 | 
25 | import edu.stanford.nlp.ling.HasWord;
26 | import edu.stanford.nlp.ling.SentenceUtils;
27 | import edu.stanford.nlp.process.DocumentPreprocessor;
28 | 
29 | import java.io.*;
30 | import java.util.ArrayList;
31 | import java.util.List;
32 | 
33 | /**
34 |  *
35 |  */
36 | public class SentencesUtils {
37 | 
38 |     private static List<String> splitIntoSentences(Reader reader) {
39 |         List<String> res = new ArrayList<>();
40 | 
41 |         DocumentPreprocessor dp = new DocumentPreprocessor(reader);
42 |         for (List<HasWord> sentence : dp) {
43 |             res.add(SentenceUtils.listToString(sentence));
44 |         }
45 | 
46 |         return res;
47 |     }
48 | 
49 |     public static List<String> splitIntoSentences(String text) {
50 |         return splitIntoSentences(new StringReader(text));
51 |     }
52 | 
53 |     public static List<String> splitIntoSentencesFromFile(File file, boolean byLines) throws IOException {
54 |         if (byLines) {
55 |             List<String> res = new ArrayList<>();
56 | 
57 |             try (BufferedReader br = new BufferedReader(new FileReader(file))) {
58 |                 String line;
59 |                 while ((line = br.readLine()) != null) {
60 |                     res.add(line);
61 |                 }
62 |             }
63 | 
64 |             return res;
65 |         } else {
66 |             return splitIntoSentences(new BufferedReader(new FileReader(file)));
67 |         }
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeVisualizer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ParseTreeVisualizer
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.parseTree;
24 | 
25 | import edu.stanford.nlp.trees.Tree;
26 | import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
27 | 
28 | import java.util.ArrayList;
29 | import java.util.Arrays;
30 | import java.util.List;
31 | import java.util.stream.Collectors;
32 | 
33 | /**
34 |  *
35 |  */
36 | public class ParseTreeVisualizer {
37 | 
38 |     public static String prettyPrint(Tree parseTree) {
39 |         MyNode node = new MyNode(parseTree, parseTree);
40 |         return PrettyTreePrinter.prettyPrint(node, false);
41 |     }
42 | 
43 |     private static class MyNode implements PrettyTreePrinter.Node {
44 |         private final List<PrettyTreePrinter.Node> children;
45 |         private final String caption;
46 |         private final int nr;
47 | 
48 |         public MyNode(Tree parseNode, Tree anchor) {
49 |             this.caption = parseNode.value();
50 |             this.children = new ArrayList<>();
51 |             for (Tree childNode : parseNode.getChildrenAsList()) {
52 |                 this.children.add(new MyNode(childNode, anchor));
53 |             }
54 |             this.nr = parseNode.nodeNumber(anchor);
55 |         }
56 | 
57 |         @Override
58 |         public List<String> getPTPCaption() {
59 |             return Arrays.asList(caption, "#" + nr);
60 |         }
61 | 
62 |         @Override
63 |         public List<PrettyTreePrinter.Edge> getPTPEdges() {
64 |             return children.stream().map(c -> new PrettyTreePrinter.DefaultEdge("", c, true)).collect(Collectors.toList());
65 |         }
66 | 
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/App.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : App
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse;
24 | 
25 | import org.lambda3.text.simplification.discourse.processing.DiscourseSimplifier;
26 | import org.lambda3.text.simplification.discourse.processing.ProcessingType;
27 | import org.lambda3.text.simplification.discourse.model.SimplificationContent;
28 | import org.slf4j.LoggerFactory;
29 | 
30 | import java.io.BufferedWriter;
31 | import java.io.File;
32 | import java.io.FileWriter;
33 | import java.io.IOException;
34 | import java.util.Arrays;
35 | import java.util.List;
36 | import java.util.stream.Collectors;
37 | 
38 | public class App {
39 |     private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(App.class);
40 |     private static final DiscourseSimplifier DISCOURSE_SIMPLIFIER = new DiscourseSimplifier();
41 | 
42 |     private static void saveLines(File file, List<String> lines) {
43 |         try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
44 |             bw.write(lines.stream().collect(Collectors.joining("\n")));
45 | 
46 |             // no need to close it.
47 |             //bw.close()
48 |         } catch (IOException e) {
49 |             e.printStackTrace();
50 |         }
51 |     }
52 | 
53 |     public static void main(String[] args) throws IOException {
54 |         SimplificationContent content = DISCOURSE_SIMPLIFIER.doDiscourseSimplification(new File("input.txt"), ProcessingType.SEPARATE, true);
55 |         content.serializeToJSON(new File("output.json"));
56 |         saveLines(new File("output_default.txt"), Arrays.asList(content.defaultFormat(false)));
57 |         saveLines(new File("output_flat.txt"), Arrays.asList(content.flatFormat(false)));
58 |         LOGGER.info("done");
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/processing/SentencePreprocessor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SentencePreprocessor
 4 |  *
 5 |  * Copyright © 2018 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.processing;
24 | 
25 | import com.typesafe.config.Config;
26 | import edu.stanford.nlp.simple.Sentence;
27 | 
28 | public class SentencePreprocessor {
29 |     private static String ROUND_BRACKET_PATTERN = "\\([^\\(\\)]*?\\)";
30 |     private static String SQUARE_BRACKET_PATTERN = "\\[[^\\[\\]]*?\\]";
31 |     private static String CURLY_BRACKET_PATTERN = "\\{[^\\{\\}]*?\\}";
32 | 
33 |     private static String ROUND_BRACKET_PATTERN2 = "-LRB-((?!-LRB-|-RRB-).)*?-RRB-";
34 |     private static String SQUARE_BRACKET_PATTERN2 = "-LSB-((?!-LSB-|-RSB-).)*?-RSB-";
35 |     private static String CURLY_BRACKET_PATTERN2 = "-LCB-((?!-LCB-|-RCB-).)*?-RCB-";
36 | 
37 | 
38 |     private static String WHITESPACE_PATTERN = "\\s+";
39 | 
40 |     public boolean removeBrackets;
41 | 
42 |     public SentencePreprocessor(Config config) {
43 |         this.removeBrackets = config.getBoolean("remove-brackets");
44 |     }
45 | 
46 |     public void setRemoveBrackets(boolean removeBrackets) {
47 |         this.removeBrackets = removeBrackets;
48 |     }
49 | 
50 |     public String preprocessSentence(String sentence) {
51 |         String res = sentence;
52 | 
53 |         if (removeBrackets) {
54 |             res = sentence.replaceAll(ROUND_BRACKET_PATTERN, "")
55 |                     .replaceAll(SQUARE_BRACKET_PATTERN, "")
56 |                     .replaceAll(CURLY_BRACKET_PATTERN, "")
57 |                     .replaceAll(ROUND_BRACKET_PATTERN2, "")
58 |                     .replaceAll(SQUARE_BRACKET_PATTERN2, "")
59 |                     .replaceAll(CURLY_BRACKET_PATTERN2, "");
60 |         }
61 | 
62 |         res = res.replaceAll(WHITESPACE_PATTERN, " ");
63 |         return res;
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/OutSentence.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : OutSentence
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model;
24 | 
25 | import java.util.HashMap;
26 | import java.util.LinkedHashMap;
27 | import java.util.List;
28 | import java.util.stream.Collectors;
29 | 
30 | /**
31 |  *
32 |  */
33 | public class OutSentence {
34 |     private int sentenceIdx;
35 |     private String originalSentence;
36 |     private HashMap<String, Element> elementMap; // all extractions extracted from this sentence
37 | 
38 |     // for deserialization
39 |     public OutSentence() {
40 |     }
41 | 
42 |     public OutSentence(int sentenceIdx,String originalSentence) {
43 |         this.sentenceIdx = sentenceIdx;
44 |         this.originalSentence = originalSentence;
45 |         this.elementMap = new LinkedHashMap<>();
46 |     }
47 | 
48 |     public void addElement(Element element) {
49 |         if (sentenceIdx != element.getSentenceIdx()) {
50 |             throw new AssertionError("Element should not be added to this sentence");
51 |         }
52 |         elementMap.putIfAbsent(element.getId(), element);
53 |     }
54 | 
55 |     public int getSentenceIdx() {
56 |         return sentenceIdx;
57 |     }
58 | 
59 |     public String getOriginalSentence() {
60 |         return originalSentence;
61 |     }
62 | 
63 |     public Element getElement(String id) {
64 |         return elementMap.getOrDefault(id, null);
65 |     }
66 | 
67 |     public List<Element> getElements() {
68 |         return elementMap.values().stream().collect(Collectors.toList());
69 |     }
70 | 
71 |     @Override
72 |     public String toString() {
73 |         StringBuilder strb = new StringBuilder();
74 |         strb.append("# " + originalSentence + "\n");
75 |         getElements().forEach(e -> strb.append("\n" + e));
76 |         return strb.toString();
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/ranking/features/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import random
 3 | import numpy as np
 4 | from collections import Counter
 5 | 
 6 | 
 7 | def jaccard_similarity(list1, list2):
 8 |     intersection = len(list(set(list1).intersection(list2)))
 9 |     union = (len(list1) + len(list2)) - intersection
10 |     return float(intersection) / union
11 | 
12 | 
13 | def compression_ratio(comp, simp):
14 |     return len(simp.split()) * 1.0 / len(comp.split())
15 | 
16 | 
17 | class FeatureExtractor:
18 |     def __init__(self):
19 |         self.rule_vocab = {}
20 |         for ind, line in enumerate(open("ranking/all_rules.txt")):
21 |             self.rule_vocab[line.strip()] = ind
22 | 
23 |     def get_fv(self, cand, src):
24 |         cand_sent = cand[0]
25 | 
26 |         fv = list()
27 |         fv.append(len(cand_sent.lower().split("<sep>")))
28 |         fv.append(len(src.split()) * 1.0)
29 |         fv.append(jaccard_similarity(cand_sent.lower().split(), src.lower().split()))
30 | 
31 |         ratio_src_cand = len(cand_sent.split()) * 1.0 / len(src.split())
32 |         fv.append(ratio_src_cand)
33 | 
34 |         fv.append(len(cand_sent.split()) * 1.0 / len(cand_sent.split("<SEP>")))
35 | 
36 |         rules = cand[1].split()
37 |         fv.append(len(rules))
38 | 
39 |         rules_vec = [0] * len(self.rule_vocab)
40 |         for rule in rules:
41 |             rules_vec[self.rule_vocab[rule]] = 1
42 |         fv.extend(rules_vec)
43 |         return fv
44 | 
45 |     def filter_candidates(self, tuples):
46 |         cands, feats = [], []
47 |         for tup in tuples:
48 |             fv, cand_sent = tup
49 |             feats.append(fv)
50 |             cands.append(cand_sent)
51 |         return cands, feats
52 | 
53 |     def get_features(self, input_file, cands_file):
54 | 
55 |         print("Extracting features")
56 |         all_src, all_features, all_cands = [], [], []
57 |         for src, candidates in zip(open(input_file), open(cands_file)):
58 | 
59 |             src = src.strip()
60 |             cands = candidates.strip().split("\t")
61 |             cands = [tuple(cand.split("|||")[:2]) for cand in cands]
62 | 
63 |             tuples = []
64 |             cand_sents = set()
65 |             for cand in cands:
66 |                 if len(cand) > 1 and cand[0] not in cand_sents:
67 |                     fv = self.get_fv(cand, src)
68 |                     tuples.append((fv, cand[0]))
69 |             cands, feature_vectors = self.filter_candidates(tuples)
70 | 
71 |             assert len(cands) == len(feature_vectors)
72 |             all_src.append(src)
73 |             all_cands.append(cands)
74 |             all_features.append(feature_vectors)
75 | 
76 |         print("Data size: ", len(all_features), len(all_features[0]), len(all_features[0][0]))
77 |         print("Done extracting features")
78 |         return all_features, all_cands, all_src
79 | 


--------------------------------------------------------------------------------
/ranking/models/mr_ranker.py:
--------------------------------------------------------------------------------
 1 | import torch, random
 2 | import numpy as np
 3 | from torch.autograd import Variable
 4 | from models.base_ranker import BaseRanker
 5 | 
 6 | 
 7 | class MRRanker(BaseRanker):
 8 |     def __init__(self, epochs, lr, device):
 9 |         self.device = device
10 |         super().__init__(epochs, lr)
11 | 
12 |     def train(self, all_features, all_labels):
13 |         train_x_1, train_x_2, train_y = self._get_pairwise_features(all_features, all_labels)
14 | 
15 |         self.set_model(train_x_1.size(1))
16 |         self.model.to(self.device)
17 |         self.model.training = True
18 | 
19 |         loss_fn = torch.nn.MarginRankingLoss(margin=1.0)
20 |         optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
21 | 
22 |         print("Started training.")
23 | 
24 |         batch_size = 4096 * 16
25 |         for epoch in range(self.epochs):
26 |             print("Final data size", train_x_1.size(0), train_x_1.size(1))
27 |             permutation = torch.randperm(train_x_1.size(0))
28 |             for i in range(0, train_x_1.size(0), batch_size):
29 |                 indices = permutation[i:i + batch_size]
30 |                 y_pred_1 = self.model(train_x_1[indices])
31 |                 y_pred_2 = self.model(train_x_2[indices])
32 |                 loss = loss_fn(y_pred_1, y_pred_2, train_y[indices])
33 | 
34 |                 print("Epoch ", epoch, "Loss", loss.data.cpu().numpy().tolist())
35 | 
36 |                 optimizer.zero_grad()
37 |                 loss.backward()
38 |                 optimizer.step()
39 | 
40 |         print("Done training.")
41 |         self.model.eval()
42 |         self.model.to("cpu")
43 | 
44 |     def _get_pairwise_features(self, all_features, all_labels):
45 |         train_labels = []
46 |         train_features_1, train_features_2 = [], []
47 | 
48 |         for feats, ls in zip(all_features, all_labels):
49 |             for i, sf1 in enumerate(feats):
50 |                 for j, sf2 in enumerate(feats):
51 |                     if abs(ls[i] - ls[j]) > 0.1:
52 |                         train_features_1.append(sf1)
53 |                         train_features_2.append(sf2)
54 |                         train_labels.append(float(np.sign(ls[i] - ls[j])))
55 | 
56 | 
57 |         self.binner.fit(np.array(train_features_1))
58 |         train_features_1 = self.binner.transform(np.array(train_features_1))
59 |         train_features_2 = self.binner.transform(np.array(train_features_2))
60 |         assert len(train_labels) == len(train_features_1) == len(train_features_2)
61 |         print("Pairwise data size: ", len(train_features_2))
62 | 
63 |         train_x_1 = Variable(torch.FloatTensor(train_features_1).to(self.device))
64 |         train_x_2 = Variable(torch.FloatTensor(train_features_2).to(self.device))
65 |         train_y = Variable(torch.FloatTensor(train_labels).to(self.device), requires_grad=False)
66 |         train_y = torch.unsqueeze(train_y, 1)
67 |         return  train_x_1, train_x_2, train_y
68 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/Content.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : Content
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.model;
24 | 
25 | import com.fasterxml.jackson.annotation.JsonAutoDetect;
26 | import com.fasterxml.jackson.annotation.PropertyAccessor;
27 | import com.fasterxml.jackson.core.JsonProcessingException;
28 | import com.fasterxml.jackson.databind.ObjectMapper;
29 | import com.fasterxml.jackson.databind.module.SimpleModule;
30 | import edu.stanford.nlp.trees.Tree;
31 | import org.lambda3.text.simplification.discourse.model.serializer.TreeDeserializer;
32 | import org.lambda3.text.simplification.discourse.model.serializer.TreeSerializer;
33 | 
34 | import java.io.File;
35 | import java.io.IOException;
36 | 
37 | public abstract class Content {
38 | 	private static final ObjectMapper MAPPER = new ObjectMapper();
39 | 	private static final SimpleModule MODULE = new SimpleModule();
40 | 
41 | 	static {
42 | 		MAPPER.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE);
43 | 		MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
44 | 
45 | 		// register custom de-/serializers
46 | 		MODULE.addSerializer(Tree.class, new TreeSerializer());
47 | 		MODULE.addDeserializer(Tree.class, new TreeDeserializer());
48 | 
49 | 		MAPPER.registerModule(MODULE);
50 | 	}
51 | 
52 | 	public static <T extends Content> T deserializeFromJSON(String json, Class<T> clazz) throws IOException {
53 | 		return MAPPER.readValue(json, clazz);
54 | 	}
55 | 
56 | 	public static <T extends Content> T deserializeFromJSON(File file, Class<T> clazz) throws IOException {
57 | 		return MAPPER.readValue(file, clazz);
58 | 	}
59 | 
60 | 	public String prettyPrintJSON() throws JsonProcessingException {
61 | 		return MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this);
62 | 	}
63 | 
64 | 	public String serializeToJSON() throws JsonProcessingException {
65 | 		return MAPPER.writeValueAsString(this);
66 | 	}
67 | 
68 | 	public void serializeToJSON(File file) throws IOException {
69 | 		MAPPER.writeValue(file, this);
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERString.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERString
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | 
27 | import java.util.ArrayList;
28 | import java.util.List;
29 | import java.util.stream.Collectors;
30 | 
31 | /**
32 |  *
33 |  */
34 | public class NERString {
35 |     public static final String NO_CATEGORY = "O";
36 | 
37 |     protected final List<NERToken> tokens;
38 |     private List<NERTokenGroup> groups;
39 | 
40 |     public NERString(List<NERToken> tokens) {
41 |         this.tokens = tokens;
42 |         this.createGroups();
43 |     }
44 | 
45 |     private void createGroups() {
46 |         this.groups = new ArrayList<>();
47 | 
48 |         String lastCategory = null;
49 |         List<NERToken> currGroupTokens = new ArrayList<>();
50 |         for (NERToken nerToken : this.tokens) {
51 | 
52 |             if ((lastCategory != null) && (!nerToken.getCategory().equals(lastCategory))) {
53 |                 // add
54 |                 this.groups.add(new NERTokenGroup(currGroupTokens));
55 |                 currGroupTokens = new ArrayList<>();
56 |             }
57 | 
58 |             currGroupTokens.add(nerToken);
59 |             lastCategory = nerToken.getCategory();
60 |         }
61 | 
62 |         // add
63 |         this.groups.add(new NERTokenGroup(currGroupTokens));
64 |     }
65 | 
66 |     public List<NERToken> getTokens() {
67 |         return tokens;
68 |     }
69 | 
70 |     public List<NERTokenGroup> getGroups() {
71 |         return groups;
72 |     }
73 | 
74 |     private List<Word> getWords(int fromIndex, int toIndex) {
75 |         return tokens.subList(fromIndex, toIndex).stream().map(t -> new Word(t.getText())).collect(Collectors.toList());
76 |     }
77 | 
78 |     public List<Word> getWords() {
79 |         return getWords(0, tokens.size());
80 |     }
81 | 
82 |     @Override
83 |     public String toString() {
84 |         return tokens.stream().map(NERToken::toString).collect(Collectors.joining("\n"));
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/test/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifierTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : DiscourseSimplifierTest
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.processing;
24 | 
25 | import org.junit.jupiter.api.Assertions;
26 | import org.junit.jupiter.api.Test;
27 | import org.lambda3.text.simplification.discourse.model.OutSentence;
28 | import org.lambda3.text.simplification.discourse.model.SimplificationContent;
29 | import org.slf4j.LoggerFactory;
30 | 
31 | import java.io.File;
32 | import java.io.IOException;
33 | 
34 | /**
35 |  *
36 |  */
37 | class DiscourseSimplifierTest {
38 |     private org.slf4j.Logger log = LoggerFactory.getLogger(this.getClass());
39 |     private DiscourseSimplifier simplifier = new DiscourseSimplifier();
40 | 
41 |     @Test
42 |     void processSingleSentence() {
43 |         String text = "Peter went to Paris because he likes the city.";
44 |         SimplificationContent c = simplifier.doDiscourseSimplification(text, ProcessingType.WHOLE);
45 | 
46 |         Assertions.assertEquals(1, c.getSentences().size());
47 |         OutSentence sent = c.getSentences().get(0);
48 | 
49 |         Assertions.assertEquals(2, sent.getElements().size());
50 |     }
51 | 
52 |     @Test
53 |     void serializationTest() throws IOException {
54 |         String text = "After graduating from Columbia University in 1983, Barack Obama worked as a community organizer in Chicago.";
55 |         SimplificationContent c = simplifier.doDiscourseSimplification(text, ProcessingType.WHOLE);
56 | 
57 |         final String filename = "tmp-w8weg3q493ewqieh.json";
58 | 
59 |         log.info("SAVE TO FILE...");
60 |         c.serializeToJSON(new File(filename));
61 | 
62 |         log.info("LOAD FROM FILE...");
63 |         SimplificationContent loaded = SimplificationContent.deserializeFromJSON(new File(filename), SimplificationContent.class);
64 | 
65 |         log.info(loaded.prettyPrintJSON());
66 |         log.info("---------------------------------");
67 |         log.info(loaded.defaultFormat(false));
68 | 
69 |         log.info("DELETE FILE...");
70 |         File file = new File(filename);
71 |         file.delete();
72 |     }
73 | 
74 | }


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/resources/attribution_verbs.conf:
--------------------------------------------------------------------------------
  1 | attribution_verbs = [
  2 | comment,
  3 | have faith in,
  4 | consider,
  5 | demand,
  6 | apprise,
  7 | report,
  8 | evince,
  9 | identify,
 10 | enlighten,
 11 | utter,
 12 | ruminate,
 13 | give away,
 14 | discern,
 15 | hold,
 16 | acknowledge,
 17 | explain,
 18 | hypothesize,
 19 | forbid,
 20 | shout,
 21 | theorise,
 22 | betray,
 23 | turn down,
 24 | traverse ,
 25 | pipe up,
 26 | cogitate,
 27 | confide,
 28 | hope,
 29 | dispute,
 30 | notify,
 31 | conjecture,
 32 | televise,
 33 | signify ,
 34 | read,
 35 | propose,
 36 | void,
 37 | express,
 38 | perceive,
 39 | mention,
 40 | meditate,
 41 | insist,
 42 | presume,
 43 | judge,
 44 | compute,
 45 | speculate,
 46 | discuss,
 47 | counter,
 48 | reveal,
 49 | contradict,
 50 | conceive,
 51 | proclaim,
 52 | hypothesise,
 53 | ascertain,
 54 | signal,
 55 | mean,
 56 | respond,
 57 | prohibit,
 58 | signify,
 59 | weight,
 60 | urge,
 61 | repudiate,
 62 | pronounce,
 63 | deduce,
 64 | asseverate,
 65 | design,
 66 | expect,
 67 | critique,
 68 | adjudge,
 69 | enounce,
 70 | wonder,
 71 | educate,
 72 | detect,
 73 | deliberate,
 74 | confess,
 75 | rehearse,
 76 | publish,
 77 | verbalize,
 78 | veto,
 79 | state,
 80 | suspect,
 81 | disprove,
 82 | blur,
 83 | manifest,
 84 | disclose,
 85 | reiterate,
 86 | avow,
 87 | slur,
 88 | disagree,
 89 | communicate,
 90 | enunciate,
 91 | disallow,
 92 | disclaim,
 93 | contemplate,
 94 | reason,
 95 | brood,
 96 | imagine,
 97 | distinguish,
 98 | estimate,
 99 | narrate,
100 | surmise ,
101 | remark,
102 | theorize,
103 | clarify,
104 | study,
105 | disavow,
106 | keep back,
107 | recollect,
108 | display,
109 | admit,
110 | credit,
111 | belie,
112 | entertain,
113 | verbalise,
114 | dismiss,
115 | argue,
116 | think,
117 | recite,
118 | invalidate,
119 | abjure,
120 | speak up,
121 | feel,
122 | relate,
123 | renounce,
124 | articulate,
125 | assess,
126 | instruct,
127 | guess ,
128 | esteem,
129 | trust,
130 | teach,
131 | speak,
132 | ventilate,
133 | guess,
134 | edify,
135 | acquaint,
136 | connote,
137 | vocalize,
138 | question,
139 | mediate,
140 | submit,
141 | mark,
142 | indicate,
143 | iterate ,
144 | whisper,
145 | familiarize,
146 | tell,
147 | garble,
148 | offer,
149 | share,
150 | expose,
151 | regard,
152 | refuse,
153 | muse,
154 | clue,
155 | assert,
156 | observe,
157 | differentiate,
158 | argue against,
159 | recount,
160 | believe,
161 | count,
162 | reflect on,
163 | affirm,
164 | recall,
165 | anticipate,
166 | spill,
167 | controvert,
168 | air,
169 | warn,
170 | record,
171 | suppose,
172 | espouse,
173 | voice,
174 | declare,
175 | announce,
176 | exhibit,
177 | claim,
178 | gather,
179 | recognize,
180 | describe,
181 | influence,
182 | predicate,
183 | denote,
184 | say,
185 | deem,
186 | embrace,
187 | contest,
188 | sense,
189 | phrase,
190 | allege,
191 | publicise,
192 | surmise,
193 | ponder,
194 | discriminate,
195 | refute,
196 | agree,
197 | divulge,
198 | couch,
199 | note,
200 | discredit,
201 | reject,
202 | answer,
203 | oppose,
204 | advise,
205 | infer,
206 | bear in mind,
207 | repeat,
208 | intend,
209 | allow,
210 | mispronounce,
211 | reckon,
212 | familiarise,
213 | vocalise,
214 | make known,
215 | reflect,
216 | concede,
217 | purpose,
218 | recognise,
219 | recount ,
220 | disown,
221 | broadcast,
222 | deny,
223 | let slip,
224 | renounce ,
225 | remember,
226 | rationalize,
227 | assume,
228 | bid,
229 | register,
230 | make out,
231 | withhold,
232 | inform,
233 | command,
234 | unburden ,
235 | publicize,
236 | recant,
237 | order,
238 | talk,
239 | know,
240 | promote,
241 | advertise,
242 | swear,
243 | emphasize,
244 | underline,
245 | testify,
246 | cite,
247 | message,
248 | ask
249 | ]


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : Leaf
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.model;
 24 | 
 25 | import edu.stanford.nlp.trees.Tree;
 26 | import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
 27 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
 28 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
 29 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
 30 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
 31 | 
 32 | import java.util.ArrayList;
 33 | import java.util.Collections;
 34 | import java.util.List;
 35 | 
 36 | /**
 37 |  *
 38 |  */
 39 | public class Leaf extends DiscourseTree {
 40 |     private Tree parseTree;
 41 |     private boolean allowSplit; // true, if extraction-rules will be applied on the text
 42 |     private boolean toSimpleContext;
 43 | 
 44 |     public Leaf() {
 45 |         super("UNKNOWN");
 46 |     }
 47 | 
 48 |     public Leaf(String extractionRule, Tree parseTree) {
 49 |         super(extractionRule);
 50 |         this.parseTree = parseTree;
 51 |         this.allowSplit = true;
 52 |         this.toSimpleContext = false;
 53 |     }
 54 | 
 55 |     // not efficient -> prefer to use constructor with tree
 56 |     public Leaf(String extractionRule, String text) throws ParseTreeException {
 57 |         this(extractionRule, ParseTreeParser.parse(text));
 58 |     }
 59 | 
 60 |     public void dontAllowSplit() {
 61 |         this.allowSplit = false;
 62 |     }
 63 | 
 64 |     public Tree getParseTree() {
 65 |         return parseTree;
 66 |     }
 67 | 
 68 |     public void setParseTree(Tree parseTree) {
 69 |         this.parseTree = parseTree;
 70 |     }
 71 | 
 72 |     public String getText() {
 73 |         return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree));
 74 |     }
 75 | 
 76 |     public void setToSimpleContext(boolean toSimpleContext) {
 77 |         this.toSimpleContext = toSimpleContext;
 78 |     }
 79 | 
 80 |     public boolean isAllowSplit() {
 81 |         return allowSplit;
 82 |     }
 83 | 
 84 |     public boolean isToSimpleContext() {
 85 |         return toSimpleContext;
 86 |     }
 87 | 
 88 |     // VISUALIZATION ///////////////////////////////////////////////////////////////////////////////////////////////////
 89 | 
 90 |     @Override
 91 |     public List<String> getPTPCaption() {
 92 |         return Collections.singletonList("|||" + getText());
 93 |     }
 94 | 
 95 |     @Override
 96 |     public List<PrettyTreePrinter.Edge> getPTPEdges() {
 97 |         return new ArrayList<>();
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ReferenceInitialAdverbialExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ReferenceExtractor1
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class ReferenceInitialAdverbialExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV <, (ADVP|PP=adv))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         if (matcher.findAt(leaf.getParseTree())) {
53 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("adv"));
54 | 
55 |             // the right constituent
56 |             List<Word> words = new ArrayList<>();
57 |             words.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("adv"), false));
58 |             words.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("adv"), false));
59 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
60 | 
61 |             // relation
62 |             Optional<Relation> relation = classifer.classifyAdverbial(cuePhraseWords);
63 | 
64 |             // only if present
65 |             if (relation.isPresent()) {
66 |                 Extraction res = new Extraction(
67 |                     getClass().getSimpleName(),
68 |                     true,
69 |                     cuePhraseWords,
70 |                     relation.get(),
71 |                     true,
72 |                     Arrays.asList(rightConstituent)
73 |                 );
74 | 
75 |                 return Optional.of(res);
76 |             }
77 |         }
78 | 
79 |         return Optional.empty();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ReferenceInitialConjunctionExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ReferenceExtractor0
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class ReferenceInitialConjunctionExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV <, (CC=cc))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         if (matcher.findAt(leaf.getParseTree())) {
53 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("cc"));
54 | 
55 |             // the right constituent
56 |             List<Word> words = new ArrayList<>();
57 |             words.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("cc"), false));
58 |             words.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("cc"), false));
59 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
60 | 
61 |             // relation
62 |             Optional<Relation> relation = classifer.classifyCoordinating(cuePhraseWords);
63 | 
64 |             // only if present
65 |             if (relation.isPresent()) {
66 |                 Extraction res = new Extraction(
67 |                     getClass().getSimpleName(),
68 |                     true,
69 |                     cuePhraseWords,
70 |                     relation.get(),
71 |                     true,
72 |                     Arrays.asList(rightConstituent)
73 |                 );
74 | 
75 |                 return Optional.of(res);
76 |             }
77 |         }
78 | 
79 |         return Optional.empty();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ReferenceMedialAdverbialExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ReferenceExtractor2
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class ReferenceMedialAdverbialExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (ADVP|PP=adv , /,/=begin . /,/=end $,, NP $.. VP))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         if (matcher.findAt(leaf.getParseTree())) {
53 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("adv"));
54 | 
55 |             // the right constituent
56 |             List<Word> words = new ArrayList<>();
57 |             words.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("begin"), false));
58 |             words.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("end"), false));
59 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
60 | 
61 |             // relation
62 |             Optional<Relation> relation = classifer.classifyAdverbial(cuePhraseWords);
63 | 
64 |             // only if present
65 |             if (relation.isPresent()) {
66 |                 Extraction res = new Extraction(
67 |                     getClass().getSimpleName(),
68 |                     true,
69 |                     cuePhraseWords,
70 |                     relation.get(),
71 |                     true,
72 |                     Arrays.asList(rightConstituent)
73 |                 );
74 | 
75 |                 return Optional.of(res);
76 |             }
77 |         }
78 | 
79 |         return Optional.empty();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ReferenceFinalAdverbialExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ReferenceExtractor3
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class ReferenceFinalAdverbialExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < (VP <+(VP) (ADVP|PP=adv))) : (=s [<<- =adv | <<- (/\\./ , =adv)])");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         if (matcher.findAt(leaf.getParseTree())) {
53 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("adv"));
54 | 
55 |             // the right constituent
56 |             List<Word> words = new ArrayList<>();
57 |             words.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("adv"), false));
58 |             words.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("adv"), false));
59 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
60 | 
61 |             // relation
62 |             Optional<Relation> relation = classifer.classifyAdverbial(cuePhraseWords);
63 | 
64 |             // only if present
65 |             if (relation.isPresent()) {
66 |                 Extraction res = new Extraction(
67 |                     getClass().getSimpleName(),
68 |                     true,
69 |                     cuePhraseWords,
70 |                     relation.get(),
71 |                     true,
72 |                     Arrays.asList(rightConstituent)
73 |                 );
74 | 
75 |                 return Optional.of(res);
76 |             }
77 |         }
78 | 
79 |         return Optional.empty();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParser.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : NERStringParser
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.utils.ner;
24 | 
25 | import edu.stanford.nlp.ie.AbstractSequenceClassifier;
26 | import edu.stanford.nlp.ie.crf.CRFClassifier;
27 | import edu.stanford.nlp.trees.Tree;
28 | import org.lambda3.text.simplification.discourse.utils.ner.tner.TNERString;
29 | import org.lambda3.text.simplification.discourse.utils.ner.tner.TNERToken;
30 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
31 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
32 | 
33 | import java.util.ArrayList;
34 | import java.util.List;
35 | 
36 | /**
37 |  *
38 |  */
39 | public class NERStringParser {
40 | 
41 |     private static final AbstractSequenceClassifier NER_CLASSIFIER = CRFClassifier.getClassifierNoExceptions("edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
42 | 
43 |     public static NERString parse(String text) {
44 |         List<NERToken> tokens = new ArrayList<>();
45 | 
46 |         String nerString = NER_CLASSIFIER.classifyToString(text);
47 |         String[] nerTokens = nerString.split(" ");
48 | 
49 |         int idx = 0;
50 |         for (String nerToken : nerTokens) {
51 |             int sep_idx = nerToken.lastIndexOf("/");
52 | 
53 |             // create text
54 |             String txt = nerToken.substring(0, sep_idx);
55 |             String category = nerToken.substring(sep_idx + 1);
56 |             NERToken token = new NERToken(idx, txt, category);
57 |             tokens.add(token);
58 | 
59 |             ++idx;
60 |         }
61 | 
62 |         return new NERString(tokens);
63 |     }
64 | 
65 |     public static TNERString parse(Tree parseTree) throws NERStringParseException {
66 |         List<TNERToken> tokens = new ArrayList<>();
67 | 
68 |         List<Integer> parseTreeLeafNumbers = ParseTreeExtractionUtils.getLeafNumbers(parseTree, parseTree);
69 |         String nerString = NER_CLASSIFIER.classifyToString(WordsUtils.wordsToString(parseTree.yieldWords()));
70 |         String[] nerTokens = nerString.split(" ");
71 | 
72 |         if (parseTreeLeafNumbers.size() != nerTokens.length) {
73 |             throw new NERStringParseException("Could not map NER string to parseTree");
74 |         }
75 | 
76 |         int idx = 0;
77 |         for (String nerToken : nerTokens) {
78 |             int sep_idx = nerToken.lastIndexOf("/");
79 | 
80 |             // create token
81 |             String text = nerToken.substring(0, sep_idx);
82 |             String category = nerToken.substring(sep_idx + 1);
83 |             TNERToken token = new TNERToken(idx, text, category, parseTree.getNodeNumber(parseTreeLeafNumbers.get(idx)));
84 |             tokens.add(token);
85 | 
86 |             ++idx;
87 |         }
88 | 
89 |         return new TNERString(tokens, parseTree);
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/processing/ExtendedDiscourseSimplifier.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : ExtendedDiscourseSimplifier
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.processing;
24 | 
25 | import org.lambda3.text.simplification.discourse.model.SimplificationContent;
26 | import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
27 | import org.slf4j.Logger;
28 | import org.slf4j.LoggerFactory;
29 | 
30 | import java.io.File;
31 | import java.io.FileNotFoundException;
32 | import java.io.IOException;
33 | import java.util.ArrayList;
34 | import java.util.Collections;
35 | import java.util.List;
36 | import java.util.stream.Collectors;
37 | 
38 | /**
39 |  *
40 |  */
41 | public class ExtendedDiscourseSimplifier extends DiscourseSimplifier {
42 |     private final Logger logger = LoggerFactory.getLogger(getClass());
43 | 
44 |     public static List<String> filterSentences(List<String> sentences, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
45 | 
46 |         // select sentences to doDiscourseSimplification
47 |         List<String> res = new ArrayList<>();
48 |         res.addAll(sentences);
49 | 
50 |         // shuffle
51 |         if (shuffleSentences) {
52 |             Collections.shuffle(res);
53 |         }
54 | 
55 |         // remove too long sentences
56 |         if (maxSentenceLength != null) {
57 |             res = res.stream().filter(s -> s.length() <= maxSentenceLength).collect(Collectors.toList());
58 |         }
59 | 
60 |         // limit number of sentences
61 |         if (maxSentences != null) {
62 |             if (res.size() > maxSentences) {
63 |                 res = res.subList(0, maxSentences);
64 |             }
65 |         }
66 | 
67 |         return res;
68 |     }
69 | 
70 |     public SimplificationContent process(File file, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) throws IOException {
71 |         return process(file, type, shuffleSentences, maxSentenceLength, maxSentences, false);
72 |     }
73 | 
74 |     public SimplificationContent process(File file, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences, boolean separateLines) throws IOException {
75 |         return process(SentencesUtils.splitIntoSentencesFromFile(file, separateLines), type, shuffleSentences, maxSentenceLength, maxSentences);
76 |     }
77 | 
78 |     public SimplificationContent process(String text, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
79 |         return process(SentencesUtils.splitIntoSentences(text), type, shuffleSentences, maxSentenceLength, maxSentences);
80 |     }
81 | 
82 |     public SimplificationContent process(List<String> sentences, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
83 |         return doDiscourseSimplification(filterSentences(sentences, shuffleSentences, maxSentenceLength, maxSentences), type);
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/LeadNPExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class LeadNPExtractor extends ExtractionRule {
45 | 
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 		
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV <, (NP=np $+ (/,/ $+ NP & $++ VP=vp)))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) {
53 | 
54 |         	// rephrase
55 |             List<Word> leftConstituentWords = rephraseEnablement(matcher.getNode("np"), matcher.getNode("vp"));
56 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
57 |             leftConstituent.dontAllowSplit();
58 |             leftConstituent.setToSimpleContext(true);
59 | 
60 |             // the right, superordinate constituent
61 |             List<Word> rightConstituentWords = new ArrayList<>();
62 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("np"), false));
63 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("np"), false));
64 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
65 | 
66 |             // relation
67 |             Relation relation = Relation.UNKNOWN_SUBORDINATION; //TODO
68 | 
69 |             Extraction res = new Extraction(
70 |                 getClass().getSimpleName(),
71 |                 false,
72 |                 null,
73 |                 relation,
74 |                 false,
75 |                 Arrays.asList(leftConstituent, rightConstituent)
76 |             );
77 | 
78 |             return Optional.of(res);
79 |         }
80 | 
81 |         return Optional.empty();
82 |     }
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/AdjectivalAdverbialInitialExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class AdjectivalAdverbialInitialExtractor extends ExtractionRule {
45 | 
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 		
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV <, (ADJP|ADVP=ad $+ (/,/ $++ VP=vp)))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) {
53 | 
54 |         	// rephrase
55 |             List<Word> leftConstituentWords = rephraseEnablement(matcher.getNode("ad"), matcher.getNode("vp"));
56 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
57 |             leftConstituent.dontAllowSplit();
58 |             leftConstituent.setToSimpleContext(true);
59 | 
60 |             // the right, superordinate constituent
61 |             List<Word> rightConstituentWords = new ArrayList<>();
62 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("ad"), false));
63 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("ad"), false));
64 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
65 | 
66 |             // relation
67 |             Relation relation = Relation.UNKNOWN_SUBORDINATION; //TODO
68 | 
69 |             Extraction res = new Extraction(
70 |                 getClass().getSimpleName(),
71 |                 false,
72 |                 null,
73 |                 relation,
74 |                 false,
75 |                 Arrays.asList(leftConstituent, rightConstituent)
76 |             );
77 | 
78 |             return Optional.of(res);
79 |         }
80 | 
81 |         return Optional.empty();
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPreExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPreExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPreExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (SBAR=sbar < (S=s < (NP $.. VP)) $.. (NP $.. VP)))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, subordinate constituent
54 |             List<Word> leftConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
55 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
56 | 
57 |             // the right, superordinate constituent
58 |             List<Word> rightConstituentWords = new ArrayList<>();
59 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
60 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
61 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
62 | 
63 |             // relation
64 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
65 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.UNKNOWN_SUBORDINATION);
66 | 
67 |             Extraction res = new Extraction(
68 |                 getClass().getSimpleName(),
69 |                 false,
70 |                 cuePhraseWords,
71 |                 relation,
72 |                 false,
73 |                 Arrays.asList(leftConstituent, rightConstituent)
74 |             );
75 | 
76 |             return Optional.of(res);
77 |         }
78 | 
79 |         return Optional.empty();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/PurposePreExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : EnablementPreExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class PurposePreExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (S=s <<, (VP <<, /(T|t)o/) $.. (NP $.. VP=vp)))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) {
53 | 
54 |             // the left, subordinate constituent
55 | //            List<Word> leftConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
56 | 
57 |             // rephrase
58 |             List<Word> leftConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
59 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
60 |             leftConstituent.dontAllowSplit();
61 |             leftConstituent.setToSimpleContext(true);
62 | 
63 |             // the right, superordinate constituent
64 |             List<Word> rightConstituentWords = new ArrayList<>();
65 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("s"), false));
66 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("s"), false));
67 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
68 | 
69 |             // relation
70 |             Relation relation = Relation.PURPOSE;
71 | 
72 |             Extraction res = new Extraction(
73 |                 getClass().getSimpleName(),
74 |                 false,
75 |                 null,
76 |                 relation,
77 |                 false,
78 |                 Arrays.asList(leftConstituent, rightConstituent)
79 |             );
80 | 
81 |             return Optional.of(res);
82 |         }
83 | 
84 |         return Optional.empty();
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/PurposePostExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : EnablementPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class PurposePostExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP=vp <+(VP) (NP|PP $.. (S=s <<, (VP <<, /(T|t)o/))))))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) {
53 | 
54 |             // the left, superordinate constituent
55 |             List<Word> leftConstituentWords = new ArrayList<>();
56 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("s"), false));
57 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("s"), false));
58 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
59 | 
60 |             // the right, subordinate constituent
61 | //            List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
62 | 
63 |             // rephrase
64 |             List<Word> rightConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
65 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
66 |             rightConstituent.dontAllowSplit();
67 |             rightConstituent.setToSimpleContext(true);
68 | 
69 |             // relation
70 |             Relation relation = Relation.PURPOSE;
71 | 
72 |             Extraction res = new Extraction(
73 |                 getClass().getSimpleName(),
74 |                 false,
75 |                 null,
76 |                 relation,
77 |                 true,
78 |                 Arrays.asList(leftConstituent, rightConstituent)
79 |             );
80 | 
81 |             return Optional.of(res);
82 |         }
83 | 
84 |         return Optional.empty();
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPostExtractor2.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostISAExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPostExtractor2 extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP <+(VP) (SBAR=sbar <<, /that/ < (S=s)))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, !subordinate! constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
56 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
57 | 
58 |             // rephrase
59 |             leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords);
60 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
61 |             leftConstituent.dontAllowSplit();
62 |             leftConstituent.setToSimpleContext(true);
63 | 
64 |             // the right, !superordinate! constituent
65 |             List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
66 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67 | 
68 |             // relation
69 |             Relation relation = Relation.UNKNOWN_SUBORDINATION;
70 | 
71 |             Extraction res = new Extraction(
72 |                 getClass().getSimpleName(),
73 |                 false,
74 |                 null,
75 |                 relation,
76 |                 false,
77 |                 Arrays.asList(leftConstituent, rightConstituent)
78 |             );
79 | 
80 |             return Optional.of(res);
81 |         }
82 | 
83 |         return Optional.empty();
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPostExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPostExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP <+(VP) (SBAR=sbar < (S=s < (NP $.. VP))))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, superordinate constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
56 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
57 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
58 | 
59 |             // the right, subordinate constituent
60 |             List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
61 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
62 | 
63 |             // relation
64 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
65 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.UNKNOWN_SUBORDINATION);
66 | 
67 |             //TODO not always doDiscourseExtraction?
68 |             Extraction res = new Extraction(
69 |                 getClass().getSimpleName(),
70 |                 false,
71 |                 cuePhraseWords,
72 |                 relation,
73 |                 true,
74 |                 Arrays.asList(leftConstituent, rightConstituent)
75 |             );
76 | 
77 |             return Optional.of(res);
78 |         }
79 | 
80 |         return Optional.empty();
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPrePurposeExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPreEnablementExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPrePurposeExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (SBAR=sbar < (S=s <<, (VP <<, /(T|t)o/)) $.. (NP $.. VP=vp)))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, subordinate constituent
54 |             List<Word> leftConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
55 | 
56 |             // rephrase
57 |             leftConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
58 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
59 |             leftConstituent.dontAllowSplit();
60 |             leftConstituent.setToSimpleContext(true);
61 | 
62 |             // the right, superordinate constituent
63 |             List<Word> rightConstituentWords = new ArrayList<>();
64 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
65 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
66 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67 | 
68 |             // relation
69 |             Relation relation = Relation.PURPOSE;
70 | 
71 |             Extraction res = new Extraction(
72 |                 getClass().getSimpleName(),
73 |                 false,
74 |                 null,
75 |                 relation,
76 |                 false,
77 |                 Arrays.asList(leftConstituent, rightConstituent)
78 |             );
79 | 
80 |             return Optional.of(res);
81 |         }
82 | 
83 |         return Optional.empty();
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/DiscourseSimplification.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
 4 |     <output url="file://$MODULE_DIR$/target/classes" />
 5 |     <output-test url="file://$MODULE_DIR$/target/test-classes" />
 6 |     <content url="file://$MODULE_DIR$">
 7 |       <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
 8 |       <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
 9 |       <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
10 |       <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
11 |       <excludeFolder url="file://$MODULE_DIR$/target" />
12 |     </content>
13 |     <orderEntry type="inheritedJdk" />
14 |     <orderEntry type="sourceFolder" forTests="false" />
15 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.21" level="project" />
16 |     <orderEntry type="library" name="Maven: com.typesafe:config:1.3.1" level="project" />
17 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.8.9" level="project" />
18 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.8.9" level="project" />
19 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.8.9" level="project" />
20 |     <orderEntry type="library" name="Maven: edu.stanford.nlp:stanford-corenlp:3.7.0" level="project" />
21 |     <orderEntry type="library" name="Maven: com.apple:AppleJavaExtensions:1.4" level="project" />
22 |     <orderEntry type="library" name="Maven: de.jollyday:jollyday:0.4.9" level="project" />
23 |     <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.7" level="project" />
24 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.3.1" level="project" />
25 |     <orderEntry type="library" name="Maven: org.apache.lucene:lucene-queryparser:4.10.3" level="project" />
26 |     <orderEntry type="library" name="Maven: org.apache.lucene:lucene-sandbox:4.10.3" level="project" />
27 |     <orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:4.10.3" level="project" />
28 |     <orderEntry type="library" name="Maven: org.apache.lucene:lucene-queries:4.10.3" level="project" />
29 |     <orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:4.10.3" level="project" />
30 |     <orderEntry type="library" name="Maven: javax.servlet:javax.servlet-api:3.0.1" level="project" />
31 |     <orderEntry type="library" name="Maven: com.io7m.xom:xom:1.2.10" level="project" />
32 |     <orderEntry type="library" name="Maven: xml-apis:xml-apis:1.3.03" level="project" />
33 |     <orderEntry type="library" name="Maven: xerces:xercesImpl:2.8.0" level="project" />
34 |     <orderEntry type="library" name="Maven: xalan:xalan:2.7.0" level="project" />
35 |     <orderEntry type="library" name="Maven: joda-time:joda-time:2.9" level="project" />
36 |     <orderEntry type="library" name="Maven: com.googlecode.efficient-java-matrix-library:ejml:0.23" level="project" />
37 |     <orderEntry type="library" name="Maven: org.glassfish:javax.json:1.0.4" level="project" />
38 |     <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.6.1" level="project" />
39 |     <orderEntry type="library" name="Maven: edu.stanford.nlp:stanford-corenlp:models:3.7.0" level="project" />
40 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.12" level="project" />
41 |     <orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.1.8" level="project" />
42 |     <orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.1.8" level="project" />
43 |     <orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-api:5.0.0-M3" level="project" />
44 |     <orderEntry type="library" scope="TEST" name="Maven: org.opentest4j:opentest4j:1.0.0-M1" level="project" />
45 |     <orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-commons:1.0.0-M3" level="project" />
46 |   </component>
47 | </module>


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/utils/words/WordsUtils.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : WordsUtils
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.utils.words;
 24 | 
 25 | import edu.stanford.nlp.ling.CoreLabel;
 26 | import edu.stanford.nlp.ling.SentenceUtils;
 27 | import edu.stanford.nlp.ling.Word;
 28 | import edu.stanford.nlp.process.CoreLabelTokenFactory;
 29 | import edu.stanford.nlp.process.PTBTokenizer;
 30 | import edu.stanford.nlp.simple.Sentence;
 31 | 
 32 | import java.io.StringReader;
 33 | import java.util.ArrayList;
 34 | import java.util.Arrays;
 35 | import java.util.Iterator;
 36 | import java.util.List;
 37 | 
 38 | /**
 39 |  *
 40 |  */
 41 | public class WordsUtils {
 42 | 
 43 |     public static Word lemmatize(Word word) {
 44 |         Sentence sentence = new Sentence(word.value());
 45 |         return new Word(sentence.lemma(0));
 46 |     }
 47 | 
 48 |     public static List<Word> splitIntoWords(String sentence) {
 49 |         PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new StringReader(sentence), new CoreLabelTokenFactory(), "");
 50 |         List<Word> words = new ArrayList<>();
 51 | 
 52 |         while (ptbt.hasNext()) {
 53 |             CoreLabel label = ptbt.next();
 54 |             words.add(new Word(label));
 55 |         }
 56 | 
 57 |         return words;
 58 |     }
 59 | 
 60 |     public static String wordsToString(List<Word> words) {
 61 |         return SentenceUtils.listToString(words);
 62 |     }
 63 | 
 64 |     public static String wordsToProperSentenceString(List<Word> words) {
 65 |         return wordsToString(wordsToProperSentence(words));
 66 |     }
 67 | 
 68 |     private static Word capitalizeWord(Word word) {
 69 |         String s = word.value();
 70 |         if (s.length() > 0) {
 71 |             s = s.substring(0, 1).toUpperCase() + s.substring(1);
 72 |         }
 73 | 
 74 |         return new Word(s);
 75 |     }
 76 | 
 77 |     public static Word lowercaseWord(Word word) {
 78 |         return new Word(word.value().toLowerCase());
 79 |     }
 80 | 
 81 |     private static List<Word> wordsToProperSentence(List<Word> words) {
 82 |         List<Word> res = new ArrayList<>();
 83 |         res.addAll(words);
 84 | 
 85 |         // trim '.' and ',' at beginning and the end and remove multiple, consecutive occurrences
 86 |         for (String c : Arrays.asList(".", ",")) {
 87 |             Word prev = null;
 88 |             Iterator<Word> it = res.iterator();
 89 |             while (it.hasNext()) {
 90 |                 Word word = it.next();
 91 |                 if (word.value().equals(c)) {
 92 |                     if (prev == null || prev.value().equals(word.value())) {
 93 |                         it.remove();
 94 |                     }
 95 |                 }
 96 |                 prev = word;
 97 |             }
 98 |             if ((!res.isEmpty()) && (res.get(res.size() - 1).value().equals(c))) {
 99 |                 res.remove(res.size() - 1);
100 |             }
101 |         }
102 | 
103 |         // add a '.' at the end
104 |         res.add(new Word("."));
105 | 
106 |         // capitalize first word
107 |         if (!res.isEmpty()) {
108 |             res.set(0, capitalizeWord(res.get(0)));
109 |         }
110 | 
111 |         return res;
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPostPurposeExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostEnablementExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPostPurposeExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP=vp <+(VP) (SBAR=sbar < (S=s <<, (VP <<, /(T|t)o/))))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, superordinate constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
56 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
57 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
58 | 
59 |             // the right, subordinate constituent
60 | //            List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
61 | 
62 |             // rephrase
63 |             List<Word> rightConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
64 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
65 |             rightConstituent.dontAllowSplit();
66 |             rightConstituent.setToSimpleContext(true);
67 | 
68 |             // relation
69 |             Relation relation = Relation.PURPOSE;
70 | 
71 |             Extraction res = new Extraction(
72 |                 getClass().getSimpleName(),
73 |                 false,
74 |                 null,
75 |                 relation,
76 |                 true,
77 |                 Arrays.asList(leftConstituent, rightConstituent)
78 |             );
79 | 
80 |             return Optional.of(res);
81 |         }
82 | 
83 |         return Optional.empty();
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/AdjectivalAdverbialMiddleFinalExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class AdjectivalAdverbialMiddleFinalExtractor extends ExtractionRule {
45 | 	
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 		
49 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < VP=vp & << (/,/=comma1 $+ (ADJP|ADVP=ad ?$+ /,/=comma2)))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) {
53 | 
54 |         	// rephrase
55 |             List<Word> leftConstituentWords = rephraseEnablement(matcher.getNode("ad"), matcher.getNode("vp"));
56 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
57 |             leftConstituent.dontAllowSplit();
58 |             leftConstituent.setToSimpleContext(true);
59 | 
60 |             // the right, superordinate constituent
61 |             List<Word> rightConstituentWords = new ArrayList<>();
62 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("comma1"), false));
63 |             if (matcher.getNode("comma2") != null) {
64 |             	rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("comma2"), false));
65 |             } else {
66 |             	rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("ad"), false));
67 |             }
68 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
69 | 
70 |             // relation
71 |             Relation relation = Relation.UNKNOWN_SUBORDINATION; //TODO
72 | 
73 |             Extraction res = new Extraction(
74 |                 getClass().getSimpleName(),
75 |                 false,
76 |                 null,
77 |                 relation,
78 |                 false,
79 |                 Arrays.asList(leftConstituent, rightConstituent)
80 |             );
81 | 
82 |             return Optional.of(res);
83 |         }
84 | 
85 |         return Optional.empty();
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/utils/TregexUtils.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : TregexUtils
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.utils;
 24 | 
 25 | import edu.stanford.nlp.trees.Tree;
 26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
 27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
 28 | 
 29 | import java.util.ArrayList;
 30 | import java.util.HashMap;
 31 | import java.util.List;
 32 | 
 33 | /**
 34 |  *
 35 |  */
 36 | public class TregexUtils {
 37 | 
 38 |     public static List<MyMatch> sortedFindAt(Tree parseTree, TregexPattern p, List<String> groupsToOrder) {
 39 |         List<MyMatch> res = new ArrayList<>();
 40 | 
 41 |         TregexMatcher matcher = p.matcher(parseTree);
 42 |         while (matcher.findAt(parseTree)) {
 43 |             HashMap<String, Tree> groups = new HashMap<>();
 44 |             for (String name : matcher.getNodeNames()) {
 45 |                 groups.put(name, matcher.getNode(name));
 46 |             }
 47 |             res.add(new MyMatch(groups));
 48 |         }
 49 | 
 50 |         // sort groups
 51 |         res.sort(new MyMatch.Comparator(parseTree, groupsToOrder));
 52 | 
 53 |         return res;
 54 |     }
 55 | 
 56 |     public static List<MyMatch> sortedFind(Tree parseTree, TregexPattern p, List<String> groupsToOrder) {
 57 |         List<MyMatch> res = new ArrayList<>();
 58 | 
 59 |         TregexMatcher matcher = p.matcher(parseTree);
 60 |         while (matcher.find()) {
 61 |             HashMap<String, Tree> groups = new HashMap<>();
 62 |             for (String name : matcher.getNodeNames()) {
 63 |                 groups.put(name, matcher.getNode(name));
 64 |             }
 65 |             res.add(new MyMatch(groups));
 66 |         }
 67 | 
 68 |         // sort groups
 69 |         res.sort(new MyMatch.Comparator(parseTree, groupsToOrder));
 70 | 
 71 |         return res;
 72 |     }
 73 | 
 74 |     public static class MyMatch {
 75 |         private final HashMap<String, Tree> groups;
 76 | 
 77 |         public MyMatch(HashMap<String, Tree> groups) {
 78 |             this.groups = groups;
 79 |         }
 80 | 
 81 |         public Tree getNode(String name) {
 82 |             if (groups.containsKey(name)) {
 83 |                 return groups.get(name);
 84 |             } else {
 85 |                 throw new IllegalArgumentException("No discourse_tree for name: '" + name + "'");
 86 |             }
 87 |         }
 88 | 
 89 |         public static class Comparator implements java.util.Comparator<MyMatch> {
 90 |             private final Tree anchorTree;
 91 |             private final List<String> names;
 92 | 
 93 |             public Comparator(Tree anchorTree, List<String> names) {
 94 |                 this.anchorTree = anchorTree;
 95 |                 this.names = names;
 96 |             }
 97 | 
 98 |             @Override
 99 |             public int compare(MyMatch myMatch, MyMatch otherMatch) {
100 |                 int myMatchValue = 0;
101 |                 int otherMatchValue = 0;
102 |                 for (String name : names) {
103 |                     myMatchValue += myMatch.getNode(name).nodeNumber(anchorTree);
104 |                     otherMatchValue += otherMatch.getNode(name).nodeNumber(anchorTree);
105 |                 }
106 | 
107 |                 return myMatchValue - otherMatchValue;
108 |             }
109 |         }
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SharedNPPostCoordinationExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SharedNPPostCoordinationExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.Tree;
27 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
28 | import edu.stanford.nlp.trees.tregex.TregexPattern;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
32 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
34 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
35 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
36 | 
37 | import java.util.ArrayList;
38 | import java.util.Arrays;
39 | import java.util.List;
40 | import java.util.Optional;
41 | 
42 | /**
43 |  *
44 |  */
45 | public class SharedNPPostCoordinationExtractor extends ExtractionRule {
46 | 
47 |     @Override
48 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
49 | 
50 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP <+(VP) (VP > VP=vp $.. VP))))");
51 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
52 | 
53 |         while (matcher.findAt(leaf.getParseTree())) {
54 |             List<Tree> siblings = getSiblings(matcher.getNode("vp"), Arrays.asList("VP"));
55 | 
56 |             // constituents
57 |             List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), siblings.get(0), false);
58 |             List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), siblings.get(siblings.size() - 1), false);
59 | 
60 |             List<Leaf> constituents = new ArrayList<>();
61 |             for (Tree sibling : siblings) {
62 |                 List<Word> words = new ArrayList<Word>();
63 |                 words.addAll(precedingWords);
64 |                 words.addAll(ParseTreeExtractionUtils.getContainingWords(sibling));
65 |                 words.addAll(followingWords);
66 | 
67 |                 Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
68 |                 constituents.add(constituent);
69 |             }
70 | 
71 |             List<Word> cuePhraseWords = null;
72 |             Relation relation = Relation.UNKNOWN_COORDINATION;
73 |             if (constituents.size() == 2) {
74 |                 cuePhraseWords = ParseTreeExtractionUtils.getWordsInBetween(leaf.getParseTree(), siblings.get(0), siblings.get(siblings.size() - 1), false, false);
75 |                 relation = classifer.classifyCoordinating(cuePhraseWords).orElse(Relation.UNKNOWN_COORDINATION);
76 |             }
77 | 
78 |             Extraction res = new Extraction(
79 |                 getClass().getSimpleName(),
80 |                 false,
81 |                 cuePhraseWords,
82 |                 relation,
83 |                 true,
84 |                 constituents
85 |             );
86 | 
87 |             return Optional.of(res);
88 |         }
89 | 
90 |         return Optional.empty();
91 |     }
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPostAttributionExtractor2.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostAttributionExtractor2
 4 |  *
 5 |  * Copyright © 2018 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPostAttributionExtractor2 extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP=vp <+(VP) (SBAR=sbar <<, /that/ < (S=s)))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, !subordinate! constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
56 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
57 | 
58 |             // rephrase
59 |             leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords);
60 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
61 |             leftConstituent.dontAllowSplit();
62 |             leftConstituent.setToSimpleContext(true);
63 | 
64 |             // the right, !superordinate! constituent
65 |             List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
66 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67 | 
68 |             // relation
69 |             Optional<Word> headVerb = getHeadVerb(matcher.getNode("vp"));
70 | 
71 |             // only extract if verb matches
72 |             if (headVerb.isPresent() && classifer.checkAttribution(headVerb.get())) {
73 |                 Relation relation = Relation.ATTRIBUTION;
74 | 
75 |                 Extraction res = new Extraction(
76 |                     getClass().getSimpleName(),
77 |                     false,
78 |                     null,
79 |                     relation,
80 |                     false,
81 |                     Arrays.asList(leftConstituent, rightConstituent)
82 |                 );
83 | 
84 |                 return Optional.of(res);
85 |             }
86 |         }
87 | 
88 |         return Optional.empty();
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SubordinationPostAttributionExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostAttributionExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SubordinationPostAttributionExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP $.. (VP=vp <+(VP) (SBAR=sbar))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             System.out.println("Matched ............................... !!!!!!!!!");
54 | 
55 |             // the left, !subordinate! constituent
56 |             List<Word> leftConstituentWords = new ArrayList<>();
57 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
58 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
59 | 
60 |             // rephrase
61 |             leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords);
62 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
63 |             leftConstituent.dontAllowSplit();
64 |             leftConstituent.setToSimpleContext(true);
65 | 
66 |             // the right, !superordinate! constituent
67 |             List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("sbar"));
68 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
69 | 
70 |             // relation
71 |             Optional<Word> headVerb = getHeadVerb(matcher.getNode("vp"));
72 | 
73 |             // only extract if verb matches
74 |             if (headVerb.isPresent() && classifer.checkAttribution(headVerb.get())) {
75 |                 Relation relation = Relation.ATTRIBUTION;
76 | 
77 |                 Extraction res = new Extraction(
78 |                     getClass().getSimpleName(),
79 |                     false,
80 |                     null,
81 |                     relation,
82 |                     false,
83 |                     Arrays.asList(leftConstituent, rightConstituent)
84 |                 );
85 | 
86 |                 return Optional.of(res);
87 |             }
88 |         }
89 | 
90 |         return Optional.empty();
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/model/Element.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : Element
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.model;
 24 | 
 25 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 26 | import com.fasterxml.jackson.annotation.JsonProperty;
 27 | import edu.stanford.nlp.trees.Tree;
 28 | import org.lambda3.text.simplification.discourse.utils.IDGenerator;
 29 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
 30 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
 31 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
 32 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
 33 | 
 34 | import java.util.ArrayList;
 35 | import java.util.List;
 36 | 
 37 | /**
 38 |  *
 39 |  */
 40 | @JsonIgnoreProperties(ignoreUnknown = true)
 41 | public class Element {
 42 |     private String id;
 43 |     private Tree parseTree;
 44 |     private int sentenceIdx;
 45 |     private int contextLayer;
 46 |     private List<SimpleContext> simpleContexts;
 47 |     private List<LinkedContext> linkedContexts;
 48 | 
 49 |     // for deserialization
 50 |     public Element() {
 51 |     }
 52 | 
 53 |     public Element(Tree parseTree, int sentenceIdx, int contextLayer) {
 54 |         this.id = IDGenerator.generateUUID();
 55 |         this.parseTree = parseTree;
 56 |         this.sentenceIdx = sentenceIdx;
 57 |         this.contextLayer = contextLayer;
 58 |         this.simpleContexts = new ArrayList<>();
 59 |         this.linkedContexts = new ArrayList<>();
 60 |     }
 61 | 
 62 |     // not efficient -> prefer to use constructor with tree
 63 |     public Element(String text, int sentenceIdx, int contextLayer) throws ParseTreeException {
 64 |         this(ParseTreeParser.parse(text), sentenceIdx, contextLayer);
 65 |     }
 66 | 
 67 |     public void addLinkedContext(LinkedContext context) {
 68 |         if (!linkedContexts.contains(context)) {
 69 |             linkedContexts.add(context);
 70 |         }
 71 |     }
 72 | 
 73 |     public void addSimpleContext(SimpleContext context) {
 74 |         if (!simpleContexts.contains(context)) {
 75 |             simpleContexts.add(context);
 76 |         }
 77 |     }
 78 | 
 79 |     public String getId() {
 80 |         return id;
 81 |     }
 82 | 
 83 |     public Tree getParseTree() {
 84 |         return parseTree;
 85 |     }
 86 | 
 87 |     public void setParseTree(Tree parseTree) {
 88 |         this.parseTree = parseTree;
 89 |     }
 90 | 
 91 |     @JsonProperty("text")
 92 |     public String getText() {
 93 |         return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree));
 94 |     }
 95 | 
 96 |     public int getSentenceIdx() {
 97 |         return sentenceIdx;
 98 |     }
 99 | 
100 |     public int getContextLayer() {
101 |         return contextLayer;
102 |     }
103 | 
104 |     public List<SimpleContext> getSimpleContexts() {
105 |         return simpleContexts;
106 |     }
107 | 
108 |     public List<LinkedContext> getLinkedContexts() {
109 |         return linkedContexts;
110 |     }
111 | 
112 |     @Override
113 |     public String toString() {
114 |         StringBuilder strb = new StringBuilder();
115 |         strb.append(id + "     " + contextLayer + "     " + getText() + "\n");
116 |         getSimpleContexts().forEach(c -> strb.append("\tS:" + c.getRelation() + "    " + c.getText() + "\n"));
117 |         getLinkedContexts().forEach(c -> strb.append("\tL:" + c.getRelation() + "    " + c.getTargetID() + "\n"));
118 |         return strb.toString();
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/NonRestrictiveRelativeClauseWhereExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class NonRestrictiveRelativeClauseWhereExtractor extends ExtractionRule {
45 | 	
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV << (/.*/=head < (NP|PP $+ (/,/=comma $+ (SBAR=sbar <, (WHADVP $+ S=s & <<: WRB) & ?$+ /,/=comma2)))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, superordinate constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("comma"), false));
56 |             if (matcher.getNode("comma2") != null) {
57 |             	leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("comma2"), false));
58 |             } else {
59 |             	leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
60 |             }
61 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
62 | 
63 |             // the right, subordinate constituent
64 |             List<Word> rightConstituentWords = new ArrayList<>();
65 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s")));
66 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67 | 
68 |             // relation
69 |             //List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
70 |             //Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.UNKNOWN_SUBORDINATION);
71 | 
72 |             Relation relation = Relation.SPATIAL;
73 |             
74 |             //TODO not always doDiscourseExtraction?
75 |             Extraction res = new Extraction(
76 |                 getClass().getSimpleName(),
77 |                 false,
78 |                 null,//cuePhraseWords,
79 |                 relation,
80 |                 true,
81 |                 Arrays.asList(leftConstituent, rightConstituent)
82 |             );
83 | 
84 |             return Optional.of(res);
85 |         }
86 | 
87 |         return Optional.empty();
88 |     }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/RestrictiveParticipialExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class RestrictiveParticipialExtractor extends ExtractionRule {
45 | 	
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         //TregexPattern p = TregexPattern.compile("ROOT <<: (S=s << (NP=np <, (NP $+ (VP=vp <, VBN|VBG=vbgn)))) ");
49 | 		TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < VP=mainverb &<< (NP|PP=head <, (NP=np $+ (VP=vp [<, (ADVP|PP $+ VBG|VBN=vbgn) | <, VBG|VBN=vbgn] )) & [> (PP !> S)| > (VP > S)]))");
50 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
51 | 
52 |         while (matcher.findAt(leaf.getParseTree())) { 
53 |         	
54 |             
55 |         	// the left, superordinate constituent
56 |             List<Word> leftConstituentWords = new ArrayList<>();
57 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("vp"), false));
58 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("vp"), false));
59 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
60 |             
61 |             // the right, subordinate constituent 
62 |             List<Word> rightConstituentWords = new ArrayList<>();
63 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("np")));
64 |             rightConstituentWords.addAll(rephraseAppositionNonRes(matcher.getNode("mainverb"), matcher.getNode("np"), matcher.getNode("vbgn")));
65 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(matcher.getNode("head"), matcher.getNode("vbgn"), false));
66 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67 |             
68 | 
69 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("vbgn"), matcher.getNode("s"), false);
70 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.IDENTIFYING_DEFINITION);
71 | 
72 |             //TODO not always doDiscourseExtraction?
73 |             Extraction res = new Extraction(
74 |                 getClass().getSimpleName(),
75 |                 false,
76 |                 cuePhraseWords,
77 |                 relation,
78 |                 true,
79 |                 Arrays.asList(leftConstituent, rightConstituent)
80 |             );
81 | 
82 |             return Optional.of(res);
83 |         }
84 | 
85 |         return Optional.empty();
86 |     }
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SharedNPPreParticipalExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SharedNPPreParticipalExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SharedNPPreParticipalExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         String participalNode = "(__=node [== S=s | == (PP|ADVP <+(PP|ADVP) S=s)]) : (=s <: (VP <<, VBG|VBN=vbgn))";
50 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < " + participalNode + ") : (=node $.. (NP=np $.. VP=vp))");
51 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
52 | 
53 |         while (matcher.findAt(leaf.getParseTree())) {
54 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("node"), matcher.getNode("s"), false);
55 | 
56 |             // the left, subordinate constituent
57 |             List<Word> leftConstituentWords = new ArrayList<>();
58 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("node"), false));
59 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("np")));
60 |             leftConstituentWords.addAll(getRephrasedParticipalS(matcher.getNode("np"), matcher.getNode("vp"), matcher.getNode("s"), matcher.getNode("vbgn")));
61 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("vp"), false));
62 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
63 | 
64 |             // the right, superordinate constituent
65 |             List<Word> rightConstituentWords = new ArrayList<>();
66 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("node"), false));
67 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("node"), false));
68 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
69 | 
70 |             // relation
71 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.UNKNOWN_COORDINATION);
72 | 
73 |             Extraction res = new Extraction(
74 |                 getClass().getSimpleName(),
75 |                 false,
76 |                 cuePhraseWords,
77 |                 relation,
78 |                 false,
79 |                 Arrays.asList(leftConstituent, rightConstituent)
80 |             );
81 | 
82 |             return Optional.of(res);
83 |         }
84 | 
85 |         return Optional.empty();
86 |     }
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/Relation.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : Relation
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree;
 24 | 
 25 | import java.util.Optional;
 26 | 
 27 | public enum Relation {
 28 | 
 29 |     UNKNOWN,
 30 | 
 31 |     // Coordinations
 32 |     UNKNOWN_COORDINATION, // the default for coordination
 33 |     CONTRAST,
 34 |     CAUSE_C,
 35 |     RESULT_C,
 36 |     LIST,
 37 |     DISJUNCTION,
 38 |     TEMPORAL_AFTER_C,
 39 |     TEMPORAL_BEFORE_C,
 40 | 
 41 |     // Subordinations
 42 |     UNKNOWN_SUBORDINATION, // the default for subordination
 43 |     ATTRIBUTION,
 44 |     BACKGROUND,
 45 |     CAUSE,
 46 |     RESULT,
 47 |     CONDITION,
 48 |     ELABORATION,
 49 |     PURPOSE,
 50 |     TEMPORAL_AFTER,
 51 |     TEMPORAL_BEFORE,
 52 | 
 53 |     // for sentence simplification
 54 |     NOUN_BASED,
 55 |     SPATIAL,
 56 |     TEMPORAL,
 57 |     TEMPORAL_TIME, // indicating a particular instance on a time scale (e.g. “Next Sunday 2 pm”).
 58 |     TEMPORAL_DURATION, // the amount of time between the two end-points of a time interval (e.g. “2 weeks").
 59 |     TEMPORAL_DATE, // particular date (e.g. “On 7 April 2013”).
 60 |     TEMPORAL_SET, IDENTIFYING_DEFINITION, DESCRIBING_DEFINITION; // periodic temporal sets representing times that occur with some frequency (“Every Tuesday”).
 61 | 
 62 |     static {
 63 |         UNKNOWN_COORDINATION.coordination = true;
 64 |         CONTRAST.coordination = true;
 65 |         CAUSE_C.coordination = true;
 66 |         RESULT_C.coordination = true;
 67 |         LIST.coordination = true;
 68 |         DISJUNCTION.coordination = true;
 69 |         TEMPORAL_AFTER_C.coordination = true;
 70 |         TEMPORAL_BEFORE_C.coordination = true;
 71 | 
 72 |         CAUSE.coordinateVersion = CAUSE_C;
 73 |         RESULT.coordinateVersion = RESULT_C;
 74 |         TEMPORAL_AFTER.coordinateVersion = TEMPORAL_AFTER_C;
 75 |         TEMPORAL_BEFORE.coordinateVersion = TEMPORAL_BEFORE_C;
 76 | 
 77 |         CAUSE_C.subordinateVersion = CAUSE;
 78 |         RESULT_C.subordinateVersion = RESULT;
 79 |         TEMPORAL_AFTER_C.subordinateVersion = TEMPORAL_AFTER;
 80 |         TEMPORAL_BEFORE_C.subordinateVersion = TEMPORAL_BEFORE;
 81 | 
 82 |         CAUSE_C.inverse = RESULT_C;
 83 |         RESULT_C.inverse = CAUSE_C;
 84 |         TEMPORAL_AFTER_C.inverse = TEMPORAL_BEFORE_C;
 85 |         TEMPORAL_BEFORE_C.inverse = TEMPORAL_AFTER_C;
 86 |         CAUSE.inverse = RESULT;
 87 |         RESULT.inverse = CAUSE;
 88 |         TEMPORAL_AFTER.inverse = TEMPORAL_BEFORE;
 89 |         TEMPORAL_BEFORE.inverse = TEMPORAL_AFTER;
 90 |     }
 91 | 
 92 |     private boolean coordination;
 93 |     private Relation regular; // class of context span (in subordination) or right span (coordination)
 94 |     private Relation inverse; // class of core span (in subordination) or left span (coordination)
 95 |     private Relation coordinateVersion; // optional
 96 |     private Relation subordinateVersion; // optional
 97 | 
 98 |     Relation() {
 99 |         this.coordination = false;
100 |         this.regular = this;
101 |         this.inverse = this; // only used in coordinations
102 |         this.coordinateVersion = null;
103 |         this.subordinateVersion = null;
104 |     }
105 | 
106 |     public boolean isCoordination() {
107 |         return coordination;
108 |     }
109 | 
110 |     public Relation getRegulatRelation() {
111 |         return regular;
112 |     }
113 | 
114 |     public Relation getInverseRelation() {
115 |         return inverse;
116 |     }
117 | 
118 |     public Optional<Relation> getCoordinateVersion() {
119 |         return Optional.ofNullable(coordinateVersion);
120 |     }
121 | 
122 |     public Optional<Relation> getSubordinateVersion() {
123 |         return Optional.ofNullable(subordinateVersion);
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/ListNP/ListNPExtractor.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ==========================License-Start=============================
  3 |  * DiscourseSimplification : ListNPExtractor
  4 |  *
  5 |  * Copyright © 2017 Lambda³
  6 |  *
  7 |  * GNU General Public License 3
  8 |  * This program is free software: you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
 20 |  * ==========================License-End==============================
 21 |  */
 22 | 
 23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.ListNP;
 24 | 
 25 | import edu.stanford.nlp.ling.Word;
 26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
 27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
 28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
 29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
 30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.utils.ListNPSplitter;
 31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
 32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
 33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
 34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
 35 | 
 36 | import java.util.ArrayList;
 37 | import java.util.List;
 38 | import java.util.Optional;
 39 | 
 40 | /**
 41 |  *
 42 |  */
 43 | public abstract class ListNPExtractor extends ExtractionRule {
 44 |     private final String pattern;
 45 | 
 46 |     public ListNPExtractor(String pattern) {
 47 |         this.pattern = pattern;
 48 |     }
 49 | 
 50 |     @Override
 51 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
 52 | 
 53 |         TregexPattern p = TregexPattern.compile(pattern);
 54 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
 55 | 
 56 |         while (matcher.findAt(leaf.getParseTree())) {
 57 | 
 58 |             Optional<ListNPSplitter.Result> r = ListNPSplitter.splitList(leaf.getParseTree(), matcher.getNode("np"));
 59 |             if (r.isPresent()) {
 60 | 
 61 |                 // constituents
 62 |                 List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("np"), false);
 63 |                 List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("np"), false);
 64 | 
 65 |                 List<Leaf> constituents = new ArrayList<>();
 66 | 
 67 |                 if (r.get().getIntroductionWords().isPresent()) {
 68 |                     List<Word> words = new ArrayList<Word>();
 69 |                     words.addAll(precedingWords);
 70 |                     words.addAll(r.get().getIntroductionWords().get());
 71 |                     words.addAll(followingWords);
 72 | 
 73 |                     Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
 74 |                     constituent.dontAllowSplit();
 75 |                     constituents.add(constituent);
 76 |                 }
 77 | 
 78 |                 for (List<Word> element : r.get().getElementsWords()) {
 79 |                     List<Word> words = new ArrayList<Word>();
 80 |                     words.addAll(precedingWords);
 81 |                     words.addAll(element);
 82 |                     words.addAll(followingWords);
 83 | 
 84 |                     Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words));
 85 |                     constituent.dontAllowSplit();
 86 |                     constituents.add(constituent);
 87 |                 }
 88 | 
 89 | 
 90 |                 Extraction res = new Extraction(
 91 |                     getClass().getSimpleName(),
 92 |                     false,
 93 |                     null,
 94 |                     r.get().getRelation(),
 95 |                     true,
 96 |                     constituents
 97 |                 );
 98 | 
 99 |                 return Optional.of(res);
100 |             }
101 |         }
102 | 
103 |         return Optional.empty();
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/SharedNPPostParticipalExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SharedNPPostParticipalExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class SharedNPPostParticipalExtractor extends ExtractionRule {
45 | 
46 |     @Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 | 
49 |         String participalNode = "(__=node [== S=s | == (PP|ADVP <+(PP|ADVP) S=s)]) : (=s <: (VP <<, VBG|VBN=vbgn))";
50 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV < (NP=np $.. (VP=vp <+(VP) (NP|PP $.. " + participalNode + "))))");
51 | 
52 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
53 | 
54 |         while (matcher.findAt(leaf.getParseTree())) {
55 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("node"), matcher.getNode("s"), false);
56 | 
57 | 
58 |             // the left, superordinate constituent
59 |             List<Word> leftConstituentWords = new ArrayList<>();
60 |            // leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("node"), false));
61 | 
62 | 
63 |             // the left, superordinate constituent
64 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("s"), false));
65 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("s"), false));
66 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
67 | 
68 |             // the right, subordinate constituent
69 |             List<Word> rightConstituentWords = new ArrayList<>();
70 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("vp"), false));
71 |             rightConstituentWords.addAll(getRephrasedParticipalS(matcher.getNode("np"), matcher.getNode("vp"), matcher.getNode("s"), matcher.getNode("vbgn")));
72 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("s"), false));
73 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
74 | 
75 |             // relation
76 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.UNKNOWN_COORDINATION);
77 | 
78 |             Extraction res = new Extraction(
79 |                 getClass().getSimpleName(),
80 |                 false,
81 |                 null,
82 |                 relation,
83 |                 true,
84 |                 Arrays.asList(leftConstituent, rightConstituent)
85 |             );
86 | 
87 |             return Optional.of(res);
88 |         }
89 | 
90 |         return Optional.empty();
91 |     }
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/rules/NonRestrictiveRelativeClausePrepWhichWhoExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ==========================License-Start=============================
 3 |  * DiscourseSimplification : SubordinationPostExtractor
 4 |  *
 5 |  * Copyright © 2017 Lambda³
 6 |  *
 7 |  * GNU General Public License 3
 8 |  * This program is free software: you can redistribute it and/or modify
 9 |  * it under the terms of the GNU General Public License as published by
10 |  * the Free Software Foundation, either version 3 of the License, or
11 |  * (at your option) any later version.
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU General Public License
19 |  * along with this program.  If not, see http://www.gnu.org/licenses/.
20 |  * ==========================License-End==============================
21 |  */
22 | 
23 | package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24 | 
25 | import edu.stanford.nlp.ling.Word;
26 | import edu.stanford.nlp.trees.tregex.TregexMatcher;
27 | import edu.stanford.nlp.trees.tregex.TregexPattern;
28 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31 | import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33 | import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34 | import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35 | 
36 | import java.util.ArrayList;
37 | import java.util.Arrays;
38 | import java.util.List;
39 | import java.util.Optional;
40 | 
41 | /**
42 |  *
43 |  */
44 | public class NonRestrictiveRelativeClausePrepWhichWhoExtractor extends ExtractionRule {
45 | 	
46 | 	@Override
47 |     public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48 |         TregexPattern p = TregexPattern.compile("ROOT <<: (S|SINV << (NP=head <, NP=np & < (/,/=comma $+ (SBAR=sbar <, (WHPP $+ S=s & <, IN=prep & <- WHNP) & ?$+ /,/=comma2))))");
49 |         TregexMatcher matcher = p.matcher(leaf.getParseTree());
50 | 
51 |         while (matcher.findAt(leaf.getParseTree())) {
52 | 
53 |             // the left, superordinate constituent
54 |             List<Word> leftConstituentWords = new ArrayList<>();
55 |             leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("comma"), false));
56 |             if (matcher.getNode("comma2") != null) {
57 |             	leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("comma2"), false));
58 |             } else {
59 |             	leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
60 |             }
61 |             Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
62 | 
63 |             // the right, subordinate constituent
64 |             List<Word> rightConstituentWords = new ArrayList<>();
65 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s")));
66 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("prep")));
67 |             rightConstituentWords.addAll(ParseTreeExtractionUtils.getWordsInBetween(leaf.getParseTree(), matcher.getNode("np"), matcher.getNode("comma"), true, false));
68 |             Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
69 | 
70 |             // relation
71 |             List<Word> cuePhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
72 |             Relation relation = classifer.classifySubordinating(cuePhraseWords).orElse(Relation.DESCRIBING_DEFINITION);
73 | 
74 |             //TODO not always doDiscourseExtraction?
75 |             Extraction res = new Extraction(
76 |                 getClass().getSimpleName(),
77 |                 false,
78 |                 cuePhraseWords,
79 |                 relation,
80 |                 true,
81 |                 Arrays.asList(leftConstituent, rightConstituent)
82 |             );
83 | 
84 |             return Optional.of(res);
85 |         }
86 | 
87 |         return Optional.empty();
88 |     }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/DiscourseSimplification/src/main/resources/cue_phrases.conf:
--------------------------------------------------------------------------------
  1 | cue_phrases {
  2 |     default_phrases {
  3 |         matching = contained
  4 |         phrases {
  5 |             // CONTRAST
  6 |             "although" = CONTRAST
  7 |             "but" = CONTRAST
  8 |             "but now" = CONTRAST
  9 |             "despite" = CONTRAST
 10 |             "even though" = CONTRAST
 11 |             "even when" = CONTRAST
 12 |             "except when" = CONTRAST
 13 |             "however" = CONTRAST
 14 |             "instead" = CONTRAST
 15 |             "rather" = CONTRAST
 16 |             "still" = CONTRAST
 17 |             "though" = CONTRAST
 18 |             "thus" = CONTRAST
 19 |             "until recently" = CONTRAST
 20 |             "while" = CONTRAST
 21 |             "yet" = CONTRAST
 22 | 
 23 |             // LIST
 24 |             "and" = LIST
 25 |             "in addition" = LIST
 26 |             "in addition to" = LIST
 27 |             "moreover" = LIST
 28 | 
 29 |             // DISJUNCTION
 30 |             "or" = DISJUNCTION
 31 | 
 32 |             // CAUSE
 33 | //          "largely because" = CAUSE_C
 34 |             "because" = CAUSE_C // changed from EXPLANATION TO CAUSE
 35 |             "since" = CAUSE_C // changed from TEMPORAL_AFTER TO CAUSE
 36 | 
 37 |             // RESULT
 38 |             "as a result" = RESULT_C
 39 |             "as a result of" = RESULT_C
 40 | 
 41 |             // TEMPORAL_AFTER
 42 |             "after" = TEMPORAL_AFTER_C
 43 |             "and after" = TEMPORAL_AFTER_C
 44 |             "next" = TEMPORAL_AFTER_C
 45 |             "then" = TEMPORAL_AFTER_C
 46 | 
 47 |             // TEMPORAL_BEFORE
 48 |             "before" = TEMPORAL_BEFORE_C
 49 |             "previously" = TEMPORAL_BEFORE_C  // changed from BACKGROUND TO TEMPORAL_BEFORE
 50 | 
 51 |             // BACKGROUND
 52 |             "as" = BACKGROUND
 53 |             "now" = BACKGROUND
 54 |             "once" = BACKGROUND
 55 |             "when" = BACKGROUND
 56 |             "with" = BACKGROUND
 57 |             "without" = BACKGROUND
 58 | 
 59 |             // CONDITION
 60 |             "if" = CONDITION
 61 |             "in case" = CONDITION
 62 |             "unless" = CONDITION
 63 |             "until" = CONDITION
 64 | 
 65 |             // ELABORATION
 66 |             "more provocatively" = ELABORATION
 67 |             "even before" = ELABORATION
 68 |             "for example" = ELABORATION
 69 |             "further" = ELABORATION
 70 |             "recently" = ELABORATION
 71 |             "since(\\W(.*?\\W)?)now" = ELABORATION
 72 |             "so" = ELABORATION
 73 |             "so far" = ELABORATION
 74 |             "where" = ELABORATION
 75 |             "whereby" = ELABORATION
 76 |             "whether" = ELABORATION
 77 | 
 78 | //          // EXPLANATION
 79 | //          "simply because" = EXPLANATION
 80 | //          "because of" = EXPLANATION
 81 | //          "indeed" = EXPLANATION
 82 | //          "so(\\W(.*?\\W)?)that" = EXPLANATION
 83 |         }
 84 |     }
 85 | 
 86 |     subordinating_phrases {
 87 |         matching = contained
 88 |         phrases = ${cue_phrases.default_phrases.phrases}
 89 |         phrases {
 90 |             // CAUSE
 91 | //          "largely because" = CAUSE
 92 |             "because" = CAUSE // changed from EXPLANATION TO CAUSE
 93 |             "since" = CAUSE // changed from TEMPORAL_AFTER TO CAUSE
 94 | 
 95 |             // RESULT
 96 |             "as a result" = RESULT
 97 |             "as a result of" = RESULT
 98 | 
 99 |             // TEMPORAL_AFTER
100 |             "after" = TEMPORAL_BEFORE
101 |             "and after" = TEMPORAL_BEFORE
102 |             "next" = TEMPORAL_AFTER
103 |             "then" = TEMPORAL_AFTER
104 | 
105 |             // TEMPORAL_BEFORE
106 |             "before" = TEMPORAL_AFTER
107 |             "previously" = TEMPORAL_AFTER  // changed from BACKGROUND TO TEMPORAL_BEFORE
108 |         }
109 |     }
110 | 
111 |     coordinating_phrases {
112 |         matching = contained
113 |         phrases = ${cue_phrases.default_phrases.phrases}
114 |     }
115 | 
116 |     adverbial_phrases {
117 |         matching = exact
118 |         phrases = ${cue_phrases.default_phrases.phrases}
119 |         phrases {
120 |             // CAUSE
121 | //          "largely because(\\W(.*?\\W)?)(this|that)" = CAUSE_C
122 |             "because(\\W(.*?\\W)?)(this|that)" = CAUSE_C // changed from EXPLANATION TO CAUSE
123 | 
124 |             // RESULT
125 |             "as a result(\\W(.*?\\W)?)(this|that)" = RESULT_C
126 |             "as a result of(\\W(.*?\\W)?)(this|that)" = RESULT_C
127 | 
128 |             // TEMPORAL_AFTER
129 |             "after(\\W(.*?\\W)?)(this|that)" = TEMPORAL_AFTER_C
130 |             "and after(\\W(.*?\\W)?)(this|that)" = TEMPORAL_AFTER_C
131 | 
132 |             // TEMPORAL_BEFORE
133 |             "before(\\W(.*?\\W)?)(this|that)" = TEMPORAL_BEFORE_C
134 |             "previously(\\W(.*?\\W)?)(this|that)" = TEMPORAL_BEFORE_C  // changed from BACKGROUND TO TEMPORAL_BEFORE
135 |         }
136 |     }
137 | }


--------------------------------------------------------------------------------