├── .gitignore
├── index.js
├── CHANGELOG.md
├── .travis.yml
├── CONTRIBUTORS.md
├── .npmignore
├── LICENSE.md
├── package.json
├── src
    ├── WeightedGraph.js
    ├── SummarizerManager.js
    ├── Summarizer.js
    └── Preprocesser.js
├── tests
    ├── test.txt
    ├── test2.txt
    └── SummarizerManager.test.js
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | **/.DS_Store
3 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | module.exports.SummarizerManager = require("./src/SummarizerManager");


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | ## Version 1.0.1 - 1.0.7
4 | - Created initial project


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 |   - "8"
4 | script:
5 |   - npm test
6 | 
7 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | node-summarizer contributors
2 | ============================================
3 | 
4 | [Swapnik Katkoori](https://github.com/SwapnikKatkoori)
5 | 
6 |   - Author and Maintainer
7 |   - Initial code and documentation
8 |  


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | # lcov
 2 | coverage/
 3 | *.log
 4 | .package.json
 5 | 
 6 | # artifacts & source
 7 | README.hbs
 8 | output.md
 9 | output/
10 | test/
11 | examples/
12 | lib-doc/
13 | 
14 | # dotfiles
15 | .travis.yml
16 | .eslintrc
17 | .eslintignore
18 | .editorconfig
19 | .babelrc
20 | .gitignore
21 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2019, Swapnik Katkoori
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "node-summarizer",
 3 |   "version": "1.0.7",
 4 |   "description": "Text summarizer using Node.js",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "jest"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/SwapnikKatkoori/Text-Summarizer.git"
12 |   },
13 |   "keywords": [
14 |     "text",
15 |     "summary",
16 |     "summarize",
17 |     "textrank",
18 |     "senitment",
19 |     "summarizer",
20 |     "summarization"
21 |   ],
22 |   "author": "Swapnik Katkoori <katkoor2@msu.edu>",
23 |   "license": "ISC",
24 |   "bugs": {
25 |     "url": "https://github.com/SwapnikKatkoori/Text-Summarizer/issues"
26 |   },
27 |   "homepage": "https://github.com/SwapnikKatkoori/Text-Summarizer#readme",
28 |   "dependencies": {
29 |     "sbd": "^1.0.15",
30 |     "natural": "^0.6.3",
31 |     "wordpos": "^1.2.0"
32 |   },
33 |   "devDependencies": {
34 |     "jest": "^24.7.1"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/WeightedGraph.js:
--------------------------------------------------------------------------------
 1 | class Vertex{
 2 | 	constructor(value){
 3 | 		this.value = value;
 4 | 		this.adjacent = new Map();
 5 | 	}
 6 | }
 7 | 
 8 | class WeightedGraph{
 9 | 	constructor(){
10 | 		this.vertices_map = new Map();
11 | 		this.size = 0;
12 | 	}
13 | 
14 | 	addVertex(value){
15 | 		this.size+=1;
16 | 		let vertex_to_add = new Vertex(value);
17 | 		this.vertices_map.set(value, vertex_to_add);
18 | 		return vertex_to_add;
19 | 	}
20 | 
21 | 	getVertex(value){
22 | 		if (this.vertices_map.has(value)){
23 | 			return this.vertices_map.get(value);
24 | 		}
25 | 		return 
26 | 	}
27 | 
28 | 	addEdge(a, b, weight){
29 | 		if (!this.vertices_map.has(a)){
30 | 			this.addVertex(a);
31 | 		}
32 | 		if (!this.vertices_map.has(b)){
33 | 			this.addVertex(b);
34 | 		}
35 | 		this.vertices_map.get(a).adjacent.set(b, weight);
36 | 		this.vertices_map.get(b).adjacent.set(a, weight);
37 | 	}
38 | 
39 | 	getAllVertices(){
40 | 		let result_list = []
41 | 		this.vertices_map.forEach((value, key, map)=>{
42 | 			result_list.push(key);
43 | 		})
44 | 		return result_list;
45 | 	}
46 | }
47 | 
48 | module.exports.WeightedGraph = WeightedGraph;


--------------------------------------------------------------------------------
/tests/test.txt:
--------------------------------------------------------------------------------
 1 | Blake Griffin didn’t say much Saturday about the left knee injury that forced him to miss four of the final seven games of the regular season.
 2 | 
 3 | “We’re taking this day by day,” he said when asked about his availability for the Detroit Pistons' playoff opener against the Milwaukee Bucks on Sunday. “I’m not looking forward to a day that’s not right here in front of me.
 4 | 
 5 | “I’ll sit with our training staff and whoever else needs to be in on that decision and make that call.”
 6 | 
 7 | If the game were Saturday night, could Griffin go?
 8 | “It’s not Saturday night,” Griffin said.
 9 | 
10 | An official status report lists Griffin as questionable, but it’s apparent the Pistons are preparing to be without Griffin — at least for the beginning of the best-of-seven first-round series against the league’s best team.
11 | 
12 | “When you lose him and don’t have him going into Game 1, it’s a huge blow, but we are going to step up, defensively, offensively in all the areas with him not being there,” point guard Ish Smith said.
13 | 
14 | The Pistons were 2-5 in games Griffin didn’t play, so even winning one game against the league’s best team will be tough.
15 | 
16 | Griffin played in 75 games during the regular season, the most he’s played since the 2013-14 season.
17 | 
18 | He tweaked his left knee toward the end of the season, and he took himself out of the lineup after warmups before the Pistons’ 99-90 victory over the Portland Trail Blazers on March 30.
19 | 
20 | He missed the next two games before returning to score 45 points at Oklahoma City in a road loss April 5. But he struggled mightily the next two games, culminating in only five points in 18 minutes in Tuesday night’s 100-93 victory over the Memphis Grizzlies in the regular-season home finale.
21 | 
22 | He missed the season finale at the New York Knicks, the night the Pistons clinched a playoff spot.
23 | 
24 | Griffin, 30, was an All-Star and averaged 24.5 points, 7.5 rebounds and 5.4 assists per game this season.
25 | 
26 | Before speaking with reporters Saturday, Griffin walked on a treadmill at the practice facility in Auburn Hills. After finishing, he spoke briefly with Pistons public relations personnel before addressing the media.
27 | 
28 | He’s been receiving treatment almost nonstop the past couple days, and it’s obvious he’s determined to return to the floor, but he also expressed satisfaction with reaching the postseason, establishing a foundation in coach Dwane Casey’s first season.
29 | 
30 | “You never want to let your teammates down and not be out there, but at the same time we’ve built a foundation this season and we’ve started to play the right brand of basketball,” Griffin said. “Those types of things carry over.
31 | 
32 | “Our goal was the reach the playoffs and we accomplished that goal so at the end of the day, we’re right where we want to be.”


--------------------------------------------------------------------------------
/tests/test2.txt:
--------------------------------------------------------------------------------
 1 | President Trump and a California mayor traded barbs over Twitter on Saturday, prompted by the president's repeated threats to release detained immigrants into “sanctuary cities.”
 2 | 
 3 | The exchange between Trump and Oakland Mayor Libby Schaaf also appeared to be triggered, at least in part, by recent New York Times articles about the president's immigration policies.
 4 | 
 5 | "So interesting to see the Mayor of Oakland and other Sanctuary Cities NOT WANT our currently 'detained immigrants' after release due to the ridiculous court ordered 20 day rule," Trump tweeted.
 6 | 
 7 | Schaaf fired back, saying: “It’s time to stop fanning hate and division @realDonaldTrump - I’ve been consistent and clear: #Oakland welcomes all, no matter where you came from or how you got here.”
 8 | 
 9 | Trump fired off another tweet soon after: "Just out: The USA has the absolute legal right to have apprehended illegal immigrants transferred to Sanctuary Cities. We hereby demand that they be taken care of at the highest level, especially by the State of California, which is well known or [sic] its poor management & high taxes!"
10 | 
11 | In 2017, California passed a “sanctuary state” law limiting cooperation between local authorities and federal immigration officials. Administration officials said Trump's proposal to dump undocumented immigrants in "sanctuary cities" was floated and rejected. Trump insists he is still giving the idea strong consideration, according to the Los Angeles Times.
12 | 
13 | White House deputy press secretary Hogan Gidley said the administration was working with Department of Homeland Security (DHS) and Immigration and Customs Enforcement (ICE) to advance the plan.
14 | 
15 | "They have said they wanted all of these illegal aliens into their communities," Gidley said on Fox News' "Justice with Judge Jeanine." "We're working with DHS, we're working with ICE, to try and make sure that happens because after all, it's what they want. They should not say 'This is retribution politically,' they should say, 'This is an olive branch."
16 | 
17 | Schaaf told NPR on Saturday that her city would accept a busload of 5,000 migrants if it had to.
18 | 
19 | "My job as a mayor is to welcome people," she said. "I don't build walls. It's our job to welcome everyone into our city, ensure their safety, ensure that their families can thrive. And that is my job no matter where those people came from or how they got there."
20 | 
21 | "This is about an outrageous abuse of power," she continued. "The idea that you could use human beings, families as instruments of political payback to use public resources to exact retribution on your political enemies."
22 | 
23 | Mayors in other cities have also said they are willing to take in migrants.
24 | 
25 | “We have people who are routinely coming to this city. We have a whole infrastructure that’s built up to make sure that their rights are protected while the city of Chicago has, under the current administration, provided funding for various groups to help support asylum seekers and other people that are going through the immigration court system. I expect it will continue, if not expand upon, those kinds of resources,” said Chicago Mayor-elect Lori Lightfoot.


--------------------------------------------------------------------------------
/src/SummarizerManager.js:
--------------------------------------------------------------------------------
 1 | const Summarizer = require('./Summarizer').Summarizer;
 2 | const natural = require("natural");
 3 | 
 4 | class SummarizerManager{
 5 | 	constructor(string, number_of_sentences){
 6 | 		this.string = string;
 7 | 		this.number_of_sentences = number_of_sentences;
 8 | 		this.rank_summary = "";
 9 | 		this.frequency_summary = "";
10 | 	}
11 | 
12 | 	getSentiment(){
13 | 		let self = this;
14 | 		let Analyzer = require('natural').SentimentAnalyzer;
15 | 		let stemmer = require('natural').PorterStemmer;
16 | 		let analyzer = new Analyzer("English", stemmer, "afinn");
17 | 		return analyzer.getSentiment(self.string.split(" "));
18 | 		
19 | 	}
20 | 	getFrequencyReduction(){
21 | 		if (this.frequency_summary == ""){
22 | 			this.frequency_summary = this.getSummaryByFrequency().summary;
23 | 		}
24 | 		let dec = 1-(this.frequency_summary.length/this.string.length);
25 | 		let string_dec = String(dec);
26 | 		return {
27 | 			reduction: string_dec.slice(2,4)+"."+string_dec.slice(4,5)+"%",
28 | 			summary: this.frequency_summary
29 | 		};
30 | 	}
31 | 
32 | 	async getRankReduction(){
33 | 		if (this.rank_summary == ""){
34 | 			await this.getSummaryByRank();	
35 | 		}
36 | 		let dec = 1-(this.rank_summary.length/this.string.length);
37 | 		let string_dec = String(dec);
38 | 		return {
39 | 			reduction: string_dec.slice(2,4)+"."+string_dec.slice(4,5)+"%",
40 | 			summary: this.rank_summary
41 | 		}
42 | 
43 | 	}
44 | 
45 | 	async getRankReductionAsDec(){
46 | 		if (this.rank_summary == ""){
47 | 			await this.getSummaryByRank();
48 | 		}
49 | 		let dec = 1-(this.rank_summary.length/this.string.length);
50 | 		return {
51 | 			dec_reduction: dec,
52 | 			summary: this.rank_summary
53 | 		}
54 | 	}
55 | 
56 | 	getFrequencyReductionAsDec(){
57 | 		if (this.frequency_summary == ""){
58 | 			this.frequency_summary = this.getSummaryByFrequency().summary;
59 | 		}
60 | 		let dec = 1-(this.frequency_summary.length/this.string.length);
61 | 		return {
62 | 			dec_reduction: dec,
63 | 			summary: this.frequency_summary
64 | 		}
65 | 	}
66 | 
67 | 	getSummaryByFrequency(){
68 | 		try{
69 | 			let summarizer = new Summarizer(this.string, this.number_of_sentences);
70 | 			const summary_obj = summarizer.summarizeByFrequency();
71 | 			this.frequency_summary = summary_obj.summary;
72 | 			if(summary_obj.summary == ''){
73 | 				summary_obj.summary = Error("Not Enough similarities to be summarized, or the sentence is invalid."),
74 | 				summary_obj.sentence_list = Error("Not enough similarities to be summarized, or the sentence is invalid.")
75 | 			}
76 | 			return summary_obj;
77 | 		}catch(err){
78 | 			return Error("An invalid sentence was entered");
79 | 		}
80 | 
81 | 	}
82 | 
83 | 	async getSummaryByRank(){
84 | 		try{
85 | 			let summarizer = new Summarizer(this.string, this.number_of_sentences);
86 | 			const summary_obj = await summarizer.summarizeByRank();
87 | 			if(typeof(summary_obj.summary) === 'undefined' || summary_obj.summary == ''){
88 | 				summary_obj.summary = Error("Not Enough similarities to be summarized, or the sentence is invalid."),
89 | 				summary_obj.sentence_list = Error("Not enough similarities to be summarized, or the sentence is invalid.")
90 | 			}
91 | 			this.rank_summary = summary_obj.summary;
92 | 			return summary_obj;
93 | 		}catch(err){
94 | 			return Error("An invalid sentence was entered");
95 | 		}
96 | 	}
97 | }
98 | 
99 | module.exports = SummarizerManager;


--------------------------------------------------------------------------------
/tests/SummarizerManager.test.js:
--------------------------------------------------------------------------------
  1 | const SummarizerManager = require("../src/SummarizerManager");
  2 | let fs = require("fs");
  3 | let content = fs.readFileSync(__dirname + "/test.txt", 'utf8');
  4 | let content2 = fs.readFileSync(__dirname + "/test2.txt", 'utf8');
  5 | 
  6 | test('Gets the sentiment analysis', async () => {
  7 | 	let Summarizer = new SummarizerManager(content2, 3);
  8 | 	let summary_obj = await Summarizer.getSummaryByRank();
  9 |  	expect(typeof(Summarizer.getSentiment())).toBe("number");
 10 | });
 11 | 
 12 | test('Makes sure that there are no errors in the random walk',async ()=>{
 13 | 	jest.setTimeout(30000);
 14 | 
 15 | 	for(let i = 0; i<500; i++){
 16 | 		let Summarizer = new SummarizerManager(content,5);
 17 | 		let summary_obj = await Summarizer.getSummaryByRank();
 18 | 		expect(typeof(summary_obj.summary)).toBe('string');
 19 | 
 20 | 	}
 21 | })
 22 | 
 23 | test('Makes sure that there are no errors in frequency approach',()=>{
 24 | 	jest.setTimeout(30000);
 25 | 
 26 | 	for(let i = 0; i<500; i++){
 27 | 		let Summarizer = new SummarizerManager(content,5);
 28 | 		let summary_obj = Summarizer.getSummaryByFrequency();
 29 | 		expect(typeof(summary_obj.summary)).toBe('string');
 30 | 	}
 31 | })
 32 | 
 33 | test('Makes sure that it handles edge cases',async ()=>{
 34 | 	jest.setTimeout(30000);
 35 | 
 36 | 	let Summarizer = new SummarizerManager("...",0);
 37 | 	let summary_obj = Summarizer.getSummaryByFrequency();
 38 | 	expect(typeof(summary_obj.summary)).toBe('object');
 39 | 
 40 | 	for(let i = 0; i<15; i++){
 41 | 		summary_obj = Summarizer.getSummaryByFrequency();
 42 | 		expect(typeof(summary_obj.summary)).toBe('object');
 43 | 	}
 44 | 
 45 | 	let rank_summary = await Summarizer.getSummaryByRank();
 46 | 	expect(typeof(rank_summary.summary)).toBe('object');
 47 | 	
 48 | 	for(let i = 0; i<15; i++){
 49 | 		rank_summary = await Summarizer.getSummaryByRank();
 50 | 		expect(typeof(rank_summary.summary)).toBe('object');
 51 | 	}
 52 | 	
 53 | })
 54 | 
 55 | // test('Tests the getSentiment() function',async ()=>{
 56 | // 	jest.setTimeout(30000);
 57 | 
 58 | 
 59 | // })
 60 | 
 61 | test('Tests the getFrequencyReduction() function',async ()=>{
 62 | 	jest.setTimeout(30000);
 63 | 	let Summarizer = new SummarizerManager("This is a single sentence. This is a single sentence.", 1);
 64 | 	let reduction = Summarizer.getFrequencyReduction();
 65 | 	expect(reduction.reduction).toBe("50.9%");
 66 | 
 67 | 	let Summarizer2 = new SummarizerManager("This is a single sentence. This is a single sentence.", 1);
 68 | 	let summary = Summarizer2.getSummaryByFrequency();
 69 | 	let reduction2 = Summarizer.getFrequencyReduction();
 70 | 	expect(reduction2.reduction).toBe("50.9%");
 71 | 
 72 | 	let Summarizer3 = new SummarizerManager("This is a single sentence. This is a single sentence.", 1);
 73 | 	let summary2 = await Summarizer2.getSummaryByRank();
 74 | 	let reduction3 = Summarizer.getFrequencyReduction();
 75 | 	expect(reduction3.reduction).toBe("50.9%");
 76 | 
 77 | })
 78 | 
 79 | test('Tests the getRankReduction() function',async ()=>{
 80 | 	jest.setTimeout(30000);
 81 | 	let Summarizer = new SummarizerManager("This is a single sentence. This is a single sentence. This is not", 1);
 82 | 	let reduction = await Summarizer.getRankReduction();
 83 | 	expect(typeof(reduction.reduction)).toBe('string');
 84 | 
 85 | 	let Summarizer2 = new SummarizerManager("This is a single sentence. This is a single sentence.", 1);
 86 | 	let summary = await Summarizer2.getSummaryByRank();
 87 | 	let reduction2 = await Summarizer.getRankReduction();
 88 | 	expect(typeof(reduction2.reduction)).toBe('string');
 89 | 
 90 | 	let Summarizer3 = new SummarizerManager("This is a single sentence. This is a single sentence.", 1);
 91 | 	let summary2 = Summarizer2.getSummaryByFrequency();
 92 | 	Summarizer.getRankReduction().then((data)=>{
 93 | 		let reduction3 = data;
 94 | 		expect(typeof(reduction3.reduction)).toBe('string');
 95 | 	})
 96 | 	
 97 | 
 98 | })
 99 | 
100 | test("Final test to test everything",()=>{
101 | 
102 | })


--------------------------------------------------------------------------------
/src/Summarizer.js:
--------------------------------------------------------------------------------
  1 | const Preprocesser = require('./Preprocesser').Preprocesser;
  2 | 
  3 | class Summarizer{
  4 | 	constructor(string_to_process, number_of_sentences){
  5 | 		this.preprocesser = new Preprocesser();
  6 | 		this.number_of_sentences = number_of_sentences;
  7 | 		this.string_to_process = string_to_process;
  8 | 		this.new_length = 0;
  9 | 	}
 10 | 
 11 | 	//Takes in a list of sentences and weights and sorts by weight.
 12 | 	sortSentences(sentence_weights_list){
 13 | 		sentence_weights_list.sort((a,b)=>{
 14 | 			return b[0]-a[0];
 15 | 		})
 16 | 		return sentence_weights_list;
 17 | 	}
 18 | 
 19 | 	//Converts the textRank map into a list
 20 | 	textRankMapToList(text_rank_map){
 21 | 		let result_list = [];
 22 | 		text_rank_map.forEach((value, key, map)=>{
 23 | 			result_list.push([value,key]);
 24 | 		})
 25 | 
 26 | 		return result_list;
 27 | 	}
 28 | 
 29 | 	//Takes in a list of sorted sentences and a map of those sentences to the original sentences. Returns a string of the entire summary
 30 | 	summaryToString(sorted_sentences, clean_sentences){
 31 | 		const self = this;
 32 | 		let result_string = "";
 33 | 		let length_count = 0;
 34 | 		let count = self.number_of_sentences;
 35 | 		if(sorted_sentences.length < self.number_of_sentences){
 36 | 			count = sorted_sentences.length;
 37 | 		}
 38 | 		for(var i=0; i<count; i++){
 39 | 			length_count += sorted_sentences[i][1].split(" ").length;
 40 | 			result_string+=clean_sentences[1].get(sorted_sentences[i][1]);
 41 | 		}
 42 | 		this.new_length = length_count;
 43 | 		return result_string;
 44 | 	}
 45 |     // Takes in a list of sorted sentences and a map of those sentences to the original sentences. Returns an array of summarized sentences. 
 46 | 	summaryToArray(sorted_sentences, clean_sentences){
 47 | 		const self = this;
 48 | 		let result_array = [];
 49 | 		let length_count = 0;
 50 | 		let count = self.number_of_sentences;
 51 | 		if(sorted_sentences.length < self.number_of_sentences){
 52 | 			count = sorted_sentences.length;
 53 | 		}
 54 | 		for(var i=0; i<count; i++){
 55 | 			length_count += sorted_sentences[i][1].split(" ").length;
 56 | 			result_array.push(clean_sentences[1].get(sorted_sentences[i][1]));
 57 | 		}
 58 | 		this.new_length = length_count;
 59 | 		return result_array;
 60 | 	}
 61 | 
 62 | 	summarizeByFrequency(){
 63 | 		const self = this
 64 | 		const list_to_clean = self.preprocesser.paragraphToSentences(self.string_to_process);
 65 | 		const clean_sentences = self.preprocesser.cleanSentences(list_to_clean);
 66 | 		const tokenized = self.preprocesser.tokenizeSentences(clean_sentences[0]);
 67 | 		const weighted_map = self.preprocesser.getWeights(tokenized);
 68 | 		const sentence_weights_list = self.preprocesser.sentenceWeights(clean_sentences[0], weighted_map);
 69 | 		const sorted_sentences = self.sortSentences(sentence_weights_list);
 70 | 		
 71 | 		return {
 72 | 			summary: self.summaryToString(sorted_sentences, clean_sentences),
 73 | 			summaryArray: self.summaryToArray(sorted_sentences, clean_sentences),
 74 | 			sentence_list: list_to_clean,
 75 | 			weighted_map: weighted_map,
 76 | 			sorted_sentences: sorted_sentences
 77 | 		}
 78 | 	}
 79 | 
 80 | 	async summarizeByRank(){
 81 | 		const self = this;
 82 | 		const list_to_clean = self.preprocesser.paragraphToSentences(self.string_to_process);
 83 | 		const clean_sentences = self.preprocesser.cleanSentences(list_to_clean);
 84 | 		try{
 85 | 			const nouns_and_adjective_map = await self.preprocesser.nounsAndAdjectives(clean_sentences[0]);
 86 | 			let text_rank_graph = self.preprocesser.createTextRankGraph(nouns_and_adjective_map);
 87 | 			let text_rank_map = self.preprocesser.textRank(text_rank_graph);
 88 | 			let text_rank_list = self.sortSentences(self.textRankMapToList(text_rank_map));
 89 | 			return {
 90 | 				summary: self.summaryToString(text_rank_list, clean_sentences),
 91 | 				summaryArray: self.summaryToArray(text_rank_list, clean_sentences),
 92 | 				sentence_list: list_to_clean,
 93 | 				nouns_and_adjective_map: nouns_and_adjective_map
 94 | 			}
 95 | 		}catch(err){
 96 | 			console.log(err);
 97 | 		}
 98 | 	}
 99 | }
100 | 
101 | module.exports.Summarizer = Summarizer;


--------------------------------------------------------------------------------
/src/Preprocesser.js:
--------------------------------------------------------------------------------
  1 | const WordPos = require("wordpos");
  2 | const WeightedGraph = require('./WeightedGraph').WeightedGraph;
  3 | const sbd = require('sbd');
  4 | 
  5 | class Preprocesser{
  6 | 	constructor(){
  7 | 		this.tokenizer = sbd
  8 | 	}
  9 | 
 10 | 	//This method takes in a paragraph and returns a list of the sentences in the paragraph.
 11 | 	paragraphToSentences(string_to_process){
 12 | 		try{
 13 | 			const result = this.tokenizer.sentences(string_to_process, {});
 14 | 			return result;
 15 | 		}catch(err){
 16 | 			return Error("Cannot toeknize the given string.");
 17 | 		}
 18 | 	}
 19 | 
 20 | 	//Cleans the sentences by removing punctuation and lowercasing capital letters.
 21 | 	cleanSentences(list_to_clean){
 22 | 		let sentence_map = new Map();
 23 | 		const regex = /[&\/\\#,+()$~%.'":*?<>{}]/g;
 24 | 		for (let i = 0; i<list_to_clean.length; i++){
 25 | 			let original_sentence = list_to_clean[i];
 26 | 			list_to_clean[i] = list_to_clean[i].toLowerCase();
 27 | 			list_to_clean[i] = list_to_clean[i].replace(regex, "");
 28 | 			sentence_map.set(list_to_clean[i], original_sentence);
 29 | 		}
 30 | 		return [list_to_clean,sentence_map];
 31 | 	}
 32 | 
 33 | 	//Takes in a list of sentences and returns a list of all of the words in the sentences.
 34 | 	tokenizeSentences(list_of_sentences){
 35 | 		let new_array = new Array();
 36 | 		new_array = list_of_sentences
 37 | 		let result_list = [];
 38 | 		for (let i = 0; i<new_array.length; i++){
 39 | 			result_list = result_list.concat(new_array[i].split(" "));
 40 | 		}
 41 | 		return result_list;
 42 | 	}
 43 | 
 44 | 	//Takes in a list of words and calculates the frequencies of the words.
 45 | 	//Returns a list. The first item is a map of word->frequency. The second is the max frequency.
 46 | 	getFrequencyAndMax(list_of_words){
 47 | 		let frequency_map = new Map();
 48 | 		let max = 0
 49 | 		for (let i = 0; i<list_of_words.length; i++){
 50 | 			const word = list_of_words[i];
 51 | 			if (frequency_map.has(word)){
 52 | 				const new_val = frequency_map.get(word)+1;
 53 | 				frequency_map.set(word, new_val);
 54 | 				if (new_val>max){
 55 | 					max = new_val;
 56 | 				}
 57 | 			}else{
 58 | 				frequency_map.set(word, 1);
 59 | 			}
 60 | 		}
 61 | 		return [frequency_map, max];
 62 | 	}
 63 | 	
 64 | 	//Converts a frequency map into a map with "weights".
 65 | 	getWeights(list_of_words){
 66 | 		const frequencies_and_max = this.getFrequencyAndMax(list_of_words);
 67 | 		const frequencies_map = frequencies_and_max[0];
 68 | 		const max = frequencies_and_max[1];
 69 | 		frequencies_map.forEach((value,key,map)=>{
 70 | 			map.set(key, value/max);
 71 | 		});
 72 | 		return frequencies_map;
 73 | 	}
 74 | 
 75 | 
 76 | 	sentenceWeights(clean_sentences, weighted_map){
 77 | 		let weight_of_sentence = 0;
 78 | 		let sentence_weight_list = [];
 79 | 		let sentence = "";
 80 | 		for (let i = 0; i<clean_sentences.length; i++){
 81 | 			sentence = clean_sentences[i];
 82 | 			let word_list = sentence.split(" ");
 83 | 			weight_of_sentence = 0;
 84 | 			for (let j = 0; j<word_list.length; j++){
 85 | 				weight_of_sentence += weighted_map.get(word_list[j]);
 86 | 			}
 87 | 			sentence_weight_list.push([weight_of_sentence/word_list.length, sentence]);
 88 | 		}
 89 | 		return sentence_weight_list;
 90 | 	}
 91 | 
 92 | 	//Takes a list of sentences and returns a map of the each sentence to its nouns and adjectives
 93 | 	async nounsAndAdjectives(clean_sentences){
 94 | 		let nouns_and_adjectives_map = new Map();
 95 | 		let wordpos = new WordPos();
 96 | 		try{
 97 | 			for (let i = 0; i<clean_sentences.length; i++){
 98 | 				let adjectives = await wordpos.getAdjectives(clean_sentences[i]);
 99 | 				let nouns = await wordpos.getNouns(clean_sentences[i]);
100 | 				nouns_and_adjectives_map.set(clean_sentences[i],nouns.concat(adjectives));
101 | 			}
102 | 
103 | 			return await nouns_and_adjectives_map;
104 | 		}catch(err){
105 | 			console.log(err)
106 | 			return
107 | 		}
108 | 	}
109 | 
110 | 	//Used for the text rank summary. Takes two lists of words and gets the weight of the edge connecting the vertices.
111 | 	getEdgeWeights(list1, list2){
112 | 		let weight = 0;
113 | 		let intial = list1
114 | 		let other = list2
115 | 		if (list2.length >= list1.length){
116 | 			intial = list2
117 | 			other = list1
118 | 		}
119 | 		for(let i=0; i<intial.length; i++){
120 | 			if(other.includes(intial[i])){
121 | 				weight+=1;
122 | 			}
123 | 		}
124 | 
125 | 		return weight
126 | 	}
127 | 
128 | 	//Creates the graph for the textrank algorithm.
129 | 	createTextRankGraph(nouns_and_adjactive_map){
130 | 		let graph = new WeightedGraph();
131 | 		let key_list = [];
132 | 		let weight = 0
133 | 		nouns_and_adjactive_map.forEach((value,key,map)=>{
134 | 			key_list.push(key);
135 | 		})
136 | 		for(let i=0; i<key_list.length; i++){
137 | 			for(let j=i+1; j<key_list.length; j++){
138 | 				weight = this.getEdgeWeights(nouns_and_adjactive_map.get(key_list[i]), nouns_and_adjactive_map.get(key_list[j]));
139 | 				if(weight>0){
140 | 					graph.addEdge(key_list[i], key_list[j], weight);
141 | 				}
142 | 			}
143 | 
144 | 		}
145 | 		return graph;
146 | 	}
147 | 
148 | 	//TextRank algorithm.
149 | 	textRank(graph){
150 | 		let key_list = graph.getAllVertices();
151 | 		let text_rank_map = new Map();
152 | 		
153 | 		//random key to start with
154 | 		if (key_list.length == 0){
155 | 			return text_rank_map;
156 | 		}
157 | 		let key = key_list[Math.floor(Math.random()*key_list.length)];
158 | 		let vertex = graph.getVertex(key);
159 | 		let probability_list = [];
160 | 		//random walk 
161 | 		for (let i = 0; i < 10000; i++) {
162 | 			let full_weight = 0
163 | 		
164 | 			vertex.adjacent.forEach((value, key, map)=>{
165 | 				full_weight+=value;
166 | 			})
167 | 		
168 | 			vertex.adjacent.forEach((value, key, map)=>{
169 | 				for(let x = 0; x<value; x++){
170 | 					probability_list.push(key);
171 | 				}
172 | 			})
173 | 		
174 | 
175 | 			let sentence = probability_list[Math.floor(Math.random()*probability_list.length)];
176 | 			if(text_rank_map.has(sentence)){
177 | 				text_rank_map.set(sentence, text_rank_map.get(sentence)+1)
178 | 			}else{
179 | 				text_rank_map.set(sentence, 1);
180 | 			}
181 | 			let last_vertex = vertex;
182 | 			vertex = graph.getVertex(sentence);
183 | 			probability_list = [];
184 | 		}
185 | 		return text_rank_map;
186 | 		
187 | 	}
188 | 
189 | 
190 | }
191 | 
192 | 
193 | module.exports.Preprocesser = Preprocesser


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/SwapnikKatkoori/node-summarizer.svg?branch=master)](https://travis-ci.org/SwapnikKatkoori/node-summarizer)
  2 | ![npm](https://img.shields.io/npm/v/node-summarizer.svg)
  3 | ![NPM](https://img.shields.io/npm/l/node-summarizer.svg)
  4 | ![npm](https://img.shields.io/npm/dw/node-summarizer.svg)
  5 | # node-summarizer
  6 | 
  7 | node-summarizer is a Node.js module that summarizes text into a specified number of sentences. This module uses two
  8 | different extractive summarization techniques: frequency based and textrank based. It also provides sentiment analysis on the 
  9 | given text, reduction percentage, as well as other information about the text and generated summary. Read the [How it Works](#desc) 
 10 | section for more information about the two different approaches used.
 11 | 
 12 | # Table of Contents
 13 | 
 14 | 1. [ Installation ](#install)
 15 | 
 16 | 2. [Usage](#usage)
 17 |     
 18 |     * [Initialization](#init)
 19 |     
 20 |     * [Getting a summary](#getsum)
 21 |     
 22 |     * [Getting a reduction percentage](#getred)
 23 |     
 24 |     * [Sentiment Analysis](#sentiment)
 25 |     
 26 |     * [Usage Notes](#note)
 27 |     
 28 | 3. [Method Details](#meth)
 29 | 4. [ How it Works ](#desc)
 30 | 5. [ Dependencies ](#depend)
 31 | 6. [ License ](#license)
 32 | 
 33 | <a name="install"></a>
 34 | ## 1. Install
 35 | 
 36 | To install using NPM
 37 | ```
 38 | npm i node-summarizer
 39 | ```
 40 | ## 2. Usage 
 41 | 
 42 | <a name="init"></a>
 43 | ### Initialization
 44 | 
 45 | To get started, initialize a SummaryManager object.
 46 | 
 47 | ```
 48 | let SummarizerManager = require("node-summarizer").SummarizerManager;
 49 | 
 50 | let Summarizer = new SummarizerManager(text_to_summarize,number_of_sentences); 
 51 | ```
 52 | *Params*
 53 | - text_to_summarize: is a String of the text you want summarized.
 54 | 
 55 | - number_of_sentences: is an Int of how many sentences you want in the summary.
 56 | 
 57 | <a name="getsum"></a>
 58 | ### Gettting a summary
 59 | 
 60 | There are two different approaches taken for summarization. Read the [How it Works](#desc) for more information on each of them. 
 61 | 
 62 | To get a **frequency summary**:
 63 | 
 64 | ```
 65 | let summary = Summarizer.getSummaryByFrequency().summary;
 66 | ```
 67 | - The getSummaryByFrequency() method returns as object with summary as one of the properties. More information on it in the [Method Details](#meth) section.
 68 | 
 69 | To get a **TextRank summary**:
 70 | 
 71 | ```
 72 | let summary = Summarizer.getSummaryByRank().then((summary_object)=>{
 73 |     return summary_object.summary
 74 | })
 75 | ```
 76 | - The getSummaryByRank() method returns a Promise. More information on it in the [Method Details](#meth) section.
 77 | 
 78 | <a name="getred"></a>
 79 | ### Getting a reduction percentage
 80 | 
 81 | To get the reduction percentage as a decimal:
 82 | 
 83 | ```
 84 | //If you want the reduction percentage of a frequency summary.
 85 | let reduction_percentage = Summarizer.getFrequencyReductionAsDec().dec_reduction;
 86 | 
 87 | //If you want the reduction percentage of a TextRank summary. Returns a promise.
 88 | let reduction_percentage = Summarizer.getRankReductionAsDec().then((reduction_obj)=>{
 89 |     return reduction_obj.dec_reduction;
 90 | })
 91 | ```
 92 | 
 93 | To get the reduction percentage as a string:
 94 | 
 95 | ```
 96 | //If you want the reduction percentage of a frequency summary.
 97 | let reduction_percentage = Summarizer.getFrequencyReduction().reduction;
 98 | 
 99 | //If you want the reduction percentage of a TextRank summary. Returns a promise.
100 | let reduction_percentage = Summarizer.getRankReduction().then((reduction_obj)=>{
101 |     return reduction_obj.reduction;
102 | })
103 | ```
104 | 
105 | <a name="Sentiment"></a>
106 | ### Sentiment Analysis
107 | 
108 | To get the sentiment value of the string to be summarized:
109 | 
110 | ```
111 | let sentiment = Summarizer.getSentiment();
112 | ```
113 | 
114 | -This will return a Float.
115 | 
116 | <a name="note"></a>
117 | ### Usage Notes
118 | 
119 | - The easisest way to use this is to initialize the SummaryManager object => Get a summary using one or both of the summarization methods => Get reduction percentage based on the summarization method used.
120 | 
121 | - The alternative way is to simply initialize the SummaryManager object => call one of the reduction methods which automatically creates a summary if one doesn't exist and returns both the reduction percentage and new summary as an object. This is fine if all you need is the reduction percentage and summary.
122 | 
123 | - If a sentence cannot be split into sentences or if there are not enough sentences, the summary will be an Error().
124 | 
125 | <a name="meth"></a>
126 | ## 3. Method Details
127 | 
128 | More details on the available methods of the SummarizerManager class.
129 | 
130 | ### getSummaryByFrequency()
131 | 
132 | Once a SummarizerManager object has been initialized, calling this method will return an object with:
133 | 
134 | ```
135 | {
136 |     summary: "",    //String of the summary
137 |     sentence_list: [],  //List of all of the tokenized sentences in the given text
138 |     weighted_map: Map,  //Map of all of the tokenized words with their frequencies.
139 |     sorted_sentences: []   //A list of all of the sentences sorted by weights.
140 | }
141 | ```
142 | 
143 | ### getSummaryByRank()
144 | 
145 | This method returns a Promise. The result of the Promise is an object with:
146 | 
147 | ```
148 | {
149 |     summary: "",    //String of the summary
150 |     sentence_list: [],  //List of all of the tokenized sentences in the given text
151 |     nouns_and_adjactive_map: Map   //Map of all of the sentences with the values being a list of nouns and adjactives in the                                        sentence
152 | }
153 | ```
154 | 
155 | ### getFrequencyReduction()
156 | 
157 | Once a SummarizerObject has been initialized calling this method will return an object with:
158 | 
159 | ```
160 | {
161 |     reduction: "",  //A String of the percentage of reduction ex. "50.1%"
162 |     summary: ""     //Current frequency summary
163 |     
164 | }
165 | ```
166 | -Calling this method without first calling the getSummaryByFrequency() method will still work. It will automatically create
167 | a frequency summary.
168 | 
169 | ### getRankReduction()
170 | 
171 | This method returns a Promise. The result of the Promise is an object with:
172 | 
173 | ```
174 | {
175 |     reduction: "",  //A String of the percentage of reduction ex. "50.1%"
176 |     summary: ""     //Current frequency summary
177 |     
178 | }
179 | ```
180 | -Calling this method without first calling the getSummaryByRank() method will still work. It will just automatically create
181 | a rank summary.
182 | 
183 | ### getFrequencyReductionAsDec()
184 | 
185 | This method works the same way as getFrequencyReduction and returns an object with:
186 | 
187 | ```
188 | {
189 |     dec_reduction: Float,  //A float of the reduction ex. .50192
190 |     summary: ""     //Current frequency summary
191 |     
192 | }
193 | ```
194 | 
195 | ### getRankReductionAsDec()
196 | 
197 | This method works the same way as getRankReductionAsDec and returns a Promise. The result of the Promise is an object with:
198 | 
199 | ```
200 | {
201 |     dec_reduction: "",  //A String of the percentage of reduction ex. .50192
202 |     summary: ""     //Current frequency summary
203 |     
204 | }
205 | ```
206 | 
207 | ### getSentiment()
208 | 
209 | This method returns a Float of the sentiment value.
210 | 
211 | 
212 | <a name="desc"></a>
213 | ## 4. How it Works
214 | 
215 | How the text is summarized.
216 | 
217 | ### Frequency based algorithm:
218 | 
219 | This type of summary works best for text that is not too complicated. The advantage of this approach is that it more efficient than the textrank implementation. It was heavily inspired by this [post](https://stackabuse.com/text-summarization-with-nltk-in-python/).
220 | 
221 | - Split the given text into sentences.
222 | 
223 | - Preprocess the sentences by removing all punctuation and making all letters lowercase.
224 | 
225 | - Make a list of all the words that occur in the text and find the frequency of the words.
226 | 
227 | - Take the calculated frequencies of the words and calculate the total weight of the original sentences.
228 | 
229 | 
230 | ### TextRank based algorithm:
231 | 
232 | While this approach costs more in terms of time complexity, it is better for getting the summary of things like newspaper articles and essays. Read more about it [here](https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf).
233 | 
234 | - Split the given text into sentences.
235 | 
236 | - Preprocess the sentences by removing all punctuation and making all letters lowercase.
237 | 
238 | - Make a map of all of the sentences with the key being the sentences themselves and the values being an array of the nouns and adjactives in the sentence.
239 | ex. {"The Detroit Pistons are a good basketball team" => [detroit, pistons, good, basketball, team], "A basketball is round"=>[basketball, round]}
240 | 
241 | - Make a weighted graph with edges that connect sentences with matching nouns or adjectives. The weight of each edge is the number of matching nouns and adjactives. In the example above, there would be two nodes connected with an edge of 1 for "basketball"
242 | 
243 | - Choose a random starting point in the graph and "walk" through it many times using the weight of each edge as a probability of which next vertex to go to. For example, a node with two vertices with an edge of weight 4 and an edge of weight 3 would have a 4/7 chance to go to the first vertex and 3/7 chance to go to the other vertex. 
244 | 
245 | - A count is kept of how many times a node is walked on.
246 | 
247 | - Sort the sentences.
248 | 
249 | <a name="depend"></a>
250 | ## 5. Dependancies
251 |   [natural](https://github.com/NaturalNode/natural)
252 |     
253 |    - Used for tokenizing sentences and sentiment analysis
254 |     
255 |   [wordpos](https://github.com/moos/wordpos)
256 |   
257 |    - Used to detect nouns and adjectives in a sentence in the TextRank algorithm.
258 |   
259 | <a name="license"></a>
260 | ## 6. Licence
261 | 
262 | This project is licensed under the terms of the ISC license.
263 | 


--------------------------------------------------------------------------------