├── Example.py
├── Example
    ├── Guess_Summ_1.txt
    ├── Guess_Summ_2.txt
    ├── Ref_Summ_1_1.txt
    ├── Ref_Summ_1_2.txt
    ├── Ref_Summ_2_1.txt
    ├── Ref_Summ_2_2.txt
    └── Ref_Summ_2_3.txt
├── PythonROUGE.py
└── README.txt


/Example.py:
--------------------------------------------------------------------------------
1 | import PythonROUGE
2 | 
3 | guess_summary_list = ['Example/Guess_Summ_1.txt','Example/Guess_Summ_2.txt']
4 | ref_summary_list = [['Example/Ref_Summ_1_1.txt','Example/Ref_Summ_1_2.txt'] , ['Example/Ref_Summ_2_1.txt','Example/Ref_Summ_2_2.txt','Example/Ref_Summ_2_3.txt']]
5 | recall,precision,F_measure = PythonROUGE.PythonROUGE(guess_summary_list,ref_summary_list,ngram_order=2)
6 | 
7 | print recall,precision,F_measure
8 | 


--------------------------------------------------------------------------------
/Example/Guess_Summ_1.txt:
--------------------------------------------------------------------------------
1 | They are tall and nice.
2 | 


--------------------------------------------------------------------------------
/Example/Guess_Summ_2.txt:
--------------------------------------------------------------------------------
1 | Summarization is a hard task.
2 | 


--------------------------------------------------------------------------------
/Example/Ref_Summ_1_1.txt:
--------------------------------------------------------------------------------
1 | He is happy, but she is sad. They are tall.
2 | 


--------------------------------------------------------------------------------
/Example/Ref_Summ_1_2.txt:
--------------------------------------------------------------------------------
1 | He is happy, but she is unhappy. They are tall.
2 | 


--------------------------------------------------------------------------------
/Example/Ref_Summ_2_1.txt:
--------------------------------------------------------------------------------
1 | Summarization is hard.
2 | 


--------------------------------------------------------------------------------
/Example/Ref_Summ_2_2.txt:
--------------------------------------------------------------------------------
1 | One hard task is summarization.
2 | 


--------------------------------------------------------------------------------
/Example/Ref_Summ_2_3.txt:
--------------------------------------------------------------------------------
1 | Summarization is a task.
2 | 


--------------------------------------------------------------------------------
/PythonROUGE.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Mon Aug 13 10:31:58 2012
  3 | 
  4 | author: Miguel B. Almeida
  5 | mail: mba@priberam.pt
  6 | """
  7 | 
  8 | import os
  9 | import re
 10 | 
 11 | # Wrapper function to use ROUGE from Python easily
 12 | # Inputs:
 13 |     # guess_summ_list, a string with the absolute path to the file with your guess summary
 14 |     # ref_summ_list, a list of lists of paths to multiple reference summaries.
 15 |     # IMPORTANT: all the reference summaries must be in the same directory!
 16 |     # (optional) ngram_order, the order of the N-grams used to compute ROUGE
 17 |     # the default is 1 (unigrams)
 18 | # Output: a tuple of the form (recall,precision,F_measure)
 19 | #
 20 | # Example usage: PythonROUGE('/home/foo/my_guess_summary.txt',[/home/bar/my_ref_summary_1.txt,/home/bar/my_ref_summary_2.txt])
 21 | def PythonROUGE(guess_summ_list,ref_summ_list,ngram_order=2):
 22 |     """ Wrapper function to use ROUGE from Python easily. """
 23 | 
 24 |     # even though we ask that the first argument is a list,
 25 |     # if it is a single string we can handle it
 26 |     if type(guess_summ_list) == str:
 27 |         temp = list()
 28 |         temp.append(ref_summ_list)
 29 |         guess_summ_list = temp
 30 |         del temp
 31 |     
 32 |     # even though we ask that the second argument is a list of lists,
 33 |     # if it is a single string we can handle it
 34 | #    if type(ref_summ_list[0]) == str:
 35 | #        temp = list()
 36 | #        temp.append(ref_summ_list)
 37 | #        ref_summ_list = temp
 38 | #        del temp
 39 |     
 40 |     # this is the path to your ROUGE distribution
 41 |     ROUGE_path = '/home/miguel/PythonROUGE/RELEASE-1.5.5/ROUGE-1.5.5.pl'
 42 |     data_path = '/home/miguel/PythonROUGE/RELEASE-1.5.5/data'
 43 |     
 44 |     # these are the options used to call ROUGE
 45 |     # feel free to edit this is you want to call ROUGE with different options
 46 |     options = '-a -m -n ' + str(ngram_order)
 47 |     
 48 |     # this is a temporary XML file which will contain information
 49 |     # in the format ROUGE uses
 50 |     xml_path = 'temp.xml'
 51 |     xml_file = open(xml_path,'w')
 52 |     xml_file.write('<ROUGE-EVAL version="1.0">\n')
 53 |     for guess_summ_index,guess_summ_file in enumerate(guess_summ_list):
 54 |         xml_file.write('<EVAL ID="' + str(guess_summ_index+1) + '">\n')
 55 |         create_xml(xml_file,guess_summ_file,ref_summ_list[guess_summ_index])
 56 |         xml_file.write('</EVAL>\n')
 57 |     xml_file.write('</ROUGE-EVAL>\n')
 58 |     xml_file.close()
 59 |     
 60 |     
 61 |     # this is the file where the output of ROUGE will be stored
 62 |     ROUGE_output_path = 'ROUGE_result.txt'
 63 |     
 64 |     # this is where we run ROUGE itself
 65 |     exec_command = ROUGE_path + ' -e ' + data_path + ' ' + options + ' -x ' + xml_path + ' > ' + ROUGE_output_path
 66 |     os.system(exec_command)
 67 |     
 68 |     # here, we read the file with the ROUGE output and
 69 |     # look for the recall, precision, and F-measure scores
 70 |     recall_list = list()
 71 |     precision_list = list()
 72 |     F_measure_list = list()
 73 |     ROUGE_output_file = open(ROUGE_output_path,'r')
 74 |     for n in xrange(ngram_order):
 75 |         ROUGE_output_file.seek(0)
 76 |         for line in ROUGE_output_file:
 77 |             match = re.findall('X ROUGE-' + str(n+1) + ' Average_R: ([0-9.]+)',line)
 78 |             if match != []:
 79 |                 recall_list.append(float(match[0]))
 80 |             match = re.findall('X ROUGE-' + str(n+1) + ' Average_P: ([0-9.]+)',line)
 81 |             if match != []:
 82 |                 precision_list.append(float(match[0]))
 83 |             match = re.findall('X ROUGE-' + str(n+1) + ' Average_F: ([0-9.]+)',line)
 84 |             if match != []:
 85 |                 F_measure_list.append(float(match[0]))
 86 |     ROUGE_output_file.close()
 87 |     
 88 |     # remove temporary files which were created
 89 |     os.remove(xml_path)
 90 |     os.remove(ROUGE_output_path)
 91 | 
 92 |     return (recall_list,precision_list,F_measure_list)
 93 |     
 94 |     
 95 | # This is an auxiliary function
 96 | # It creates an XML file which ROUGE can read
 97 | # Don't ask me how ROUGE works, because I don't know!
 98 | def create_xml(xml_file,guess_summ_file,ref_summ_list):
 99 |     xml_file.write('<PEER-ROOT>\n')
100 |     guess_summ_dir = os.path.dirname(guess_summ_file)
101 |     xml_file.write(guess_summ_dir + '\n')
102 |     xml_file.write('</PEER-ROOT>\n')
103 |     xml_file.write('<MODEL-ROOT>\n')
104 |     ref_summ_dir = os.path.dirname(ref_summ_list[0] + '\n')
105 |     xml_file.write(ref_summ_dir + '\n')
106 |     xml_file.write('</MODEL-ROOT>\n')
107 |     xml_file.write('<INPUT-FORMAT TYPE="SPL">\n')
108 |     xml_file.write('</INPUT-FORMAT>\n')
109 |     xml_file.write('<PEERS>\n')
110 |     guess_summ_basename = os.path.basename(guess_summ_file)
111 |     xml_file.write('<P ID="X">' + guess_summ_basename + '</P>\n')
112 |     xml_file.write('</PEERS>\n')
113 |     xml_file.write('<MODELS>')
114 |     letter_list = ['A','B','C','D','E','F','G','H','I','J']
115 |     for ref_summ_index,ref_summ_file in enumerate(ref_summ_list):
116 |         ref_summ_basename = os.path.basename(ref_summ_file)
117 |         xml_file.write('<M ID="' + letter_list[ref_summ_index] + '">' + ref_summ_basename + '</M>\n')
118 |     
119 |     xml_file.write('</MODELS>\n')
120 |     
121 | # This is only called if this file is executed as a script.
122 | # It shows an example of usage.
123 | if __name__ == '__main__':
124 |     guess_summary_list = ['Example/Guess_Summ_1.txt','Example/Guess_Summ_2.txt']
125 |     ref_summ_list = [['Example/Ref_Summ_1_1.txt','Example/Ref_Summ_1_2.txt'] , ['Example/Ref_Summ_2_1.txt','Example/Ref_Summ_2_2.txt','Example/Ref_Summ_2_3.txt']]
126 |     recall_list,precision_list,F_measure_list = PythonROUGE(guess_summary_list,ref_summ_list)
127 |     print 'recall = ' + str(recall_list)
128 |     print 'precision = ' + str(precision_list)
129 |     print 'F = ' + str(F_measure_list)


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | Author: Miguel B. Almeida
 2 | Email: mba@priberam.pt
 3 | 
 4 | This is a simple Python wrapper for ROUGE. ROUGE is a Perl script for evaluation of summaries. You can obtain it from http://www.berouge.com/
 5 | 
 6 | 
 7 | INSTALLATION:
 8 | 1) Download ROUGE from http://www.berouge.com/
 9 | 2) Create a folder somewhere, say /home/you/PythonROUGE
10 | 3) Place the "RELEASE-1.5.5" folder you got from ROUGE inside that folder, i.e. /home/you/PythonROUGE/RELEASE-1.5.5
11 | 4) Place this package in parallel to the above, i.e. /home/you/PythonROUGE/PythonROUGE.py and /home/you/PythonROUGE/Example
12 | 5) Edit the "PythonROUGE.py" file and replace the lines starting with "ROUGE_path =" and "data_path =" with the appropriate paths
13 | 
14 | 
15 | TESTING:
16 | 1) Run in a terminal the command "python PythonROUGE.py"
17 | 2) You should get as output:
18 | 
19 | recall = 0.3
20 | precision = 0.6
21 | F = 0.4
22 | 
23 | 
24 | USAGE:
25 | See the file "Example.py".
26 | 


--------------------------------------------------------------------------------