├── LICENSE ├── test ├── DataFiles │ ├── TC-A-2.response │ ├── TC-A.key │ ├── TC-M.key │ ├── TC-N.key │ ├── TC-A-10.response │ ├── TC-A-11.response │ ├── TC-A-12.response │ ├── TC-A-13.response │ ├── TC-A-4.response │ ├── TC-A-5.response │ ├── TC-A-6.response │ ├── TC-A-1.response │ ├── TC-A-3.response │ ├── TC-A-7.response │ ├── TC-A-8.response │ ├── TC-M-1.response │ ├── TC-M-2.response │ ├── TC-M-3.response │ ├── TC-M-4.response │ ├── TC-M-5.response │ ├── TC-M-6.response │ ├── TC-N-1.response │ ├── TC-N-2.response │ ├── TC-N-3.response │ ├── TC-N-4.response │ ├── TC-N-5.response │ ├── TC-N-6.response │ ├── TC-A-9.response │ ├── TC-F.key │ ├── TC-G.key │ ├── TC-H.key │ ├── TC-I.key │ ├── TC-J.key │ ├── TC-J-1.response │ ├── TC-F-1.response │ ├── TC-G-1.response │ ├── TC-H-1.response │ ├── TC-I-1.response │ ├── TC-K.key │ ├── TC-L.key │ ├── TC-L-1.response │ ├── TC-D.key │ ├── TC-E.key │ ├── TC-K-1.response │ ├── TC-D-1.response │ ├── TC-E-1.response │ ├── TC-B.key │ ├── TC-B-1.response │ ├── TC-C.key │ └── TC-C-1.response ├── test.pl ├── CorefMetricTest.pm ├── TestCases.README └── CorefMetricTestConfig.pm ├── scorer.pl ├── scorer.bat ├── README.rst └── lib ├── Algorithm ├── README.Munkres └── Munkres.pm ├── Cwd.pm ├── Math └── Combinatorics.pm └── CorScorer.pm /LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE 2 | ------- 3 | 4 | This work is licensed under a [[Creative Commons Attribution ShareAlike 4.0 International License|http://creativecommons.org/licenses/by-sa/4.0/]]. 5 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 - 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c - 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-10.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 x - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 z - 17 | test2 0 5 e (4) 18 | test2 0 6 y - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-11.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-12.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 1) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (2) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (3) 13 | test2 0 1 x - 14 | test2 0 2 d1 (4 15 | test2 0 3 d2 4) 16 | test2 0 4 z - 17 | test2 0 5 e (5) 18 | test2 0 6 y - 19 | test2 0 7 f1 (6) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-13.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 0) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (0) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 x (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (1 7 | test1 0 5 b3 1) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (3 7 | test1 0 5 b3 3) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 y (2) 17 | test2 0 5 e (2) 18 | test2 0 6 z (3) 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-7.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-8.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-M-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-N-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-A-9.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3(3(3(3(3(3(3(3(3(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)3)3)3)3)3)3)3)3)3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /test/DataFiles/TC-F.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-G.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-H.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-I.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-J.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-J-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-F-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-G-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-H-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-I-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-K.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (1) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (1) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-L.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-L-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (3) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-D.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-E.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-K-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 (3) 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-D-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /test/DataFiles/TC-E-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (1) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (1) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (1) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (1) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /scorer.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | 7 | if ($d eq $0) { 8 | unshift(@INC, "lib"); 9 | } 10 | else { 11 | unshift(@INC, $d . "/lib"); 12 | } 13 | } 14 | 15 | use strict; 16 | use CorScorer; 17 | 18 | if (@ARGV < 3) { 19 | print q| 20 | use: scorer.pl [name] 21 | 22 | metric: the metric desired to score the results: 23 | muc: MUCScorer (Vilain et al, 1995) 24 | bcub: B-Cubed (Bagga and Baldwin, 1998) 25 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 26 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 27 | blanc: BLANC 28 | all: uses all the metrics to score 29 | 30 | keys_file: file with expected coreference chains in SemEval format 31 | 32 | response_file: file with output of coreference system (SemEval format) 33 | 34 | name: [optional] the name of the document to score. If name is not 35 | given, all the documents in the dataset will be scored. If given 36 | name is "none" then all the documents are scored but only total 37 | results are shown. 38 | 39 | |; 40 | exit; 41 | } 42 | 43 | my $metric = shift(@ARGV); 44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) { 45 | print "Invalid metric\n"; 46 | exit; 47 | } 48 | 49 | if ($metric eq 'all') { 50 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') { 51 | print "\nMETRIC $m:\n"; 52 | &CorScorer::Score($m, @ARGV); 53 | } 54 | } 55 | else { 56 | &CorScorer::Score($metric, @ARGV); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /test/test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | push(@INC, $d); 7 | push(@INC, $d . "/../lib"); 8 | } 9 | 10 | use strict; 11 | use CorScorer; 12 | use CorefMetricTest; 13 | use CorefMetricTestConfig; 14 | 15 | my $error_tolerance = 1.e-4; 16 | my $script_dir = $0; 17 | $script_dir =~ s/\/[^\/][^\/]*$//g; 18 | 19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) { 20 | my $id = $test_case->{'id'}; 21 | my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 22 | $script_dir . "/" . $test_case->{'response_file'}); 23 | print "\nTesting case ($id): keyFile=", $key_response_files[0], 24 | " responseFile=", $key_response_files[1], "\n"; 25 | my $expected_metrics = $test_case->{'expected_metrics'}; 26 | foreach my $metric_name (sort keys %$expected_metrics) { 27 | my $expected_values = $expected_metrics->{$metric_name}; 28 | *::SAVED_STDOUT = *STDOUT; 29 | *STDOUT = *::SUPRRES_STDOUT; 30 | my @actual_counts = &CorScorer::Score($metric_name, @key_response_files); 31 | # Compute R,P,and F1 from raw counts. 32 | my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts); 33 | *STDOUT = *::SAVED_STDOUT; 34 | my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values); 35 | printf " metric: %+10s", $metric_name; 36 | if ($diff < $error_tolerance) { 37 | print " => PASS\n"; 38 | } else { 39 | print " => FAIL\n"; 40 | print " Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n"; 41 | print " Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n"; 42 | #exit(1); 43 | } 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /scorer.bat: -------------------------------------------------------------------------------- 1 | @rem = '--*-Perl-*-- 2 | @echo off 3 | if "%OS%" == "Windows_NT" goto WinNT 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9 5 | goto endofperl 6 | :WinNT 7 | perl -x -S %0 %* 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH. 10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul 11 | goto endofperl 12 | @rem '; 13 | #!perl 14 | #line 15 15 | 16 | BEGIN { 17 | $d = $0; 18 | $d =~ s/\/[^\/][^\/]*$//g; 19 | push(@INC, $d."/lib"); 20 | } 21 | 22 | use strict; 23 | use CorScorer; 24 | 25 | if (@ARGV < 3) { 26 | print q| 27 | use: scorer.bat [name] 28 | 29 | metric: the metric desired to score the results: 30 | muc: MUCScorer (Vilain et al, 1995) 31 | bcub: B-Cubed (Bagga and Baldwin, 1998) 32 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 33 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 34 | all: uses all the metrics to score 35 | 36 | keys_file: file with expected coreference chains in SemEval format 37 | 38 | response_file: file with output of coreference system (SemEval format) 39 | 40 | name: [optional] the name of the document to score. If name is not 41 | given, all the documents in the dataset will be scored. If given 42 | name is "none" then all the documents are scored but only total 43 | results are shown. 44 | 45 | |; 46 | exit; 47 | } 48 | 49 | my $metric = shift (@ARGV); 50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) { 51 | print "Invalid metric\n"; 52 | exit; 53 | } 54 | 55 | 56 | if ($metric eq 'all') { 57 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') { 58 | print "\nMETRIC $m:\n"; 59 | &CorScorer::Score( $m, @ARGV ); 60 | } 61 | } 62 | else { 63 | &CorScorer::Score( $metric, @ARGV ); 64 | } 65 | 66 | __END__ 67 | :endofperl 68 | -------------------------------------------------------------------------------- /test/DataFiles/TC-B.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /test/DataFiles/TC-B-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /test/DataFiles/TC-C.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /test/DataFiles/TC-C-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Reference Coreference Scorer 2 | ============================ 3 | 4 | DESCRIPTION 5 | ----------- 6 | 7 | This is the official implementation of the revised coreference scorer 8 | used for CoNLL-2011/2012 shared tasks on coreference resolution. It 9 | addresses issues that prevented the consistent scoring of predicted 10 | mentions in the past. 11 | 12 | 13 | VERSION 14 | ------- 15 | 16 | The current stable (official) version for scoring predicted mentions is **v8.01** 17 | 18 | CITATION 19 | -------- 20 | 21 | We would appreciate if you cite the paper when you use this scorer as 22 | some of us are academics or wanting to be academics, and citations 23 | matter. 24 | 25 | :: 26 | 27 | @InProceedings{pradhan-EtAl:2014:P14-2, 28 | author = {Pradhan, Sameer and Luo, Xiaoqiang and Recasens, Marta and Hovy, Eduard and Ng, Vincent and Strube, Michael}, 29 | title = {Scoring Coreference Partitions of Predicted Mentions: A Reference Implementation}, 30 | booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, 31 | month = {June}, 32 | year = {2014}, 33 | address = {Baltimore, Maryland}, 34 | publisher = {Association for Computational Linguistics}, 35 | pages = {30--35}, 36 | url = {http://www.aclweb.org/anthology/P14-2006} 37 | } 38 | 39 | 40 | USAGE 41 | ----- 42 | 43 | :: 44 | 45 | perl scorer.pl [] 46 | 47 | 48 | : the metric desired to score the results. one of the following values: 49 | 50 | muc: MUCScorer (Vilain et al, 1995) 51 | bcub: B-Cubed (Bagga and Baldwin, 1998) 52 | ceafm: CEAF (Luo et al., 2005) using mention-based similarity 53 | ceafe: CEAF (Luo et al., 2005) using entity-based similarity 54 | blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions 55 | all: uses all the metrics to score 56 | 57 | : file with expected coreference chains in CoNLL-2011/2012 format 58 | 59 | : file with output of coreference system (CoNLL-2011/2012 format) 60 | 61 | : optional. The name of the document to score. If name is not 62 | given, all the documents in the dataset will be scored. If given 63 | name is "none" then all the documents are scored but only total 64 | results are shown. 65 | 66 | 67 | OUTPUT 68 | ------ 69 | 70 | The score subroutine returns an array with four values in this order: 71 | 72 | Coreference Score 73 | ~~~~~~~~~~~~~~~~~ 74 | 75 | :: 76 | 77 | Recall = recall_numerator / recall_denominator 78 | Precision = precision_numerator / precision_denominator 79 | F1 = 2 * Recall * Precision / (Recall + Precision) 80 | 81 | These values are to standard output when variable ``$VERBOSE`` is not null. 82 | 83 | 84 | Identification of Mentions 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | 87 | A score for identification of mentions (recall, precision and F1) is 88 | also included. Mentions from system response are compared with key 89 | mentions. This version performs strict mention matching as was used in 90 | the CoNLL-2011 and 2012 shared tasks. 91 | 92 | AUTHORS 93 | ------- 94 | 95 | * Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena lsi.upc.edu 96 | * Sameer Pradhan, http://cemantix.org, pradhan cemantix.org 97 | * Sebastian Martschat, sebastian.martschat h-its.org 98 | * Xiaoqiang Luo, xql google.com 99 | 100 | 101 | COPYRIGHT 102 | --------- 103 | 104 | :: 105 | 106 | 2009-2011, Emili Sapena esapena lsi.upc.edu 107 | 2011- Sameer Pradhan pradhan cemantix.org 108 | -------------------------------------------------------------------------------- /test/CorefMetricTest.pm: -------------------------------------------------------------------------------- 1 | package CorefMetricTest; 2 | use strict; 3 | use warnings; 4 | use Exporter; 5 | 6 | our @ISA= qw(Exporter); 7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual); 8 | 9 | ################################################################################ 10 | # Compute recall, precision and F1. 11 | # 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall, 13 | # numerator_counts_for_precision, denominator_counts_for_precision) 14 | # Output: (recall, precision, F1) 15 | ################################################################################ 16 | sub ComputeScoreFromCounts { 17 | # The first 4 are also coref link counts when using BLANC. 18 | my ($recall_numerator, $recall_denominator, 19 | $precision_numerator, $precision_denominator, @noncoref_counts) = @_; 20 | # The coref recall, precision, and F1 when using BLANC. 21 | my ($recall, $precision, $F1) = 22 | RPFFromCounts($recall_numerator, $recall_denominator, 23 | $precision_numerator, $precision_denominator); 24 | 25 | # BLANC: @noncoref_counts= 26 | # (noncoref_numerator_recall, noncoref_denominator_recall, 27 | # noncoref_numerator_precision, noncoref_denominator_precision) 28 | if (scalar(@noncoref_counts) == 4) { 29 | ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts( 30 | $recall_numerator, $recall_denominator, $precision_denominator, 31 | $noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]); 32 | } 33 | $recall = ($recall < 0) ? 0 : $recall; 34 | $precision = ($precision < 0) ? 0 : $precision; 35 | $F1 = ($F1 < 0) ? 0 : $F1; 36 | return ($recall, $precision, $F1); 37 | } 38 | 39 | sub RPFFromCounts 40 | { 41 | my ($recall_numerator, $recall_denominator, 42 | $precision_numerator, $precision_denominator, @nonCorefCounts) = @_; 43 | my ($recall, $precision, $F1) = (-1, -1, 0); 44 | if ($recall_denominator > 0) { 45 | $recall = $recall_numerator / $recall_denominator; 46 | } 47 | if ($precision_denominator > 0) { 48 | $precision = $precision_numerator / $precision_denominator; 49 | } 50 | 51 | if (($recall + $precision) > 0) { 52 | $F1 = 2 * $recall * $precision / ($recall + $precision); 53 | } 54 | 55 | return ($recall, $precision, $F1); 56 | } 57 | 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts(). 59 | sub ComputeBLANCRPF 60 | { 61 | my ($coref_recall, $coref_precision, $coref_F1, 62 | $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_; 63 | 64 | my ($recall, $precision, $F1); 65 | 66 | if ($coref_recall < 0 && $noncoref_recall < 0) { 67 | # no key mention. 68 | $recall = $precision = $F1 = 0; 69 | } elsif ($coref_recall < 0) { 70 | # key: all links are non-coref (mentions are all singltons). 71 | $recall = $noncoref_recall; 72 | $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision; 73 | $F1 = $noncoref_F1; 74 | } elsif ($noncoref_recall < 0) { 75 | # key: all links are coref (all mentions are in one entity). 76 | $recall = $coref_recall; 77 | $precision = ($coref_precision < 0) ? 0 : $coref_precision; 78 | $F1 = $coref_F1; 79 | } else { 80 | #key contains both coref and non-coref links. 81 | if ($coref_precision < 0 && $noncoref_precision < 0) { 82 | # no response. 83 | $recall = $precision = $F1 = 0; 84 | } else { 85 | if ($coref_precision < 0) { 86 | # response: all links are non-coref, or response mentions are all 87 | # singletons. 88 | $coref_precision = 0; 89 | } elsif ($noncoref_precision < 0) { 90 | # response: all links are coref, or all mentions are in one entity. 91 | $noncoref_precision = 0; 92 | } 93 | $recall = ($coref_recall + $noncoref_recall)/2; 94 | $precision = ($coref_precision + $noncoref_precision)/2; 95 | $F1 = ($coref_F1 + $noncoref_F1)/2; 96 | } 97 | } 98 | 99 | return ($recall, $precision, $F1); 100 | } 101 | 102 | ############################################################################## 103 | # Compute the sum of the duifference between the expected recall, precision, 104 | # F1 and the actual one. 105 | ############################################################################## 106 | sub DiffExpectedAndActual { 107 | my ($expected, $actual) = @_; 108 | if (scalar(@$expected) != scalar(@$actual)) { 109 | print STDERR "Expected and actual have diff dimensions: \n"; 110 | print STDERR " Expected: ", join(" ", @$expected), "\n"; 111 | print STDERR " Actual: ", join(" ", @$actual), "\n"; 112 | return 1.0e5; 113 | } 114 | my $sum = 0.0; 115 | my $i = 0; 116 | foreach my $e (@$expected) { 117 | $sum += abs($e - $actual->[$i]); 118 | ++$i; 119 | } 120 | return $sum; 121 | } 122 | 123 | 1; 124 | 125 | -------------------------------------------------------------------------------- /lib/Algorithm/README.Munkres: -------------------------------------------------------------------------------- 1 | NAME 2 | Algorithm-Munkres : Perl extension for Munkres' solution to 3 | classical Assignment problem for square and rectangular matrices 4 | This module extends the solution of Assignment problem for square 5 | matrices to rectangular matrices by padding zeros. Thus a rectangular 6 | matrix is converted to square matrix by padding necessary zeros. 7 | 8 | SYNOPSIS 9 | use Algorithm::Munkres; 10 | 11 | @mat = ( 12 | [2, 4, 7, 9], 13 | [3, 9, 5, 1], 14 | [8, 2, 9, 7], 15 | ); 16 | 17 | assign(\@mat,\@out_mat); 18 | 19 | Then the @out_mat array will have the output as: (0,3,1,2), 20 | where 21 | 0th element indicates that 0th row is assigned 0th column i.e value=2 22 | 1st element indicates that 1st row is assigned 3rd column i.e.value=1 23 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2 24 | 3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0 25 | 26 | DESCRIPTION 27 | Assignment Problem: Given N jobs, N workers and the time taken by 28 | each worker to complete a job then how should the assignment of a 29 | Worker to a Job be done, so as to minimize the time taken. 30 | 31 | Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that: 32 | x y z 33 | p 2 4 7 34 | q 3 9 5 35 | r 8 2 9 36 | 37 | where the cell values of the above matrix give the time required 38 | for the worker(given by column name) to complete the job(given by 39 | the row name) 40 | 41 | then possible solutions are: 42 | Total 43 | 1. 2, 9, 9 20 44 | 2. 2, 2, 5 9 45 | 3. 3, 4, 9 16 46 | 4. 3, 2, 7 12 47 | 5. 8, 9, 7 24 48 | 6. 8, 4, 5 17 49 | 50 | Thus (2) is the optimal solution for the above problem. 51 | This kind of brute-force approach of solving Assignment problem 52 | quickly becomes slow and bulky as N grows, because the number of 53 | possible solution are N! and thus the task is to evaluate each 54 | and then find the optimal solution.(If N=10, number of possible 55 | solutions: 3628800 !) 56 | Munkres' gives us a solution to this problem, which is implemented 57 | in this module. 58 | 59 | This module also solves Assignment problem for rectangular matrices 60 | (M x N) by converting them to square matrices by padding zeros. ex: 61 | If input matrix is: 62 | [2, 4, 7, 9], 63 | [3, 9, 5, 1], 64 | [8, 2, 9, 7] 65 | i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 66 | matrix will be: 67 | [2, 4, 7, 9], 68 | [3, 9, 5, 1], 69 | [8, 2, 9, 7], 70 | [0, 0, 0, 0] 71 | 72 | EXPORT 73 | "assign" function by default. 74 | 75 | INPUT 76 | The input matrix should be in a two dimensional array(array of 77 | array) and the 'assign' subroutine expects a reference to this 78 | array and not the complete array. 79 | eg:assign(\@inp_mat, \@out_mat); 80 | The second argument to the assign subroutine is the reference 81 | to the output array. 82 | 83 | OUTPUT 84 | The assign subroutine expects references to two arrays as its 85 | input paramenters. The second parameter is the reference to the 86 | output array. This array is populated by assign subroutine. This 87 | array is single dimensional Nx1 matrix. 88 | For above example the output array returned will be: 89 | (0, 90 | 2, 91 | 1) 92 | 93 | where 94 | 0th element indicates that 0th row is assigned 0th column i.e value=2 95 | 1st element indicates that 1st row is assigned 2nd column i.e.value=5 96 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2 97 | 98 | SEE ALSO 99 | 1. http://216.249.163.93/bob.pilgrim/445/munkres.html 100 | 101 | 2. Munkres, J. Algorithms for the assignment and transportation 102 | Problems. J. Siam 5 (Mar. 1957), 32-38 103 | 104 | 3. François Bourgeois and Jean-Claude Lassalle. 1971. 105 | An extension of the Munkres algorithm for the assignment 106 | problem to rectangular matrices. 107 | Communication ACM, 14(12):802-804 108 | 109 | AUTHOR 110 | Anagha Kulkarni, University of Minnesota Duluth 111 | kulka020 d.umn.edu 112 | 113 | Ted Pedersen, University of Minnesota Duluth 114 | tpederse d.umn.edu 115 | 116 | COPYRIGHT AND LICENSE 117 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni 118 | 119 | This program is free software; you can redistribute it and/or modify it 120 | under the terms of the GNU General Public License as published by the 121 | Free Software Foundation; either version 2 of the License, or (at your 122 | option) any later version. This program is distributed in the hope that 123 | it will be useful, but WITHOUT ANY WARRANTY; without even the implied 124 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 125 | GNU General Public License for more details. 126 | 127 | You should have received a copy of the GNU General Public License along 128 | with this program; if not, write to the Free Software Foundation, Inc., 129 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 130 | 131 | -------------------------------------------------------------------------------- /test/TestCases.README: -------------------------------------------------------------------------------- 1 | TC-A-1 - perfect: 2 | Key/Ref: {a} {bc} {def} 3 | Rsp/Sys: {a} {bc} {def} 4 | Expected: BCUB=1 [recall=6/6, prec=6/6] 5 | Expected: MUC=1 [recall=3/3=1, prec=3/3=1] 6 | Expected: CEAFm=1 [recall=6/6=1, prec=6/6=1] 7 | Expected: CEAFe=1 [recall=3/3=1, prec=3/3=1] 8 | Expected: BLANC=1 [recall_c=4/4=1, prec_c=4/4=1, recall_n=11/11=1, prec_n=11/11=1] 9 | 10 | TC-A-2 -- response with missing mentions/entities 11 | Key/Ref: {a} {bc} {def} 12 | Rsp/Sys: {a} {de} 13 | Expected: BCUB=.5599 [recall=7/18, prec=3/3] 14 | Expected: MUC=0.5 [recall=1/3, prec=1/1] 15 | Expected: CEAFm=6/9=0.67 [common=3, recall=3/6=0.5, Prec=3/3=1] 16 | Expected: CEAFe=3.6/5=0.72 [common=1+4/5=1.8, recall=1.8/3=0.6, Prec=1.8/2=0.9] 17 | Expected: BLANC=0.35 [recall_c=1/4, prec_c=1/1, recall_n=2/11, prec_n=2/2] 18 | 19 | TC-A-3 -- response with false-alarm mentions/entities 20 | Key/Ref: {a} {bc} {def} 21 | Rsp/Sys: {a} {bcx} {defy} {z} 22 | Expected: BCUB=.6748 [recall=6/6, prec=55/108] 23 | Expected: MUC=0.75 [recall=3/3, prec=3/5] 24 | Expected: CEAFm=12/15=0.8 [common=6, recall=6/6=1, prec=6/9=.67] 25 | Expected: CEAFe=3.6/5=0.76 [common=1+4/5+6/7=2.66, recall=2.66/3=0.89, Prec=2.66/4=0.66] 26 | Expected: BLANC=0.60 [recall_c=4/4, prec_c=4/9, recall_n=11/11, prec_n=11/27] 27 | 28 | 29 | TC-A-4 -- response with both missing and false-alarm mentions/entities 30 | Key/Ref: {a} {bc} {def} 31 | Rsp/Sys: {a} {bcx} {dy} {z} 32 | Expected: BCUB=.4683 [recall=5/9, prec=17/42] 33 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3] 34 | Expected: CEAFm=8/13=0.62 [common=4 recall=4/6=0.67 prec=4/7=.57] 35 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55] 36 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17] 37 | 38 | TC-A-5 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in the aligned entity. 39 | Key/Ref: {a} {bc} {def} 40 | Rsp/Sys: {a} {bcxB} {dy} {z} 41 | Expected: BCUB=.4 [recall=5/9, prec=5/16] 42 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4] 43 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5] 44 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52] 45 | Expected: BLANC=0.25 [recall_c=1/4, prec_c=1/7, recall_n=5/11, prec_n=5/21] 46 | 47 | TC-A-6 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in an unaligned entity. 48 | Key/Ref: {a} {bc} {def} 49 | Rsp/Sys: {a} {bcx} {dy} {Bz} 50 | Expected: BCUB=.4325 [recall=5/9, prec=17/48] 51 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4] 52 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5] 53 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55] 54 | Expected: BLANC=0.26 [recall_c=1/4, prec_c=1/5, recall_n=5/11, prec_n=5/23] 55 | 56 | TC-A-7 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in the same cluster entity (note: this is diff from TC5) -- this tests mention de-duplication. 57 | Key/Ref: {a} {bc} {def} 58 | Rsp/Sys: {a} {bcxB} {dy} {z} 59 | de-dup: {a} {bcx} {dy} {z} 60 | 61 | de-dup: 62 | Expected: BCUB=.4683 [recall=5/9, prec=17/42] 63 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3] 64 | Expected: CEAFm=8/13=0.61538 [common=4, recall=4/6=0.66667, Prec=4/7=0.57143] 65 | Expected: CEAFe=4.14/7=0.62857 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73333, Prec=2.2/4=0.55] 66 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17] 67 | 68 | if No de-dup: 69 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5] 70 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52] 71 | 72 | 73 | TC-A-8 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in a diff entity from b. 74 | Key/Ref: {a} {bc} {def} 75 | Rsp/Sys: {a} {bcx} {dy} {Bz} 76 | 77 | De-dup: 78 | Expected: BCUB=.4683 [recall=5/9, prec=17/42] 79 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3] 80 | Expected: CEAFm=8/13=0.61538 [common=4 recall=4/6=0.67 prec=4/7=.57143] 81 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55] 82 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17] 83 | 84 | If no de-dup: 85 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5] 86 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55] 87 | 88 | TC-A-9 -- show B3 can be canned: "b" is repeated 10 times so precision approaches 1 89 | Key/Ref: {a} {bc} {def} 90 | Rsp/Sys: {a} {bcx} {dy} {Bx10z} 91 | de-dup Rsp/Sys: {a} {bcx} {dy} {z} 92 | 93 | De-dup: 94 | Expected: BCUB=.4683 [recall=5/9, prec=17/42] 95 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3] 96 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/7=.57143] 97 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55] 98 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17] 99 | 100 | 101 | TC-A-10 - Gold mentions. Only singletons in the response. 102 | Key/Ref: {a} {bc} {def} 103 | Rsp/Sys: {a} {b} {c} {d} {e} {f} 104 | Expected: BCUB=.6667 [recall=3/6, prec=6/6] 105 | Expected: MUC=0 [recall=0, prec=0] 106 | Expected: BLANC=0.42 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=11/11, prec_n=11/15] 107 | 108 | 109 | TC-A-11 - Gold mentions. All mentions are coreferent in the response. 110 | Key/Ref: {a} {bc} {def} 111 | Rsp/Sys: {abcdef} 112 | 113 | Expected: BCUB=0.5599 [recall=6/6, prec=7/18] 114 | Expected: MUC=6/8=0.75 [recall=3/3, prec=3/5] 115 | Expected: BLANC=0.21 [recall_c=4/4, prec_c=4/15, recall_n=0/11, prec_n=0/0, f_n=0] 116 | 117 | 118 | TC-A-12 - System mentions. Only singletons in the response. 119 | Key/Ref: {a} {bc} {def} 120 | Rsp/Sys: {a} {x} {y} {c} {d} {e} {z} 121 | 122 | Expected: BCUB=0.4425 [recall=13/36, prec=4/7] 123 | Expected: MUC=0 [recall=0, prec=0] 124 | Expected: BLANC=0.16 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=5/11, prec_n=5/21] 125 | 126 | 127 | TC-A-13 - System mentions. All mentions are coreferent in the response. 128 | Key/Ref: {a} {bc} {def} 129 | Rsp/Sys: {axycdez} 130 | 131 | Expected: BCUB=0.19447 [recall=17/36, prec=6/49] 132 | Expected: MUC=2/9 [recall=1/3, prec=1/6] 133 | Expected: BLANC=0.04 [recall_c=1/4, prec_c=1/21, recall_n=0/11, prec_n=0/0, f_n=0] 134 | 135 | 136 | TC-B-1 -- spurious mention (x) and missing mention (a) in response; link (bc) is a key non-coref link and is an incorrect response coref link. 137 | 138 | Keys: {ab} {cde} 139 | Response: {bcx} {de} 140 | 141 | key coref links: C_k = {(ab), (cd), (de), (ce)} 142 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be)} 143 | 144 | response coref links: C_r = {(bc), (bx), (cx), (de)} 145 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe)} 146 | 147 | (I'll use ^ for set intersection) 148 | C_k ^ C_r = {(de)} => R_c = |C_k^C_r| / |C_k| = 1/4, P_c = 1/|C_r| = 1/4, F_c = 1/4 149 | N_k ^ N_r = {(bd), (be)} => R_n = |N_k^N_r|/|N_k| = 2/6, P_n = 2/|N_r| = 2/6, F_n = 1/3 150 | 151 | BLANC = 1/2 (F_c + F_n) = 7/24. 152 | 153 | 154 | 155 | 156 | TC-C-1 -- same as TC14 plus a new entity and its correct prediction shown. this was for testing the more than two entity case. 157 | 158 | Keys: {ab} {cde} {fg} 159 | Response: {bcx} {de} {fg} 160 | 161 | key coref links: C_k = {(ab), (cd), (de), (ce), (fg)}} 162 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be), (af), (ag), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)} 163 | 164 | response coref links: C_r = {(bc), (bx), (cx), (de), (fg)} 165 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe), (bf), (bg), (cf), (cg), (xf), (xg), (df), (dg), (ef), (eg)} 166 | 167 | (I'll use ^ for set intersection) 168 | C_k ^ C_r = {(de), (fg)} => R_c = |C_k^C_r| / |C_k| = 2/5, P_c = 2/|C_r| = 2/5, F_c = 2/5 = 0.40 169 | N_k ^ N_r = {(bd), (be), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)} => R_n = |N_k^N_r|/|N_k| = 10/16, P_n = 10/|N_r| = 10/16, F_n = 10/16 = 0.625 170 | 171 | BLANC = 1/2 (F_c + F_n) = 0.5125 172 | 173 | 174 | 175 | # ------------ examples from the B-CUBED paper 176 | 177 | TC-D-1 -- merging one small cluster with a big cluster 178 | 179 | key: {12345} {67} {89ABC} 180 | --- 181 | 182 | 1-2-3-4-5 183 | 184 | 6-7 185 | 186 | 8-9-A-B-C 187 | 188 | 189 | 190 | response: {12345} {6789ABC} 191 | --------- 192 | 193 | 1-2-3-4-5 194 | 195 | 6-7 196 | | 197 | 8-9-A-B-C 198 | 199 | 200 | Expected: BCUB [r=12/12, p=16/21, f=0.864864865] 201 | Expected: MUC [r=9/9, p=9/10, f=0.947368421] 202 | 203 | 204 | 205 | TC-E-1 -- merging two big clusters 206 | 207 | 208 | key: {12345} {67} {89ABC} 209 | --- 210 | 211 | 1-2-3-4-5 212 | 213 | 6-7 214 | 215 | 8-9-A-B-C 216 | 217 | 218 | 219 | response: {123456789ABC} {67} 220 | --------- 221 | 222 | 1-2-3-4-5 223 | | 224 | 6-7 | 225 | | 226 | 8-9-A-B-C 227 | 228 | 229 | Expected: BCUB [r=1, p=7/12, f=0.736842105] 230 | Expected: MUC [r=9/9, p=9/10, f=0.947368421] 231 | 232 | 233 | # ---------- examples from the MUC paper 234 | 235 | TC-F-1 -- 236 | 237 | key: {ABCD} ---- Links: A-B; B-C; C-D 238 | response: {AB} {CD} ---- Links: A-B; C-D 239 | 240 | Expected: MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)] 241 | 242 | 243 | 244 | TC-G-1 -- 245 | 246 | key: {AB} {CD} ---- Links: A-B; C-D 247 | response: {ABCD} ---- Links: A-B; B-C; C-D 248 | 249 | Expected: MUC [r=2/2, p=2/3, f=2*(2/2)*(2/3)/(2/2+2/3)] 250 | 251 | 252 | 253 | TC-H-1 -- 254 | 255 | key: {ABCD} ---- Links: A-B; B-C; B-D 256 | response: {ABCD} ---- Links: A-B; B-C; C-D 257 | 258 | Expected: MUC [r=1, p=1, f=1] 259 | 260 | 261 | 262 | TC-I-1 -- 263 | 264 | key: {ABCD} ---- Links: A-B; B-C; B-D 265 | response: {AB} {CD} ---- Links: A-B; C-D 266 | 267 | Expected: MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)] 268 | 269 | 270 | 271 | TC-J-1 -- 272 | 273 | key: {ABC} ---- Links: A-B; B-C 274 | response: {AC} ---- Links: A-C 275 | 276 | Expected: MUC [r=1/2, p=1/1, f=2*(1/2)*(1/1)/(1/2+1/1)] 277 | 278 | 279 | 280 | TC-K-1 -- 281 | 282 | key: {BCDEGHJ} ---- Links: B-C; C-D; D-E; E-G; G-H; H-J 283 | response: {ABC} {DEF} {GHI} ---- Links: A-B; B-C; D-E; E-F; G-H; H-I 284 | 285 | Expected: MUC [r=3/6, p=3/6, f=3/6] 286 | 287 | 288 | 289 | TC-L-1 -- 290 | 291 | key: {ABC} {DEFG} ---- Links: A-B; B-C; D-E; E-F; F-G 292 | response: {AB} {CD} {FGH} ---- Links: A-B; C-D; F-G; G-H 293 | 294 | Expected: MUC [r=2/5, p=2/4, f=2*(2/5)*(2/4)/(2/5+2/4)] 295 | 296 | 297 | TC-M-1 - Only coreferent mentions in the key. Gold mentions. Matching response. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c. 298 | Key/Ref: {abcdef} 299 | Rsp/Sys: {abcdef} 300 | 301 | Expected: BCUB=1 302 | Expected: MUC=1 303 | Expected: CEAFm=1 304 | Expected: CEAFe=1 305 | Expected: BLANC=1 [recall_c=15/15=1, prec_c=15/15=1] 306 | 307 | 308 | TC-M-2 - Only coreferent mentions in the key. Gold mentions. Response contains only non-coreference links. 309 | Key/Ref: {abcdef} 310 | Rsp/Sys: {a} {b} {c} {d} {e} {f} 311 | 312 | Expected: MUC=0 313 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0] 314 | 315 | 316 | TC-M-3 - Only coreferent mentions in the key. Gold mentions. Response contains coreference and non-coreference links. 317 | Key/Ref: {abcdef} 318 | Rsp/Sys: {ab} {cde} {f} 319 | 320 | Expected: BLANC=0.42 [recall_c=4/15, prec_c=4/4=1] 321 | 322 | 323 | TC-M-4 - Only coreferent mentions in the key. System mentions: only coreferent mentions. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c. 324 | Key/Ref: {abcdef} 325 | Rsp/Sys: {abcxyz} 326 | 327 | Expected: BLANC=0.20 [recall_c=3/15, prec_c=3/15] 328 | 329 | 330 | TC-M-5 - Only coreferent mentions in the key. System mentions: only singletons. 331 | Key/Ref: {abcdef} 332 | Rsp/Sys: {a} {b} {c} {x} {y} {z} 333 | 334 | Expected: MUC=0 335 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0] 336 | 337 | 338 | TC-M-6 - Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 339 | Key/Ref: {abcdef} 340 | Rsp/Sys: {ab} {cxy} {z} 341 | 342 | Expected: BLANC=0.11 [recall_c=1/15, prec_c=1/4] 343 | 344 | 345 | TC-N-1 - Only singletons in the key. Gold mentions. Matching response. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n. 346 | Key/Ref: {a} {b} {c} {d} {e} {f} 347 | Rsp/Sys: {a} {b} {c} {d} {e} {f} 348 | 349 | Expected: BCUB=1 350 | Expected: MUC=0 351 | Expected: CEAFm=1 352 | Expected: CEAFe=1 353 | Expected: BLANC=1 [recall_n=15/15=1, prec_n=15/15=1] 354 | 355 | 356 | TC-N-2 - Only singletons in the key. Gold mentions. Response contains only coreference links. 357 | Key/Ref: {a} {b} {c} {d} {e} {f} 358 | Rsp/Sys: {abcdef} 359 | 360 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0] 361 | 362 | 363 | TC-N-3 - Only singletons in the key. Gold mentions. Response contains coreference and non-coreference links. 364 | Key/Ref: {a} {b} {c} {d} {e} {f} 365 | Rsp/Sys: {ab} {cde} {f} 366 | 367 | Expected: BLANC=0.85 [recall_n=11/15, prec_n=11/11=1] 368 | 369 | 370 | TC-N-4 - Only singletons in the key. System mentions: only singletons. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n. 371 | Key/Ref: {a} {b} {c} {d} {e} {f} 372 | Rsp/Sys: {a} {b} {c} {x} {y} {z} 373 | 374 | Expected: MUC=0 375 | Expected: BLANC=0.20 [recall_n=3/15, prec_n=3/15] 376 | 377 | 378 | TC-N-5 - Only singletons in the key. System mentions: only coreference links. 379 | Key/Ref: {a} {b} {c} {d} {e} {f} 380 | Rsp/Sys: {abcxyz} 381 | 382 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0] 383 | 384 | 385 | TC-N-6 - Only singletons in the key. Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 386 | Key/Ref: {a} {b} {c} {d} {e} {f} 387 | Rsp/Sys: {ab} {cxy} {z} 388 | 389 | Expected: BLANC=0.15 [recall_n=2/15, prec_n=2/11] 390 | 391 | -------------------------------------------------------------------------------- /test/CorefMetricTestConfig.pm: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # This is the test configuration file. Test cases are stored in an 3 | # array, each element consisting of: 4 | # (1) id: a unique identifier for the test case. 5 | # (2) key_file: the key file to be tested in the CoNLL format. 6 | # (3) response_file: the response file to be tested in the CoNLL format. 7 | # (4) expected_metrics: is a hash label from a metric name (identical to those 8 | # used in the scorer.{pl|bat}) to an array of expected 9 | # metric values. All metrics have 3 expected numbers: 10 | # (recall, precision, F-measure). 11 | ################################################################################ 12 | 13 | package CorefMetricTestConfig; 14 | use strict; 15 | use warnings; 16 | use Exporter; 17 | 18 | our @ISA= qw( Exporter ); 19 | 20 | # these are exported by default. 21 | our @EXPORT = qw(TestCases); 22 | 23 | # 24 | # Values following metric names are [recall, precision, F1] 25 | # 26 | our @TestCases = ( 27 | { id => "A1", 28 | key_file => "DataFiles/TC-A.key", 29 | response_file => "DataFiles/TC-A-1.response", 30 | expected_metrics => { "muc" => [1, 1, 1], 31 | "bcub" => [6/6, 6/6, 1], 32 | "ceafm" => [1, 1, 1], 33 | "ceafe" => [1, 1, 1], 34 | "blanc" => [1, 1, 1] } 35 | }, 36 | { id => "A2", 37 | key_file => "DataFiles/TC-A.key", 38 | response_file => "DataFiles/TC-A-2.response", 39 | expected_metrics => { "muc" => [1/3, 1/1, 0.5], 40 | "bcub" => [(7/3)/6, 3/3, 14/25], 41 | "ceafm" => [0.5, 1, 0.66667], 42 | "ceafe" => [0.6, 0.9, 0.72], 43 | "blanc" => [0.21591, 1, 0.35385] } 44 | }, 45 | { id => "A3", 46 | key_file => "DataFiles/TC-A.key", 47 | response_file => "DataFiles/TC-A-3.response", 48 | expected_metrics => { "muc" => [3/3, 3/5, 0.75], 49 | "bcub" => [6/6, (4+7/12)/9, 110/163], 50 | "ceafm" => [1, 0.66667, 0.8], 51 | "ceafe" => [0.88571, 0.66429, 0.75918], 52 | "blanc" => [1, 0.42593, 0.59717] } 53 | }, 54 | { id => "A4", 55 | key_file => "DataFiles/TC-A.key", 56 | response_file => "DataFiles/TC-A-4.response", 57 | expected_metrics => { "muc" => [1/3, 1/3, 1/3], 58 | "bcub" => [(3+1/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))], 59 | "ceafm" => [0.66667, 0.57143, 0.61538], 60 | "ceafe" => [0.73333, 0.55, 0.62857], 61 | "blanc" => [0.35227, 0.27206, 0.30357] } 62 | }, 63 | { id => "A5", 64 | key_file => "DataFiles/TC-A.key", 65 | response_file => "DataFiles/TC-A-5.response", 66 | expected_metrics => { "muc" => [1/3, 1/4, 2/7], 67 | "bcub" => [(3+1/3)/6, 2.5/8, 2*(5/9)*(5/16)/((5/9)+(5/16))], 68 | "ceafm" => [0.66667, 0.5, 0.57143], 69 | "ceafe" => [0.68889, 0.51667, 0.59048], 70 | "blanc" => [0.35227, 0.19048, 0.24716] } 71 | }, 72 | { id => "A6", 73 | key_file => "DataFiles/TC-A.key", 74 | response_file => "DataFiles/TC-A-6.response", 75 | expected_metrics => { "muc" => [1/3, 1/4, 2/7], 76 | "bcub" => [(10/3)/6, (1+4/3+1/2)/8, 2*(5/9)*(17/48)/((5/9)+(17/48))], 77 | "ceafm" => [0.66667, 0.5, 0.57143], 78 | "ceafe" => [0.73333, 0.55, 0.62857], 79 | "blanc" => [0.35227, 0.20870, 0.25817] } 80 | }, 81 | { id => "A7", 82 | key_file => "DataFiles/TC-A.key", 83 | response_file => "DataFiles/TC-A-7.response", 84 | expected_metrics => { "muc" => [1/3, 1/3, 1/3], 85 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))], 86 | "ceafm" => [0.66667, 0.57143, 0.61538], 87 | "ceafe" => [0.73333, 0.55, 0.62857], 88 | "blanc" => [0.35227, 0.27206, 0.30357] } 89 | }, 90 | { id => "A8", 91 | key_file => "DataFiles/TC-A.key", 92 | response_file => "DataFiles/TC-A-8.response", 93 | expected_metrics => { "muc" => [1/3, 1/3, 1/3], 94 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))], 95 | "ceafm" => [0.66667, 0.57143, 0.61538], 96 | "ceafe" => [0.73333, 0.55, 0.62857], 97 | "blanc" => [0.35227, 0.27206, 0.30357] } 98 | }, 99 | { id => "A9", 100 | key_file => "DataFiles/TC-A.key", 101 | response_file => "DataFiles/TC-A-9.response", 102 | expected_metrics => { "muc" => [1/3, 1/3, 1/3], 103 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))], 104 | "ceafm" => [0.66667, 0.57143, 0.61538], 105 | "ceafe" => [0.73333, 0.55, 0.62857], 106 | "blanc" => [0.35227, 0.27206, 0.30357] } 107 | }, 108 | { id => "A10", 109 | key_file => "DataFiles/TC-A.key", 110 | response_file => "DataFiles/TC-A-10.response", 111 | expected_metrics => { "muc" => [0, 0, 0], 112 | "bcub" => [3/6, 6/6, 2/3], 113 | #”ceafm" => [1, 1, 1], 114 | #”ceafe" => [1, 1, 1], 115 | "blanc" => [0.5, 0.36667, 0.42308] } 116 | }, 117 | { id => "A11", 118 | key_file => "DataFiles/TC-A.key", 119 | response_file => "DataFiles/TC-A-11.response", 120 | expected_metrics => { "muc" => [3/3, 3/5, 6/8], 121 | "bcub" => [6/6, (1/6+2*2/6+3*3/6)/6, 14/25], 122 | #”ceafm" => [1, 1, 1], 123 | #”ceafe" => [1, 1, 1], 124 | "blanc" => [0.5, 0.13333, 0.21053] } 125 | }, 126 | { id => "A12", 127 | key_file => "DataFiles/TC-A.key", 128 | response_file => "DataFiles/TC-A-12.response", 129 | expected_metrics => { "muc" => [0, 0, 0], 130 | "bcub" => [(1+1/2+2/3)/6, 4/7, 2*(13/36)*(4/7)/((13/36)+(4/7))], 131 | #”ceafm" => [1, 1, 1], 132 | #”ceafe" => [1, 1, 1], 133 | "blanc" => [0.22727, 0.11905, 0.15625] } 134 | }, 135 | { id => "A13", 136 | key_file => "DataFiles/TC-A.key", 137 | response_file => "DataFiles/TC-A-13.response", 138 | expected_metrics => { "muc" => [1/3, 1/6, 2/9], 139 | "bcub" => [(1+1/2+2*2/3)/6, (1/7+1/7+2*2/7)/7, 2*(17/36)*(6/49)/((17/36)+(6/49))], 140 | #”ceafm" => [1, 1, 1], 141 | #”ceafe" => [1, 1, 1], 142 | "blanc" => [0.125, 0.02381, 0.04] } 143 | }, 144 | { id => "B1", 145 | key_file => "DataFiles/TC-B.key", 146 | response_file => "DataFiles/TC-B-1.response", 147 | expected_metrics => { #"muc" => [1, 1, 1], 148 | #"bcub" => [1, 1, 1], 149 | #”ceafm" => [1, 1, 1], 150 | #”ceafe" => [1, 1, 1], 151 | "blanc" => [1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3)] } 152 | }, 153 | { id => "C1", 154 | key_file => "DataFiles/TC-C.key", 155 | response_file => "DataFiles/TC-C-1.response", 156 | expected_metrics => { #"muc" => [1, 1, 1], 157 | #"bcub" => [1, 1, 1], 158 | #”ceafm" => [1, 1, 1], 159 | #”ceafe" => [1, 1, 1], 160 | "blanc" => [1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16)] } 161 | }, 162 | { id => "D1", 163 | key_file => "DataFiles/TC-D.key", 164 | response_file => "DataFiles/TC-D-1.response", 165 | expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 166 | "bcub" => [12/12, 16/21, 2*(12/12)*(16/21)/(12/12+16/21)], 167 | #"ceafm" => [1, 1, 1], 168 | #"ceafe" => [1, 1, 1], 169 | #"blanc" => [1, 1, 1] 170 | } 171 | }, 172 | { id => "E1", 173 | key_file => "DataFiles/TC-E.key", 174 | response_file => "DataFiles/TC-E-1.response", 175 | expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 176 | "bcub" => [1, 7/12, 2*1*(7/12)/(1+7/12)], 177 | #"ceafm" => [1, 1, 1], 178 | #"ceafe" => [1, 1, 1], 179 | #"blanc" => [1, 1, 1] 180 | } 181 | }, 182 | { id => "F1", 183 | key_file => "DataFiles/TC-F.key", 184 | response_file => "DataFiles/TC-F-1.response", 185 | expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)] , 186 | #"bcub" => , 187 | #"ceafm" => , 188 | #"ceafe" => , 189 | #"blanc" => 190 | } 191 | }, 192 | { id => "G1", 193 | key_file => "DataFiles/TC-G.key", 194 | response_file => "DataFiles/TC-G-1.response", 195 | expected_metrics => { "muc" => [2/2, 2/3, 2*(2/2)*(2/3)/(2/2+2/3)], 196 | #"bcub" => , 197 | #"ceafm" => , 198 | #"ceafe" => , 199 | #"blanc" => 200 | } 201 | }, 202 | { id => "H1", 203 | key_file => "DataFiles/TC-H.key", 204 | response_file => "DataFiles/TC-H-1.response", 205 | expected_metrics => { "muc" => [1, 1, 1], 206 | #"bcub" => , 207 | #"ceafm" => , 208 | #"ceafe" => , 209 | #"blanc" => 210 | } 211 | }, 212 | { id => "I1", 213 | key_file => "DataFiles/TC-I.key", 214 | response_file => "DataFiles/TC-I-1.response", 215 | expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)], 216 | #"bcub" => , 217 | #"ceafm" => , 218 | #"ceafe" => , 219 | #"blanc" => 220 | } 221 | }, 222 | { id => "J1", 223 | key_file => "DataFiles/TC-J.key", 224 | response_file => "DataFiles/TC-J-1.response", 225 | expected_metrics => { "muc" => [1/2, 1/1, 2*(1/2)*(1/1)/(1/2+1/1)], 226 | #"bcub" => , 227 | #"ceafm" => , 228 | #"ceafe" => , 229 | #"blanc" => 230 | } 231 | }, 232 | { id => "K1", 233 | key_file => "DataFiles/TC-K.key", 234 | response_file => "DataFiles/TC-K-1.response", 235 | expected_metrics => { "muc" => [3/6, 3/6, 3/6], 236 | #"bcub" => , 237 | #"ceafm" => , 238 | #"ceafe" => , 239 | #"blanc" => 240 | } 241 | }, 242 | { id => "L1", 243 | key_file => "DataFiles/TC-L.key", 244 | response_file => "DataFiles/TC-L-1.response", 245 | expected_metrics => { "muc" => [2/5, 2/4, 2*(2/5)*(2/4)/(2/5+2/4)], 246 | #"bcub" => , 247 | #"ceafm" => , 248 | #"ceafe" => , 249 | #"blanc" => 250 | } 251 | }, 252 | { id => "M1", 253 | key_file => "DataFiles/TC-M.key", 254 | response_file => "DataFiles/TC-M-1.response", 255 | expected_metrics => { "muc" => [1, 1, 1], 256 | "bcub" => [1, 1, 1], 257 | "ceafm" => [1, 1, 1], 258 | "ceafe" => [1, 1, 1], 259 | "blanc" => [1, 1, 1] } 260 | }, 261 | { id => "M2", 262 | key_file => "DataFiles/TC-M.key", 263 | response_file => "DataFiles/TC-M-2.response", 264 | expected_metrics => { "muc" => [0, 0, 0], 265 | #"bcub" => , 266 | #"ceafm" => , 267 | #"ceafe" => , 268 | "blanc" => [0, 0, 0] } 269 | }, 270 | { id => "M3", 271 | key_file => "DataFiles/TC-M.key", 272 | response_file => "DataFiles/TC-M-3.response", 273 | expected_metrics => { #"muc" => , 274 | #"bcub" => , 275 | #"ceafm" => , 276 | #"ceafe" => , 277 | "blanc" => [0.26667, 1, 0.42105] } 278 | }, 279 | { id => "M4", 280 | key_file => "DataFiles/TC-M.key", 281 | response_file => "DataFiles/TC-M-4.response", 282 | expected_metrics => { #"muc" => , 283 | #"bcub" => , 284 | #"ceafm" => , 285 | #"ceafe" => , 286 | "blanc" => [0.2, 0.2, 0.2] } 287 | }, 288 | { id => "M5", 289 | key_file => "DataFiles/TC-M.key", 290 | response_file => "DataFiles/TC-M-5.response", 291 | expected_metrics => { "muc" => [0, 0, 0], 292 | #"bcub" => , 293 | #"ceafm" => , 294 | #"ceafe" => , 295 | "blanc" => [0, 0, 0] } 296 | }, 297 | { id => "M6", 298 | key_file => "DataFiles/TC-M.key", 299 | response_file => "DataFiles/TC-M-6.response", 300 | expected_metrics => { #"muc" => , 301 | #"bcub" => , 302 | #"ceafm" => , 303 | #"ceafe" => , 304 | "blanc" => [0.06667, 0.25, 0.10526] } 305 | }, 306 | { id => "N1", 307 | key_file => "DataFiles/TC-N.key", 308 | response_file => "DataFiles/TC-N-1.response", 309 | expected_metrics => { "muc" => [0, 0, 0], 310 | #"bcub" => [1, 1, 1], 311 | #"ceafm" => [1, 1, 1], 312 | #"ceafe" => [1, 1, 1], 313 | "blanc" => [1, 1, 1] } 314 | }, 315 | { id => "N2", 316 | key_file => "DataFiles/TC-N.key", 317 | response_file => "DataFiles/TC-N-2.response", 318 | expected_metrics => { "muc" => [0, 0, 0], 319 | #"bcub" => , 320 | #"ceafm" => , 321 | #"ceafe" => , 322 | "blanc" => [0, 0, 0] } 323 | }, 324 | { id => "N3", 325 | key_file => "DataFiles/TC-N.key", 326 | response_file => "DataFiles/TC-N-3.response", 327 | expected_metrics => { #"muc" => , 328 | #"bcub" => , 329 | #"ceafm" => , 330 | #"ceafe" => , 331 | "blanc" => [0.73333, 1, 0.84615] } 332 | }, 333 | { id => "N4", 334 | key_file => "DataFiles/TC-N.key", 335 | response_file => "DataFiles/TC-N-4.response", 336 | expected_metrics => { "muc" => [0, 0, 0], 337 | #"bcub" => , 338 | #"ceafm" => , 339 | #"ceafe" => , 340 | "blanc" => [0.2, 0.2, 0.2] } 341 | }, 342 | { id => "N5", 343 | key_file => "DataFiles/TC-N.key", 344 | response_file => "DataFiles/TC-N-5.response", 345 | expected_metrics => { #"muc" => , 346 | #"bcub" => , 347 | #"ceafm" => , 348 | #"ceafe" => , 349 | "blanc" => [0, 0, 0] } 350 | }, 351 | { id => "N6", 352 | key_file => "DataFiles/TC-N.key", 353 | response_file => "DataFiles/TC-N-6.response", 354 | expected_metrics => { #"muc" => , 355 | #"bcub" => , 356 | #"ceafm" => , 357 | #"ceafe" => , 358 | "blanc" => [0.13333, 0.18182, 0.15385] } 359 | } 360 | 361 | ); 362 | 363 | 1; 364 | -------------------------------------------------------------------------------- /lib/Algorithm/Munkres.pm: -------------------------------------------------------------------------------- 1 | package Algorithm::Munkres; 2 | 3 | use 5.006; 4 | use strict; 5 | use warnings; 6 | 7 | require Exporter; 8 | 9 | our @ISA = qw(Exporter); 10 | 11 | our @EXPORT = qw( assign ); 12 | 13 | our $VERSION = '0.08'; 14 | 15 | #Variables global to the package 16 | my @mat = (); 17 | my @mask = (); 18 | my @colcov = (); 19 | my @rowcov = (); 20 | my $Z0_row = 0; 21 | my $Z0_col = 0; 22 | my @path = (); 23 | 24 | #The exported subroutine. 25 | #Expected Input: Reference to the input matrix (MxN) 26 | #Output: Mx1 matrix, giving the column number of the value assigned to each row. (For more explaination refer perldoc) 27 | sub assign 28 | { 29 | #reference to the input matrix 30 | my $rmat = shift; 31 | my $rsolution_mat = shift; 32 | my ($row, $row_len) = (0,0); 33 | 34 | # re-initialize that global variables 35 | @mat = (); 36 | @mask = (); 37 | @colcov = (); 38 | @rowcov = (); 39 | $Z0_row = 0; 40 | $Z0_col = 0; 41 | @path = (); 42 | 43 | #variables local to the subroutine 44 | my $step = 0; 45 | my ($i, $j) = (0,0); 46 | 47 | #the input matrix 48 | my @inp_mat = @$rmat; 49 | 50 | #copy the orginal matrix, before applying the algorithm to the matrix 51 | foreach (@inp_mat) 52 | { 53 | push @mat, [ @$_ ]; 54 | } 55 | 56 | #check if the input matrix is well-formed i.e. either square or rectangle. 57 | $row_len = $#{$mat[0]}; 58 | foreach my $row (@mat) 59 | { 60 | if($row_len != $#$row) 61 | { 62 | die "Please check the input matrix.\nThe input matrix is not a well-formed matrix!\nThe input matrix has to be rectangular or square matrix.\n"; 63 | } 64 | } 65 | 66 | #check if the matrix is a square matrix, 67 | #if not convert it to square matrix by padding zeroes. 68 | if($#mat < $#{$mat[0]}) 69 | { 70 | # Add rows 71 | my $diff = $#{$mat[0]} - $#mat; 72 | for (1 .. $diff) 73 | { 74 | push @mat, [ (0) x @{$mat[0]} ]; 75 | } 76 | } 77 | elsif($#mat > $#{$mat[0]}) 78 | { 79 | # Add columns 80 | my $diff = $#mat - $#{$mat[0]}; 81 | for (0 .. $#mat) 82 | { 83 | push @{$mat[$_]}, (0) x $diff; 84 | } 85 | } 86 | 87 | #initialize mask, column cover and row cover matrices 88 | clear_covers(); 89 | 90 | for($i=0;$i<=$#mat;$i++) 91 | { 92 | push @mask, [ (0) x @mat ]; 93 | } 94 | 95 | #The algorithm can be grouped in 6 steps. 96 | &stepone(); 97 | &steptwo(); 98 | $step = &stepthree(); 99 | while($step == 4) 100 | { 101 | $step = &stepfour(); 102 | while($step == 6) 103 | { 104 | &stepsix(); 105 | $step = &stepfour(); 106 | } 107 | &stepfive(); 108 | $step = &stepthree(); 109 | } 110 | 111 | #create the output matrix 112 | for my $i (0 .. $#mat) 113 | { 114 | for my $j (0 .. $#{$mat[$i]}) 115 | { 116 | if($mask[$i][$j] == 1) 117 | { 118 | $rsolution_mat->[$i] = $j; 119 | } 120 | } 121 | } 122 | 123 | 124 | #Code for tracing------------------ 125 | <<'ee'; 126 | print "\nInput Matrix:\n"; 127 | for($i=0;$i<=$#mat;$i++) 128 | { 129 | for($j=0;$j<=$#mat;$j++) 130 | { 131 | print $mat[$i][$j] . "\t"; 132 | } 133 | print "\n"; 134 | } 135 | 136 | print "\nMask Matrix:\n"; 137 | for($i=0;$i<=$#mat;$i++) 138 | { 139 | for($j=0;$j<=$#mat;$j++) 140 | { 141 | print $mask[$i][$j] . "\t"; 142 | } 143 | print "\n"; 144 | } 145 | 146 | print "\nOutput Matrix:\n"; 147 | print "$_\n" for @$rsolution_mat; 148 | ee 149 | 150 | #---------------------------------- 151 | 152 | } 153 | 154 | #Step 1 - Find minimum value for every row and subtract this min from each element of the row. 155 | sub stepone 156 | { 157 | # print "Step 1 \n"; 158 | 159 | #Find the minimum value for every row 160 | for my $row (@mat) 161 | { 162 | my $min = $row->[0]; 163 | for (@$row) 164 | { 165 | $min = $_ if $min > $_; 166 | } 167 | 168 | #Subtract the minimum value of the row from each element of the row. 169 | @$row = map {$_ - $min} @$row; 170 | } 171 | # print "Step 1 end \n"; 172 | } 173 | 174 | #Step 2 - Star the zeroes, Create the mask and cover matrices. Re-initialize the cover matrices for next steps. 175 | #To star a zero: We search for a zero in the matrix and than cover the column and row in which it occurs. Now this zero is starred. 176 | #A next starred zero can occur only in those columns and rows which have not been previously covered by any other starred zero. 177 | sub steptwo 178 | { 179 | # print "Step 2 \n"; 180 | 181 | my ($i, $j) = (0,0); 182 | 183 | for($i=0;$i<=$#mat;$i++) 184 | { 185 | for($j=0;$j<=$#{$mat[$i]};$j++) 186 | { 187 | if($mat[$i][$j] == 0 && $colcov[$j] == 0 && $rowcov[$i] == 0) 188 | { 189 | $mask[$i][$j] = 1; 190 | $colcov[$j] = 1; 191 | $rowcov[$i] = 1; 192 | } 193 | } 194 | } 195 | #Re-initialize the cover matrices 196 | &clear_covers(); 197 | # print "Step 2 end\n"; 198 | } 199 | 200 | #Step 3 - Check if each column has a starred zero. If yes then the problem is solved else proceed to step 4 201 | sub stepthree 202 | { 203 | # print "Step 3 \n"; 204 | 205 | my $cnt = 0; 206 | 207 | for my $i (0 .. $#mat) 208 | { 209 | for my $j (0 .. $#mat) 210 | { 211 | if($mask[$i][$j] == 1) 212 | { 213 | $colcov[$j] = 1; 214 | $cnt++; 215 | } 216 | } 217 | } 218 | if($cnt > $#mat) 219 | { 220 | # print "Step 3 end. Next expected step 7 \n"; 221 | return 7; 222 | } 223 | else 224 | { 225 | # print "Step 3 end. Next expected step 4 \n"; 226 | return 4; 227 | } 228 | 229 | } 230 | 231 | #Step 4 - Try to find a zero which is not starred and whose columns and rows are not yet covered. 232 | #If such a zero found, prime it, try to find a starred zero in its row, 233 | # if not found proceed to step 5 234 | # else continue 235 | #Else proceed to step 6. 236 | sub stepfour 237 | { 238 | # print "Step 4 \n"; 239 | 240 | while(1) 241 | { 242 | my ($row, $col) = &find_a_zero(); 243 | if ($row < 0) 244 | { 245 | # No zeroes 246 | return 6; 247 | } 248 | 249 | $mask[$row][$col] = 2; 250 | my $star_col = &find_star_in_row($row); 251 | if ($star_col >= 0) 252 | { 253 | $col = $star_col; 254 | $rowcov[$row] = 1; 255 | $colcov[$col] = 0; 256 | } 257 | else 258 | { 259 | $Z0_row = $row; 260 | $Z0_col = $col; 261 | return 5; 262 | } 263 | } 264 | } 265 | 266 | #Tries to find yet uncovered zero 267 | sub find_a_zero 268 | { 269 | for my $i (0 .. $#mat) 270 | { 271 | next if $rowcov[$i]; 272 | 273 | for my $j (reverse(0 .. $#mat)) # Prefer large $j 274 | { 275 | next if $colcov[$j]; 276 | return ($i, $j) if $mat[$i][$j] == 0; 277 | } 278 | } 279 | 280 | return (-1, -1); 281 | } 282 | 283 | #Tries to find starred zero in the given row and returns the column number 284 | sub find_star_in_row 285 | { 286 | my $row = shift; 287 | 288 | for my $j (0 .. $#mat) 289 | { 290 | if($mask[$row][$j] == 1) 291 | { 292 | return $j; 293 | } 294 | } 295 | return -1; 296 | } 297 | 298 | #Step 5 - Try to find a starred zero in the column of the uncovered zero found in the step 4. 299 | #If starred zero found, try to find a prime zero in its row. 300 | #Continue finding starred zero in the column and primed zero in the row until, 301 | #we get to a primed zero which does not have a starred zero in its column. 302 | #At this point reduce the non-zero values of mask matrix by 1. i.e. change prime zeros to starred zeroes. 303 | #Clear the cover matrices and clear any primes i.e. values=2 from mask matrix. 304 | sub stepfive 305 | { 306 | # print "Step 5 \n"; 307 | 308 | my $cnt = 0; 309 | my $done = 0; 310 | 311 | $path[$cnt][0] = $Z0_row; 312 | $path[$cnt][1] = $Z0_col; 313 | 314 | while($done == 0) 315 | { 316 | my $row = &find_star_in_col($path[$cnt][1]); 317 | if($row > -1) 318 | { 319 | $cnt++; 320 | $path[$cnt][0] = $row; 321 | $path[$cnt][1] = $path[$cnt - 1][1]; 322 | } 323 | else 324 | { 325 | $done = 1; 326 | } 327 | if($done == 0) 328 | { 329 | my $col = &find_prime_in_row($path[$cnt][0]); 330 | $cnt++; 331 | $path[$cnt][0] = $path[$cnt - 1][0]; 332 | $path[$cnt][1] = $col; 333 | } 334 | } 335 | &convert_path($cnt); 336 | &clear_covers(); 337 | &erase_primes(); 338 | 339 | # print "Step 5 end \n"; 340 | } 341 | 342 | #Tries to find starred zero in the given column and returns the row number 343 | sub find_star_in_col 344 | { 345 | my $col = shift; 346 | 347 | for my $i (0 .. $#mat) 348 | { 349 | return $i if $mask[$i][$col] == 1; 350 | } 351 | 352 | return -1; 353 | } 354 | 355 | #Tries to find primed zero in the given row and returns the column number 356 | sub find_prime_in_row 357 | { 358 | my $row = shift; 359 | 360 | for my $j (0 .. $#mat) 361 | { 362 | return $j if $mask[$row][$j] == 2; 363 | } 364 | 365 | return -1; 366 | } 367 | 368 | #Reduces non-zero value in the mask matrix by 1. 369 | #i.e. converts all primes to stars and stars to none. 370 | sub convert_path 371 | { 372 | my $cnt = shift; 373 | 374 | for my $i (0 .. $cnt) 375 | { 376 | for ( $mask[$path[$i][0]][$path[$i][1]] ) { 377 | $_ = ( $_ == 1 ) ? 0 : 1; 378 | } 379 | } 380 | } 381 | 382 | #Clears cover matrices 383 | sub clear_covers 384 | { 385 | @rowcov = @colcov = (0) x @mat; 386 | } 387 | 388 | #Changes all primes i.e. values=2 to 0. 389 | sub erase_primes 390 | { 391 | for my $row (@mask) 392 | { 393 | for my $j (0 .. $#$row) 394 | { 395 | $row->[$j] = 0 if $row->[$j] == 2; 396 | } 397 | } 398 | } 399 | 400 | #Step 6 - Find the minimum value from the rows and columns which are currently not covered. 401 | #Subtract this minimum value from all the elements of the columns which are not covered. 402 | #Add this minimum value to all the elements of the rows which are covered. 403 | #Proceed to step 4. 404 | sub stepsix 405 | { 406 | # print "Step 6 \n"; 407 | my ($i, $j); 408 | my $minval = 0; 409 | 410 | $minval = &find_smallest(); 411 | 412 | for($i=0;$i<=$#mat;$i++) 413 | { 414 | for($j=0;$j<=$#{$mat[$i]};$j++) 415 | { 416 | if($rowcov[$i] == 1) 417 | { 418 | $mat[$i][$j] += $minval; 419 | } 420 | if($colcov[$j] == 0) 421 | { 422 | $mat[$i][$j] -= $minval; 423 | } 424 | } 425 | } 426 | 427 | # print "Step 6 end \n"; 428 | } 429 | 430 | #Finds the minimum value from all the matrix values which are not covered. 431 | sub find_smallest 432 | { 433 | my $minval; 434 | 435 | for my $i (0 .. $#mat) 436 | { 437 | next if $rowcov[$i]; 438 | 439 | for my $j (0 .. $#mat) 440 | { 441 | next if $colcov[$j]; 442 | if( !defined($minval) || $minval > $mat[$i][$j]) 443 | { 444 | $minval = $mat[$i][$j]; 445 | } 446 | } 447 | } 448 | return $minval; 449 | } 450 | 451 | 452 | 1; 453 | __END__ 454 | 455 | =head1 NAME 456 | 457 | Algorithm::Munkres - Perl extension for Munkres' solution to 458 | classical Assignment problem for square and rectangular matrices 459 | This module extends the solution of Assignment problem for square 460 | matrices to rectangular matrices by padding zeros. Thus a rectangular 461 | matrix is converted to square matrix by padding necessary zeros. 462 | 463 | =head1 SYNOPSIS 464 | 465 | use Algorithm::Munkres; 466 | 467 | @mat = ( 468 | [2, 4, 7, 9], 469 | [3, 9, 5, 1], 470 | [8, 2, 9, 7], 471 | ); 472 | 473 | assign(\@mat,\@out_mat); 474 | 475 | Then the @out_mat array will have the output as: (0,3,1,2), 476 | where 477 | 0th element indicates that 0th row is assigned 0th column i.e value=2 478 | 1st element indicates that 1st row is assigned 3rd column i.e.value=1 479 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2 480 | 3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0 481 | 482 | 483 | =head1 DESCRIPTION 484 | 485 | Assignment Problem: Given N jobs, N workers and the time taken by 486 | each worker to complete a job then how should the assignment of a 487 | Worker to a Job be done, so as to minimize the time taken. 488 | 489 | Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that: 490 | x y z 491 | p 2 4 7 492 | q 3 9 5 493 | r 8 2 9 494 | 495 | where the cell values of the above matrix give the time required 496 | for the worker(given by column name) to complete the job(given by 497 | the row name) 498 | 499 | then possible solutions are: 500 | Total 501 | 1. 2, 9, 9 20 502 | 2. 2, 2, 5 9 503 | 3. 3, 4, 9 16 504 | 4. 3, 2, 7 12 505 | 5. 8, 9, 7 24 506 | 6. 8, 4, 5 17 507 | 508 | Thus (2) is the optimal solution for the above problem. 509 | This kind of brute-force approach of solving Assignment problem 510 | quickly becomes slow and bulky as N grows, because the number of 511 | possible solution are N! and thus the task is to evaluate each 512 | and then find the optimal solution.(If N=10, number of possible 513 | solutions: 3628800 !) 514 | Munkres' gives us a solution to this problem, which is implemented 515 | in this module. 516 | 517 | This module also solves Assignment problem for rectangular matrices 518 | (M x N) by converting them to square matrices by padding zeros. ex: 519 | If input matrix is: 520 | [2, 4, 7, 9], 521 | [3, 9, 5, 1], 522 | [8, 2, 9, 7] 523 | i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 524 | matrix will be: 525 | [2, 4, 7, 9], 526 | [3, 9, 5, 1], 527 | [8, 2, 9, 7], 528 | [0, 0, 0, 0] 529 | 530 | =head1 EXPORT 531 | 532 | "assign" function by default. 533 | 534 | =head1 INPUT 535 | 536 | The input matrix should be in a two dimensional array(array of 537 | array) and the 'assign' subroutine expects a reference to this 538 | array and not the complete array. 539 | eg:assign(\@inp_mat, \@out_mat); 540 | The second argument to the assign subroutine is the reference 541 | to the output array. 542 | 543 | =head1 OUTPUT 544 | 545 | The assign subroutine expects references to two arrays as its 546 | input paramenters. The second parameter is the reference to the 547 | output array. This array is populated by assign subroutine. This 548 | array is single dimensional Nx1 matrix. 549 | For above example the output array returned will be: 550 | (0, 551 | 2, 552 | 1) 553 | 554 | where 555 | 0th element indicates that 0th row is assigned 0th column i.e value=2 556 | 1st element indicates that 1st row is assigned 2nd column i.e.value=5 557 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2 558 | 559 | =head1 SEE ALSO 560 | 561 | 1. http://216.249.163.93/bob.pilgrim/445/munkres.html 562 | 563 | 2. Munkres, J. Algorithms for the assignment and transportation 564 | Problems. J. Siam 5 (Mar. 1957), 32-38 565 | 566 | 3. François Bourgeois and Jean-Claude Lassalle. 1971. 567 | An extension of the Munkres algorithm for the assignment 568 | problem to rectangular matrices. 569 | Communication ACM, 14(12):802-804 570 | 571 | =head1 AUTHOR 572 | 573 | Anagha Kulkarni, University of Minnesota Duluth 574 | kulka020 d.umn.edu 575 | 576 | Ted Pedersen, University of Minnesota Duluth 577 | tpederse d.umn.edu 578 | 579 | =head1 COPYRIGHT AND LICENSE 580 | 581 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni 582 | 583 | This program is free software; you can redistribute it and/or 584 | modify it under the terms of the GNU General Public License 585 | as published by the Free Software Foundation; either version 2 586 | of the License, or (at your option) any later version. 587 | This program is distributed in the hope that it will be useful, 588 | but WITHOUT ANY WARRANTY; without even the implied warranty of 589 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 590 | GNU General Public License for more details. 591 | 592 | You should have received a copy of the GNU General Public License 593 | along with this program; if not, write to the Free Software 594 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 595 | 596 | =cut 597 | -------------------------------------------------------------------------------- /lib/Cwd.pm: -------------------------------------------------------------------------------- 1 | package Cwd; 2 | 3 | =head1 NAME 4 | 5 | Cwd - get pathname of current working directory 6 | 7 | =head1 SYNOPSIS 8 | 9 | use Cwd; 10 | my $dir = getcwd; 11 | 12 | use Cwd 'abs_path'; 13 | my $abs_path = abs_path($file); 14 | 15 | =head1 DESCRIPTION 16 | 17 | This module provides functions for determining the pathname of the 18 | current working directory. It is recommended that getcwd (or another 19 | *cwd() function) be used in I code to ensure portability. 20 | 21 | By default, it exports the functions cwd(), getcwd(), fastcwd(), and 22 | fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace. 23 | 24 | 25 | =head2 getcwd and friends 26 | 27 | Each of these functions are called without arguments and return the 28 | absolute path of the current working directory. 29 | 30 | =over 4 31 | 32 | =item getcwd 33 | 34 | my $cwd = getcwd(); 35 | 36 | Returns the current working directory. 37 | 38 | Exposes the POSIX function getcwd(3) or re-implements it if it's not 39 | available. 40 | 41 | =item cwd 42 | 43 | my $cwd = cwd(); 44 | 45 | The cwd() is the most natural form for the current architecture. For 46 | most systems it is identical to `pwd` (but without the trailing line 47 | terminator). 48 | 49 | =item fastcwd 50 | 51 | my $cwd = fastcwd(); 52 | 53 | A more dangerous version of getcwd(), but potentially faster. 54 | 55 | It might conceivably chdir() you out of a directory that it can't 56 | chdir() you back into. If fastcwd encounters a problem it will return 57 | undef but will probably leave you in a different directory. For a 58 | measure of extra security, if everything appears to have worked, the 59 | fastcwd() function will check that it leaves you in the same directory 60 | that it started in. If it has changed it will C with the message 61 | "Unstable directory path, current directory changed 62 | unexpectedly". That should never happen. 63 | 64 | =item fastgetcwd 65 | 66 | my $cwd = fastgetcwd(); 67 | 68 | The fastgetcwd() function is provided as a synonym for cwd(). 69 | 70 | =item getdcwd 71 | 72 | my $cwd = getdcwd(); 73 | my $cwd = getdcwd('C:'); 74 | 75 | The getdcwd() function is also provided on Win32 to get the current working 76 | directory on the specified drive, since Windows maintains a separate current 77 | working directory for each drive. If no drive is specified then the current 78 | drive is assumed. 79 | 80 | This function simply calls the Microsoft C library _getdcwd() function. 81 | 82 | =back 83 | 84 | 85 | =head2 abs_path and friends 86 | 87 | These functions are exported only on request. They each take a single 88 | argument and return the absolute pathname for it. If no argument is 89 | given they'll use the current working directory. 90 | 91 | =over 4 92 | 93 | =item abs_path 94 | 95 | my $abs_path = abs_path($file); 96 | 97 | Uses the same algorithm as getcwd(). Symbolic links and relative-path 98 | components ("." and "..") are resolved to return the canonical 99 | pathname, just like realpath(3). 100 | 101 | =item realpath 102 | 103 | my $abs_path = realpath($file); 104 | 105 | A synonym for abs_path(). 106 | 107 | =item fast_abs_path 108 | 109 | my $abs_path = fast_abs_path($file); 110 | 111 | A more dangerous, but potentially faster version of abs_path. 112 | 113 | =back 114 | 115 | =head2 $ENV{PWD} 116 | 117 | If you ask to override your chdir() built-in function, 118 | 119 | use Cwd qw(chdir); 120 | 121 | then your PWD environment variable will be kept up to date. Note that 122 | it will only be kept up to date if all packages which use chdir import 123 | it from Cwd. 124 | 125 | 126 | =head1 NOTES 127 | 128 | =over 4 129 | 130 | =item * 131 | 132 | Since the path separators are different on some operating systems ('/' 133 | on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec 134 | modules wherever portability is a concern. 135 | 136 | =item * 137 | 138 | Actually, on Mac OS, the C, C and C 139 | functions are all aliases for the C function, which, on Mac OS, 140 | calls `pwd`. Likewise, the C function is an alias for 141 | C. 142 | 143 | =back 144 | 145 | =head1 AUTHOR 146 | 147 | Originally by the perl5-porters. 148 | 149 | Maintained by Ken Williams 150 | 151 | =head1 COPYRIGHT 152 | 153 | Copyright (c) 2004 by the Perl 5 Porters. All rights reserved. 154 | 155 | This program is free software; you can redistribute it and/or modify 156 | it under the same terms as Perl itself. 157 | 158 | Portions of the C code in this library are copyright (c) 1994 by the 159 | Regents of the University of California. All rights reserved. The 160 | license on this code is compatible with the licensing of the rest of 161 | the distribution - please see the source code in F for the 162 | details. 163 | 164 | =head1 SEE ALSO 165 | 166 | L 167 | 168 | =cut 169 | 170 | use strict; 171 | use Exporter; 172 | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); 173 | 174 | $VERSION = '3.39_02'; 175 | my $xs_version = $VERSION; 176 | $VERSION =~ tr/_//; 177 | 178 | @ISA = qw/ Exporter /; 179 | @EXPORT = qw(cwd getcwd fastcwd fastgetcwd); 180 | push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32'; 181 | @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath); 182 | 183 | # sys_cwd may keep the builtin command 184 | 185 | # All the functionality of this module may provided by builtins, 186 | # there is no sense to process the rest of the file. 187 | # The best choice may be to have this in BEGIN, but how to return from BEGIN? 188 | 189 | if ($^O eq 'os2') { 190 | local $^W = 0; 191 | 192 | *cwd = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd; 193 | *getcwd = \&cwd; 194 | *fastgetcwd = \&cwd; 195 | *fastcwd = \&cwd; 196 | 197 | *fast_abs_path = \&sys_abspath if defined &sys_abspath; 198 | *abs_path = \&fast_abs_path; 199 | *realpath = \&fast_abs_path; 200 | *fast_realpath = \&fast_abs_path; 201 | 202 | return 1; 203 | } 204 | 205 | # Need to look up the feature settings on VMS. The preferred way is to use the 206 | # VMS::Feature module, but that may not be available to dual life modules. 207 | 208 | my $use_vms_feature; 209 | BEGIN { 210 | if ($^O eq 'VMS') { 211 | if (eval { local $SIG{__DIE__}; require VMS::Feature; }) { 212 | $use_vms_feature = 1; 213 | } 214 | } 215 | } 216 | 217 | # Need to look up the UNIX report mode. This may become a dynamic mode 218 | # in the future. 219 | sub _vms_unix_rpt { 220 | my $unix_rpt; 221 | if ($use_vms_feature) { 222 | $unix_rpt = VMS::Feature::current("filename_unix_report"); 223 | } else { 224 | my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; 225 | $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; 226 | } 227 | return $unix_rpt; 228 | } 229 | 230 | # Need to look up the EFS character set mode. This may become a dynamic 231 | # mode in the future. 232 | sub _vms_efs { 233 | my $efs; 234 | if ($use_vms_feature) { 235 | $efs = VMS::Feature::current("efs_charset"); 236 | } else { 237 | my $env_efs = $ENV{'DECC$EFS_CHARSET'} || ''; 238 | $efs = $env_efs =~ /^[ET1]/i; 239 | } 240 | return $efs; 241 | } 242 | 243 | 244 | # If loading the XS stuff doesn't work, we can fall back to pure perl 245 | eval { 246 | if ( $] >= 5.006 ) { 247 | require XSLoader; 248 | XSLoader::load( __PACKAGE__, $xs_version); 249 | } else { 250 | require DynaLoader; 251 | push @ISA, 'DynaLoader'; 252 | __PACKAGE__->bootstrap( $xs_version ); 253 | } 254 | }; 255 | 256 | # Big nasty table of function aliases 257 | my %METHOD_MAP = 258 | ( 259 | VMS => 260 | { 261 | cwd => '_vms_cwd', 262 | getcwd => '_vms_cwd', 263 | fastcwd => '_vms_cwd', 264 | fastgetcwd => '_vms_cwd', 265 | abs_path => '_vms_abs_path', 266 | fast_abs_path => '_vms_abs_path', 267 | }, 268 | 269 | MSWin32 => 270 | { 271 | # We assume that &_NT_cwd is defined as an XSUB or in the core. 272 | cwd => '_NT_cwd', 273 | getcwd => '_NT_cwd', 274 | fastcwd => '_NT_cwd', 275 | fastgetcwd => '_NT_cwd', 276 | abs_path => 'fast_abs_path', 277 | realpath => 'fast_abs_path', 278 | }, 279 | 280 | dos => 281 | { 282 | cwd => '_dos_cwd', 283 | getcwd => '_dos_cwd', 284 | fastgetcwd => '_dos_cwd', 285 | fastcwd => '_dos_cwd', 286 | abs_path => 'fast_abs_path', 287 | }, 288 | 289 | # QNX4. QNX6 has a $os of 'nto'. 290 | qnx => 291 | { 292 | cwd => '_qnx_cwd', 293 | getcwd => '_qnx_cwd', 294 | fastgetcwd => '_qnx_cwd', 295 | fastcwd => '_qnx_cwd', 296 | abs_path => '_qnx_abs_path', 297 | fast_abs_path => '_qnx_abs_path', 298 | }, 299 | 300 | cygwin => 301 | { 302 | getcwd => 'cwd', 303 | fastgetcwd => 'cwd', 304 | fastcwd => 'cwd', 305 | abs_path => 'fast_abs_path', 306 | realpath => 'fast_abs_path', 307 | }, 308 | 309 | epoc => 310 | { 311 | cwd => '_epoc_cwd', 312 | getcwd => '_epoc_cwd', 313 | fastgetcwd => '_epoc_cwd', 314 | fastcwd => '_epoc_cwd', 315 | abs_path => 'fast_abs_path', 316 | }, 317 | 318 | MacOS => 319 | { 320 | getcwd => 'cwd', 321 | fastgetcwd => 'cwd', 322 | fastcwd => 'cwd', 323 | abs_path => 'fast_abs_path', 324 | }, 325 | ); 326 | 327 | $METHOD_MAP{NT} = $METHOD_MAP{MSWin32}; 328 | 329 | 330 | # Find the pwd command in the expected locations. We assume these 331 | # are safe. This prevents _backtick_pwd() consulting $ENV{PATH} 332 | # so everything works under taint mode. 333 | my $pwd_cmd; 334 | foreach my $try ('/bin/pwd', 335 | '/usr/bin/pwd', 336 | '/QOpenSys/bin/pwd', # OS/400 PASE. 337 | ) { 338 | 339 | if( -x $try ) { 340 | $pwd_cmd = $try; 341 | last; 342 | } 343 | } 344 | my $found_pwd_cmd = defined($pwd_cmd); 345 | unless ($pwd_cmd) { 346 | # Isn't this wrong? _backtick_pwd() will fail if somenone has 347 | # pwd in their path but it is not /bin/pwd or /usr/bin/pwd? 348 | # See [perl #16774]. --jhi 349 | $pwd_cmd = 'pwd'; 350 | } 351 | 352 | # Lazy-load Carp 353 | sub _carp { require Carp; Carp::carp(@_) } 354 | sub _croak { require Carp; Carp::croak(@_) } 355 | 356 | # The 'natural and safe form' for UNIX (pwd may be setuid root) 357 | sub _backtick_pwd { 358 | # Localize %ENV entries in a way that won't create new hash keys 359 | my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV); 360 | local @ENV{@localize}; 361 | 362 | my $cwd = `$pwd_cmd`; 363 | # Belt-and-suspenders in case someone said "undef $/". 364 | local $/ = "\n"; 365 | # `pwd` may fail e.g. if the disk is full 366 | chomp($cwd) if defined $cwd; 367 | $cwd; 368 | } 369 | 370 | # Since some ports may predefine cwd internally (e.g., NT) 371 | # we take care not to override an existing definition for cwd(). 372 | 373 | unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) { 374 | # The pwd command is not available in some chroot(2)'ed environments 375 | my $sep = $Config::Config{path_sep} || ':'; 376 | my $os = $^O; # Protect $^O from tainting 377 | 378 | 379 | # Try again to find a pwd, this time searching the whole PATH. 380 | if (defined $ENV{PATH} and $os ne 'MSWin32') { # no pwd on Windows 381 | my @candidates = split($sep, $ENV{PATH}); 382 | while (!$found_pwd_cmd and @candidates) { 383 | my $candidate = shift @candidates; 384 | $found_pwd_cmd = 1 if -x "$candidate/pwd"; 385 | } 386 | } 387 | 388 | # MacOS has some special magic to make `pwd` work. 389 | if( $os eq 'MacOS' || $found_pwd_cmd ) 390 | { 391 | *cwd = \&_backtick_pwd; 392 | } 393 | else { 394 | *cwd = \&getcwd; 395 | } 396 | } 397 | 398 | if ($^O eq 'cygwin') { 399 | # We need to make sure cwd() is called with no args, because it's 400 | # got an arg-less prototype and will die if args are present. 401 | local $^W = 0; 402 | my $orig_cwd = \&cwd; 403 | *cwd = sub { &$orig_cwd() } 404 | } 405 | 406 | 407 | # set a reasonable (and very safe) default for fastgetcwd, in case it 408 | # isn't redefined later (20001212 rspier) 409 | *fastgetcwd = \&cwd; 410 | 411 | # A non-XS version of getcwd() - also used to bootstrap the perl build 412 | # process, when miniperl is running and no XS loading happens. 413 | sub _perl_getcwd 414 | { 415 | abs_path('.'); 416 | } 417 | 418 | # By John Bazik 419 | # 420 | # Usage: $cwd = &fastcwd; 421 | # 422 | # This is a faster version of getcwd. It's also more dangerous because 423 | # you might chdir out of a directory that you can't chdir back into. 424 | 425 | sub fastcwd_ { 426 | my($odev, $oino, $cdev, $cino, $tdev, $tino); 427 | my(@path, $path); 428 | local(*DIR); 429 | 430 | my($orig_cdev, $orig_cino) = stat('.'); 431 | ($cdev, $cino) = ($orig_cdev, $orig_cino); 432 | for (;;) { 433 | my $direntry; 434 | ($odev, $oino) = ($cdev, $cino); 435 | CORE::chdir('..') || return undef; 436 | ($cdev, $cino) = stat('.'); 437 | last if $odev == $cdev && $oino == $cino; 438 | opendir(DIR, '.') || return undef; 439 | for (;;) { 440 | $direntry = readdir(DIR); 441 | last unless defined $direntry; 442 | next if $direntry eq '.'; 443 | next if $direntry eq '..'; 444 | 445 | ($tdev, $tino) = lstat($direntry); 446 | last unless $tdev != $odev || $tino != $oino; 447 | } 448 | closedir(DIR); 449 | return undef unless defined $direntry; # should never happen 450 | unshift(@path, $direntry); 451 | } 452 | $path = '/' . join('/', @path); 453 | if ($^O eq 'apollo') { $path = "/".$path; } 454 | # At this point $path may be tainted (if tainting) and chdir would fail. 455 | # Untaint it then check that we landed where we started. 456 | $path =~ /^(.*)\z/s # untaint 457 | && CORE::chdir($1) or return undef; 458 | ($cdev, $cino) = stat('.'); 459 | die "Unstable directory path, current directory changed unexpectedly" 460 | if $cdev != $orig_cdev || $cino != $orig_cino; 461 | $path; 462 | } 463 | if (not defined &fastcwd) { *fastcwd = \&fastcwd_ } 464 | 465 | 466 | # Keeps track of current working directory in PWD environment var 467 | # Usage: 468 | # use Cwd 'chdir'; 469 | # chdir $newdir; 470 | 471 | my $chdir_init = 0; 472 | 473 | sub chdir_init { 474 | if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') { 475 | my($dd,$di) = stat('.'); 476 | my($pd,$pi) = stat($ENV{'PWD'}); 477 | if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) { 478 | $ENV{'PWD'} = cwd(); 479 | } 480 | } 481 | else { 482 | my $wd = cwd(); 483 | $wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32'; 484 | $ENV{'PWD'} = $wd; 485 | } 486 | # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar) 487 | if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) { 488 | my($pd,$pi) = stat($2); 489 | my($dd,$di) = stat($1); 490 | if (defined $pd and defined $dd and $di == $pi and $dd == $pd) { 491 | $ENV{'PWD'}="$2$3"; 492 | } 493 | } 494 | $chdir_init = 1; 495 | } 496 | 497 | sub chdir { 498 | my $newdir = @_ ? shift : ''; # allow for no arg (chdir to HOME dir) 499 | $newdir =~ s|///*|/|g unless $^O eq 'MSWin32'; 500 | chdir_init() unless $chdir_init; 501 | my $newpwd; 502 | if ($^O eq 'MSWin32') { 503 | # get the full path name *before* the chdir() 504 | $newpwd = Win32::GetFullPathName($newdir); 505 | } 506 | 507 | return 0 unless CORE::chdir $newdir; 508 | 509 | if ($^O eq 'VMS') { 510 | return $ENV{'PWD'} = $ENV{'DEFAULT'} 511 | } 512 | elsif ($^O eq 'MacOS') { 513 | return $ENV{'PWD'} = cwd(); 514 | } 515 | elsif ($^O eq 'MSWin32') { 516 | $ENV{'PWD'} = $newpwd; 517 | return 1; 518 | } 519 | 520 | if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in 521 | $ENV{'PWD'} = cwd(); 522 | } elsif ($newdir =~ m#^/#s) { 523 | $ENV{'PWD'} = $newdir; 524 | } else { 525 | my @curdir = split(m#/#,$ENV{'PWD'}); 526 | @curdir = ('') unless @curdir; 527 | my $component; 528 | foreach $component (split(m#/#, $newdir)) { 529 | next if $component eq '.'; 530 | pop(@curdir),next if $component eq '..'; 531 | push(@curdir,$component); 532 | } 533 | $ENV{'PWD'} = join('/',@curdir) || '/'; 534 | } 535 | 1; 536 | } 537 | 538 | 539 | sub _perl_abs_path 540 | { 541 | my $start = @_ ? shift : '.'; 542 | my($dotdots, $cwd, @pst, @cst, $dir, @tst); 543 | 544 | unless (@cst = stat( $start )) 545 | { 546 | _carp("stat($start): $!"); 547 | return ''; 548 | } 549 | 550 | unless (-d _) { 551 | # Make sure we can be invoked on plain files, not just directories. 552 | # NOTE that this routine assumes that '/' is the only directory separator. 553 | 554 | my ($dir, $file) = $start =~ m{^(.*)/(.+)$} 555 | or return cwd() . '/' . $start; 556 | 557 | # Can't use "-l _" here, because the previous stat was a stat(), not an lstat(). 558 | if (-l $start) { 559 | my $link_target = readlink($start); 560 | die "Can't resolve link $start: $!" unless defined $link_target; 561 | 562 | require File::Spec; 563 | $link_target = $dir . '/' . $link_target 564 | unless File::Spec->file_name_is_absolute($link_target); 565 | 566 | return abs_path($link_target); 567 | } 568 | 569 | return $dir ? abs_path($dir) . "/$file" : "/$file"; 570 | } 571 | 572 | $cwd = ''; 573 | $dotdots = $start; 574 | do 575 | { 576 | $dotdots .= '/..'; 577 | @pst = @cst; 578 | local *PARENT; 579 | unless (opendir(PARENT, $dotdots)) 580 | { 581 | # probably a permissions issue. Try the native command. 582 | require File::Spec; 583 | return File::Spec->rel2abs( $start, _backtick_pwd() ); 584 | } 585 | unless (@cst = stat($dotdots)) 586 | { 587 | _carp("stat($dotdots): $!"); 588 | closedir(PARENT); 589 | return ''; 590 | } 591 | if ($pst[0] == $cst[0] && $pst[1] == $cst[1]) 592 | { 593 | $dir = undef; 594 | } 595 | else 596 | { 597 | do 598 | { 599 | unless (defined ($dir = readdir(PARENT))) 600 | { 601 | _carp("readdir($dotdots): $!"); 602 | closedir(PARENT); 603 | return ''; 604 | } 605 | $tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir")) 606 | } 607 | while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] || 608 | $tst[1] != $pst[1]); 609 | } 610 | $cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ; 611 | closedir(PARENT); 612 | } while (defined $dir); 613 | chop($cwd) unless $cwd eq '/'; # drop the trailing / 614 | $cwd; 615 | } 616 | 617 | 618 | my $Curdir; 619 | sub fast_abs_path { 620 | local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage 621 | my $cwd = getcwd(); 622 | require File::Spec; 623 | my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir); 624 | 625 | # Detaint else we'll explode in taint mode. This is safe because 626 | # we're not doing anything dangerous with it. 627 | ($path) = $path =~ /(.*)/; 628 | ($cwd) = $cwd =~ /(.*)/; 629 | 630 | unless (-e $path) { 631 | _croak("$path: No such file or directory"); 632 | } 633 | 634 | unless (-d _) { 635 | # Make sure we can be invoked on plain files, not just directories. 636 | 637 | my ($vol, $dir, $file) = File::Spec->splitpath($path); 638 | return File::Spec->catfile($cwd, $path) unless length $dir; 639 | 640 | if (-l $path) { 641 | my $link_target = readlink($path); 642 | die "Can't resolve link $path: $!" unless defined $link_target; 643 | 644 | $link_target = File::Spec->catpath($vol, $dir, $link_target) 645 | unless File::Spec->file_name_is_absolute($link_target); 646 | 647 | return fast_abs_path($link_target); 648 | } 649 | 650 | return $dir eq File::Spec->rootdir 651 | ? File::Spec->catpath($vol, $dir, $file) 652 | : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file; 653 | } 654 | 655 | if (!CORE::chdir($path)) { 656 | _croak("Cannot chdir to $path: $!"); 657 | } 658 | my $realpath = getcwd(); 659 | if (! ((-d $cwd) && (CORE::chdir($cwd)))) { 660 | _croak("Cannot chdir back to $cwd: $!"); 661 | } 662 | $realpath; 663 | } 664 | 665 | # added function alias to follow principle of least surprise 666 | # based on previous aliasing. --tchrist 27-Jan-00 667 | *fast_realpath = \&fast_abs_path; 668 | 669 | 670 | # --- PORTING SECTION --- 671 | 672 | # VMS: $ENV{'DEFAULT'} points to default directory at all times 673 | # 06-Mar-1996 Charles Bailey bailey@newman.upenn.edu 674 | # Note: Use of Cwd::chdir() causes the logical name PWD to be defined 675 | # in the process logical name table as the default device and directory 676 | # seen by Perl. This may not be the same as the default device 677 | # and directory seen by DCL after Perl exits, since the effects 678 | # the CRTL chdir() function persist only until Perl exits. 679 | 680 | sub _vms_cwd { 681 | return $ENV{'DEFAULT'}; 682 | } 683 | 684 | sub _vms_abs_path { 685 | return $ENV{'DEFAULT'} unless @_; 686 | my $path = shift; 687 | 688 | my $efs = _vms_efs; 689 | my $unix_rpt = _vms_unix_rpt; 690 | 691 | if (defined &VMS::Filespec::vmsrealpath) { 692 | my $path_unix = 0; 693 | my $path_vms = 0; 694 | 695 | $path_unix = 1 if ($path =~ m#(?<=\^)/#); 696 | $path_unix = 1 if ($path =~ /^\.\.?$/); 697 | $path_vms = 1 if ($path =~ m#[\[<\]]#); 698 | $path_vms = 1 if ($path =~ /^--?$/); 699 | 700 | my $unix_mode = $path_unix; 701 | if ($efs) { 702 | # In case of a tie, the Unix report mode decides. 703 | if ($path_vms == $path_unix) { 704 | $unix_mode = $unix_rpt; 705 | } else { 706 | $unix_mode = 0 if $path_vms; 707 | } 708 | } 709 | 710 | if ($unix_mode) { 711 | # Unix format 712 | return VMS::Filespec::unixrealpath($path); 713 | } 714 | 715 | # VMS format 716 | 717 | my $new_path = VMS::Filespec::vmsrealpath($path); 718 | 719 | # Perl expects directories to be in directory format 720 | $new_path = VMS::Filespec::pathify($new_path) if -d $path; 721 | return $new_path; 722 | } 723 | 724 | # Fallback to older algorithm if correct ones are not 725 | # available. 726 | 727 | if (-l $path) { 728 | my $link_target = readlink($path); 729 | die "Can't resolve link $path: $!" unless defined $link_target; 730 | 731 | return _vms_abs_path($link_target); 732 | } 733 | 734 | # may need to turn foo.dir into [.foo] 735 | my $pathified = VMS::Filespec::pathify($path); 736 | $path = $pathified if defined $pathified; 737 | 738 | return VMS::Filespec::rmsexpand($path); 739 | } 740 | 741 | sub _os2_cwd { 742 | $ENV{'PWD'} = `cmd /c cd`; 743 | chomp $ENV{'PWD'}; 744 | $ENV{'PWD'} =~ s:\\:/:g ; 745 | return $ENV{'PWD'}; 746 | } 747 | 748 | sub _win32_cwd_simple { 749 | $ENV{'PWD'} = `cd`; 750 | chomp $ENV{'PWD'}; 751 | $ENV{'PWD'} =~ s:\\:/:g ; 752 | return $ENV{'PWD'}; 753 | } 754 | 755 | sub _win32_cwd { 756 | # Need to avoid taking any sort of reference to the typeglob or the code in 757 | # the optree, so that this tests the runtime state of things, as the 758 | # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at 759 | # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table 760 | # lookup avoids needing a string eval, which has been reported to cause 761 | # problems (for reasons that we haven't been able to get to the bottom of - 762 | # rt.cpan.org #56225) 763 | if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) { 764 | $ENV{'PWD'} = Win32::GetCwd(); 765 | } 766 | else { # miniperl 767 | chomp($ENV{'PWD'} = `cd`); 768 | } 769 | $ENV{'PWD'} =~ s:\\:/:g ; 770 | return $ENV{'PWD'}; 771 | } 772 | 773 | *_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple; 774 | 775 | sub _dos_cwd { 776 | if (!defined &Dos::GetCwd) { 777 | $ENV{'PWD'} = `command /c cd`; 778 | chomp $ENV{'PWD'}; 779 | $ENV{'PWD'} =~ s:\\:/:g ; 780 | } else { 781 | $ENV{'PWD'} = Dos::GetCwd(); 782 | } 783 | return $ENV{'PWD'}; 784 | } 785 | 786 | sub _qnx_cwd { 787 | local $ENV{PATH} = ''; 788 | local $ENV{CDPATH} = ''; 789 | local $ENV{ENV} = ''; 790 | $ENV{'PWD'} = `/usr/bin/fullpath -t`; 791 | chomp $ENV{'PWD'}; 792 | return $ENV{'PWD'}; 793 | } 794 | 795 | sub _qnx_abs_path { 796 | local $ENV{PATH} = ''; 797 | local $ENV{CDPATH} = ''; 798 | local $ENV{ENV} = ''; 799 | my $path = @_ ? shift : '.'; 800 | local *REALPATH; 801 | 802 | defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or 803 | die "Can't open /usr/bin/fullpath: $!"; 804 | my $realpath = ; 805 | close REALPATH; 806 | chomp $realpath; 807 | return $realpath; 808 | } 809 | 810 | sub _epoc_cwd { 811 | $ENV{'PWD'} = EPOC::getcwd(); 812 | return $ENV{'PWD'}; 813 | } 814 | 815 | 816 | # Now that all the base-level functions are set up, alias the 817 | # user-level functions to the right places 818 | 819 | if (exists $METHOD_MAP{$^O}) { 820 | my $map = $METHOD_MAP{$^O}; 821 | foreach my $name (keys %$map) { 822 | local $^W = 0; # assignments trigger 'subroutine redefined' warning 823 | no strict 'refs'; 824 | *{$name} = \&{$map->{$name}}; 825 | } 826 | } 827 | 828 | # In case the XS version doesn't load. 829 | *abs_path = \&_perl_abs_path unless defined &abs_path; 830 | *getcwd = \&_perl_getcwd unless defined &getcwd; 831 | 832 | # added function alias for those of us more 833 | # used to the libc function. --tchrist 27-Jan-00 834 | *realpath = \&abs_path; 835 | 836 | 1; 837 | -------------------------------------------------------------------------------- /lib/Math/Combinatorics.pm: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | Math::Combinatorics - Perform combinations and permutations on lists 4 | 5 | =head1 SYNOPSIS 6 | 7 | Available as an object oriented API. 8 | 9 | use Math::Combinatorics; 10 | 11 | my @n = qw(a b c); 12 | my $combinat = Math::Combinatorics->new(count => 2, 13 | data => [@n], 14 | ); 15 | 16 | print "combinations of 2 from: ".join(" ",@n)."\n"; 17 | print "------------------------".("--" x scalar(@n))."\n"; 18 | while(my @combo = $combinat->next_combination){ 19 | print join(' ', @combo)."\n"; 20 | } 21 | 22 | print "\n"; 23 | 24 | print "permutations of 3 from: ".join(" ",@n)."\n"; 25 | print "------------------------".("--" x scalar(@n))."\n"; 26 | while(my @permu = $combinat->next_permutation){ 27 | print join(' ', @permu)."\n"; 28 | } 29 | 30 | output: 31 | 32 | Or available via exported functions 'permute', 'combine', and 'factorial'. 33 | 34 | use Math::Combinatorics; 35 | 36 | my @n = qw(a b c); 37 | print "combinations of 2 from: ".join(" ",@n)."\n"; 38 | print "------------------------".("--" x scalar(@n))."\n"; 39 | print join("\n", map { join " ", @$_ } combine(2,@n)),"\n"; 40 | print "\n"; 41 | print "permutations of 3 from: ".join(" ",@n)."\n"; 42 | print "------------------------".("--" x scalar(@n))."\n"; 43 | print join("\n", map { join " ", @$_ } permute(@n)),"\n"; 44 | 45 | 46 | Output: 47 | 48 | combinations of 2 from: a b c 49 | ------------------------------ 50 | a b 51 | a c 52 | b c 53 | 54 | permutations of 3 from: a b c 55 | ------------------------------ 56 | a b c 57 | a c b 58 | b a c 59 | b c a 60 | c a b 61 | c b a 62 | 63 | Output from both types of calls is the same, but the object-oriented approach consumes 64 | much less memory for large sets. 65 | 66 | =head1 DESCRIPTION 67 | 68 | Combinatorics is the branch of mathematics studying the enumeration, combination, 69 | and permutation of sets of elements and the mathematical relations that characterize 70 | their properties. As a jumping off point, refer to: 71 | 72 | http://mathworld.wolfram.com/Combinatorics.html 73 | 74 | This module provides a pure-perl implementation of nCk, nCRk, nPk, nPRk, !n and n! 75 | (combination, multiset, permutation, string, derangement, and factorial, respectively). 76 | Functional and object-oriented usages allow problems such as the following to be solved: 77 | 78 | =over 79 | 80 | =item combine - nCk 81 | 82 | http://mathworld.wolfram.com/Combination.html 83 | 84 | "Fun questions to ask the pizza parlor wait staff: how many possible combinations 85 | of 2 toppings can I get on my pizza?". 86 | 87 | =item derange - !n 88 | 89 | http://mathworld.wolfram.com/Derangement.html 90 | 91 | "A derangement of n ordered objects, denoted !n, is a permutation in which none of the 92 | objects appear in their "natural" (i.e., ordered) place." 93 | 94 | =item permute - nPk 95 | 96 | http://mathworld.wolfram.com/Permutation.html 97 | 98 | "Master Mind Game: ways to arrange pieces of different colors in a 99 | certain number of positions, without repetition of a color". 100 | 101 | =back 102 | 103 | Object-oriented usage additionally allows solving these problems by calling L 104 | with a B vector: 105 | 106 | =over 107 | 108 | =item string - nPRk 109 | 110 | http://mathworld.wolfram.com/String.html 111 | 112 | "Morse signals: diferent signals of 3 positions using the two symbols - and .". 113 | 114 | $o = Math::Combinatorics->new( count=>3 , data=>[qw(. -)] , frequency=>[3,3] ); 115 | while ( my @x = $o->next_multiset ) { 116 | my $p = Math::Combinatorics->new( data=>\@x , frequency=>[map{1} @x] ); 117 | while ( my @y = $p->next_string ) { 118 | #do something 119 | } 120 | } 121 | 122 | =item multiset/multichoose - nCRk 123 | 124 | http://mathworld.wolfram.com/Multiset.html 125 | 126 | "ways to extract 3 balls at once of a bag with 3 black and 3 white balls". 127 | 128 | $o = Math::Combinatorics->new( count=>3 , data=>[qw(white black)] , frequency=>[3,3] ); 129 | while ( my @x = $o->next_multiset ) { 130 | #do something 131 | } 132 | 133 | =back 134 | 135 | =head2 EXPORT 136 | 137 | the following export tags will bring a single method into the caller's 138 | namespace. no symbols are exported by default. see pod documentation below for 139 | method descriptions. 140 | 141 | combine 142 | derange 143 | multiset 144 | permute 145 | string 146 | factorial 147 | 148 | =head1 AUTHOR 149 | 150 | Allen Day , with algorithmic contributions from Christopher Eltschka and 151 | Tye. 152 | 153 | Copyright (c) 2004-2005 Allen Day. All rights reserved. This program is free software; you 154 | can redistribute it and/or modify it under the same terms as Perl itself. 155 | 156 | =head1 ACKNOWLEDGEMENTS 157 | 158 | A sincere thanks to everyone for helping to make this a better module. After initial 159 | development I've only had time to accept patches and improvements. Math::Combinatorics 160 | continues to be developed and improved by the community. Contributors of note include: 161 | 162 | For adding new features: Carlos Rica, David Coppit, Carlos Segre, Lyon Lemmens 163 | 164 | For bug reports: Ying Yang, Joerg Beyer, Marc Logghe, Yunheng Wang, 165 | Torsten Seemann, Gerrit Haase, Joern Behre, Lyon Lemmens, Federico Lucifredi 166 | 167 | =head1 BUGS / TODO 168 | 169 | Report them to the author. 170 | 171 | * Need more extensive unit tests. 172 | 173 | * tests for new()'s frequency argment 174 | 175 | * A known bug (more of a missing feature, actually) does not allow parameterization of k 176 | for nPk in permute(). it is assumed k == n. L for details. You can work 177 | around this by making calls to both L and L 178 | 179 | * Lots of really interesting stuff from Mathworld.Wolfram.com. MathWorld rocks! Expect 180 | to see implementation of more concepts from their site, e.g.: 181 | 182 | http://mathworld.wolfram.com/BellNumber.html 183 | http://mathworld.wolfram.com/StirlingNumberoftheSecondKind.html 184 | http://mathworld.wolfram.com/Word.html 185 | 186 | * Other combinatorics stuff 187 | http://en.wikipedia.org/wiki/Catalan_number 188 | http://en.wikipedia.org/wiki/Stirling_number 189 | 190 | =head1 SEE ALSO 191 | 192 | L 193 | 194 | L 195 | 196 | L (alas misnamed, it actually returns permutations on a string). 197 | 198 | http://perlmonks.thepen.com/29374.html 199 | 200 | http://groups.google.com/groups?selm=38568F79.13680B86%40physik.tu-muenchen.de&output=gplain 201 | 202 | 203 | =cut 204 | 205 | package Math::Combinatorics; 206 | 207 | use strict; 208 | use Data::Dumper; 209 | require Exporter; 210 | 211 | our @ISA = qw(Exporter); 212 | our @EXPORT = qw( combine derange factorial permute ); 213 | our $VERSION = '0.09'; 214 | 215 | =head1 EXPORTED FUNCTIONS 216 | 217 | =head2 combine() 218 | 219 | Usage : my @combinations = combine($k,@n); 220 | Function: implements nCk (n choose k), or n!/(k!*(n-k!)). 221 | returns all unique unorderd combinations of k items from set n. 222 | items in n are assumed to be character data, and are 223 | copied into the return data structure (see "Returns" below). 224 | Example : my @n = qw(a b c); 225 | my @c = combine(2,@n); 226 | print join "\n", map { join " ", @$_ } @c; 227 | # prints: 228 | # b c 229 | # a c 230 | # a b 231 | Returns : a list of arrays, where each array contains a unique combination 232 | of k items from n 233 | Args : a list of items to be combined 234 | Notes : data is internally assumed to be alphanumeric. this is necessary 235 | to efficiently generate combinations of large sets. if you need 236 | combinations of non-alphanumeric data, or on data 237 | C would not be appropriate, use the 238 | object-oriented API. See L and the B option. 239 | 240 | Identical items are assumed to be non-unique. That is, calling 241 | Cnew(data => [@n], count => $k); 252 | while(my(@combo) = $c->next_combination){ 253 | push @result, [@combo]; 254 | } 255 | 256 | return @result; 257 | } 258 | 259 | =head2 derange() 260 | 261 | Usage : my @deranges = derange(@n); 262 | Function: implements !n, a derangement of n items in which none of the 263 | items appear in their originally ordered place. 264 | Example : my @n = qw(a b c); 265 | my @d = derange(@n); 266 | print join "\n", map { join " ", @$_ } @d; 267 | # prints: 268 | # a c b 269 | # b a c 270 | # b c a 271 | # c a b 272 | # c b a 273 | Returns : a list of arrays, where each array contains a derangement of 274 | k items from n (where k == n). 275 | Args : a list of items to be deranged. 276 | Note : k should really be parameterizable. this will happen 277 | in a later version of the module. send me a patch to 278 | make that version come out sooner. 279 | Notes : data is internally assumed to be alphanumeric. this is necessary 280 | to efficiently generate combinations of large sets. if you need 281 | combinations of non-alphanumeric data, or on data 282 | C would not be appropriate, use the 283 | object-oriented API. See L, and the B option. 284 | 285 | =cut 286 | 287 | sub derange { 288 | my(@n) = @_; 289 | 290 | my @result = (); 291 | 292 | my $c = __PACKAGE__->new(data => [@n]); 293 | while(my(@derange) = $c->next_derangement){ 294 | push @result, [@derange]; 295 | } 296 | 297 | return @result; 298 | } 299 | 300 | =head2 next_derangement() 301 | 302 | Usage : my @derangement = $c->next_derangement(); 303 | Function: get derangements for @data. 304 | Returns : returns a permutation of items from @data (see L), 305 | where none of the items appear in their natural order. repeated calls 306 | retrieve all unique derangements of @data elements. a returned empty 307 | list signifies all derangements have been iterated. 308 | Args : none. 309 | 310 | =cut 311 | 312 | sub next_derangement { 313 | my $self = shift; 314 | my $data = $self->data(); 315 | 316 | my $cursor = $self->_permutation_cursor(); 317 | my $values = @$cursor; 318 | if($self->{pin}){ 319 | $self->{pin} = 0; 320 | 321 | my $i; 322 | for ($i = 1; $i < $values; $i += 2) { 323 | $$cursor[$i - 1] = $i; 324 | $$cursor[$i] = $i - 1; 325 | } 326 | if ($values % 2 != 0) { 327 | $$cursor[$values - 1] = $values - 3; 328 | $$cursor[$values - 2] = $values - 1; 329 | } 330 | goto RESULT; 331 | } 332 | else { 333 | my $values = @$cursor; 334 | my $i; 335 | my @found; # stores for each element if it has been found previously 336 | for ($i = 0; $i < $values; $i++) { $found[$i] = 0 } 337 | my $e; 338 | my $elemfound = 0; 339 | for ($i = $values - 1; $i > -1; $i--) { 340 | $found[$$cursor[$i]] = 1; 341 | if ($i > $values - 3) { # $values-1 or $values-2 342 | if ($i == $values - 2) { 343 | #print "i=$i (values-2)\n";## 344 | $e = $$cursor[$i + 1]; 345 | if ($e > $$cursor[$i] && $e != $i 346 | && $$cursor[$i] != $i + 1) { 347 | $$cursor[$i + 1] = $$cursor[$i]; 348 | $$cursor[$i] = $e; 349 | #print "!\n";## 350 | goto RESULT; 351 | } 352 | } 353 | next; 354 | } 355 | for ($e = $$cursor[$i] + 1; $e < $values; $e++) { 356 | if ($found[$e] && $e != $i) { 357 | $elemfound = 1; 358 | last; 359 | } 360 | } 361 | last if ($elemfound); 362 | } 363 | if ($elemfound) { 364 | $$cursor[$i] = $e; 365 | $found[$e] = 0; 366 | $i++; 367 | my $j; 368 | my @elems; 369 | for ($j = 0; $j < $values; $j++) { 370 | if ($found[$j]) { push(@elems, $j) } 371 | } 372 | for ($j = 0; $j < @elems; $j++) { 373 | if ($elems[$j] != $i) { 374 | # if the next is the last and it will be wrong: 375 | if ($j + 2 == @elems 376 | && $elems[$j + 1] == $i + 1) { 377 | # interchange them: 378 | $$cursor[$i] = $elems[$j + 1]; 379 | $$cursor[$i + 1] = $elems[$j]; 380 | last; 381 | } 382 | $$cursor[$i] = $elems[$j]; 383 | } 384 | elsif ($j + 1 < @elems) { 385 | # use the next element: 386 | $$cursor[$i] = $elems[$j + 1]; 387 | $elems[$j + 1] = $elems[$j]; 388 | } 389 | else { die() } 390 | $i++; 391 | } 392 | goto RESULT; 393 | } 394 | return (); 395 | } 396 | RESULT: 397 | # map cursor to data array 398 | my @result; 399 | foreach my $c (@$cursor){ 400 | push @result, $${ $data->[$c] }; 401 | } 402 | return @result; 403 | } 404 | 405 | =head2 factorial() 406 | 407 | Usage : my $f = factorial(4); #returns 24, or 4*3*2*1 408 | Function: calculates n! (n factorial). 409 | Returns : undef if n is non-integer or n < 0 410 | Args : a positive, non-zero integer 411 | Note : this function is used internally by combine() and permute() 412 | 413 | =cut 414 | 415 | sub factorial { 416 | my $n = shift; 417 | return undef unless $n >= 0 and $n == int($n); 418 | 419 | my $f; 420 | 421 | for($f = 1 ; $n > 0 ; $n--){ 422 | $f *= $n 423 | } 424 | 425 | return $f; 426 | } 427 | 428 | =head2 permute() 429 | 430 | Usage : my @permutations = permute(@n); 431 | Function: implements nPk (n permute k) (where k == n), or n!/(n-k)! 432 | returns all unique permutations of k items from set n 433 | (where n == k, see "Note" below). items in n are assumed to 434 | be character data, and are copied into the return data 435 | structure. 436 | Example : my @n = qw(a b c); 437 | my @p = permute(@n); 438 | print join "\n", map { join " ", @$_ } @p; 439 | # prints: 440 | # b a c 441 | # b c a 442 | # c b a 443 | # c a b 444 | # a c b 445 | # a b c 446 | Returns : a list of arrays, where each array contains a permutation of 447 | k items from n (where k == n). 448 | Args : a list of items to be permuted. 449 | Note : k should really be parameterizable. this will happen 450 | in a later version of the module. send me a patch to 451 | make that version come out sooner. 452 | Notes : data is internally assumed to be alphanumeric. this is necessary 453 | to efficiently generate combinations of large sets. if you need 454 | combinations of non-alphanumeric data, or on data 455 | C would not be appropriate, use the 456 | object-oriented API. See L, and the B option. 457 | 458 | Identical items are assumed to be non-unique. That is, calling 459 | Cnew(data => [@n]); 470 | while(my(@permu) = $c->next_permutation){ 471 | push @result, [@permu]; 472 | } 473 | 474 | return @result; 475 | } 476 | 477 | =head1 CONSTRUCTOR 478 | 479 | =cut 480 | 481 | =head2 new() 482 | 483 | Usage : my $c = Math::Combinatorics->new( count => 2, #treated as int 484 | data => [1,2,3,4] #arrayref or anonymous array 485 | ); 486 | Function: build a new Math::Combinatorics object. 487 | Returns : a Math::Combinatorics object 488 | Args : count - required for combinatoric functions/methods. number of elements to be 489 | present in returned set(s). 490 | data - required for combinatoric B permutagenic functions/methods. this is the 491 | set elements are chosen from. B: this array is modified in place; make 492 | a copy of your array if the order matters in the caller's space. 493 | frequency - optional vector of data frequencies. must be the same length as the B 494 | constructor argument. These two constructor calls here are equivalent: 495 | 496 | $a = 'a'; 497 | $b = 'b'; 498 | 499 | Math::Combinatorics->new( count=>2, data=>[\$a,\$a,\$a,\$a,\$a,\$b,\$b] ); 500 | Math::Combinatorics->new( count=>2, data=>[\$a,\$b], frequency=>[5,2] ); 501 | 502 | so why use this? sometimes it's useful to have multiple identical entities in 503 | a set (in set theory jargon, this is called a "bag", See L). 504 | compare - optional subroutine reference used in sorting elements of the set. examples: 505 | 506 | #appropriate for character elements 507 | compare => sub { $_[0] cmp $_[1] } 508 | #appropriate for numeric elements 509 | compare => sub { $_[0] <=> $_[1] } 510 | #appropriate for object elements, perhaps 511 | compare => sub { $_[0]->value <=> $_[1]->value } 512 | 513 | The default sort mechanism is based on references, and cannot be predicted. 514 | Improvements for a more flexible compare() mechanism are most welcome. 515 | 516 | =cut 517 | 518 | sub new { 519 | my($class,%arg) = @_; 520 | my $self = bless {}, $class; 521 | 522 | $self->{compare} = $arg{compare} || sub { $_[0] cmp $_[1] }; 523 | $self->{count} = $arg{count}; 524 | 525 | #convert bag to set 526 | my $freq = $arg{frequency}; 527 | if(ref($freq) eq 'ARRAY' and scalar(@$freq) == scalar(@{$arg{data}})){ 528 | $self->{frequency}++; 529 | my @bag = @{$arg{data}}; 530 | my @set = (); 531 | 532 | #allow '0 but defined' elements (Yunheng Wang) 533 | foreach my $type ( @bag ) { 534 | my $f = shift @$freq; 535 | next if $f < 1; 536 | for(1..$f){ 537 | #we push on a reference to make sure, for instance, that objects 538 | #are identical and not copied 539 | push @set, \$type; 540 | } 541 | } 542 | $arg{data} = \@set; 543 | } 544 | elsif(!ref($freq)){ 545 | $arg{data} = [map { \$_ } @{$arg{data}}]; 546 | } 547 | 548 | #warn join ' ', @{$arg{data}}; 549 | 550 | #OK, this is hokey, but I don't have time to fix it properly right now. 551 | #We want to allow both user-specified sorting as well as our own 552 | #reference-based internal sorting -- the latter only because unit tests 553 | #are failing if we don't have it. Additionally, we don't want to require 554 | #the triple derefernce necessary for comparison of the pristine data in 555 | #the user-supplied compare coderef. The solution for now is to do an 556 | #if/else. If you're staring at this please fix it! 557 | my $compare = $self->{compare}; 558 | if ( defined $arg{compare} ) { 559 | $self->{data} = [sort {&$compare($$$a,$$$b)} map {\$_} @{$arg{data}}]; 560 | } 561 | else { 562 | $self->{data} = [sort {&$compare($a,$b)} map {\$_} @{$arg{data}}]; 563 | } 564 | 565 | #warn Dumper($self->{data}); 566 | 567 | $self->{cin} = 1; 568 | $self->{pin} = 1; 569 | 570 | return $self; 571 | } 572 | 573 | =head1 OBJECT METHODS 574 | 575 | =cut 576 | 577 | =head2 next_combination() 578 | 579 | Usage : my @combo = $c->next_combination(); 580 | Function: get combinations of size $count from @data. 581 | Returns : returns a combination of $count items from @data (see L). 582 | repeated calls retrieve all unique combinations of $count elements. 583 | a returned empty list signifies all combinations have been iterated. 584 | Note : this method may only be used if a B argument is B 585 | given to L, otherwise use L. 586 | Args : none. 587 | 588 | =cut 589 | 590 | sub next_combination { 591 | my $self = shift; 592 | if ( $self->{frequency} ) { 593 | print STDERR "must use next_multiset() if 'frequency' argument passed to constructor\n"; 594 | return (); 595 | } 596 | return $self->_next_combination; 597 | } 598 | 599 | sub _next_combination { 600 | my $self = shift; 601 | my $data = $self->data(); 602 | my $combo_end = $self->count(); 603 | 604 | my $begin = 0; 605 | my $end = $#{$data} + 1; 606 | 607 | my @result; 608 | 609 | return () if scalar(@$data) < $self->count(); 610 | 611 | if($self->{cin}){ 612 | $self->{cin} = 0; 613 | 614 | for(0..$self->count-1){ 615 | push @result, $${ $data->[$_] }; 616 | } 617 | #warn 1; 618 | return @result; 619 | } 620 | 621 | if ($combo_end == $begin || $combo_end == $end) { 622 | return (); 623 | } 624 | 625 | my $combo = $combo_end; 626 | my $total_set; 627 | 628 | --$combo; 629 | $total_set = $self->upper_bound($combo_end,$end,$data->[$combo]); 630 | if ($total_set != $end) { 631 | $self->swap($combo,$total_set); 632 | 633 | for(0..$self->count-1){ 634 | push @result, $${ $data->[$_] }; 635 | } 636 | #warn 2; 637 | return @result; 638 | } 639 | 640 | --$total_set; 641 | $combo = $self->lower_bound($begin, $combo_end, $data->[$total_set]); 642 | 643 | if ($combo == $begin) { 644 | $self->rotate($begin, $combo_end, $end); 645 | #warn 3; 646 | return (); 647 | } 648 | 649 | my $combo_next = $combo; 650 | --$combo; 651 | $total_set = $self->upper_bound($combo_end, $end, $data->[$combo]); 652 | 653 | my $sort_pos = $end; 654 | $sort_pos += $combo_end - $total_set - 1; 655 | 656 | $self->rotate($combo_next, $total_set, $end); 657 | $self->rotate($combo, $combo_next, $end); 658 | $self->rotate($combo_end, $sort_pos, $end); 659 | 660 | for(0..$self->count-1){ 661 | push @result, $${ $data->[$_] }; 662 | } 663 | #warn 4; 664 | return @result; 665 | } 666 | 667 | =head2 next_multiset() 668 | 669 | Usage : my @multiset = $c->next_multiset(); 670 | Function: get multisets for @data. 671 | Returns : returns a multiset of items from @data (see L). 672 | a multiset is a special type of combination where the set from which 673 | combinations are drawn contains items that are indistinguishable. use 674 | L when a B argument is passed to L. 675 | repeated calls retrieve all unique multisets of @data elements. a 676 | returned empty list signifies all multisets have been iterated. 677 | Note : this method may only be used if a B argument is given to 678 | L, otherwise use L. 679 | Args : none. 680 | 681 | =cut 682 | 683 | sub next_multiset { 684 | my $self = shift; 685 | 686 | if ( ! $self->{frequency} ) { 687 | print STDERR "must use next_combination() if 'frequency' argument not passed to constructor\n"; 688 | return (); 689 | } 690 | 691 | my $data = $self->data(); 692 | my $compare = $self->compare(); 693 | 694 | while ( my @combo = $self->_next_combination ) { 695 | my $x = join '', map {scalar($$_)} sort @$data; 696 | my $y = join '', map {scalar($_) } sort @combo; 697 | 698 | next if $self->{'cache_multiset'}{$y}++; 699 | return @combo; 700 | } 701 | $self->{'cache_multiset'} = undef; 702 | return (); 703 | } 704 | 705 | =head2 next_permutation() 706 | 707 | Usage : my @permu = $c->next_permutation(); 708 | Function: get permutations of elements in @data. 709 | Returns : returns a permutation of items from @data (see L). 710 | repeated calls retrieve all unique permutations of @data elements. 711 | a returned empty list signifies all permutations have been iterated. 712 | Note : this method may only be used if a B argument is B 713 | given to L, otherwise use L. 714 | Args : none. 715 | 716 | =cut 717 | 718 | sub next_permutation { 719 | my $self = shift; 720 | if ( $self->{frequency} ) { 721 | print STDERR "must use next_string() if 'frequency' argument passed to constructor\n"; 722 | return (); 723 | } 724 | return $self->_next_permutation; 725 | } 726 | 727 | sub _next_permutation { 728 | my $self = shift; 729 | my $data = $self->data(); 730 | 731 | if($self->{pin}){ 732 | $self->{pin} = 0; 733 | return map {$$$_} @$data; 734 | } 735 | 736 | my $cursor = $self->_permutation_cursor(); 737 | 738 | my $last= $#{$cursor}; 739 | 740 | if($last < 1){ 741 | return (); 742 | } 743 | 744 | # Find last item not in reverse-sorted order: 745 | my $i = $last - 1; 746 | $i-- while 0 <= $i && $cursor->[$i] >= $cursor->[$i+1]; 747 | 748 | if($i == -1){ 749 | return (); 750 | } 751 | 752 | 753 | # Re-sort the reversely-sorted tail of the list: 754 | @{$cursor}[$i+1..$last] = reverse @{$cursor}[$i+1..$last] 755 | if $cursor->[$i+1] > $cursor->[$last]; 756 | 757 | # Find next item that will make us "greater": 758 | my $j = $i+1; 759 | $j++ while $cursor->[$i] >= $cursor->[$j]; 760 | 761 | # Swap: 762 | @{$cursor}[$i,$j] = @{$cursor}[$j,$i]; 763 | 764 | # map cursor to data array 765 | my @result; 766 | foreach my $c (@$cursor){ 767 | push @result, $${ $data->[$c] }; 768 | } 769 | return @result; 770 | } 771 | 772 | =head2 next_string() 773 | 774 | Usage : my @string = $c->next_string(); 775 | Function: get strings for @data. 776 | Returns : returns a multiset of items from @data (see L). 777 | a multiset is a special type of permutation where the set from which 778 | combinations are drawn contains items that are indistinguishable. use 779 | L when a B argument is passed to L. 780 | repeated calls retrieve all unique multisets of @data elements. a 781 | returned empty list signifies all strings have been iterated. 782 | Note : this method may only be used if a B argument is given to 783 | L, otherwise use L. 784 | Args : none. 785 | 786 | =cut 787 | 788 | sub next_string { 789 | my $self = shift; 790 | my $data = $self->data(); 791 | 792 | if ( ! $self->{frequency} ) { 793 | print STDERR "must use next_permutation() if 'frequency' argument not passed to constructor\n"; 794 | return (); 795 | } 796 | 797 | 798 | while ( my @permu = $self->_next_permutation ) { 799 | my $x = join '', map {scalar($$_)} @$data; 800 | my $y = join '', map {scalar($_) } @permu; 801 | 802 | next if $self->{'cache_string'}{$y}++; 803 | return @permu; 804 | } 805 | 806 | $self->{'cache_string'} = undef; 807 | return (); 808 | } 809 | 810 | =head1 INTERNAL FUNCTIONS AND METHODS 811 | 812 | =head2 sum() 813 | 814 | Usage : my $sum = sum(1,2,3); # returns 6 815 | Function: sums a list of integers. non-integer list elements are ignored 816 | Returns : sum of integer items in arguments passed in 817 | Args : a list of integers 818 | Note : this function is used internally by combine() 819 | 820 | =cut 821 | 822 | sub sum { 823 | my $sum = 0; 824 | foreach my $i (@_){ 825 | $sum += $i if $i == int($i); 826 | } 827 | return $sum; 828 | } 829 | 830 | =head2 compare() 831 | 832 | Usage : $obj->compare() 833 | Function: internal, undocumented. holds a comparison coderef. 834 | Returns : value of compare (a coderef) 835 | 836 | 837 | =cut 838 | 839 | sub compare { 840 | my($self,$val) = @_; 841 | return $self->{'compare'}; 842 | } 843 | 844 | 845 | =head2 count() 846 | 847 | Usage : $obj->count() 848 | Function: internal, undocumented. holds the "k" in nCk or nPk. 849 | Returns : value of count (an int) 850 | 851 | =cut 852 | 853 | sub count { 854 | my($self) = @_; 855 | return $self->{'count'}; 856 | } 857 | 858 | 859 | =head2 data() 860 | 861 | Usage : $obj->data() 862 | Function: internal, undocumented. holds the set "n" in nCk or nPk. 863 | Returns : value of data (an arrayref) 864 | 865 | =cut 866 | 867 | sub data { 868 | my($self) = @_; 869 | return $self->{'data'}; 870 | } 871 | 872 | 873 | =head2 swap() 874 | 875 | internal, undocumented. 876 | 877 | =cut 878 | 879 | sub swap { 880 | my $self = shift; 881 | my $first = shift; 882 | my $second = shift; 883 | my $data = $self->data(); 884 | 885 | my $temp = $data->[$first]; 886 | $data->[$first] = $data->[$second]; 887 | $data->[$second] = $temp; 888 | } 889 | 890 | =head2 reverse() 891 | 892 | internal, undocumented. 893 | 894 | =cut 895 | 896 | sub reverse { 897 | my $self = shift; 898 | my $first = shift; 899 | my $last = shift; 900 | my $data = $self->data(); 901 | 902 | while (1) { 903 | if ($first == $last || $first == --$last) { 904 | return; 905 | } else { 906 | $self->swap($first++, $last); 907 | } 908 | } 909 | } 910 | 911 | =head2 rotate() 912 | 913 | internal, undocumented. 914 | 915 | =cut 916 | 917 | sub rotate { 918 | my $self = shift; 919 | my $first = shift; 920 | my $middle = shift; 921 | my $last = shift; 922 | my $data = $self->data(); 923 | 924 | if ($first == $middle || $last == $middle) { 925 | return; 926 | } 927 | 928 | my $first2 = $middle; 929 | 930 | do { 931 | $self->swap($first++, $first2++); 932 | 933 | if ($first == $middle) { 934 | $middle = $first2; 935 | } 936 | } while ($first2 != $last); 937 | 938 | $first2 = $middle; 939 | 940 | while ($first2 != $last) { 941 | $self->swap($first++, $first2++); 942 | if ($first == $middle) { 943 | $middle = $first2; 944 | } elsif ($first2 == $last) { 945 | $first2 = $middle; 946 | } 947 | } 948 | } 949 | 950 | =head2 upper_bound() 951 | 952 | internal, undocumented. 953 | 954 | =cut 955 | 956 | sub upper_bound { 957 | my $self = shift; 958 | my $first = shift; 959 | my $last = shift; 960 | my $value = shift; 961 | my $compare = $self->compare(); 962 | my $data = $self->data(); 963 | 964 | my $len = $last - $first; 965 | my $half; 966 | my $middle; 967 | 968 | while ($len > 0) { 969 | $half = $len >> 1; 970 | $middle = $first; 971 | $middle += $half; 972 | 973 | if (&$compare($value,$data->[$middle]) == -1) { 974 | $len = $half; 975 | } else { 976 | $first = $middle; 977 | ++$first; 978 | $len = $len - $half - 1; 979 | } 980 | } 981 | 982 | return $first; 983 | } 984 | 985 | =head2 lower_bound() 986 | 987 | internal, undocumented. 988 | 989 | =cut 990 | 991 | sub lower_bound { 992 | my $self = shift; 993 | my $first = shift; 994 | my $last = shift; 995 | my $value = shift; 996 | my $compare = $self->compare(); 997 | my $data = $self->data(); 998 | 999 | my $len = $last - $first; 1000 | my $half; 1001 | my $middle; 1002 | 1003 | while ($len > 0) { 1004 | $half = $len >> 1; 1005 | $middle = $first; 1006 | $middle += $half; 1007 | 1008 | if (&$compare($data->[$middle],$value) == -1) { 1009 | $first = $middle; 1010 | ++$first; 1011 | $len = $len - $half - 1; 1012 | } else { 1013 | $len = $half; 1014 | } 1015 | } 1016 | 1017 | return $first; 1018 | } 1019 | 1020 | =head2 _permutation_cursor() 1021 | 1022 | Usage : $obj->_permutation_cursor() 1023 | Function: internal method. cursor on permutation iterator order. 1024 | Returns : value of _permutation_cursor (an arrayref) 1025 | Args : none 1026 | 1027 | =cut 1028 | 1029 | sub _permutation_cursor { 1030 | my($self,$val) = @_; 1031 | 1032 | if(!$self->{'_permutation_cursor'}){ 1033 | my $data = $self->data(); 1034 | my @tmp = (); 1035 | my $i = 0; 1036 | push @tmp, $i++ foreach @$data; 1037 | $self->{'_permutation_cursor'} = \@tmp; 1038 | } 1039 | 1040 | return $self->{'_permutation_cursor'}; 1041 | } 1042 | 1043 | 1; 1044 | 1045 | -------------------------------------------------------------------------------- /lib/CorScorer.pm: -------------------------------------------------------------------------------- 1 | package CorScorer; 2 | 3 | # Copyright (C) 2009-2011, Emili Sapena esapena lsi.upc.edu 4 | # 2011-2014, Sameer Pradhan childrens.harvard.edu 5 | # 6 | # This program is free software; you can redistribute it and/or modify it 7 | # under the terms of the GNU General Public License as published by the 8 | # Free Software Foundation; either version 2 of the License, or (at your 9 | # option) any later version. This program is distributed in the hope that 10 | # it will be useful, but WITHOUT ANY WARRANTY; without even the implied 11 | # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License along 15 | # with this program; if not, write to the Free Software Foundation, Inc., 16 | # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | # 18 | # Modified in 2013 for v1.07 by Sebastian Martschat, 19 | # sebastian.martschat h-its.org 20 | # 21 | # Revised in July, 2013 by Xiaoqiang Luo (xql google.com) to create v6.0. 22 | # See comments under $VERSION for modifications. 23 | # 24 | # Revised in March, 2014 by Sameer Pradhan (sameer.pradhan childrens.harvard.edu) 25 | # to implement the BLANC metric for predicted mentions 26 | 27 | 28 | use strict; 29 | use Algorithm::Munkres; 30 | use Data::Dumper; 31 | 32 | #use Algorithm::Combinatorics qw(combinations); 33 | use Math::Combinatorics; 34 | use Cwd; 35 | 36 | our $VERSION = '8.01'; 37 | print "version: " . $VERSION . " " . Cwd::realpath(__FILE__) . "\n"; 38 | 39 | ## 40 | # 8.01 fixed a bug that crashed the the BLANC scoring when duplicate 41 | # (potentially singleton) mentions were present in the 42 | # response. as part of the fix, wee will allow a maximum of 10 43 | # duplicate mentions in response, but if there are more, than it 44 | # is a sign of a systematic error/manipulation and we will refuse 45 | # to score that run. 46 | 47 | # 8.0 added code to compute the BLANC metric (generalized for both gold 48 | # and system mentions (Luo et al., 2014) 49 | # 50 | # 7.0 Removed code to compute *_cs metrics 51 | # 52 | # 6.0 The directory hosting the scorer is under v6 and internal $VERSION is 53 | # set to "6.0." 54 | # Changes: 55 | # - 'ceafm', 'ceafe' and 'bcub' in the previous version are renamed 56 | # 'ceafm_cs', 'ceafe_cs', and 'bcub_cs', respectively. 57 | # - 'ceafm', 'ceafe' and 'bcub' are implemented without (Cai&Strube 2010) 58 | # modification. These metrics can handle twinless mentions and entities 59 | # just fine. 60 | # 61 | # 1.07 Modifications to implement BCUB and CEAFM 62 | # exactly as proposed by (Cai & Strube, 2010). 63 | # 1.06 ? 64 | # 1.05 Modification of IdentifMentions in order to correctly evaluate the 65 | # outputs with detected mentions. Based on (Cai & Strubbe, 2010) 66 | # 1.04 Some output corrections in BLANC functions. Changed package name to "Scorer" 67 | # 1.03 Detects mentions that start in a document but do not end 68 | # 1.02 Corrected BCUB bug. It fails when the key file does not have any mention 69 | 70 | # global variables 71 | my $VERBOSE = 2; 72 | my $HEAD_COLUMN = 8; 73 | my $RESPONSE_COLUMN = -1; 74 | my $KEY_COLUMN = -1; 75 | 76 | # Score. Scores the results of a coreference resolution system 77 | # Input: Metric, keys file, response file, [name] 78 | # Metric: the metric desired to evaluate: 79 | # muc: MUCScorer (Vilain et al, 1995) 80 | # bcub: B-Cubed (Bagga and Baldwin, 1998) 81 | # ceafm: CEAF (Luo et al, 2005) using mention-based similarity 82 | # ceafe: CEAF (Luo et al, 2005) using entity-based similarity 83 | # keys file: file with expected coreference chains in SemEval format 84 | # response file: file with output of coreference system (SemEval format) 85 | # name: [optional] the name of the document to score. If name is not 86 | # given, all the documents in the dataset will be scored. 87 | # 88 | # Output: an array with numerators and denominators of recall and precision 89 | # (recall_num, recall_den, precision_num, precision_den) 90 | # 91 | # Final scores: 92 | # Recall = recall_num / recall_den 93 | # Precision = precision_num / precision_den 94 | # F1 = 2 * Recall * Precision / (Recall + Precision) 95 | sub Score { 96 | my ($metric, $kFile, $rFile, $name) = @_; 97 | our $repeated_mentions = 0; 98 | 99 | if (lc($metric) eq 'blanc') { 100 | return ScoreBLANC($kFile, $rFile, $name); 101 | } 102 | 103 | my %idenTotals = 104 | (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0); 105 | my ($acumNR, $acumDR, $acumNP, $acumDP) = (0, 0, 0, 0); 106 | 107 | if (defined($name) && $name ne 'none') { 108 | print "$name:\n" if ($VERBOSE); 109 | my $keys = GetCoreference($kFile, $KEY_COLUMN, $name); 110 | my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name); 111 | my ( 112 | $keyChains, $keyChainsWithSingletonsFromResponse, 113 | $responseChains, $responseChainsWithoutMentionsNotInKey, 114 | $keyChainsOrig, $responseChainsOrig 115 | ) = IdentifMentions($keys, $response, \%idenTotals); 116 | ($acumNR, $acumDR, $acumNP, $acumDP) = Eval( 117 | $metric, $keyChains, 118 | $keyChainsWithSingletonsFromResponse, $responseChains, 119 | $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, 120 | $responseChainsOrig 121 | ); 122 | } 123 | else { 124 | my $kIndexNames = GetFileNames($kFile); 125 | my $rIndexNames = GetFileNames($rFile); 126 | 127 | $VERBOSE = 0 if ($name eq 'none'); 128 | foreach my $iname (keys(%{$kIndexNames})) { 129 | my $keys = 130 | GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname}); 131 | my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname, 132 | $rIndexNames->{$iname}); 133 | 134 | print "$iname:\n" if ($VERBOSE); 135 | my ( 136 | $keyChains, $keyChainsWithSingletonsFromResponse, 137 | $responseChains, $responseChainsWithoutMentionsNotInKey, 138 | $keyChainsOrig, $responseChainsOrig 139 | ) = IdentifMentions($keys, $response, \%idenTotals); 140 | my ($nr, $dr, $np, $dp) = Eval( 141 | $metric, $keyChains, 142 | $keyChainsWithSingletonsFromResponse, $responseChains, 143 | $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, 144 | $responseChainsOrig 145 | ); 146 | 147 | $acumNR += $nr; 148 | $acumDR += $dr; 149 | $acumNP += $np; 150 | $acumDP += $dp; 151 | } 152 | } 153 | 154 | if ($VERBOSE || $name eq 'none') { 155 | print "\n====== TOTALS =======\n"; 156 | print "Identification of Mentions: "; 157 | ShowRPF( 158 | $idenTotals{recallNum}, $idenTotals{recallDen}, 159 | $idenTotals{precisionNum}, $idenTotals{precisionDen} 160 | ); 161 | print "Coreference: "; 162 | ShowRPF($acumNR, $acumDR, $acumNP, $acumDP); 163 | } 164 | 165 | return ($acumNR, $acumDR, $acumNP, $acumDP); 166 | } 167 | 168 | sub GetIndex { 169 | my ($ind, $i) = @_; 170 | if (!defined($ind->{$i})) { 171 | my $n = $ind->{nexti} || 0; 172 | $ind->{$i} = $n; 173 | $n++; 174 | $ind->{nexti} = $n; 175 | } 176 | 177 | return $ind->{$i}; 178 | } 179 | 180 | # Get the coreference information from column $column of the file $file 181 | # If $name is defined, only keys between "#begin document $name" and 182 | # "#end file $name" are taken. 183 | # The output is an array of entites, where each entity is an array 184 | # of mentions and each mention is an array with two values corresponding 185 | # to the mention's begin and end. For example: 186 | # @entities = ( [ [1,3], [45,45], [57,62] ], # <-- entity 0 187 | # [ [5,5], [25,27], [31,31] ], # <-- entity 1 188 | # ... 189 | # ); 190 | # entity 0 is composed of 3 mentions: from token 1 to 3, token 45 and 191 | # from token 57 to 62 (both included) 192 | # 193 | # if $name is not specified, the output is a hash including each file 194 | # found in the document: 195 | # $coref{$file} = \@entities 196 | sub GetCoreference { 197 | my ($file, $column, $name, $pos) = @_; 198 | my %coref; 199 | my %ind; 200 | 201 | open(F, $file) || die "Can not open $file: $!"; 202 | if ($pos) { 203 | seek(F, $pos, 0); 204 | } 205 | my $fName; 206 | my $getout = 0; 207 | do { 208 | # look for the begin of a file 209 | while (my $l = ) { 210 | chomp($l); 211 | $l =~ s/\r$//; # m$ format jokes 212 | if ($l =~ /^\#\s*begin document (.*?)$/) { 213 | if (defined($name)) { 214 | if ($name eq $1) { 215 | $fName = $name; 216 | $getout = 1; 217 | last; 218 | } 219 | } 220 | else { 221 | $fName = $1; 222 | last; 223 | } 224 | } 225 | } 226 | print "====> $fName:\n" if ($VERBOSE > 1); 227 | 228 | # Extract the keys from the file until #end is found 229 | my $lnumber = 0; 230 | my @entities; 231 | my @half; 232 | my @head; 233 | my @sentId; 234 | while (my $l = ) { 235 | chomp($l); 236 | $l =~ s/^\s+$//; 237 | next if ($l eq ''); 238 | if ($l =~ /\#\s*end document/) { 239 | foreach my $h (@half) { 240 | if (defined($h) && @$h) { 241 | die "Error: some mentions in the document do not close\n"; 242 | } 243 | } 244 | last; 245 | } 246 | my @columns = split(/\t/, $l); 247 | my $cInfo = $columns[$column]; 248 | push(@head, $columns[$HEAD_COLUMN]); 249 | push(@sentId, $columns[0]); 250 | if ($cInfo ne '_') { 251 | 252 | #discard double antecedent 253 | while ($cInfo =~ s/\((\d+\+\d)\)//) { 254 | print "Discarded ($1)\n" if ($VERBOSE > 1); 255 | } 256 | 257 | # one-token mention(s) 258 | while ($cInfo =~ s/\((\d+)\)//) { 259 | my $ie = GetIndex(\%ind, $1); 260 | push(@{$entities[$ie]}, [$lnumber, $lnumber, $lnumber]); 261 | print "+mention (entity $ie): ($lnumber,$lnumber)\n" 262 | if ($VERBOSE > 2); 263 | } 264 | 265 | # begin of mention(s) 266 | while ($cInfo =~ s/\((\d+)//) { 267 | my $ie = GetIndex(\%ind, $1); 268 | push(@{$half[$ie]}, $lnumber); 269 | print "+init mention (entity $ie): ($lnumber\n" if ($VERBOSE > 2); 270 | } 271 | 272 | # end of mention(s) 273 | while ($cInfo =~ s/(\d+)\)//) { 274 | my $numberie = $1; 275 | my $ie = GetIndex(\%ind, $numberie); 276 | my $start = pop(@{$half[$ie]}); 277 | if (defined($start)) { 278 | my $inim = $sentId[$start]; 279 | my $endm = $sentId[$lnumber]; 280 | my $tHead = $start; 281 | 282 | # the token whose head is outside the mention is the head of the mention 283 | for (my $t = $start ; $t <= $lnumber ; $t++) { 284 | if ($head[$t] < $inim || $head[$t] > $endm) { 285 | $tHead = $t; 286 | last; 287 | } 288 | } 289 | push(@{$entities[$ie]}, [$start, $lnumber, $tHead]); 290 | } 291 | else { 292 | die 293 | "Detected the end of a mention [$numberie]($ie) without begin (?,$lnumber)"; 294 | } 295 | print "+mention (entity $ie): ($start,$lnumber)\n" if ($VERBOSE > 2); 296 | 297 | } 298 | } 299 | $lnumber++; 300 | } 301 | 302 | # verbose 303 | if ($VERBOSE > 1) { 304 | print "File $fName:\n"; 305 | for (my $e = 0 ; $e < scalar(@entities) ; $e++) { 306 | print "Entity $e:"; 307 | foreach my $mention (@{$entities[$e]}) { 308 | print " ($mention->[0],$mention->[1])"; 309 | } 310 | print "\n"; 311 | } 312 | } 313 | 314 | $coref{$fName} = \@entities; 315 | } while (!$getout && !eof(F)); 316 | 317 | if (defined($name)) { 318 | return $coref{$name}; 319 | } 320 | return \%coref; 321 | } 322 | 323 | sub GetFileNames { 324 | my $file = shift; 325 | my %hash; 326 | my $last = 0; 327 | open(F, $file) || die "Can not open $file: $!"; 328 | while (my $l = ) { 329 | chomp($l); 330 | $l =~ s/\r$//; # m$ format jokes 331 | if ($l =~ /^\#\s*begin document (.*?)$/) { 332 | my $name = $1; 333 | $hash{$name} = $last; 334 | } 335 | $last = tell(F); 336 | } 337 | close(F); 338 | return \%hash; 339 | } 340 | 341 | sub IdentifMentions { 342 | my ($keys, $response, $totals) = @_; 343 | my @kChains; 344 | my @kChainsWithSingletonsFromResponse; 345 | my @rChains; 346 | my @rChainsWithoutMentionsNotInKey; 347 | my %id; 348 | my %map; 349 | my $idCount = 0; 350 | my @assigned; 351 | my @kChainsOrig = (); 352 | my @rChainsOrig = (); 353 | 354 | # for each mention found in keys an ID is generated 355 | foreach my $entity (@$keys) { 356 | foreach my $mention (@$entity) { 357 | if (defined($id{"$mention->[0],$mention->[1]"})) { 358 | print "Repeated mention in the key: $mention->[0], $mention->[1] ", 359 | $id{"$mention->[0],$mention->[1]"}, $idCount, "\n"; 360 | } 361 | $id{"$mention->[0],$mention->[1]"} = $idCount; 362 | $idCount++; 363 | } 364 | } 365 | 366 | # correct identification: Exact bound limits 367 | my $exact = 0; 368 | foreach my $entity (@$response) { 369 | 370 | my $i = 0; 371 | my @remove; 372 | 373 | foreach my $mention (@$entity) { 374 | if (defined($map{"$mention->[0],$mention->[1]"})) { 375 | print "Repeated mention in the response: $mention->[0], $mention->[1] ", 376 | $map{"$mention->[0],$mention->[1]"}, 377 | $id{"$mention->[0],$mention->[1]"}, 378 | "\n"; 379 | push(@remove, $i); 380 | $main::repeated_mentions++; 381 | 382 | if ($main::repeated_mentions > 10) 383 | { 384 | print STDERR "Found too many repeated mentions (> 10) in the response, so refusing to score. Please fix the output.\n"; 385 | exit 1; 386 | } 387 | 388 | } 389 | elsif (defined($id{"$mention->[0],$mention->[1]"}) 390 | && !$assigned[$id{"$mention->[0],$mention->[1]"}]) 391 | { 392 | $assigned[$id{"$mention->[0],$mention->[1]"}] = 1; 393 | $map{"$mention->[0],$mention->[1]"} = 394 | $id{"$mention->[0],$mention->[1]"}; 395 | $exact++; 396 | } 397 | $i++; 398 | } 399 | 400 | # Remove repeated mentions in the response 401 | foreach my $i (sort { $b <=> $a } (@remove)) { 402 | splice(@$entity, $i, 1); 403 | } 404 | } 405 | 406 | 407 | # now, lets remove any empty elements in the response array after removing 408 | # potential repeats 409 | my @another_remove = (); 410 | my $ii; 411 | 412 | foreach my $eentity (@$response) 413 | { 414 | if ( @$eentity == 0) 415 | { 416 | push(@another_remove, $ii); 417 | } 418 | $ii++; 419 | } 420 | 421 | foreach my $iii (sort { $b <=> $a } (@another_remove)) { 422 | splice(@$response, $iii, 1); 423 | } 424 | 425 | 426 | # Partial identificaiton: Inside bounds and including the head 427 | my $part = 0; 428 | 429 | # Each mention in response not included in keys has a new ID 430 | my $mresp = 0; 431 | foreach my $entity (@$response) { 432 | foreach my $mention (@$entity) { 433 | my $ini = $mention->[0]; 434 | my $end = $mention->[1]; 435 | if (!defined($map{"$mention->[0],$mention->[1]"})) { 436 | $map{"$mention->[0],$mention->[1]"} = $idCount; 437 | $idCount++; 438 | } 439 | $mresp++; 440 | } 441 | } 442 | 443 | if ($VERBOSE) { 444 | print "Total key mentions: " . scalar(keys(%id)) . "\n"; 445 | print "Total response mentions: " . scalar(keys(%map)) . "\n"; 446 | print "Strictly correct identified mentions: $exact\n"; 447 | print "Partially correct identified mentions: $part\n"; 448 | print "No identified: " . (scalar(keys(%id)) - $exact - $part) . "\n"; 449 | print "Invented: " . ($idCount - scalar(keys(%id))) . "\n"; 450 | } 451 | 452 | if (defined($totals)) { 453 | $totals->{recallDen} += scalar(keys(%id)); 454 | $totals->{recallNum} += $exact; 455 | $totals->{precisionDen} += scalar(keys(%map)); 456 | $totals->{precisionNum} += $exact; 457 | $totals->{precisionExact} += $exact; 458 | $totals->{precisionPart} += $part; 459 | } 460 | 461 | # The coreference chains arrays are generated again with ID of mentions 462 | # instead of token coordenates 463 | my $e = 0; 464 | foreach my $entity (@$keys) { 465 | foreach my $mention (@$entity) { 466 | push(@{$kChainsOrig[$e]}, $id{"$mention->[0],$mention->[1]"}); 467 | push(@{$kChains[$e]}, $id{"$mention->[0],$mention->[1]"}); 468 | } 469 | $e++; 470 | } 471 | $e = 0; 472 | foreach my $entity (@$response) { 473 | foreach my $mention (@$entity) { 474 | push(@{$rChainsOrig[$e]}, $map{"$mention->[0],$mention->[1]"}); 475 | push(@{$rChains[$e]}, $map{"$mention->[0],$mention->[1]"}); 476 | } 477 | $e++; 478 | } 479 | 480 | # In order to use the metrics as in (Cai & Strube, 2010): 481 | # 1. Include the non-detected key mentions into the response as singletons 482 | # 2. Discard the detected mentions not included in key resolved as singletons 483 | # 3a. For computing precision: put twinless system mentions in key 484 | # 3b. For computing recall: discard twinless system mentions in response 485 | 486 | my $kIndex = Indexa(\@kChains); 487 | my $rIndex = Indexa(\@rChains); 488 | 489 | # 1. Include the non-detected key mentions into the response as singletons 490 | my $addkey = 0; 491 | if (scalar(keys(%id)) - $exact - $part > 0) { 492 | foreach my $kc (@kChains) { 493 | foreach my $m (@$kc) { 494 | if (!defined($rIndex->{$m})) { 495 | push(@rChains, [$m]); 496 | $addkey++; 497 | } 498 | } 499 | } 500 | } 501 | 502 | @kChainsWithSingletonsFromResponse = @kChains; 503 | @rChainsWithoutMentionsNotInKey = []; 504 | 505 | # 2. Discard the detected mentions not included in key resolved as singletons 506 | my $delsin = 0; 507 | 508 | if ($idCount - scalar(keys(%id)) > 0) { 509 | foreach my $rc (@rChains) { 510 | if (scalar(@$rc) == 1) { 511 | if (!defined($kIndex->{$rc->[0]})) { 512 | @$rc = (); 513 | $delsin++; 514 | } 515 | } 516 | } 517 | } 518 | 519 | # 3a. For computing precision: put twinless system mentions in key as singletons 520 | my $addinv = 0; 521 | 522 | if ($idCount - scalar(keys(%id)) > 0) { 523 | foreach my $rc (@rChains) { 524 | if (scalar(@$rc) > 1) { 525 | foreach my $m (@$rc) { 526 | if (!defined($kIndex->{$m})) { 527 | push(@kChainsWithSingletonsFromResponse, [$m]); 528 | $addinv++; 529 | } 530 | } 531 | } 532 | } 533 | } 534 | 535 | # 3b. For computing recall: discard twinless system mentions in response 536 | my $delsys = 0; 537 | 538 | foreach my $rc (@rChains) { 539 | my @temprc; 540 | my $i = 0; 541 | 542 | foreach my $m (@$rc) { 543 | if (defined($kIndex->{$m})) { 544 | push(@temprc, $m); 545 | $i++; 546 | } 547 | else { 548 | $delsys++; 549 | } 550 | } 551 | 552 | if ($i > 0) { 553 | push(@rChainsWithoutMentionsNotInKey, \@temprc); 554 | } 555 | } 556 | 557 | # We clean the empty chains 558 | my @newrc; 559 | foreach my $rc (@rChains) { 560 | if (scalar(@$rc) > 0) { 561 | push(@newrc, $rc); 562 | } 563 | } 564 | @rChains = @newrc; 565 | 566 | return ( 567 | \@kChains, \@kChainsWithSingletonsFromResponse, 568 | \@rChains, \@rChainsWithoutMentionsNotInKey, 569 | \@kChainsOrig, \@rChainsOrig 570 | ); 571 | } 572 | 573 | sub Eval { 574 | my ($scorer, $keys, $keysPrecision, $response, $responseRecall, 575 | $keyChainsOrig, $responseChainsOrig) 576 | = @_; 577 | $scorer = lc($scorer); 578 | my ($nr, $dr, $np, $dp); 579 | if ($scorer eq 'muc') { 580 | ($nr, $dr, $np, $dp) = 581 | MUCScorer($keys, $keysPrecision, $response, $responseRecall); 582 | } 583 | elsif ($scorer eq 'bcub') { 584 | ($nr, $dr, $np, $dp) = BCUBED($keyChainsOrig, $responseChainsOrig); 585 | } 586 | elsif ($scorer eq 'ceafm') { 587 | ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 1); 588 | } 589 | elsif ($scorer eq 'ceafe') { 590 | ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 0); 591 | } 592 | else { 593 | die "Metric $scorer not implemented yet\n"; 594 | } 595 | return ($nr, $dr, $np, $dp); 596 | } 597 | 598 | # Indexes an array of arrays, in order to easily know the position of an element 599 | sub Indexa { 600 | my ($arrays) = @_; 601 | my %index; 602 | 603 | for (my $i = 0 ; $i < @$arrays ; $i++) { 604 | foreach my $e (@{$arrays->[$i]}) { 605 | $index{$e} = $i; 606 | } 607 | } 608 | return \%index; 609 | } 610 | 611 | # Consider the "links" within every coreference chain. For example, 612 | # chain A-B-C-D has 3 links: A-B, B-C and C-D. 613 | # Recall: num correct links / num expected links. 614 | # Precision: num correct links / num output links 615 | 616 | sub MUCScorer { 617 | my ($keys, $keysPrecision, $response, $responseRecall) = @_; 618 | 619 | my $kIndex = Indexa($keys); 620 | 621 | # Calculate correct links 622 | my $correct = 0; 623 | foreach my $rEntity (@$response) { 624 | next if (!defined($rEntity)); 625 | 626 | # for each possible pair 627 | for (my $i = 0 ; $i < @$rEntity ; $i++) { 628 | my $id_i = $rEntity->[$i]; 629 | for (my $j = $i + 1 ; $j < @$rEntity ; $j++) { 630 | my $id_j = $rEntity->[$j]; 631 | if ( defined($kIndex->{$id_i}) 632 | && defined($kIndex->{$id_j}) 633 | && $kIndex->{$id_i} == $kIndex->{$id_j}) 634 | { 635 | $correct++; 636 | last; 637 | } 638 | } 639 | } 640 | } 641 | 642 | # Links in key 643 | my $keylinks = 0; 644 | foreach my $kEntity (@$keys) { 645 | next if (!defined($kEntity)); 646 | $keylinks += scalar(@$kEntity) - 1 if (scalar(@$kEntity)); 647 | } 648 | 649 | # Links in response 650 | my $reslinks = 0; 651 | foreach my $rEntity (@$response) { 652 | next if (!defined($rEntity)); 653 | $reslinks += scalar(@$rEntity) - 1 if (scalar(@$rEntity)); 654 | } 655 | 656 | ShowRPF($correct, $keylinks, $correct, $reslinks) if ($VERBOSE); 657 | return ($correct, $keylinks, $correct, $reslinks); 658 | } 659 | 660 | # Compute precision for every mention in the response, and compute 661 | # recall for every mention in the keys 662 | sub BCUBED { 663 | my ($keys, $response) = @_; 664 | my $kIndex = Indexa($keys); 665 | my $rIndex = Indexa($response); 666 | my $acumP = 0; 667 | my $acumR = 0; 668 | foreach my $rChain (@$response) { 669 | foreach my $m (@$rChain) { 670 | my $kChain = (defined($kIndex->{$m})) ? $keys->[$kIndex->{$m}] : []; 671 | my $ci = 0; 672 | my $ri = scalar(@$rChain); 673 | my $ki = scalar(@$kChain); 674 | 675 | # common mentions in rChain and kChain => Ci 676 | foreach my $mr (@$rChain) { 677 | foreach my $mk (@$kChain) { 678 | if ($mr == $mk) { 679 | $ci++; 680 | last; 681 | } 682 | } 683 | } 684 | 685 | $acumP += $ci / $ri if ($ri); 686 | $acumR += $ci / $ki if ($ki); 687 | } 688 | } 689 | 690 | # Mentions in key 691 | my $keymentions = 0; 692 | foreach my $kEntity (@$keys) { 693 | $keymentions += scalar(@$kEntity); 694 | } 695 | 696 | # Mentions in response 697 | my $resmentions = 0; 698 | foreach my $rEntity (@$response) { 699 | $resmentions += scalar(@$rEntity); 700 | } 701 | 702 | ShowRPF($acumR, $keymentions, $acumP, $resmentions) if ($VERBOSE); 703 | return ($acumR, $keymentions, $acumP, $resmentions); 704 | } 705 | 706 | # type = 0: Entity-based 707 | # type = 1: Mention-based 708 | sub CEAF { 709 | my ($keys, $response, $type) = @_; 710 | 711 | my @sim; 712 | for (my $i = 0 ; $i < scalar(@$keys) ; $i++) { 713 | for (my $j = 0 ; $j < scalar(@$response) ; $j++) { 714 | if (defined($keys->[$i]) && defined($response->[$j])) { 715 | if ($type == 0) { # entity-based 716 | $sim[$i][$j] = 1 - SIMEntityBased($keys->[$i], $response->[$j]); 717 | 718 | # 1 - X => the library searches minima not maxima 719 | } 720 | elsif ($type == 1) { # mention-based 721 | $sim[$i][$j] = 1 - SIMMentionBased($keys->[$i], $response->[$j]); 722 | } 723 | } 724 | else { 725 | $sim[$i][$j] = 1; 726 | } 727 | } 728 | 729 | # fill the matrix when response chains are less than key ones 730 | for (my $j = scalar(@$response) ; $j < scalar(@$keys) ; $j++) { 731 | $sim[$i][$j] = 1; 732 | } 733 | 734 | #$denrec += SIMEntityBased($kChain->[$i], $kChain->[$i]); 735 | } 736 | 737 | my @out; 738 | 739 | # Munkres algorithm 740 | assign(\@sim, \@out); 741 | 742 | my $numerador = 0; 743 | my $denpre = 0; 744 | my $denrec = 0; 745 | 746 | # entity-based 747 | if ($type == 0) { 748 | foreach my $c (@$response) { 749 | $denpre++ if (defined($c) && scalar(@$c) > 0); 750 | } 751 | foreach my $c (@$keys) { 752 | $denrec++ if (defined($c) && scalar(@$c) > 0); 753 | } 754 | } 755 | 756 | # mention-based 757 | elsif ($type == 1) { 758 | foreach my $c (@$response) { 759 | $denpre += scalar(@$c) if (defined($c)); 760 | } 761 | foreach my $c (@$keys) { 762 | $denrec += scalar(@$c) if (defined($c)); 763 | } 764 | } 765 | 766 | for (my $i = 0 ; $i < scalar(@$keys) ; $i++) { 767 | $numerador += 1 - $sim[$i][$out[$i]]; 768 | } 769 | 770 | ShowRPF($numerador, $denrec, $numerador, $denpre) if ($VERBOSE); 771 | 772 | return ($numerador, $denrec, $numerador, $denpre); 773 | } 774 | 775 | sub SIMEntityBased { 776 | my ($a, $b) = @_; 777 | my $intersection = 0; 778 | 779 | # Common elements in A and B 780 | foreach my $ma (@$a) { 781 | next if (!defined($ma)); 782 | foreach my $mb (@$b) { 783 | next if (!defined($mb)); 784 | if ($ma == $mb) { 785 | $intersection++; 786 | last; 787 | } 788 | } 789 | } 790 | 791 | my $r = 0; 792 | my $d = scalar(@$a) + scalar(@$b); 793 | if ($d != 0) { 794 | $r = 2 * $intersection / $d; 795 | } 796 | 797 | return $r; 798 | } 799 | 800 | sub SIMMentionBased { 801 | my ($a, $b) = @_; 802 | my $intersection = 0; 803 | 804 | # Common elements in A and B 805 | foreach my $ma (@$a) { 806 | next if (!defined($ma)); 807 | foreach my $mb (@$b) { 808 | next if (!defined($mb)); 809 | if ($ma == $mb) { 810 | $intersection++; 811 | last; 812 | } 813 | } 814 | } 815 | 816 | return $intersection; 817 | } 818 | 819 | sub ShowRPF { 820 | my ($numrec, $denrec, $numpre, $denpre, $f1) = @_; 821 | 822 | my $precisio = $denpre ? $numpre / $denpre : 0; 823 | my $recall = $denrec ? $numrec / $denrec : 0; 824 | if (!defined($f1)) { 825 | $f1 = 0; 826 | if ($recall + $precisio) { 827 | $f1 = 2 * $precisio * $recall / ($precisio + $recall); 828 | } 829 | } 830 | 831 | print "Recall: ($numrec / $denrec) " . int($recall * 10000) / 100 . '%'; 832 | print "\tPrecision: ($numpre / $denpre) " 833 | . int($precisio * 10000) / 100 . '%'; 834 | print "\tF1: " . int($f1 * 10000) / 100 . "\%\n"; 835 | print 836 | "--------------------------------------------------------------------------\n"; 837 | } 838 | 839 | # NEW 840 | sub ScoreBLANC { 841 | my ($kFile, $rFile, $name) = @_; 842 | my ($acumNRa, $acumDRa, $acumNPa, $acumDPa) = (0, 0, 0, 0); 843 | my ($acumNRr, $acumDRr, $acumNPr, $acumDPr) = (0, 0, 0, 0); 844 | my %idenTotals = 845 | (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0); 846 | 847 | if (defined($name) && $name ne 'none') { 848 | print "$name:\n" if ($VERBOSE); 849 | my $keys = GetCoreference($kFile, $KEY_COLUMN, $name); 850 | my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name); 851 | my ( 852 | $keyChains, $keyChainsWithSingletonsFromResponse, 853 | $responseChains, $responseChainsWithoutMentionsNotInKey, 854 | $keyChainsOrig, $responseChainsOrig 855 | ) = IdentifMentions($keys, $response, \%idenTotals); 856 | ( 857 | $acumNRa, $acumDRa, $acumNPa, $acumDPa, 858 | $acumNRr, $acumDRr, $acumNPr, $acumDPr 859 | ) = BLANC_Internal($keyChainsOrig, $responseChainsOrig); 860 | } 861 | else { 862 | my $kIndexNames = GetFileNames($kFile); 863 | my $rIndexNames = GetFileNames($rFile); 864 | 865 | $VERBOSE = 0 if ($name eq 'none'); 866 | foreach my $iname (keys(%{$kIndexNames})) { 867 | my $keys = 868 | GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname}); 869 | my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname, 870 | $rIndexNames->{$iname}); 871 | 872 | print "$name:\n" if ($VERBOSE); 873 | my ( 874 | $keyChains, $keyChainsWithSingletonsFromResponse, 875 | $responseChains, $responseChainsWithoutMentionsNotInKey, 876 | $keyChainsOrig, $responseChainsOrig 877 | ) = IdentifMentions($keys, $response, \%idenTotals); 878 | my ($nra, $dra, $npa, $dpa, $nrr, $drr, $npr, $dpr) = 879 | BLANC_Internal($keyChainsOrig, $responseChainsOrig); 880 | 881 | $acumNRa += $nra; 882 | $acumDRa += $dra; 883 | $acumNPa += $npa; 884 | $acumDPa += $dpa; 885 | $acumNRr += $nrr; 886 | $acumDRr += $drr; 887 | $acumNPr += $npr; 888 | $acumDPr += $dpr; 889 | } 890 | } 891 | 892 | if ($VERBOSE || $name eq 'none') { 893 | print "\n====== TOTALS =======\n"; 894 | print "Identification of Mentions: "; 895 | ShowRPF( 896 | $idenTotals{recallNum}, $idenTotals{recallDen}, 897 | $idenTotals{precisionNum}, $idenTotals{precisionDen} 898 | ); 899 | print "\nCoreference:\n"; 900 | print "Coreference links: "; 901 | ShowRPF($acumNRa, $acumDRa, $acumNPa, $acumDPa); 902 | print "Non-coreference links: "; 903 | ShowRPF($acumNRr, $acumDRr, $acumNPr, $acumDPr); 904 | print "BLANC: "; 905 | 906 | my $Ra = ($acumDRa) ? $acumNRa / $acumDRa : -1; 907 | my $Rr = ($acumDRr) ? $acumNRr / $acumDRr : -1; 908 | my $Pa = ($acumDPa) ? $acumNPa / $acumDPa : 0; 909 | my $Pr = ($acumDPr) ? $acumNPr / $acumDPr : 0; 910 | 911 | my $R = ($Ra + $Rr) / 2; 912 | my $P = ($Pa + $Pr) / 2; 913 | 914 | my $Fa = ($Pa + $Ra) ? 2 * $Pa * $Ra / ($Pa + $Ra) : 0; 915 | my $Fr = ($Pr + $Rr) ? 2 * $Pr * $Rr / ($Pr + $Rr) : 0; 916 | 917 | my $f1 = ($Fa + $Fr) / 2; 918 | 919 | if ($Ra == -1 && $Rr == -1) { 920 | $R = 0; 921 | $P = 0; 922 | $f1 = 0; 923 | } 924 | elsif ($Ra == -1) { 925 | $R = $Rr; 926 | $P = $Pr; 927 | $f1 = $Fr; 928 | } 929 | elsif ($Rr == -1) { 930 | $R = $Ra; 931 | $P = $Pa; 932 | $f1 = $Fa; 933 | } 934 | 935 | ShowRPF($R, 1, $P, 1, $f1); 936 | } 937 | return ( 938 | $acumNRa, $acumDRa, $acumNPa, $acumDPa, 939 | $acumNRr, $acumDRr, $acumNPr, $acumDPr 940 | ); 941 | } 942 | 943 | sub cartesian { 944 | my @C = map { [$_] } @{shift @_}; 945 | 946 | foreach (@_) { 947 | my @A = @$_; 948 | 949 | @C = map { 950 | my $n = $_; 951 | map { [$n, @$_] } @C 952 | } @A; 953 | } 954 | 955 | return @C; 956 | } 957 | 958 | sub BLANC_Internal { 959 | my ($keys, $response) = @_; 960 | my ($ga, $gr, $ba, $br) = (0, 0, 0, 0); 961 | my $key_coreference_links = {}; 962 | my $key_non_coreference_links = {}; 963 | my $response_coreference_links = {}; 964 | my $response_non_coreference_links = {}; 965 | 966 | print "list containing list of chains in key:\n" if ($VERBOSE > 2); 967 | print Dumper $keys if ($VERBOSE > 2); 968 | 969 | print "each key chain printed individually:\n" if ($VERBOSE > 2); 970 | 971 | if ($VERBOSE > 2) { 972 | foreach my $z (@$keys) { 973 | print Dumper $z; 974 | } 975 | } 976 | 977 | print "list containing list of chains in response:\n" if ($VERBOSE > 2); 978 | print Dumper $response if ($VERBOSE > 2); 979 | 980 | print "each response chain printed individually:\n" if ($VERBOSE > 2); 981 | 982 | if ($VERBOSE > 2) { 983 | foreach my $z (@$response) { 984 | print Dumper $z; 985 | } 986 | } 987 | 988 | print 989 | "---------------------------------------------------------------------------------" 990 | . "\n" 991 | if ($VERBOSE > 2); 992 | 993 | print "combinations of links for each chain in the key:\n" if ($VERBOSE > 2); 994 | for my $kkk (@$keys) { 995 | my $ccombinat = Math::Combinatorics->new( 996 | count => 2, 997 | data => [@$kkk], 998 | ); 999 | 1000 | while (my @zcombo = $ccombinat->next_combination) { 1001 | print Dumper [@zcombo] if ($VERBOSE > 2); 1002 | my @zzcombo = sort { $a <=> $b } @zcombo; 1003 | 1004 | $key_coreference_links->{$zzcombo[0] . "-" . $zzcombo[1]} = 1; 1005 | } 1006 | 1007 | print 1008 | "................................................................................\n" 1009 | if ($VERBOSE > 2); 1010 | } 1011 | 1012 | print Dumper $key_coreference_links if ($VERBOSE > 2); 1013 | print 1014 | "********************************************************************************\n" 1015 | if ($VERBOSE > 2); 1016 | 1017 | print 1018 | "---------------------------------------------------------------------------------" 1019 | . "\n" 1020 | if ($VERBOSE > 2); 1021 | print "combinations of links for each chain in the response:\n" 1022 | if ($VERBOSE > 2); 1023 | for my $rrr (@$response) { 1024 | my $ccombinat = Math::Combinatorics->new( 1025 | count => 2, 1026 | data => [@$rrr], 1027 | ); 1028 | 1029 | while (my @zcombo = $ccombinat->next_combination) { 1030 | print Dumper [@zcombo] if ($VERBOSE > 2); 1031 | my @zzcombo = sort { $a <=> $b } @zcombo; 1032 | 1033 | $response_coreference_links->{$zzcombo[0] . "-" . $zzcombo[1]} = 1; 1034 | } 1035 | 1036 | print 1037 | "................................................................................\n" 1038 | if ($VERBOSE > 2); 1039 | } 1040 | 1041 | print Dumper $response_coreference_links if ($VERBOSE > 2); 1042 | print 1043 | "********************************************************************************\n" 1044 | if ($VERBOSE > 2); 1045 | 1046 | my $number_chains_in_key = @$keys; 1047 | print "number chains in key: " . $number_chains_in_key . "\n" 1048 | if ($VERBOSE > 2); 1049 | 1050 | my @s = (0 .. $number_chains_in_key - 1); 1051 | my $ss = join(' ', @s); 1052 | my @n = split(' ', $ss); 1053 | 1054 | my $combinat = Math::Combinatorics->new( 1055 | count => 2, 1056 | data => [@n], 1057 | ); 1058 | 1059 | print "combinations of 2 from: " . join(" ", @n) . "\n" if ($VERBOSE > 2); 1060 | print "------------------------" . ("--" x scalar(@n)) . "\n" 1061 | if ($VERBOSE > 2); 1062 | 1063 | while (my @combo = $combinat->next_combination) { 1064 | 1065 | my @kcombo = (); 1066 | foreach my $comboo (@combo) { 1067 | push(@kcombo, @$keys[$comboo]); 1068 | } 1069 | 1070 | my $lkcombo = @kcombo; 1071 | print "length: " . $lkcombo . "\n" if ($VERBOSE > 2); 1072 | print "kcombo:\n" if ($VERBOSE > 2); 1073 | print "+++++\n" if ($VERBOSE > 2); 1074 | print Dumper [@kcombo] if ($VERBOSE > 2); 1075 | my @kccar = cartesian($kcombo[0], $kcombo[1]); 1076 | 1077 | foreach my $x (@kccar) { 1078 | print "--->>>>>>>>>>>>\n" if ($VERBOSE > 2); 1079 | print Dumper $x if ($VERBOSE > 2); 1080 | my @y = sort { $a <=> $b } @$x; 1081 | print Dumper [@y] if ($VERBOSE > 2); 1082 | $key_non_coreference_links->{@y[0] . "-" . @y[1]} = 1; 1083 | } 1084 | 1085 | print Dumper $key_non_coreference_links if ($VERBOSE > 2); 1086 | print "" . "\n" if ($VERBOSE > 2); 1087 | 1088 | print ".....\n" if ($VERBOSE > 2); 1089 | 1090 | print "\n" if ($VERBOSE > 2); 1091 | } 1092 | 1093 | print "\n" if ($VERBOSE > 2); 1094 | my $number_chains_in_response = @$response; 1095 | print "number chains in response: " . $number_chains_in_response . "\n" 1096 | if ($VERBOSE > 2); 1097 | 1098 | my @s = (0 .. $number_chains_in_response - 1); 1099 | my $ss = join(' ', @s); 1100 | my @n = split(' ', $ss); 1101 | 1102 | my $combinat = Math::Combinatorics->new( 1103 | count => 2, 1104 | data => [@n], 1105 | ); 1106 | 1107 | print "combinations of 2 from: " . join(" ", @n) . "\n" if ($VERBOSE > 2); 1108 | print "------------------------" . ("--" x scalar(@n)) . "\n" 1109 | if ($VERBOSE > 2); 1110 | 1111 | while (my @combo = $combinat->next_combination) { 1112 | my @kcombo = (); 1113 | foreach my $comboo (@combo) { 1114 | push(@kcombo, @$response[$comboo]); 1115 | } 1116 | 1117 | my $lkcombo = @kcombo; 1118 | print "length: " . $lkcombo . "\n" if ($VERBOSE > 2); 1119 | print "kcombo:\n" if ($VERBOSE > 2); 1120 | print "+++++\n" if ($VERBOSE > 2); 1121 | print Dumper [@kcombo] if ($VERBOSE > 2); 1122 | my @kccar = cartesian($kcombo[0], $kcombo[1]); 1123 | 1124 | foreach my $x (@kccar) { 1125 | print "--->>>>>>>>>>>>\n" if ($VERBOSE > 2); 1126 | print Dumper $x if ($VERBOSE > 2); 1127 | my @y = sort { $a <=> $b } @$x; 1128 | print Dumper [@y] if ($VERBOSE > 2); 1129 | $response_non_coreference_links->{@y[0] . "-" . @y[1]} = 1; 1130 | } 1131 | 1132 | print Dumper $response_non_coreference_links if ($VERBOSE > 2); 1133 | print "" . "\n" if ($VERBOSE > 2); 1134 | 1135 | print ".....\n" if ($VERBOSE > 2); 1136 | print "\n" if ($VERBOSE > 2); 1137 | } 1138 | 1139 | print "\n" if ($VERBOSE > 2); 1140 | 1141 | print 1142 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" 1143 | if ($VERBOSE > 2); 1144 | print Dumper $key_coreference_links if ($VERBOSE > 2); 1145 | print Dumper $response_coreference_links if ($VERBOSE > 2); 1146 | print Dumper $key_non_coreference_links if ($VERBOSE > 2); 1147 | print Dumper $response_non_coreference_links if ($VERBOSE > 2); 1148 | print 1149 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" 1150 | if ($VERBOSE > 2); 1151 | 1152 | my @union_cl = my @isect_cl = (); 1153 | my %union_cl = my %isect_cl = (); 1154 | 1155 | my @kcl = keys %$key_coreference_links; 1156 | my @rcl = keys %$response_coreference_links; 1157 | 1158 | print Dumper @kcl if ($VERBOSE > 2); 1159 | print 1160 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" 1161 | if ($VERBOSE > 2); 1162 | print Dumper @rcl if ($VERBOSE > 2); 1163 | print 1164 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" 1165 | if ($VERBOSE > 2); 1166 | 1167 | foreach my $e (@kcl, @rcl) { $union_cl{$e}++ && $isect_cl{$e}++ } 1168 | 1169 | @union_cl = keys %union_cl; 1170 | @isect_cl = keys %isect_cl; 1171 | 1172 | print Dumper @isect_cl if ($VERBOSE > 2); 1173 | print 1174 | "********************************************************************************\n" 1175 | if ($VERBOSE > 2); 1176 | 1177 | my @union_ncl = my @isect_ncl = (); 1178 | my %union_ncl = my %isect_ncl = (); 1179 | 1180 | my @kncl = keys %$key_non_coreference_links; 1181 | my @rncl = keys %$response_non_coreference_links; 1182 | 1183 | print Dumper @kncl if ($VERBOSE > 2); 1184 | print 1185 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" 1186 | if ($VERBOSE > 2); 1187 | print Dumper @rncl if ($VERBOSE > 2); 1188 | print 1189 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" 1190 | if ($VERBOSE > 2); 1191 | 1192 | foreach my $e (@kncl, @rncl) { $union_ncl{$e}++ && $isect_ncl{$e}++ } 1193 | 1194 | @union_ncl = keys %union_ncl; 1195 | @isect_ncl = keys %isect_ncl; 1196 | 1197 | print Dumper @isect_ncl if ($VERBOSE > 2); 1198 | print 1199 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n" 1200 | if ($VERBOSE > 2); 1201 | 1202 | my $num_isect_cl = @isect_cl; 1203 | print 1204 | " number of links in the intersection of key and response coreference links: " 1205 | . $num_isect_cl . "\n" 1206 | if ($VERBOSE > 2); 1207 | 1208 | my $num_isect_ncl = @isect_ncl; 1209 | print 1210 | "number of links in the intersection of key and response non-coreference links: " 1211 | . $num_isect_ncl . "\n" 1212 | if ($VERBOSE > 2); 1213 | 1214 | my $num_key_coreference_links = keys %$key_coreference_links; 1215 | print "number of key coreference links: " . $num_key_coreference_links . "\n" 1216 | if ($VERBOSE > 2); 1217 | 1218 | my $num_response_coreference_links = keys %$response_coreference_links; 1219 | print "number of response coreference links: " 1220 | . $num_response_coreference_links . "\n" 1221 | if ($VERBOSE > 2); 1222 | 1223 | my $num_key_non_coreference_links = keys %$key_non_coreference_links; 1224 | print "number of key non-coreference links: " 1225 | . $num_key_non_coreference_links . "\n" 1226 | if ($VERBOSE > 2); 1227 | 1228 | my $num_response_non_coreference_links = 1229 | keys %$response_non_coreference_links; 1230 | print "number of response non-coreference links: " 1231 | . $num_response_non_coreference_links . "\n" 1232 | if ($VERBOSE > 2); 1233 | 1234 | my ($r_blanc, $p_blanc, $f_blanc) = ComputeBLANCFromCounts( 1235 | $num_isect_cl, $num_key_coreference_links, 1236 | $num_response_coreference_links, $num_isect_ncl, 1237 | $num_key_non_coreference_links, $num_response_non_coreference_links 1238 | ); 1239 | 1240 | print " blanc recall: " . $r_blanc . "\n" if ($VERBOSE > 2); 1241 | print "blanc precision: " . $p_blanc . "\n" if ($VERBOSE > 2); 1242 | print " blanc score: " . $f_blanc . "\n" if ($VERBOSE > 2); 1243 | print 1244 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n" 1245 | if ($VERBOSE > 2); 1246 | 1247 | return ( 1248 | $num_isect_cl, $num_key_coreference_links, 1249 | $num_isect_cl, $num_response_coreference_links, 1250 | $num_isect_ncl, $num_key_non_coreference_links, 1251 | $num_isect_ncl, $num_response_non_coreference_links 1252 | ); 1253 | } 1254 | 1255 | ################################################################################ 1256 | # Compute BLANC recall, precision and F-measure from counts. 1257 | # Parameters: 1258 | # (#correct_coref_links, #key_coref_links, #response_coref_links, 1259 | # #correct_noncoref_links, #key_noncoref_links, #response_noncoref_links). 1260 | # Returns: (recall, precision, F-measure). 1261 | ################################################################################ 1262 | sub ComputeBLANCFromCounts { 1263 | my ( 1264 | $num_isect_cl, $num_key_coreference_links, 1265 | $num_response_coreference_links, $num_isect_ncl, 1266 | $num_key_non_coreference_links, $num_response_non_coreference_links 1267 | ) = @_; 1268 | 1269 | my $kcl_recall = 1270 | ($num_key_coreference_links == 0) 1271 | ? 0 1272 | : ($num_isect_cl / $num_key_coreference_links); 1273 | my $kcl_precision = 1274 | ($num_response_coreference_links == 0) 1275 | ? 0 1276 | : ($num_isect_cl / $num_response_coreference_links); 1277 | 1278 | print 1279 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" 1280 | if ($VERBOSE > 2); 1281 | print " coreference recall: " . $kcl_recall . "\n" if ($VERBOSE > 2); 1282 | print " coreference precision: " . $kcl_precision . "\n" if ($VERBOSE > 2); 1283 | 1284 | my $fcl = 1285 | ($kcl_recall + $kcl_precision == 0) 1286 | ? 0 1287 | : (2 * $kcl_recall * $kcl_precision / ($kcl_recall + $kcl_precision)); 1288 | print " coreference f-score: " . $fcl . "\n" if ($VERBOSE > 2); 1289 | 1290 | my $kncl_recall = 1291 | ($num_key_non_coreference_links == 0) 1292 | ? 0 1293 | : ($num_isect_ncl / $num_key_non_coreference_links); 1294 | my $kncl_precision = 1295 | ($num_response_non_coreference_links == 0) 1296 | ? 0 1297 | : ($num_isect_ncl / $num_response_non_coreference_links); 1298 | 1299 | print 1300 | "--------------------------------------------------------------------------------\n" 1301 | if ($VERBOSE > 2); 1302 | print " non-coreference recall: " . $kncl_recall . "\n" if ($VERBOSE > 2); 1303 | print "non-coreference precision: " . $kncl_precision . "\n" 1304 | if ($VERBOSE > 2); 1305 | 1306 | my $fncl = 1307 | ($kncl_recall + $kncl_precision == 0) 1308 | ? 0 1309 | : (2 * $kncl_recall * $kncl_precision / ($kncl_recall + $kncl_precision)); 1310 | print " non-coreference f-score: " . $fncl . "\n" if ($VERBOSE > 2); 1311 | print 1312 | "--------------------------------------------------------------------------------\n" 1313 | if ($VERBOSE > 2); 1314 | 1315 | my $r_blanc = -1; 1316 | my $p_blanc = -1; 1317 | my $f_blanc = -1; 1318 | 1319 | if ($num_key_coreference_links == 0 && $num_key_non_coreference_links == 0) { 1320 | $r_blanc = 0; 1321 | $p_blanc = 0; 1322 | $f_blanc = 0; 1323 | } 1324 | elsif ($num_key_coreference_links == 0 || $num_key_non_coreference_links == 0) 1325 | { 1326 | if ($num_key_coreference_links == 0) { 1327 | $r_blanc = $kncl_recall; 1328 | $p_blanc = $kncl_precision; 1329 | $f_blanc = $fncl; 1330 | } 1331 | elsif ($num_key_non_coreference_links == 0) { 1332 | $r_blanc = $kcl_recall; 1333 | $p_blanc = $kcl_precision; 1334 | $f_blanc = $fcl; 1335 | } 1336 | } 1337 | else { 1338 | $r_blanc = ($kcl_recall + $kncl_recall) / 2; 1339 | $p_blanc = ($kcl_precision + $kncl_precision) / 2; 1340 | $f_blanc = ($fcl + $fncl) / 2; 1341 | } 1342 | 1343 | return ($r_blanc, $p_blanc, $f_blanc); 1344 | } 1345 | 1346 | 1; 1347 | --------------------------------------------------------------------------------