├── LICENSE
├── test
    ├── DataFiles
    │   ├── TC-A-2.response
    │   ├── TC-A.key
    │   ├── TC-M.key
    │   ├── TC-N.key
    │   ├── TC-A-10.response
    │   ├── TC-A-11.response
    │   ├── TC-A-12.response
    │   ├── TC-A-13.response
    │   ├── TC-A-4.response
    │   ├── TC-A-5.response
    │   ├── TC-A-6.response
    │   ├── TC-A-1.response
    │   ├── TC-A-3.response
    │   ├── TC-A-7.response
    │   ├── TC-A-8.response
    │   ├── TC-M-1.response
    │   ├── TC-M-2.response
    │   ├── TC-M-3.response
    │   ├── TC-M-4.response
    │   ├── TC-M-5.response
    │   ├── TC-M-6.response
    │   ├── TC-N-1.response
    │   ├── TC-N-2.response
    │   ├── TC-N-3.response
    │   ├── TC-N-4.response
    │   ├── TC-N-5.response
    │   ├── TC-N-6.response
    │   ├── TC-A-9.response
    │   ├── TC-F.key
    │   ├── TC-G.key
    │   ├── TC-H.key
    │   ├── TC-I.key
    │   ├── TC-J.key
    │   ├── TC-J-1.response
    │   ├── TC-F-1.response
    │   ├── TC-G-1.response
    │   ├── TC-H-1.response
    │   ├── TC-I-1.response
    │   ├── TC-K.key
    │   ├── TC-L.key
    │   ├── TC-L-1.response
    │   ├── TC-D.key
    │   ├── TC-E.key
    │   ├── TC-K-1.response
    │   ├── TC-D-1.response
    │   ├── TC-E-1.response
    │   ├── TC-B.key
    │   ├── TC-B-1.response
    │   ├── TC-C.key
    │   └── TC-C-1.response
    ├── test.pl
    ├── CorefMetricTest.pm
    ├── TestCases.README
    └── CorefMetricTestConfig.pm
├── scorer.pl
├── scorer.bat
├── README.rst
└── lib
    ├── Algorithm
        ├── README.Munkres
        └── Munkres.pm
    ├── Cwd.pm
    ├── Math
        └── Combinatorics.pm
    └── CorScorer.pm


/LICENSE:
--------------------------------------------------------------------------------
1 | LICENSE
2 | -------
3 | 
4 | This work is licensed under a [[Creative Commons Attribution ShareAlike 4.0 International License|http://creativecommons.org/licenses/by-sa/4.0/]].
5 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Reference Coreference Scorer
  2 | ============================
  3 | 
  4 | DESCRIPTION
  5 | -----------
  6 | 
  7 | This is the official implementation of the revised coreference scorer
  8 | used for CoNLL-2011/2012 shared tasks on coreference resolution. It
  9 | addresses issues that prevented the consistent scoring of predicted
 10 | mentions in the past.
 11 | 
 12 | 
 13 | VERSION
 14 | -------
 15 | 
 16 | The current stable (official) version for scoring predicted mentions is **v8.01**
 17 | 
 18 | CITATION
 19 | --------
 20 | 
 21 | We would appreciate if you cite the paper when you use this scorer as
 22 | some of us are academics or wanting to be academics, and citations
 23 | matter.
 24 | 
 25 |   ::
 26 | 
 27 |    @InProceedings{pradhan-EtAl:2014:P14-2,
 28 |      author    = {Pradhan, Sameer  and  Luo, Xiaoqiang  and  Recasens, Marta  and  Hovy, Eduard  and  Ng, Vincent  and  Strube, Michael},
 29 |      title     = {Scoring Coreference Partitions of Predicted Mentions: A Reference Implementation},
 30 |      booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
 31 |      month     = {June},
 32 |      year      = {2014},
 33 |      address   = {Baltimore, Maryland},
 34 |      publisher = {Association for Computational Linguistics},
 35 |      pages     = {30--35},
 36 |      url       = {http://www.aclweb.org/anthology/P14-2006}
 37 |      }
 38 | 
 39 | 
 40 | USAGE
 41 | -----
 42 | 
 43 |   ::
 44 | 
 45 |      perl scorer.pl <metric> <key> <response> [<document-id>]
 46 | 
 47 | 
 48 |      <metric>: the metric desired to score the results. one of the following values:
 49 | 
 50 |      muc: MUCScorer (Vilain et al, 1995)
 51 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 52 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 53 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 54 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 55 |      all: uses all the metrics to score
 56 | 
 57 |      <key>: file with expected coreference chains in CoNLL-2011/2012 format
 58 | 
 59 |      <response>: file with output of coreference system (CoNLL-2011/2012 format)
 60 |  
 61 |      <document-id>: optional. The name of the document to score. If name is not
 62 |                     given, all the documents in the dataset will be scored. If given
 63 |                     name is "none" then all the documents are scored but only total
 64 |                     results are shown.
 65 | 
 66 | 
 67 | OUTPUT
 68 | ------
 69 | 
 70 | The score subroutine returns an array with four values in this order:
 71 | 
 72 | Coreference Score
 73 | ~~~~~~~~~~~~~~~~~
 74 | 
 75 |   ::
 76 | 
 77 |     Recall = recall_numerator / recall_denominator
 78 |     Precision = precision_numerator / precision_denominator
 79 |     F1 = 2 * Recall * Precision / (Recall + Precision)
 80 | 
 81 | These values are to standard output when variable ``$VERBOSE`` is not null.
 82 | 
 83 | 
 84 | Identification of Mentions
 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | A score for identification of mentions (recall, precision and F1) is
 88 | also included.  Mentions from system response are compared with key
 89 | mentions. This version performs strict mention matching as was used in
 90 | the CoNLL-2011 and 2012 shared tasks.
 91 | 
 92 | AUTHORS
 93 | -------
 94 | 
 95 | * Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 96 | * Sameer Pradhan, http://cemantix.org, pradhan <at> cemantix.org
 97 | * Sebastian Martschat, sebastian.martschat <at> h-its.org
 98 | * Xiaoqiang Luo, xql <at> google.com
 99 | 
100 | 
101 | COPYRIGHT
102 | ---------
103 | 
104 |   ::
105 | 
106 |     2009-2011, Emili Sapena esapena <at> lsi.upc.edu
107 |     2011-      Sameer Pradhan pradhan <at> cemantix.org
108 | 


--------------------------------------------------------------------------------
/test/CorefMetricTest.pm:
--------------------------------------------------------------------------------
  1 | package CorefMetricTest;
  2 | use strict;
  3 | use warnings;
  4 | use Exporter;
  5 | 
  6 | our @ISA= qw(Exporter);
  7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual);
  8 | 
  9 | ################################################################################
 10 | # Compute recall, precision and F1.
 11 | # 
 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall,
 13 | #         numerator_counts_for_precision, denominator_counts_for_precision)
 14 | # Output: (recall, precision, F1)
 15 | ################################################################################
 16 | sub ComputeScoreFromCounts {
 17 |   # The first 4 are also coref link counts when using BLANC.
 18 |   my ($recall_numerator, $recall_denominator, 
 19 |       $precision_numerator, $precision_denominator, @noncoref_counts) = @_;
 20 |   # The coref recall, precision, and F1 when using BLANC.
 21 |   my ($recall, $precision, $F1) = 
 22 |     RPFFromCounts($recall_numerator, $recall_denominator, 
 23 |                   $precision_numerator, $precision_denominator);
 24 | 
 25 |   # BLANC: @noncoref_counts=
 26 |   #   (noncoref_numerator_recall, noncoref_denominator_recall, 
 27 |   #    noncoref_numerator_precision, noncoref_denominator_precision) 
 28 |   if (scalar(@noncoref_counts) == 4) {
 29 |     ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts(
 30 | 	$recall_numerator, $recall_denominator, $precision_denominator,
 31 | 	$noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]);
 32 |   }
 33 |   $recall = ($recall < 0) ? 0 : $recall;
 34 |   $precision = ($precision < 0) ? 0 : $precision;
 35 |   $F1 = ($F1 < 0) ? 0 : $F1;
 36 |   return ($recall, $precision, $F1);
 37 | }
 38 | 
 39 | sub RPFFromCounts
 40 | {
 41 |   my ($recall_numerator, $recall_denominator, 
 42 |       $precision_numerator, $precision_denominator, @nonCorefCounts) = @_;
 43 |   my ($recall, $precision, $F1) = (-1, -1, 0);
 44 |   if ($recall_denominator > 0) {
 45 |     $recall = $recall_numerator / $recall_denominator; 
 46 |   }
 47 |   if ($precision_denominator > 0) {
 48 |     $precision = $precision_numerator / $precision_denominator;
 49 |   }
 50 | 
 51 |   if (($recall + $precision) > 0) {
 52 |     $F1 = 2 * $recall * $precision / ($recall + $precision);
 53 |   }
 54 |   
 55 |   return ($recall, $precision, $F1);
 56 | }
 57 | 
 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts().
 59 | sub ComputeBLANCRPF
 60 | {
 61 |   my ($coref_recall, $coref_precision, $coref_F1,
 62 |       $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_;
 63 | 
 64 |   my ($recall, $precision, $F1);
 65 | 
 66 |   if ($coref_recall < 0 && $noncoref_recall < 0) {
 67 |     # no key mention.
 68 |     $recall = $precision = $F1 = 0;
 69 |   } elsif ($coref_recall < 0) {
 70 |     # key: all links are non-coref (mentions are all singltons).
 71 |     $recall = $noncoref_recall;
 72 |     $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision;
 73 |     $F1 = $noncoref_F1;
 74 |   } elsif ($noncoref_recall < 0) {
 75 |     # key: all links are coref (all mentions are in one entity).
 76 |     $recall = $coref_recall;
 77 |     $precision = ($coref_precision < 0) ? 0 : $coref_precision;
 78 |     $F1 = $coref_F1;
 79 |   } else {
 80 |     #key contains both coref and non-coref links.
 81 |     if ($coref_precision < 0 && $noncoref_precision < 0) {
 82 |       # no response.
 83 |       $recall = $precision = $F1 = 0;
 84 |     } else {
 85 |       if ($coref_precision < 0) {
 86 |         # response: all links are non-coref, or response mentions are all
 87 |         # singletons.
 88 |         $coref_precision = 0;
 89 |       } elsif ($noncoref_precision < 0) {
 90 |         # response: all links are coref, or all mentions are in one entity.
 91 |         $noncoref_precision = 0;
 92 |       }
 93 |       $recall = ($coref_recall + $noncoref_recall)/2;
 94 |       $precision  = ($coref_precision + $noncoref_precision)/2;
 95 |       $F1 = ($coref_F1 + $noncoref_F1)/2;
 96 |     }
 97 |   }
 98 | 
 99 |   return ($recall, $precision, $F1);
100 | }
101 | 
102 | ##############################################################################
103 | # Compute the sum of the duifference between the expected recall, precision, 
104 | # F1 and the actual one. 
105 | ##############################################################################
106 | sub DiffExpectedAndActual {
107 |   my ($expected, $actual) = @_;
108 |   if (scalar(@$expected) != scalar(@$actual)) {
109 |     print STDERR "Expected and actual have diff dimensions: \n";
110 |     print STDERR "   Expected: ", join(" ", @$expected), "\n";
111 |     print STDERR "     Actual: ", join(" ", @$actual), "\n";
112 |     return 1.0e5;
113 |   }
114 |   my $sum = 0.0;
115 |   my $i = 0;
116 |   foreach my $e (@$expected) {
117 |     $sum += abs($e - $actual->[$i]);
118 |     ++$i;
119 |   }
120 |   return $sum;
121 | }
122 | 
123 | 1;
124 | 
125 | 


--------------------------------------------------------------------------------
/lib/Algorithm/README.Munkres:
--------------------------------------------------------------------------------
  1 | NAME
  2 |         Algorithm-Munkres : Perl extension for Munkres' solution to 
  3 |         classical Assignment problem for square and rectangular matrices 
  4 |         This module extends the solution of Assignment problem for square
  5 |         matrices to rectangular matrices by padding zeros. Thus a rectangular 
  6 |         matrix is converted to square matrix by padding necessary zeros.
  7 | 
  8 | SYNOPSIS
  9 |     use Algorithm::Munkres;
 10 | 
 11 |         @mat = (
 12 |              [2, 4, 7, 9],
 13 |              [3, 9, 5, 1],
 14 |              [8, 2, 9, 7],
 15 |              );
 16 | 
 17 |     assign(\@mat,\@out_mat);
 18 | 
 19 |         Then the @out_mat array will have the output as: (0,3,1,2),
 20 |         where 
 21 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 22 |         1st element indicates that 1st row is assigned 3rd column i.e.value=1
 23 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 24 |         3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
 25 | 
 26 | DESCRIPTION
 27 |         Assignment Problem: Given N jobs, N workers and the time taken by 
 28 |         each worker to complete a job then how should the assignment of a 
 29 |         Worker to a Job be done, so as to minimize the time taken. 
 30 | 
 31 |             Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
 32 |                 x  y  z             
 33 |              p  2  4  7
 34 |              q  3  9  5
 35 |              r  8  2  9
 36 |         
 37 |             where the cell values of the above matrix give the time required
 38 |             for the worker(given by column name) to complete the job(given by 
 39 |             the row name) 
 40 |     
 41 |             then possible solutions are:    
 42 |                              Total
 43 |              1. 2, 9, 9       20
 44 |              2. 2, 2, 5        9
 45 |              3. 3, 4, 9       16
 46 |              4. 3, 2, 7       12
 47 |              5. 8, 9, 7       24
 48 |              6. 8, 4, 5       17
 49 | 
 50 |         Thus (2) is the optimal solution for the above problem.
 51 |         This kind of brute-force approach of solving Assignment problem 
 52 |         quickly becomes slow and bulky as N grows, because the number of 
 53 |         possible solution are N! and thus the task is to evaluate each 
 54 |         and then find the optimal solution.(If N=10, number of possible
 55 |         solutions: 3628800 !)
 56 |         Munkres' gives us a solution to this problem, which is implemented 
 57 |         in this module.
 58 | 
 59 |         This module also solves Assignment problem for rectangular matrices 
 60 |         (M x N) by converting them to square matrices by padding zeros. ex:
 61 |         If input matrix is:
 62 |              [2, 4, 7, 9],
 63 |              [3, 9, 5, 1],
 64 |              [8, 2, 9, 7]
 65 |         i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
 66 |         matrix will be:
 67 |              [2, 4, 7, 9],
 68 |              [3, 9, 5, 1],
 69 |              [8, 2, 9, 7],
 70 |              [0, 0, 0, 0]
 71 | 
 72 | EXPORT
 73 |         "assign" function by default.
 74 | 
 75 | INPUT
 76 |         The input matrix should be in a two dimensional array(array of 
 77 |         array) and the 'assign' subroutine expects a reference to this 
 78 |         array and not the complete array. 
 79 |         eg:assign(\@inp_mat, \@out_mat);
 80 |         The second argument to the assign subroutine is the reference 
 81 |         to the output array.
 82 | 
 83 | OUTPUT
 84 |         The assign subroutine expects references to two arrays as its 
 85 |         input paramenters. The second parameter is the reference to the
 86 |         output array. This array is populated by assign subroutine. This 
 87 |         array is single dimensional Nx1 matrix.
 88 |         For above example the output array returned will be:
 89 |          (0,
 90 |          2,
 91 |          1)
 92 | 
 93 |         where 
 94 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 95 |         1st element indicates that 1st row is assigned 2nd column i.e.value=5
 96 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 97 | 
 98 | SEE ALSO
 99 |         1. http://216.249.163.93/bob.pilgrim/445/munkres.html
100 | 
101 |         2. Munkres, J. Algorithms for the assignment and transportation 
102 |            Problems. J. Siam 5 (Mar. 1957), 32-38
103 | 
104 |         3. François Bourgeois and Jean-Claude Lassalle. 1971.
105 |            An extension of the Munkres algorithm for the assignment 
106 |            problem to rectangular matrices.
107 |            Communication ACM, 14(12):802-804
108 | 
109 | AUTHOR
110 |         Anagha Kulkarni, University of Minnesota Duluth
111 |         kulka020 <at> d.umn.edu
112 |         
113 |         Ted Pedersen, University of Minnesota Duluth
114 |         tpederse <at> d.umn.edu
115 | 
116 | COPYRIGHT AND LICENSE
117 |     Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
118 | 
119 |     This program is free software; you can redistribute it and/or modify it
120 |     under the terms of the GNU General Public License as published by the
121 |     Free Software Foundation; either version 2 of the License, or (at your
122 |     option) any later version. This program is distributed in the hope that
123 |     it will be useful, but WITHOUT ANY WARRANTY; without even the implied
124 |     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
125 |     GNU General Public License for more details.
126 | 
127 |     You should have received a copy of the GNU General Public License along
128 |     with this program; if not, write to the Free Software Foundation, Inc.,
129 |     59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
130 | 
131 | 


--------------------------------------------------------------------------------
/test/TestCases.README:
--------------------------------------------------------------------------------
  1 | TC-A-1 - perfect:
  2 | Key/Ref: {a} {bc} {def}
  3 | Rsp/Sys: {a} {bc} {def}
  4 | Expected: BCUB=1 [recall=6/6, prec=6/6]
  5 | Expected: MUC=1 [recall=3/3=1, prec=3/3=1]
  6 | Expected: CEAFm=1 [recall=6/6=1, prec=6/6=1]
  7 | Expected: CEAFe=1 [recall=3/3=1, prec=3/3=1]
  8 | Expected: BLANC=1 [recall_c=4/4=1, prec_c=4/4=1, recall_n=11/11=1, prec_n=11/11=1]
  9 | 
 10 | TC-A-2 -- response with missing mentions/entities
 11 | Key/Ref: {a} {bc} {def}
 12 | Rsp/Sys: {a} {de}
 13 | Expected: BCUB=.5599 [recall=7/18, prec=3/3]
 14 | Expected: MUC=0.5 [recall=1/3, prec=1/1]
 15 | Expected: CEAFm=6/9=0.67 [common=3, recall=3/6=0.5, Prec=3/3=1]
 16 | Expected: CEAFe=3.6/5=0.72 [common=1+4/5=1.8, recall=1.8/3=0.6, Prec=1.8/2=0.9]
 17 | Expected: BLANC=0.35 [recall_c=1/4, prec_c=1/1, recall_n=2/11, prec_n=2/2]
 18 | 
 19 | TC-A-3 -- response with false-alarm mentions/entities
 20 | Key/Ref: {a} {bc} {def}
 21 | Rsp/Sys: {a} {bcx} {defy} {z}
 22 | Expected: BCUB=.6748 [recall=6/6, prec=55/108]
 23 | Expected: MUC=0.75 [recall=3/3, prec=3/5]
 24 | Expected: CEAFm=12/15=0.8 [common=6, recall=6/6=1, prec=6/9=.67]
 25 | Expected: CEAFe=3.6/5=0.76 [common=1+4/5+6/7=2.66, recall=2.66/3=0.89, Prec=2.66/4=0.66]
 26 | Expected: BLANC=0.60 [recall_c=4/4, prec_c=4/9, recall_n=11/11, prec_n=11/27]
 27 | 
 28 | 
 29 | TC-A-4 -- response with both missing and false-alarm mentions/entities
 30 | Key/Ref: {a} {bc} {def}
 31 | Rsp/Sys: {a} {bcx} {dy} {z}
 32 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 33 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 34 | Expected: CEAFm=8/13=0.62 [common=4 recall=4/6=0.67 prec=4/7=.57]
 35 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 36 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 37 | 
 38 | TC-A-5 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in the aligned entity.
 39 | Key/Ref: {a} {bc} {def}
 40 | Rsp/Sys: {a} {bcxB} {dy} {z}
 41 | Expected: BCUB=.4 [recall=5/9, prec=5/16]
 42 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
 43 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 44 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
 45 | Expected: BLANC=0.25 [recall_c=1/4, prec_c=1/7, recall_n=5/11, prec_n=5/21]
 46 | 
 47 | TC-A-6 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in an unaligned entity.
 48 | Key/Ref: {a} {bc} {def}
 49 | Rsp/Sys: {a} {bcx} {dy} {Bz}
 50 | Expected: BCUB=.4325 [recall=5/9, prec=17/48]
 51 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
 52 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 53 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 54 | Expected: BLANC=0.26 [recall_c=1/4, prec_c=1/5, recall_n=5/11, prec_n=5/23]
 55 | 
 56 | TC-A-7 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in the same cluster entity (note: this is diff from TC5) -- this tests mention de-duplication.
 57 | Key/Ref: {a} {bc} {def}
 58 | Rsp/Sys: {a} {bcxB} {dy} {z}
 59 |  de-dup: {a} {bcx} {dy} {z}
 60 | 
 61 | de-dup:
 62 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 63 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 64 | Expected: CEAFm=8/13=0.61538 [common=4, recall=4/6=0.66667, Prec=4/7=0.57143]
 65 | Expected: CEAFe=4.14/7=0.62857 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73333, Prec=2.2/4=0.55]
 66 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 67 | 
 68 | if No de-dup: 
 69 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 70 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
 71 | 
 72 | 
 73 | TC-A-8 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in a diff entity from b.
 74 | Key/Ref: {a} {bc} {def}
 75 | Rsp/Sys: {a} {bcx} {dy} {Bz}
 76 | 
 77 | De-dup: 
 78 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 79 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 80 | Expected: CEAFm=8/13=0.61538 [common=4 recall=4/6=0.67 prec=4/7=.57143]
 81 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 82 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 83 | 
 84 | If no de-dup: 
 85 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 86 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 87 | 
 88 | TC-A-9 -- show B3 can be canned: "b" is repeated 10 times so precision approaches 1
 89 | Key/Ref: {a} {bc} {def}
 90 | Rsp/Sys: {a} {bcx} {dy} {Bx10z}
 91 | de-dup Rsp/Sys: {a} {bcx} {dy} {z}
 92 | 
 93 | De-dup: 
 94 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 95 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 96 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/7=.57143]
 97 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 98 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 99 | 
100 | 
101 | TC-A-10 - Gold mentions. Only singletons in the response.
102 | Key/Ref: {a} {bc} {def}
103 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
104 | Expected: BCUB=.6667 [recall=3/6, prec=6/6]
105 | Expected: MUC=0 [recall=0, prec=0]
106 | Expected: BLANC=0.42 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=11/11, prec_n=11/15]
107 | 
108 | 
109 | TC-A-11 - Gold mentions. All mentions are coreferent in the response.
110 | Key/Ref: {a} {bc} {def}
111 | Rsp/Sys: {abcdef}
112 | 
113 | Expected: BCUB=0.5599 [recall=6/6, prec=7/18]
114 | Expected: MUC=6/8=0.75 [recall=3/3, prec=3/5]
115 | Expected: BLANC=0.21 [recall_c=4/4, prec_c=4/15, recall_n=0/11, prec_n=0/0, f_n=0]
116 | 
117 | 
118 | TC-A-12 - System mentions. Only singletons in the response.
119 | Key/Ref: {a} {bc} {def}
120 | Rsp/Sys: {a} {x} {y} {c} {d} {e} {z}
121 | 
122 | Expected: BCUB=0.4425 [recall=13/36, prec=4/7]
123 | Expected: MUC=0 [recall=0, prec=0]
124 | Expected: BLANC=0.16 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=5/11, prec_n=5/21]
125 | 
126 | 
127 | TC-A-13 - System mentions. All mentions are coreferent in the response.
128 | Key/Ref: {a} {bc} {def}
129 | Rsp/Sys: {axycdez}
130 | 
131 | Expected: BCUB=0.19447 [recall=17/36, prec=6/49]
132 | Expected: MUC=2/9 [recall=1/3, prec=1/6]
133 | Expected: BLANC=0.04 [recall_c=1/4, prec_c=1/21, recall_n=0/11, prec_n=0/0, f_n=0]
134 | 
135 | 
136 | TC-B-1 -- spurious mention (x) and missing mention (a) in response; link (bc) is a key non-coref link and is an incorrect response coref link.
137 | 
138 |     Keys: {ab} {cde} 
139 | Response: {bcx} {de}
140 | 
141 |     key coref links: C_k = {(ab), (cd), (de), (ce)}
142 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be)}
143 | 
144 |     response coref links: C_r = {(bc), (bx), (cx), (de)}
145 | response non-coref links: N_r = {(bd), (be), (cd), (ce),  (xd), (xe)}
146 | 
147 | (I'll use ^ for set intersection)
148 | C_k ^ C_r = {(de)} => R_c = |C_k^C_r| / |C_k| = 1/4, P_c = 1/|C_r| = 1/4, F_c = 1/4
149 | N_k ^ N_r = {(bd), (be)} => R_n = |N_k^N_r|/|N_k| = 2/6,  P_n = 2/|N_r| = 2/6, F_n = 1/3
150 | 
151 | BLANC = 1/2 (F_c + F_n) = 7/24.
152 | 
153 | 
154 | 
155 | 
156 | 	TC-C-1 -- same as TC14 plus a new entity and its correct prediction shown. this was for testing the more than two entity case.
157 | 
158 |     Keys: {ab} {cde} {fg}
159 | Response: {bcx} {de} {fg}
160 | 
161 |          key coref links: C_k = {(ab), (cd), (de), (ce), (fg)}} 
162 |      key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be), (af), (ag), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)}
163 | 
164 |     response coref links: C_r = {(bc), (bx), (cx), (de), (fg)}
165 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe), (bf), (bg), (cf), (cg), (xf), (xg), (df), (dg), (ef), (eg)}
166 | 
167 | (I'll use ^ for set intersection)
168 | C_k ^ C_r = {(de), (fg)} => R_c = |C_k^C_r| / |C_k| = 2/5, P_c = 2/|C_r| = 2/5, F_c = 2/5 = 0.40
169 | N_k ^ N_r = {(bd), (be), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)} => R_n = |N_k^N_r|/|N_k| = 10/16,  P_n = 10/|N_r| = 10/16, F_n = 10/16 = 0.625
170 | 
171 | BLANC = 1/2 (F_c + F_n) = 0.5125
172 | 
173 | 
174 | 
175 | # ------------ examples from the B-CUBED paper
176 | 
177 | TC-D-1 -- merging one small cluster with a big cluster
178 | 
179 | key: {12345} {67} {89ABC}
180 | ---
181 | 
182 | 1-2-3-4-5
183 | 
184 | 6-7     
185 | 
186 | 8-9-A-B-C
187 | 
188 | 
189 | 
190 | response: {12345} {6789ABC}
191 | ---------
192 | 
193 | 1-2-3-4-5
194 | 
195 | 6-7     
196 |   |
197 |   8-9-A-B-C
198 | 
199 | 
200 | Expected: BCUB [r=12/12, p=16/21, f=0.864864865]
201 | Expected:  MUC [r=9/9, p=9/10, f=0.947368421]
202 | 
203 | 
204 | 
205 | TC-E-1 -- merging two big clusters
206 | 
207 | 
208 | key: {12345} {67} {89ABC}
209 | ---
210 | 
211 | 1-2-3-4-5
212 | 
213 | 6-7     
214 | 
215 | 8-9-A-B-C
216 | 
217 | 
218 | 
219 | response: {123456789ABC} {67}
220 | ---------
221 | 
222 | 1-2-3-4-5
223 |         |
224 | 6-7     |
225 |         |
226 |         8-9-A-B-C
227 | 
228 | 
229 | Expected: BCUB [r=1, p=7/12, f=0.736842105]
230 | Expected:  MUC [r=9/9, p=9/10, f=0.947368421]
231 | 
232 | 
233 | # ---------- examples from the MUC paper
234 | 
235 | TC-F-1 -- 
236 | 
237 |      key: {ABCD}    ---- Links: A-B; B-C; C-D
238 | response: {AB} {CD} ---- Links: A-B; C-D
239 | 
240 | Expected:  MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
241 | 
242 | 
243 | 
244 | TC-G-1 --
245 | 
246 |      key: {AB} {CD} ---- Links: A-B; C-D
247 | response: {ABCD}    ---- Links: A-B; B-C; C-D
248 | 
249 | Expected:  MUC [r=2/2, p=2/3, f=2*(2/2)*(2/3)/(2/2+2/3)]
250 | 
251 | 
252 | 
253 | TC-H-1 --
254 | 
255 |      key: {ABCD} ---- Links: A-B; B-C; B-D
256 | response: {ABCD} ---- Links: A-B; B-C; C-D
257 | 
258 | Expected:  MUC [r=1, p=1, f=1]
259 | 
260 | 
261 | 
262 | TC-I-1 --
263 | 
264 |      key: {ABCD}    ---- Links: A-B; B-C; B-D
265 | response: {AB} {CD} ---- Links: A-B; C-D
266 | 
267 | Expected:  MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
268 | 
269 | 
270 | 
271 | TC-J-1 --
272 | 
273 |      key: {ABC} ---- Links: A-B; B-C
274 | response: {AC}  ---- Links: A-C
275 | 
276 | Expected:  MUC [r=1/2, p=1/1, f=2*(1/2)*(1/1)/(1/2+1/1)]
277 | 
278 | 
279 | 
280 | TC-K-1 --
281 | 
282 |      key: {BCDEGHJ}         ---- Links: B-C; C-D; D-E; E-G; G-H; H-J
283 | response: {ABC} {DEF} {GHI} ---- Links: A-B; B-C; D-E; E-F; G-H; H-I
284 | 
285 | Expected:  MUC [r=3/6, p=3/6, f=3/6]
286 | 
287 | 
288 | 
289 | TC-L-1 --
290 | 
291 |      key: {ABC} {DEFG}    ---- Links: A-B; B-C; D-E; E-F; F-G
292 | response: {AB} {CD} {FGH} ---- Links: A-B; C-D; F-G; G-H
293 | 
294 | Expected:  MUC [r=2/5, p=2/4, f=2*(2/5)*(2/4)/(2/5+2/4)]
295 | 
296 | 
297 | TC-M-1 - Only coreferent mentions in the key. Gold mentions. Matching response. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
298 | Key/Ref: {abcdef}
299 | Rsp/Sys: {abcdef}
300 | 
301 | Expected: BCUB=1
302 | Expected: MUC=1
303 | Expected: CEAFm=1
304 | Expected: CEAFe=1
305 | Expected: BLANC=1 [recall_c=15/15=1, prec_c=15/15=1]
306 | 
307 | 
308 | TC-M-2 - Only coreferent mentions in the key. Gold mentions. Response contains only non-coreference links. 
309 | Key/Ref: {abcdef}
310 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
311 | 
312 | Expected: MUC=0
313 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
314 | 
315 | 
316 | TC-M-3 - Only coreferent mentions in the key. Gold mentions. Response contains coreference and non-coreference links. 
317 | Key/Ref: {abcdef}
318 | Rsp/Sys: {ab} {cde} {f}
319 | 
320 | Expected: BLANC=0.42 [recall_c=4/15, prec_c=4/4=1]
321 | 
322 | 
323 | TC-M-4 - Only coreferent mentions in the key. System mentions: only coreferent mentions. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
324 | Key/Ref: {abcdef}
325 | Rsp/Sys: {abcxyz}
326 | 
327 | Expected: BLANC=0.20 [recall_c=3/15, prec_c=3/15]
328 | 
329 | 
330 | TC-M-5 - Only coreferent mentions in the key. System mentions: only singletons.
331 | Key/Ref: {abcdef}
332 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
333 | 
334 | Expected: MUC=0
335 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
336 | 
337 | 
338 | TC-M-6 - Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 
339 | Key/Ref: {abcdef}
340 | Rsp/Sys: {ab} {cxy} {z}
341 | 
342 | Expected: BLANC=0.11 [recall_c=1/15, prec_c=1/4]
343 | 
344 | 
345 | TC-N-1 - Only singletons in the key. Gold mentions. Matching response. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
346 | Key/Ref: {a} {b} {c} {d} {e} {f}
347 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
348 | 
349 | Expected: BCUB=1
350 | Expected: MUC=0
351 | Expected: CEAFm=1
352 | Expected: CEAFe=1
353 | Expected: BLANC=1 [recall_n=15/15=1, prec_n=15/15=1]
354 | 
355 | 
356 | TC-N-2 - Only singletons in the key. Gold mentions. Response contains only coreference links. 
357 | Key/Ref: {a} {b} {c} {d} {e} {f}
358 | Rsp/Sys: {abcdef}
359 | 
360 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
361 | 
362 | 
363 | TC-N-3 - Only singletons in the key. Gold mentions. Response contains coreference and non-coreference links. 
364 | Key/Ref: {a} {b} {c} {d} {e} {f}
365 | Rsp/Sys: {ab} {cde} {f}
366 | 
367 | Expected: BLANC=0.85 [recall_n=11/15, prec_n=11/11=1]
368 | 
369 | 
370 | TC-N-4 - Only singletons in the key. System mentions: only singletons. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
371 | Key/Ref: {a} {b} {c} {d} {e} {f}
372 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
373 | 
374 | Expected: MUC=0
375 | Expected: BLANC=0.20 [recall_n=3/15, prec_n=3/15]
376 | 
377 | 
378 | TC-N-5 - Only singletons in the key. System mentions: only coreference links. 
379 | Key/Ref: {a} {b} {c} {d} {e} {f}
380 | Rsp/Sys: {abcxyz}
381 | 
382 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
383 | 
384 | 
385 | TC-N-6 - Only singletons in the key. Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 
386 | Key/Ref: {a} {b} {c} {d} {e} {f}
387 | Rsp/Sys: {ab} {cxy} {z}
388 | 
389 | Expected: BLANC=0.15 [recall_n=2/15, prec_n=2/11]
390 | 
391 | 


--------------------------------------------------------------------------------
/test/CorefMetricTestConfig.pm:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | # This is the test configuration file. Test cases are stored in an 
  3 | # array, each element consisting of:
  4 | #   (1) id: a unique identifier for the test case.
  5 | #   (2) key_file: the key file to be tested in the CoNLL format.
  6 | #   (3) response_file: the response file to be tested in the CoNLL format.
  7 | #   (4) expected_metrics: is a hash label from a metric name (identical to those
  8 | #                         used in the scorer.{pl|bat}) to an array of expected
  9 | #                         metric values. All metrics have 3 expected numbers:
 10 | #                         (recall, precision, F-measure).
 11 | ################################################################################
 12 | 
 13 | package CorefMetricTestConfig;
 14 | use strict;
 15 | use warnings;
 16 | use Exporter;
 17 | 
 18 | our @ISA= qw( Exporter );
 19 | 
 20 | # these are exported by default.
 21 | our @EXPORT = qw(TestCases);
 22 | 
 23 | #
 24 | # Values following metric names are [recall, precision, F1]
 25 | #
 26 | our @TestCases = (
 27 | { id => "A1", 
 28 |   key_file => "DataFiles/TC-A.key",
 29 |   response_file => "DataFiles/TC-A-1.response",
 30 |   expected_metrics => { "muc" => [1, 1, 1], 
 31 |                         "bcub" => [6/6, 6/6, 1],
 32 |                         "ceafm" => [1, 1, 1],
 33 |                         "ceafe" => [1, 1, 1],
 34 |                         "blanc" => [1, 1, 1] }
 35 | },
 36 | { id => "A2", 
 37 |   key_file => "DataFiles/TC-A.key", 
 38 |   response_file => "DataFiles/TC-A-2.response",
 39 |   expected_metrics => { "muc" => [1/3, 1/1, 0.5], 
 40 |                         "bcub" => [(7/3)/6, 3/3, 14/25],
 41 |                         "ceafm" => [0.5, 1, 0.66667],
 42 |                         "ceafe" => [0.6, 0.9, 0.72],
 43 |                         "blanc" => [0.21591, 1, 0.35385] }
 44 | },
 45 | { id => "A3", 
 46 |   key_file => "DataFiles/TC-A.key", 
 47 |   response_file => "DataFiles/TC-A-3.response",
 48 |   expected_metrics => { "muc" => [3/3, 3/5, 0.75], 
 49 |                         "bcub" => [6/6, (4+7/12)/9, 110/163],
 50 |                         "ceafm" => [1, 0.66667, 0.8],
 51 |                         "ceafe" => [0.88571, 0.66429, 0.75918],
 52 |                         "blanc" => [1, 0.42593, 0.59717] }
 53 | },
 54 | { id => "A4", 
 55 |   key_file => "DataFiles/TC-A.key",
 56 |   response_file => "DataFiles/TC-A-4.response",
 57 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 58 |                         "bcub" => [(3+1/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 59 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 60 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 61 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 62 | },
 63 | { id => "A5", 
 64 |   key_file => "DataFiles/TC-A.key", 
 65 |   response_file => "DataFiles/TC-A-5.response",
 66 |   expected_metrics => { "muc" => [1/3, 1/4, 2/7], 
 67 |                         "bcub" => [(3+1/3)/6, 2.5/8, 2*(5/9)*(5/16)/((5/9)+(5/16))],
 68 |                         "ceafm" => [0.66667, 0.5, 0.57143],
 69 |                         "ceafe" => [0.68889, 0.51667, 0.59048],
 70 |                         "blanc" => [0.35227, 0.19048, 0.24716] }
 71 | },
 72 | { id => "A6", 
 73 |   key_file => "DataFiles/TC-A.key", 
 74 |   response_file => "DataFiles/TC-A-6.response",
 75 |   expected_metrics => { "muc" => [1/3, 1/4, 2/7],
 76 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/8, 2*(5/9)*(17/48)/((5/9)+(17/48))],
 77 |                         "ceafm" => [0.66667, 0.5, 0.57143],
 78 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 79 |                         "blanc" => [0.35227, 0.20870, 0.25817] }
 80 | },
 81 | { id => "A7", 
 82 |   key_file => "DataFiles/TC-A.key", 
 83 |   response_file => "DataFiles/TC-A-7.response",
 84 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 85 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 86 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 87 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 88 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 89 | },
 90 | { id => "A8", 
 91 |   key_file => "DataFiles/TC-A.key", 
 92 |   response_file => "DataFiles/TC-A-8.response",
 93 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 94 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 95 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 96 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 97 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 98 | },
 99 | { id => "A9", 
100 |   key_file => "DataFiles/TC-A.key", 
101 |   response_file => "DataFiles/TC-A-9.response",
102 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3],
103 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
104 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
105 |                         "ceafe" => [0.73333, 0.55, 0.62857],
106 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
107 | },
108 | { id => "A10", 
109 |   key_file => "DataFiles/TC-A.key", 
110 |   response_file => "DataFiles/TC-A-10.response",
111 |   expected_metrics => { "muc" => [0, 0, 0], 
112 |                         "bcub" => [3/6, 6/6, 2/3],
113 |                         #”ceafm" => [1, 1, 1],
114 |                         #”ceafe" => [1, 1, 1],
115 |                         "blanc" => [0.5, 0.36667, 0.42308] }
116 | },
117 | { id => "A11", 
118 |   key_file => "DataFiles/TC-A.key", 
119 |   response_file => "DataFiles/TC-A-11.response",
120 |   expected_metrics => { "muc" => [3/3, 3/5, 6/8], 
121 |                         "bcub" => [6/6, (1/6+2*2/6+3*3/6)/6, 14/25],
122 |                         #”ceafm" => [1, 1, 1],
123 |                         #”ceafe" => [1, 1, 1],
124 |                         "blanc" => [0.5, 0.13333, 0.21053] }
125 | },
126 | { id => "A12", 
127 |   key_file => "DataFiles/TC-A.key", 
128 |   response_file => "DataFiles/TC-A-12.response",
129 |   expected_metrics => { "muc" => [0, 0, 0], 
130 |                         "bcub" => [(1+1/2+2/3)/6, 4/7, 2*(13/36)*(4/7)/((13/36)+(4/7))],
131 |                         #”ceafm" => [1, 1, 1],
132 |                         #”ceafe" => [1, 1, 1],
133 |                         "blanc" => [0.22727, 0.11905, 0.15625] }
134 | },
135 | { id => "A13", 
136 |   key_file => "DataFiles/TC-A.key", 
137 |   response_file => "DataFiles/TC-A-13.response",
138 |   expected_metrics => { "muc" => [1/3, 1/6, 2/9], 
139 |                         "bcub" => [(1+1/2+2*2/3)/6, (1/7+1/7+2*2/7)/7, 2*(17/36)*(6/49)/((17/36)+(6/49))],
140 |                         #”ceafm" => [1, 1, 1],
141 |                         #”ceafe" => [1, 1, 1],
142 |                         "blanc" => [0.125, 0.02381, 0.04] }
143 | },
144 | { id => "B1", 
145 |   key_file => "DataFiles/TC-B.key", 
146 |   response_file => "DataFiles/TC-B-1.response",
147 |   expected_metrics => { #"muc" => [1, 1, 1], 
148 |                         #"bcub" => [1, 1, 1],
149 |                         #”ceafm" => [1, 1, 1],
150 |                         #”ceafe" => [1, 1, 1],
151 |                         "blanc" => [1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3)] }
152 | },
153 | { id => "C1", 
154 |   key_file => "DataFiles/TC-C.key", 
155 |   response_file => "DataFiles/TC-C-1.response",
156 |   expected_metrics => { #"muc" => [1, 1, 1], 
157 |                         #"bcub" => [1, 1, 1],
158 |                         #”ceafm" => [1, 1, 1],
159 |                         #”ceafe" => [1, 1, 1],
160 |                         "blanc" => [1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16)] }
161 | },
162 | { id => "D1", 
163 | 		key_file => "DataFiles/TC-D.key", 
164 | 		response_file => "DataFiles/TC-D-1.response",
165 |  		expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 
166 |                           "bcub" => [12/12, 16/21, 2*(12/12)*(16/21)/(12/12+16/21)],
167 |                           #"ceafm" => [1, 1, 1],
168 |                           #"ceafe" => [1, 1, 1],
169 |                           #"blanc" => [1, 1, 1]
170 |                         }
171 | },
172 | { id => "E1", 
173 | 		key_file => "DataFiles/TC-E.key", 
174 | 		response_file => "DataFiles/TC-E-1.response",
175 |  		expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 
176 |                           "bcub" => [1, 7/12, 2*1*(7/12)/(1+7/12)],
177 |                           #"ceafm" => [1, 1, 1],
178 |                           #"ceafe" => [1, 1, 1],
179 |                           #"blanc" => [1, 1, 1]
180 |                         }
181 | },
182 | { id => "F1", 
183 | 		key_file => "DataFiles/TC-F.key", 
184 | 		response_file => "DataFiles/TC-F-1.response",
185 |  		expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)] ,
186 |                           #"bcub" => ,
187 |                           #"ceafm" => ,
188 |                           #"ceafe" => ,
189 |                           #"blanc" => 
190 |                         }
191 | },
192 | { id => "G1", 
193 | 		key_file => "DataFiles/TC-G.key", 
194 | 		response_file => "DataFiles/TC-G-1.response",
195 |  		expected_metrics => { "muc" => [2/2, 2/3, 2*(2/2)*(2/3)/(2/2+2/3)],
196 |                           #"bcub" => ,
197 |                           #"ceafm" => ,
198 |                           #"ceafe" => ,
199 |                           #"blanc" => 
200 |                         }
201 | },
202 | { id => "H1", 
203 | 		key_file => "DataFiles/TC-H.key", 
204 | 		response_file => "DataFiles/TC-H-1.response",
205 |  		expected_metrics => { "muc" => [1, 1, 1],
206 |                           #"bcub" => ,
207 |                           #"ceafm" => ,
208 |                           #"ceafe" => ,
209 |                           #"blanc" => 
210 |                         }
211 | },
212 | { id => "I1", 
213 | 		key_file => "DataFiles/TC-I.key", 
214 | 		response_file => "DataFiles/TC-I-1.response",
215 |  		expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)],
216 |                           #"bcub" => ,
217 |                           #"ceafm" => ,
218 |                           #"ceafe" => ,
219 |                           #"blanc" => 
220 |                         }
221 | },
222 | { id => "J1", 
223 | 		key_file => "DataFiles/TC-J.key", 
224 | 		response_file => "DataFiles/TC-J-1.response",
225 |  		expected_metrics => { "muc" => [1/2, 1/1, 2*(1/2)*(1/1)/(1/2+1/1)],
226 |                           #"bcub" => ,
227 |                           #"ceafm" => ,
228 |                           #"ceafe" => ,
229 |                           #"blanc" => 
230 |                         }
231 | },
232 | { id => "K1", 
233 | 		key_file => "DataFiles/TC-K.key", 
234 | 		response_file => "DataFiles/TC-K-1.response",
235 |  		expected_metrics => { "muc" => [3/6, 3/6, 3/6],
236 |                           #"bcub" => ,
237 |                           #"ceafm" => ,
238 |                           #"ceafe" => ,
239 |                           #"blanc" => 
240 |                         }
241 | },
242 | { id => "L1", 
243 | 		key_file => "DataFiles/TC-L.key", 
244 | 		response_file => "DataFiles/TC-L-1.response",
245 |  		expected_metrics => { "muc" => [2/5, 2/4, 2*(2/5)*(2/4)/(2/5+2/4)],
246 |                           #"bcub" => ,
247 |                           #"ceafm" => ,
248 |                           #"ceafe" => ,
249 |                           #"blanc" => 
250 |                         }
251 | },
252 | { id => "M1", 
253 | 		key_file => "DataFiles/TC-M.key", 
254 | 		response_file => "DataFiles/TC-M-1.response",
255 |  		expected_metrics => { "muc" => [1, 1, 1],
256 |                           "bcub" => [1, 1, 1],
257 |                           "ceafm" => [1, 1, 1],
258 |                           "ceafe" => [1, 1, 1],
259 |                           "blanc" => [1, 1, 1] }
260 | },
261 | { id => "M2", 
262 | 		key_file => "DataFiles/TC-M.key", 
263 | 		response_file => "DataFiles/TC-M-2.response",
264 |  		expected_metrics => { "muc" => [0, 0, 0],
265 |                           #"bcub" => ,
266 |                           #"ceafm" => ,
267 |                           #"ceafe" => ,
268 |                           "blanc" => [0, 0, 0] }
269 | },
270 | { id => "M3", 
271 | 		key_file => "DataFiles/TC-M.key", 
272 | 		response_file => "DataFiles/TC-M-3.response",
273 |  		expected_metrics => { #"muc" => ,
274 |                           #"bcub" => ,
275 |                           #"ceafm" => ,
276 |                           #"ceafe" => ,
277 |                           "blanc" => [0.26667, 1, 0.42105] }
278 | },
279 | { id => "M4", 
280 | 		key_file => "DataFiles/TC-M.key", 
281 | 		response_file => "DataFiles/TC-M-4.response",
282 |  		expected_metrics => { #"muc" => ,
283 |                           #"bcub" => ,
284 |                           #"ceafm" => ,
285 |                           #"ceafe" => ,
286 |                           "blanc" => [0.2, 0.2, 0.2] }
287 | },
288 | { id => "M5", 
289 | 		key_file => "DataFiles/TC-M.key", 
290 | 		response_file => "DataFiles/TC-M-5.response",
291 |  		expected_metrics => { "muc" => [0, 0, 0],
292 |                           #"bcub" => ,
293 |                           #"ceafm" => ,
294 |                           #"ceafe" => ,
295 |                           "blanc" => [0, 0, 0] }
296 | },
297 | { id => "M6", 
298 | 		key_file => "DataFiles/TC-M.key", 
299 | 		response_file => "DataFiles/TC-M-6.response",
300 |  		expected_metrics => { #"muc" => ,
301 |                           #"bcub" => ,
302 |                           #"ceafm" => ,
303 |                           #"ceafe" => ,
304 |                           "blanc" => [0.06667, 0.25, 0.10526] }
305 | },
306 | { id => "N1", 
307 | 		key_file => "DataFiles/TC-N.key", 
308 | 		response_file => "DataFiles/TC-N-1.response",
309 |  		expected_metrics => { "muc" => [0, 0, 0],
310 |                           #"bcub" => [1, 1, 1],
311 |                           #"ceafm" => [1, 1, 1],
312 |                           #"ceafe" => [1, 1, 1],
313 |                           "blanc" => [1, 1, 1] }
314 | },
315 | { id => "N2", 
316 | 		key_file => "DataFiles/TC-N.key", 
317 | 		response_file => "DataFiles/TC-N-2.response",
318 |  		expected_metrics => { "muc" => [0, 0, 0],
319 |                           #"bcub" => ,
320 |                           #"ceafm" => ,
321 |                           #"ceafe" => ,
322 |                           "blanc" => [0, 0, 0] }
323 | },
324 | { id => "N3", 
325 | 		key_file => "DataFiles/TC-N.key", 
326 | 		response_file => "DataFiles/TC-N-3.response",
327 |  		expected_metrics => { #"muc" => ,
328 |                           #"bcub" => ,
329 |                           #"ceafm" => ,
330 |                           #"ceafe" => ,
331 |                           "blanc" => [0.73333, 1, 0.84615] }
332 | },
333 | { id => "N4", 
334 | 		key_file => "DataFiles/TC-N.key", 
335 | 		response_file => "DataFiles/TC-N-4.response",
336 |  		expected_metrics => { "muc" => [0, 0, 0],
337 |                           #"bcub" => ,
338 |                           #"ceafm" => ,
339 |                           #"ceafe" => ,
340 |                           "blanc" => [0.2, 0.2, 0.2] }
341 | },
342 | { id => "N5", 
343 | 		key_file => "DataFiles/TC-N.key", 
344 | 		response_file => "DataFiles/TC-N-5.response",
345 |  		expected_metrics => { #"muc" => ,
346 |                           #"bcub" => ,
347 |                           #"ceafm" => ,
348 |                           #"ceafe" => ,
349 |                           "blanc" => [0, 0, 0] }
350 | },
351 | { id => "N6", 
352 | 		key_file => "DataFiles/TC-N.key", 
353 | 		response_file => "DataFiles/TC-N-6.response",
354 |  		expected_metrics => { #"muc" => ,
355 |                           #"bcub" => ,
356 |                           #"ceafm" => ,
357 |                           #"ceafe" => ,
358 |                           "blanc" => [0.13333, 0.18182, 0.15385] }
359 | }
360 | 
361 | );
362 | 
363 | 1;
364 | 


--------------------------------------------------------------------------------
/lib/Algorithm/Munkres.pm:
--------------------------------------------------------------------------------
  1 | package Algorithm::Munkres;
  2 | 
  3 | use 5.006;
  4 | use strict;
  5 | use warnings;
  6 | 
  7 | require Exporter;
  8 | 
  9 | our @ISA = qw(Exporter);
 10 | 
 11 | our @EXPORT = qw( assign );
 12 | 
 13 | our $VERSION = '0.08';
 14 | 
 15 | #Variables global to the package
 16 | my @mat = ();
 17 | my @mask = ();
 18 | my @colcov = ();
 19 | my @rowcov = ();
 20 | my $Z0_row = 0;
 21 | my $Z0_col = 0;
 22 | my @path = ();
 23 | 
 24 | #The exported subroutine.
 25 | #Expected Input: Reference to the input matrix (MxN)
 26 | #Output: Mx1 matrix, giving the column number of the value assigned to each row. (For more explaination refer perldoc)
 27 | sub assign
 28 | {
 29 |     #reference to the input matrix
 30 |     my $rmat = shift;
 31 |     my $rsolution_mat = shift;
 32 |     my ($row, $row_len) = (0,0);
 33 | 
 34 |     # re-initialize that global variables
 35 |     @mat = ();
 36 |     @mask = ();
 37 |     @colcov = ();
 38 |     @rowcov = ();
 39 |     $Z0_row = 0;
 40 |     $Z0_col = 0;
 41 |     @path = ();
 42 | 
 43 |     #variables local to the subroutine
 44 |     my $step = 0;
 45 |     my ($i, $j) = (0,0);
 46 | 
 47 |     #the input matrix
 48 |     my @inp_mat = @$rmat;
 49 | 
 50 |     #copy the orginal matrix, before applying the algorithm to the matrix
 51 |     foreach (@inp_mat)
 52 |     {
 53 | 	push @mat, [ @$_ ];
 54 |     }
 55 | 
 56 |     #check if the input matrix is well-formed i.e. either square or rectangle.
 57 |     $row_len = $#{$mat[0]};
 58 |     foreach my $row (@mat)
 59 |     {
 60 | 	if($row_len != $#$row)
 61 | 	{
 62 | 	    die "Please check the input matrix.\nThe input matrix is not a well-formed matrix!\nThe input matrix has to be rectangular or square matrix.\n";
 63 | 	}
 64 |     }
 65 | 
 66 |     #check if the matrix is a square matrix, 
 67 |     #if not convert it to square matrix by padding zeroes.
 68 |     if($#mat < $#{$mat[0]})
 69 |     {
 70 | 	# Add rows
 71 | 	my $diff = $#{$mat[0]} - $#mat;
 72 | 	for (1 .. $diff)
 73 | 	{
 74 | 	    push @mat, [ (0) x @{$mat[0]} ];
 75 | 	}
 76 |     }
 77 |     elsif($#mat > $#{$mat[0]})
 78 |     {
 79 | 	# Add columns
 80 | 	my $diff = $#mat - $#{$mat[0]};
 81 | 	for (0 .. $#mat)
 82 | 	{
 83 | 	    push @{$mat[$_]}, (0) x $diff;
 84 | 	}
 85 |     }
 86 | 
 87 |     #initialize mask, column cover and row cover matrices
 88 |     clear_covers();
 89 | 
 90 |     for($i=0;$i<=$#mat;$i++)
 91 |     {
 92 |  	push @mask, [ (0) x @mat ];
 93 |     }
 94 | 
 95 |     #The algorithm can be grouped in 6 steps.
 96 |     &stepone();
 97 |     &steptwo();
 98 |     $step = &stepthree();
 99 |     while($step == 4)
100 |     {
101 | 	$step = &stepfour();
102 | 	while($step == 6)
103 | 	{
104 | 	    &stepsix();
105 | 	    $step = &stepfour();	    
106 | 	}
107 | 	&stepfive();
108 | 	$step = &stepthree();
109 |     }
110 | 
111 |     #create the output matrix
112 |     for my $i (0 .. $#mat)
113 |     {
114 | 	for my $j (0 .. $#{$mat[$i]})
115 | 	{
116 | 	    if($mask[$i][$j] == 1)
117 | 	    {
118 | 		$rsolution_mat->[$i] = $j;
119 | 	    }
120 | 	}
121 |     }
122 | 
123 | 
124 | #Code for tracing------------------
125 |     <<'ee';
126 |     print "\nInput Matrix:\n";
127 |     for($i=0;$i<=$#mat;$i++)
128 |     {
129 | 	for($j=0;$j<=$#mat;$j++)
130 | 	{
131 | 	    print $mat[$i][$j] . "\t";
132 | 	}
133 | 	print "\n";
134 |     }
135 |     
136 |     print "\nMask Matrix:\n";
137 |     for($i=0;$i<=$#mat;$i++)
138 |     {
139 | 	for($j=0;$j<=$#mat;$j++)
140 | 	{
141 | 	    print $mask[$i][$j] . "\t";
142 | 	}
143 | 	print "\n";
144 |     }
145 | 
146 |     print "\nOutput Matrix:\n";
147 |     print "$_\n" for @$rsolution_mat;
148 | ee
149 | 
150 | #----------------------------------
151 | 
152 | }
153 | 
154 | #Step 1 - Find minimum value for every row and subtract this min from each element of the row.
155 | sub stepone
156 | {
157 | #    print "Step 1 \n";
158 | 
159 |     #Find the minimum value for every row
160 |     for my $row (@mat)
161 |     {
162 | 	my $min = $row->[0];
163 | 	for (@$row)
164 | 	{
165 | 	    $min = $_ if $min > $_;
166 | 	}    
167 | 	
168 |         #Subtract the minimum value of the row from each element of the row.
169 | 	@$row = map {$_ - $min} @$row;
170 |     }
171 | #    print "Step 1 end \n";
172 | }
173 | 
174 | #Step 2 - Star the zeroes, Create the mask and cover matrices. Re-initialize the cover matrices for next steps.
175 | #To star a zero: We search for a zero in the matrix and than cover the column and row in which it occurs. Now this zero is starred.
176 | #A next starred zero can occur only in those columns and rows which have not been previously covered by any other starred zero.
177 | sub steptwo
178 | {
179 | #    print "Step 2 \n";
180 |  
181 |     my ($i, $j) = (0,0);
182 | 
183 |     for($i=0;$i<=$#mat;$i++)
184 |     {
185 | 	for($j=0;$j<=$#{$mat[$i]};$j++)
186 | 	{
187 | 	    if($mat[$i][$j] == 0 && $colcov[$j] == 0 && $rowcov[$i] == 0)
188 | 	    {
189 | 		$mask[$i][$j] = 1;
190 | 		$colcov[$j] = 1;
191 | 		$rowcov[$i] = 1;
192 | 	    }
193 | 	}
194 |     }
195 |     #Re-initialize the cover matrices
196 |     &clear_covers();
197 | #    print "Step 2 end\n";
198 | }
199 | 
200 | #Step 3 - Check if each column has a starred zero. If yes then the problem is solved else proceed to step 4
201 | sub stepthree
202 | {
203 | #    print "Step 3 \n";
204 | 
205 |     my $cnt = 0;
206 | 
207 |     for my $i (0 .. $#mat)
208 |     {
209 | 	for my $j (0 .. $#mat)
210 | 	{
211 | 	    if($mask[$i][$j] == 1)
212 | 	    {
213 | 		$colcov[$j] = 1;
214 | 		$cnt++;
215 | 	    }
216 | 	}
217 |     }
218 |     if($cnt > $#mat)
219 |     {
220 | #       print "Step 3 end. Next expected step 7 \n";
221 |        return 7;
222 |     }
223 |     else
224 |     {
225 | #       print "Step 3 end. Next expected step 4 \n";
226 |        return 4;
227 |     }
228 | 
229 | }
230 | 
231 | #Step 4 - Try to find a zero which is not starred and whose columns and rows are not yet covered. 
232 | #If such a zero found, prime it, try to find a starred zero in its row, 
233 | #                                                 if not found proceed to step 5 
234 | #                                                 else continue
235 | #Else proceed to step 6.
236 | sub stepfour
237 | {
238 | #    print "Step 4 \n";
239 | 
240 |     while(1)
241 |     {
242 | 	my ($row, $col) = &find_a_zero();
243 | 	if ($row < 0)
244 | 	{
245 | 	    # No zeroes
246 | 	    return 6;
247 | 	}
248 | 
249 | 	$mask[$row][$col] = 2;
250 | 	my $star_col = &find_star_in_row($row);
251 | 	if ($star_col >= 0)
252 | 	{
253 | 	    $col = $star_col;
254 | 	    $rowcov[$row] = 1;
255 | 	    $colcov[$col] = 0;
256 | 	}
257 | 	else
258 | 	{
259 | 	    $Z0_row = $row;
260 | 	    $Z0_col = $col;
261 | 	    return 5;
262 | 	}
263 |     }
264 | }
265 | 
266 | #Tries to find yet uncovered zero
267 | sub find_a_zero
268 | {
269 |     for my $i (0 .. $#mat)
270 |     {
271 | 	next if $rowcov[$i];
272 | 
273 | 	for my $j (reverse(0 .. $#mat))  # Prefer large $j
274 | 	{
275 | 	    next if $colcov[$j];
276 | 	    return ($i, $j) if $mat[$i][$j] == 0;
277 | 	}
278 |     }
279 | 
280 |     return (-1, -1);
281 | }
282 | 
283 | #Tries to find starred zero in the given row and returns the column number
284 | sub find_star_in_row
285 | {
286 |     my $row = shift;
287 | 
288 |     for my $j (0 .. $#mat)
289 |     {
290 | 	if($mask[$row][$j] == 1)
291 | 	{
292 | 	    return $j;
293 | 	}
294 |     }
295 |     return -1;
296 | }
297 | 
298 | #Step 5 - Try to find a starred zero in the column of the uncovered zero found in the step 4.
299 | #If starred zero found, try to find a prime zero in its row.
300 | #Continue finding starred zero in the column and primed zero in the row until, 
301 | #we get to a primed zero which does not have a starred zero in its column.
302 | #At this point reduce the non-zero values of mask matrix by 1. i.e. change prime zeros to starred zeroes.
303 | #Clear the cover matrices and clear any primes i.e. values=2 from mask matrix.
304 | sub stepfive
305 | {
306 | #    print "Step 5 \n";
307 | 
308 |     my $cnt = 0;
309 |     my $done = 0;
310 | 
311 |     $path[$cnt][0] = $Z0_row;
312 |     $path[$cnt][1] = $Z0_col;
313 |     
314 |     while($done == 0)
315 |     {
316 | 	my $row = &find_star_in_col($path[$cnt][1]);
317 | 	if($row > -1)
318 | 	{
319 | 	    $cnt++;
320 | 	    $path[$cnt][0] = $row;
321 | 	    $path[$cnt][1] = $path[$cnt - 1][1];
322 | 	}
323 | 	else
324 | 	{
325 | 	    $done = 1;
326 | 	}
327 | 	if($done == 0)
328 | 	{
329 | 	    my $col = &find_prime_in_row($path[$cnt][0]);
330 | 	    $cnt++;
331 | 	    $path[$cnt][0] = $path[$cnt - 1][0];
332 | 	    $path[$cnt][1] = $col;
333 | 	}
334 |     }
335 |     &convert_path($cnt);
336 |     &clear_covers();
337 |     &erase_primes();
338 | 
339 | #    print "Step 5 end \n";
340 | }
341 | 
342 | #Tries to find starred zero in the given column and returns the row number
343 | sub find_star_in_col
344 | {
345 |     my $col = shift;
346 | 
347 |     for my $i (0 .. $#mat)
348 |     {
349 | 	return $i if $mask[$i][$col] == 1;
350 |     }
351 |     
352 |     return -1;
353 | }
354 | 
355 | #Tries to find primed zero in the given row and returns the column number
356 | sub find_prime_in_row
357 | {
358 |     my $row = shift;
359 | 
360 |     for my $j (0 .. $#mat)
361 |     {
362 | 	return $j if $mask[$row][$j] == 2;
363 |     }
364 |     
365 |     return -1;
366 | }
367 | 
368 | #Reduces non-zero value in the mask matrix by 1.
369 | #i.e. converts all primes to stars and stars to none.
370 | sub convert_path
371 | {
372 |     my $cnt = shift;
373 | 
374 |     for my $i (0 .. $cnt)
375 |     {
376 | 	for ( $mask[$path[$i][0]][$path[$i][1]] ) {
377 | 	    $_ = ( $_ == 1 ) ? 0 : 1;
378 | 	}
379 |     }
380 | }
381 | 
382 | #Clears cover matrices
383 | sub clear_covers
384 | {
385 |     @rowcov = @colcov = (0) x @mat;
386 | }
387 | 
388 | #Changes all primes i.e. values=2 to 0.
389 | sub erase_primes
390 | {
391 |     for my $row (@mask)
392 |     {
393 | 	for my $j (0 .. $#$row)
394 | 	{
395 | 	    $row->[$j] = 0 if $row->[$j] == 2;
396 | 	}
397 |     }
398 | }
399 | 
400 | #Step 6 - Find the minimum value from the rows and columns which are currently not covered.
401 | #Subtract this minimum value from all the elements of the columns which are not covered.
402 | #Add this minimum value to all the elements of the rows which are covered.
403 | #Proceed to step 4.
404 | sub stepsix
405 | {
406 | #    print "Step 6 \n";
407 |     my ($i, $j);
408 |     my $minval = 0;
409 | 
410 |     $minval = &find_smallest();
411 |     
412 |     for($i=0;$i<=$#mat;$i++)
413 |     {
414 | 	for($j=0;$j<=$#{$mat[$i]};$j++)
415 | 	{
416 | 	    if($rowcov[$i] == 1)
417 | 	    {
418 | 		$mat[$i][$j] += $minval;
419 | 	    }
420 | 	    if($colcov[$j] == 0)
421 | 	    {
422 | 		$mat[$i][$j] -= $minval;
423 | 	    }
424 | 	}
425 |     }
426 | 
427 | #    print "Step 6 end \n";
428 | }
429 | 
430 | #Finds the minimum value from all the matrix values which are not covered.
431 | sub find_smallest
432 | {
433 |     my $minval;
434 | 
435 |     for my $i (0 .. $#mat)
436 |     {
437 | 	next if $rowcov[$i];
438 | 
439 | 	for my $j (0 .. $#mat)
440 | 	{
441 | 	    next if $colcov[$j];
442 | 	    if( !defined($minval) || $minval > $mat[$i][$j])
443 | 	    {
444 | 		$minval = $mat[$i][$j];
445 | 	    }
446 | 	}
447 |     }
448 |     return $minval;
449 | }
450 | 
451 | 
452 | 1;
453 | __END__
454 | 
455 | =head1 NAME
456 | 
457 |     Algorithm::Munkres - Perl extension for Munkres' solution to 
458 |     classical Assignment problem for square and rectangular matrices 
459 |     This module extends the solution of Assignment problem for square
460 |     matrices to rectangular matrices by padding zeros. Thus a rectangular 
461 |     matrix is converted to square matrix by padding necessary zeros.
462 | 
463 | =head1 SYNOPSIS
464 | 
465 | use Algorithm::Munkres;
466 | 
467 |     @mat = (
468 | 	 [2, 4, 7, 9],
469 | 	 [3, 9, 5, 1],
470 | 	 [8, 2, 9, 7],
471 | 	 );
472 | 
473 | assign(\@mat,\@out_mat);
474 | 
475 |     Then the @out_mat array will have the output as: (0,3,1,2),
476 |     where 
477 |     0th element indicates that 0th row is assigned 0th column i.e value=2
478 |     1st element indicates that 1st row is assigned 3rd column i.e.value=1
479 |     2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
480 |     3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
481 | 
482 | 
483 | =head1 DESCRIPTION
484 | 
485 |     Assignment Problem: Given N jobs, N workers and the time taken by 
486 |     each worker to complete a job then how should the assignment of a 
487 |     Worker to a Job be done, so as to minimize the time taken. 
488 | 
489 | 	Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
490 | 	    x  y  z		
491 | 	 p  2  4  7
492 | 	 q  3  9  5
493 | 	 r  8  2  9
494 |         
495 |         where the cell values of the above matrix give the time required
496 |         for the worker(given by column name) to complete the job(given by 
497 |         the row name) 
498 |     
499 | 	then possible solutions are:	
500 | 		 	 Total
501 | 	 1. 2, 9, 9       20
502 | 	 2. 2, 2, 5        9
503 | 	 3. 3, 4, 9       16
504 | 	 4. 3, 2, 7       12
505 | 	 5. 8, 9, 7       24
506 | 	 6. 8, 4, 5       17
507 | 
508 |     Thus (2) is the optimal solution for the above problem.
509 |     This kind of brute-force approach of solving Assignment problem 
510 |     quickly becomes slow and bulky as N grows, because the number of 
511 |     possible solution are N! and thus the task is to evaluate each 
512 |     and then find the optimal solution.(If N=10, number of possible
513 |     solutions: 3628800 !)
514 |     Munkres' gives us a solution to this problem, which is implemented 
515 |     in this module.
516 | 
517 |     This module also solves Assignment problem for rectangular matrices 
518 |     (M x N) by converting them to square matrices by padding zeros. ex:
519 |     If input matrix is:
520 | 	 [2, 4, 7, 9],
521 | 	 [3, 9, 5, 1],
522 | 	 [8, 2, 9, 7]
523 |     i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
524 |     matrix will be:
525 | 	 [2, 4, 7, 9],
526 | 	 [3, 9, 5, 1],
527 | 	 [8, 2, 9, 7],
528 |  	 [0, 0, 0, 0]
529 | 
530 | =head1 EXPORT
531 | 
532 |     "assign" function by default.
533 | 
534 | =head1 INPUT
535 | 
536 |     The input matrix should be in a two dimensional array(array of 
537 |     array) and the 'assign' subroutine expects a reference to this 
538 |     array and not the complete array. 
539 |     eg:assign(\@inp_mat, \@out_mat);
540 |     The second argument to the assign subroutine is the reference 
541 |     to the output array.
542 | 
543 | =head1 OUTPUT
544 | 
545 |     The assign subroutine expects references to two arrays as its 
546 |     input paramenters. The second parameter is the reference to the
547 |     output array. This array is populated by assign subroutine. This 
548 |     array is single dimensional Nx1 matrix.
549 |     For above example the output array returned will be:
550 |      (0,
551 |      2,
552 |      1)
553 | 
554 |     where 
555 |     0th element indicates that 0th row is assigned 0th column i.e value=2
556 |     1st element indicates that 1st row is assigned 2nd column i.e.value=5
557 |     2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
558 | 
559 | =head1 SEE ALSO
560 | 
561 |     1. http://216.249.163.93/bob.pilgrim/445/munkres.html
562 | 
563 |     2. Munkres, J. Algorithms for the assignment and transportation 
564 |        Problems. J. Siam 5 (Mar. 1957), 32-38
565 | 
566 |     3. François Bourgeois and Jean-Claude Lassalle. 1971.
567 |        An extension of the Munkres algorithm for the assignment 
568 |        problem to rectangular matrices.
569 |        Communication ACM, 14(12):802-804
570 | 
571 | =head1 AUTHOR
572 | 
573 |     Anagha Kulkarni, University of Minnesota Duluth
574 |     kulka020 <at> d.umn.edu
575 | 	
576 |     Ted Pedersen, University of Minnesota Duluth
577 |     tpederse <at> d.umn.edu
578 | 
579 | =head1 COPYRIGHT AND LICENSE
580 | 
581 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
582 | 
583 | This program is free software; you can redistribute it and/or
584 | modify it under the terms of the GNU General Public License
585 | as published by the Free Software Foundation; either version 2
586 | of the License, or (at your option) any later version.
587 | This program is distributed in the hope that it will be useful,
588 | but WITHOUT ANY WARRANTY; without even the implied warranty of
589 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
590 | GNU General Public License for more details.
591 | 
592 | You should have received a copy of the GNU General Public License
593 | along with this program; if not, write to the Free Software
594 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
595 | 
596 | =cut
597 | 


--------------------------------------------------------------------------------
/lib/Cwd.pm:
--------------------------------------------------------------------------------
  1 | package Cwd;
  2 | 
  3 | =head1 NAME
  4 | 
  5 | Cwd - get pathname of current working directory
  6 | 
  7 | =head1 SYNOPSIS
  8 | 
  9 |     use Cwd;
 10 |     my $dir = getcwd;
 11 | 
 12 |     use Cwd 'abs_path';
 13 |     my $abs_path = abs_path($file);
 14 | 
 15 | =head1 DESCRIPTION
 16 | 
 17 | This module provides functions for determining the pathname of the
 18 | current working directory.  It is recommended that getcwd (or another
 19 | *cwd() function) be used in I<all> code to ensure portability.
 20 | 
 21 | By default, it exports the functions cwd(), getcwd(), fastcwd(), and
 22 | fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace.  
 23 | 
 24 | 
 25 | =head2 getcwd and friends
 26 | 
 27 | Each of these functions are called without arguments and return the
 28 | absolute path of the current working directory.
 29 | 
 30 | =over 4
 31 | 
 32 | =item getcwd
 33 | 
 34 |     my $cwd = getcwd();
 35 | 
 36 | Returns the current working directory.
 37 | 
 38 | Exposes the POSIX function getcwd(3) or re-implements it if it's not
 39 | available.
 40 | 
 41 | =item cwd
 42 | 
 43 |     my $cwd = cwd();
 44 | 
 45 | The cwd() is the most natural form for the current architecture.  For
 46 | most systems it is identical to `pwd` (but without the trailing line
 47 | terminator).
 48 | 
 49 | =item fastcwd
 50 | 
 51 |     my $cwd = fastcwd();
 52 | 
 53 | A more dangerous version of getcwd(), but potentially faster.
 54 | 
 55 | It might conceivably chdir() you out of a directory that it can't
 56 | chdir() you back into.  If fastcwd encounters a problem it will return
 57 | undef but will probably leave you in a different directory.  For a
 58 | measure of extra security, if everything appears to have worked, the
 59 | fastcwd() function will check that it leaves you in the same directory
 60 | that it started in.  If it has changed it will C<die> with the message
 61 | "Unstable directory path, current directory changed
 62 | unexpectedly".  That should never happen.
 63 | 
 64 | =item fastgetcwd
 65 | 
 66 |   my $cwd = fastgetcwd();
 67 | 
 68 | The fastgetcwd() function is provided as a synonym for cwd().
 69 | 
 70 | =item getdcwd
 71 | 
 72 |     my $cwd = getdcwd();
 73 |     my $cwd = getdcwd('C:');
 74 | 
 75 | The getdcwd() function is also provided on Win32 to get the current working
 76 | directory on the specified drive, since Windows maintains a separate current
 77 | working directory for each drive.  If no drive is specified then the current
 78 | drive is assumed.
 79 | 
 80 | This function simply calls the Microsoft C library _getdcwd() function.
 81 | 
 82 | =back
 83 | 
 84 | 
 85 | =head2 abs_path and friends
 86 | 
 87 | These functions are exported only on request.  They each take a single
 88 | argument and return the absolute pathname for it.  If no argument is
 89 | given they'll use the current working directory.
 90 | 
 91 | =over 4
 92 | 
 93 | =item abs_path
 94 | 
 95 |   my $abs_path = abs_path($file);
 96 | 
 97 | Uses the same algorithm as getcwd().  Symbolic links and relative-path
 98 | components ("." and "..") are resolved to return the canonical
 99 | pathname, just like realpath(3).
100 | 
101 | =item realpath
102 | 
103 |   my $abs_path = realpath($file);
104 | 
105 | A synonym for abs_path().
106 | 
107 | =item fast_abs_path
108 | 
109 |   my $abs_path = fast_abs_path($file);
110 | 
111 | A more dangerous, but potentially faster version of abs_path.
112 | 
113 | =back
114 | 
115 | =head2 $ENV{PWD}
116 | 
117 | If you ask to override your chdir() built-in function, 
118 | 
119 |   use Cwd qw(chdir);
120 | 
121 | then your PWD environment variable will be kept up to date.  Note that
122 | it will only be kept up to date if all packages which use chdir import
123 | it from Cwd.
124 | 
125 | 
126 | =head1 NOTES
127 | 
128 | =over 4
129 | 
130 | =item *
131 | 
132 | Since the path separators are different on some operating systems ('/'
133 | on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec
134 | modules wherever portability is a concern.
135 | 
136 | =item *
137 | 
138 | Actually, on Mac OS, the C<getcwd()>, C<fastgetcwd()> and C<fastcwd()>
139 | functions are all aliases for the C<cwd()> function, which, on Mac OS,
140 | calls `pwd`.  Likewise, the C<abs_path()> function is an alias for
141 | C<fast_abs_path()>.
142 | 
143 | =back
144 | 
145 | =head1 AUTHOR
146 | 
147 | Originally by the perl5-porters.
148 | 
149 | Maintained by Ken Williams <KWILLIAMS@cpan.org>
150 | 
151 | =head1 COPYRIGHT
152 | 
153 | Copyright (c) 2004 by the Perl 5 Porters.  All rights reserved.
154 | 
155 | This program is free software; you can redistribute it and/or modify
156 | it under the same terms as Perl itself.
157 | 
158 | Portions of the C code in this library are copyright (c) 1994 by the
159 | Regents of the University of California.  All rights reserved.  The
160 | license on this code is compatible with the licensing of the rest of
161 | the distribution - please see the source code in F<Cwd.xs> for the
162 | details.
163 | 
164 | =head1 SEE ALSO
165 | 
166 | L<File::chdir>
167 | 
168 | =cut
169 | 
170 | use strict;
171 | use Exporter;
172 | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
173 | 
174 | $VERSION = '3.39_02';
175 | my $xs_version = $VERSION;
176 | $VERSION =~ tr/_//;
177 | 
178 | @ISA = qw/ Exporter /;
179 | @EXPORT = qw(cwd getcwd fastcwd fastgetcwd);
180 | push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32';
181 | @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath);
182 | 
183 | # sys_cwd may keep the builtin command
184 | 
185 | # All the functionality of this module may provided by builtins,
186 | # there is no sense to process the rest of the file.
187 | # The best choice may be to have this in BEGIN, but how to return from BEGIN?
188 | 
189 | if ($^O eq 'os2') {
190 |     local $^W = 0;
191 | 
192 |     *cwd                = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd;
193 |     *getcwd             = \&cwd;
194 |     *fastgetcwd         = \&cwd;
195 |     *fastcwd            = \&cwd;
196 | 
197 |     *fast_abs_path      = \&sys_abspath if defined &sys_abspath;
198 |     *abs_path           = \&fast_abs_path;
199 |     *realpath           = \&fast_abs_path;
200 |     *fast_realpath      = \&fast_abs_path;
201 | 
202 |     return 1;
203 | }
204 | 
205 | # Need to look up the feature settings on VMS.  The preferred way is to use the
206 | # VMS::Feature module, but that may not be available to dual life modules.
207 | 
208 | my $use_vms_feature;
209 | BEGIN {
210 |     if ($^O eq 'VMS') {
211 |         if (eval { local $SIG{__DIE__}; require VMS::Feature; }) {
212 |             $use_vms_feature = 1;
213 |         }
214 |     }
215 | }
216 | 
217 | # Need to look up the UNIX report mode.  This may become a dynamic mode
218 | # in the future.
219 | sub _vms_unix_rpt {
220 |     my $unix_rpt;
221 |     if ($use_vms_feature) {
222 |         $unix_rpt = VMS::Feature::current("filename_unix_report");
223 |     } else {
224 |         my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || '';
225 |         $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; 
226 |     }
227 |     return $unix_rpt;
228 | }
229 | 
230 | # Need to look up the EFS character set mode.  This may become a dynamic
231 | # mode in the future.
232 | sub _vms_efs {
233 |     my $efs;
234 |     if ($use_vms_feature) {
235 |         $efs = VMS::Feature::current("efs_charset");
236 |     } else {
237 |         my $env_efs = $ENV{'DECC$EFS_CHARSET'} || '';
238 |         $efs = $env_efs =~ /^[ET1]/i; 
239 |     }
240 |     return $efs;
241 | }
242 | 
243 | 
244 | # If loading the XS stuff doesn't work, we can fall back to pure perl
245 | eval {
246 |   if ( $] >= 5.006 ) {
247 |     require XSLoader;
248 |     XSLoader::load( __PACKAGE__, $xs_version);
249 |   } else {
250 |     require DynaLoader;
251 |     push @ISA, 'DynaLoader';
252 |     __PACKAGE__->bootstrap( $xs_version );
253 |   }
254 | };
255 | 
256 | # Big nasty table of function aliases
257 | my %METHOD_MAP =
258 |   (
259 |    VMS =>
260 |    {
261 |     cwd			=> '_vms_cwd',
262 |     getcwd		=> '_vms_cwd',
263 |     fastcwd		=> '_vms_cwd',
264 |     fastgetcwd		=> '_vms_cwd',
265 |     abs_path		=> '_vms_abs_path',
266 |     fast_abs_path	=> '_vms_abs_path',
267 |    },
268 | 
269 |    MSWin32 =>
270 |    {
271 |     # We assume that &_NT_cwd is defined as an XSUB or in the core.
272 |     cwd			=> '_NT_cwd',
273 |     getcwd		=> '_NT_cwd',
274 |     fastcwd		=> '_NT_cwd',
275 |     fastgetcwd		=> '_NT_cwd',
276 |     abs_path		=> 'fast_abs_path',
277 |     realpath		=> 'fast_abs_path',
278 |    },
279 | 
280 |    dos => 
281 |    {
282 |     cwd			=> '_dos_cwd',
283 |     getcwd		=> '_dos_cwd',
284 |     fastgetcwd		=> '_dos_cwd',
285 |     fastcwd		=> '_dos_cwd',
286 |     abs_path		=> 'fast_abs_path',
287 |    },
288 | 
289 |    # QNX4.  QNX6 has a $os of 'nto'.
290 |    qnx =>
291 |    {
292 |     cwd			=> '_qnx_cwd',
293 |     getcwd		=> '_qnx_cwd',
294 |     fastgetcwd		=> '_qnx_cwd',
295 |     fastcwd		=> '_qnx_cwd',
296 |     abs_path		=> '_qnx_abs_path',
297 |     fast_abs_path	=> '_qnx_abs_path',
298 |    },
299 | 
300 |    cygwin =>
301 |    {
302 |     getcwd		=> 'cwd',
303 |     fastgetcwd		=> 'cwd',
304 |     fastcwd		=> 'cwd',
305 |     abs_path		=> 'fast_abs_path',
306 |     realpath		=> 'fast_abs_path',
307 |    },
308 | 
309 |    epoc =>
310 |    {
311 |     cwd			=> '_epoc_cwd',
312 |     getcwd	        => '_epoc_cwd',
313 |     fastgetcwd		=> '_epoc_cwd',
314 |     fastcwd		=> '_epoc_cwd',
315 |     abs_path		=> 'fast_abs_path',
316 |    },
317 | 
318 |    MacOS =>
319 |    {
320 |     getcwd		=> 'cwd',
321 |     fastgetcwd		=> 'cwd',
322 |     fastcwd		=> 'cwd',
323 |     abs_path		=> 'fast_abs_path',
324 |    },
325 |   );
326 | 
327 | $METHOD_MAP{NT} = $METHOD_MAP{MSWin32};
328 | 
329 | 
330 | # Find the pwd command in the expected locations.  We assume these
331 | # are safe.  This prevents _backtick_pwd() consulting $ENV{PATH}
332 | # so everything works under taint mode.
333 | my $pwd_cmd;
334 | foreach my $try ('/bin/pwd',
335 | 		 '/usr/bin/pwd',
336 | 		 '/QOpenSys/bin/pwd', # OS/400 PASE.
337 | 		) {
338 | 
339 |     if( -x $try ) {
340 |         $pwd_cmd = $try;
341 |         last;
342 |     }
343 | }
344 | my $found_pwd_cmd = defined($pwd_cmd);
345 | unless ($pwd_cmd) {
346 |     # Isn't this wrong?  _backtick_pwd() will fail if somenone has
347 |     # pwd in their path but it is not /bin/pwd or /usr/bin/pwd?
348 |     # See [perl #16774]. --jhi
349 |     $pwd_cmd = 'pwd';
350 | }
351 | 
352 | # Lazy-load Carp
353 | sub _carp  { require Carp; Carp::carp(@_)  }
354 | sub _croak { require Carp; Carp::croak(@_) }
355 | 
356 | # The 'natural and safe form' for UNIX (pwd may be setuid root)
357 | sub _backtick_pwd {
358 |     # Localize %ENV entries in a way that won't create new hash keys
359 |     my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV);
360 |     local @ENV{@localize};
361 |     
362 |     my $cwd = `$pwd_cmd`;
363 |     # Belt-and-suspenders in case someone said "undef $/".
364 |     local $/ = "\n";
365 |     # `pwd` may fail e.g. if the disk is full
366 |     chomp($cwd) if defined $cwd;
367 |     $cwd;
368 | }
369 | 
370 | # Since some ports may predefine cwd internally (e.g., NT)
371 | # we take care not to override an existing definition for cwd().
372 | 
373 | unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) {
374 |     # The pwd command is not available in some chroot(2)'ed environments
375 |     my $sep = $Config::Config{path_sep} || ':';
376 |     my $os = $^O;  # Protect $^O from tainting
377 | 
378 | 
379 |     # Try again to find a pwd, this time searching the whole PATH.
380 |     if (defined $ENV{PATH} and $os ne 'MSWin32') {  # no pwd on Windows
381 | 	my @candidates = split($sep, $ENV{PATH});
382 | 	while (!$found_pwd_cmd and @candidates) {
383 | 	    my $candidate = shift @candidates;
384 | 	    $found_pwd_cmd = 1 if -x "$candidate/pwd";
385 | 	}
386 |     }
387 | 
388 |     # MacOS has some special magic to make `pwd` work.
389 |     if( $os eq 'MacOS' || $found_pwd_cmd )
390 |     {
391 | 	*cwd = \&_backtick_pwd;
392 |     }
393 |     else {
394 | 	*cwd = \&getcwd;
395 |     }
396 | }
397 | 
398 | if ($^O eq 'cygwin') {
399 |   # We need to make sure cwd() is called with no args, because it's
400 |   # got an arg-less prototype and will die if args are present.
401 |   local $^W = 0;
402 |   my $orig_cwd = \&cwd;
403 |   *cwd = sub { &$orig_cwd() }
404 | }
405 | 
406 | 
407 | # set a reasonable (and very safe) default for fastgetcwd, in case it
408 | # isn't redefined later (20001212 rspier)
409 | *fastgetcwd = \&cwd;
410 | 
411 | # A non-XS version of getcwd() - also used to bootstrap the perl build
412 | # process, when miniperl is running and no XS loading happens.
413 | sub _perl_getcwd
414 | {
415 |     abs_path('.');
416 | }
417 | 
418 | # By John Bazik
419 | #
420 | # Usage: $cwd = &fastcwd;
421 | #
422 | # This is a faster version of getcwd.  It's also more dangerous because
423 | # you might chdir out of a directory that you can't chdir back into.
424 |     
425 | sub fastcwd_ {
426 |     my($odev, $oino, $cdev, $cino, $tdev, $tino);
427 |     my(@path, $path);
428 |     local(*DIR);
429 | 
430 |     my($orig_cdev, $orig_cino) = stat('.');
431 |     ($cdev, $cino) = ($orig_cdev, $orig_cino);
432 |     for (;;) {
433 | 	my $direntry;
434 | 	($odev, $oino) = ($cdev, $cino);
435 | 	CORE::chdir('..') || return undef;
436 | 	($cdev, $cino) = stat('.');
437 | 	last if $odev == $cdev && $oino == $cino;
438 | 	opendir(DIR, '.') || return undef;
439 | 	for (;;) {
440 | 	    $direntry = readdir(DIR);
441 | 	    last unless defined $direntry;
442 | 	    next if $direntry eq '.';
443 | 	    next if $direntry eq '..';
444 | 
445 | 	    ($tdev, $tino) = lstat($direntry);
446 | 	    last unless $tdev != $odev || $tino != $oino;
447 | 	}
448 | 	closedir(DIR);
449 | 	return undef unless defined $direntry; # should never happen
450 | 	unshift(@path, $direntry);
451 |     }
452 |     $path = '/' . join('/', @path);
453 |     if ($^O eq 'apollo') { $path = "/".$path; }
454 |     # At this point $path may be tainted (if tainting) and chdir would fail.
455 |     # Untaint it then check that we landed where we started.
456 |     $path =~ /^(.*)\z/s		# untaint
457 | 	&& CORE::chdir($1) or return undef;
458 |     ($cdev, $cino) = stat('.');
459 |     die "Unstable directory path, current directory changed unexpectedly"
460 | 	if $cdev != $orig_cdev || $cino != $orig_cino;
461 |     $path;
462 | }
463 | if (not defined &fastcwd) { *fastcwd = \&fastcwd_ }
464 | 
465 | 
466 | # Keeps track of current working directory in PWD environment var
467 | # Usage:
468 | #	use Cwd 'chdir';
469 | #	chdir $newdir;
470 | 
471 | my $chdir_init = 0;
472 | 
473 | sub chdir_init {
474 |     if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') {
475 | 	my($dd,$di) = stat('.');
476 | 	my($pd,$pi) = stat($ENV{'PWD'});
477 | 	if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) {
478 | 	    $ENV{'PWD'} = cwd();
479 | 	}
480 |     }
481 |     else {
482 | 	my $wd = cwd();
483 | 	$wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32';
484 | 	$ENV{'PWD'} = $wd;
485 |     }
486 |     # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar)
487 |     if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) {
488 | 	my($pd,$pi) = stat($2);
489 | 	my($dd,$di) = stat($1);
490 | 	if (defined $pd and defined $dd and $di == $pi and $dd == $pd) {
491 | 	    $ENV{'PWD'}="$2$3";
492 | 	}
493 |     }
494 |     $chdir_init = 1;
495 | }
496 | 
497 | sub chdir {
498 |     my $newdir = @_ ? shift : '';	# allow for no arg (chdir to HOME dir)
499 |     $newdir =~ s|///*|/|g unless $^O eq 'MSWin32';
500 |     chdir_init() unless $chdir_init;
501 |     my $newpwd;
502 |     if ($^O eq 'MSWin32') {
503 | 	# get the full path name *before* the chdir()
504 | 	$newpwd = Win32::GetFullPathName($newdir);
505 |     }
506 | 
507 |     return 0 unless CORE::chdir $newdir;
508 | 
509 |     if ($^O eq 'VMS') {
510 | 	return $ENV{'PWD'} = $ENV{'DEFAULT'}
511 |     }
512 |     elsif ($^O eq 'MacOS') {
513 | 	return $ENV{'PWD'} = cwd();
514 |     }
515 |     elsif ($^O eq 'MSWin32') {
516 | 	$ENV{'PWD'} = $newpwd;
517 | 	return 1;
518 |     }
519 | 
520 |     if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in
521 | 	$ENV{'PWD'} = cwd();
522 |     } elsif ($newdir =~ m#^/#s) {
523 | 	$ENV{'PWD'} = $newdir;
524 |     } else {
525 | 	my @curdir = split(m#/#,$ENV{'PWD'});
526 | 	@curdir = ('') unless @curdir;
527 | 	my $component;
528 | 	foreach $component (split(m#/#, $newdir)) {
529 | 	    next if $component eq '.';
530 | 	    pop(@curdir),next if $component eq '..';
531 | 	    push(@curdir,$component);
532 | 	}
533 | 	$ENV{'PWD'} = join('/',@curdir) || '/';
534 |     }
535 |     1;
536 | }
537 | 
538 | 
539 | sub _perl_abs_path
540 | {
541 |     my $start = @_ ? shift : '.';
542 |     my($dotdots, $cwd, @pst, @cst, $dir, @tst);
543 | 
544 |     unless (@cst = stat( $start ))
545 |     {
546 | 	_carp("stat($start): $!");
547 | 	return '';
548 |     }
549 | 
550 |     unless (-d _) {
551 |         # Make sure we can be invoked on plain files, not just directories.
552 |         # NOTE that this routine assumes that '/' is the only directory separator.
553 | 	
554 |         my ($dir, $file) = $start =~ m{^(.*)/(.+)$}
555 | 	    or return cwd() . '/' . $start;
556 | 	
557 | 	# Can't use "-l _" here, because the previous stat was a stat(), not an lstat().
558 | 	if (-l $start) {
559 | 	    my $link_target = readlink($start);
560 | 	    die "Can't resolve link $start: $!" unless defined $link_target;
561 | 	    
562 | 	    require File::Spec;
563 |             $link_target = $dir . '/' . $link_target
564 |                 unless File::Spec->file_name_is_absolute($link_target);
565 | 	    
566 | 	    return abs_path($link_target);
567 | 	}
568 | 	
569 | 	return $dir ? abs_path($dir) . "/$file" : "/$file";
570 |     }
571 | 
572 |     $cwd = '';
573 |     $dotdots = $start;
574 |     do
575 |     {
576 | 	$dotdots .= '/..';
577 | 	@pst = @cst;
578 | 	local *PARENT;
579 | 	unless (opendir(PARENT, $dotdots))
580 | 	{
581 | 	    # probably a permissions issue.  Try the native command.
582 | 	    require File::Spec;
583 | 	    return File::Spec->rel2abs( $start, _backtick_pwd() );
584 | 	}
585 | 	unless (@cst = stat($dotdots))
586 | 	{
587 | 	    _carp("stat($dotdots): $!");
588 | 	    closedir(PARENT);
589 | 	    return '';
590 | 	}
591 | 	if ($pst[0] == $cst[0] && $pst[1] == $cst[1])
592 | 	{
593 | 	    $dir = undef;
594 | 	}
595 | 	else
596 | 	{
597 | 	    do
598 | 	    {
599 | 		unless (defined ($dir = readdir(PARENT)))
600 | 	        {
601 | 		    _carp("readdir($dotdots): $!");
602 | 		    closedir(PARENT);
603 | 		    return '';
604 | 		}
605 | 		$tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir"))
606 | 	    }
607 | 	    while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] ||
608 | 		   $tst[1] != $pst[1]);
609 | 	}
610 | 	$cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ;
611 | 	closedir(PARENT);
612 |     } while (defined $dir);
613 |     chop($cwd) unless $cwd eq '/'; # drop the trailing /
614 |     $cwd;
615 | }
616 | 
617 | 
618 | my $Curdir;
619 | sub fast_abs_path {
620 |     local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage
621 |     my $cwd = getcwd();
622 |     require File::Spec;
623 |     my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir);
624 | 
625 |     # Detaint else we'll explode in taint mode.  This is safe because
626 |     # we're not doing anything dangerous with it.
627 |     ($path) = $path =~ /(.*)/;
628 |     ($cwd)  = $cwd  =~ /(.*)/;
629 | 
630 |     unless (-e $path) {
631 |  	_croak("$path: No such file or directory");
632 |     }
633 | 
634 |     unless (-d _) {
635 |         # Make sure we can be invoked on plain files, not just directories.
636 | 	
637 | 	my ($vol, $dir, $file) = File::Spec->splitpath($path);
638 | 	return File::Spec->catfile($cwd, $path) unless length $dir;
639 | 
640 | 	if (-l $path) {
641 | 	    my $link_target = readlink($path);
642 | 	    die "Can't resolve link $path: $!" unless defined $link_target;
643 | 	    
644 | 	    $link_target = File::Spec->catpath($vol, $dir, $link_target)
645 |                 unless File::Spec->file_name_is_absolute($link_target);
646 | 	    
647 | 	    return fast_abs_path($link_target);
648 | 	}
649 | 	
650 | 	return $dir eq File::Spec->rootdir
651 | 	  ? File::Spec->catpath($vol, $dir, $file)
652 | 	  : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file;
653 |     }
654 | 
655 |     if (!CORE::chdir($path)) {
656 |  	_croak("Cannot chdir to $path: $!");
657 |     }
658 |     my $realpath = getcwd();
659 |     if (! ((-d $cwd) && (CORE::chdir($cwd)))) {
660 |  	_croak("Cannot chdir back to $cwd: $!");
661 |     }
662 |     $realpath;
663 | }
664 | 
665 | # added function alias to follow principle of least surprise
666 | # based on previous aliasing.  --tchrist 27-Jan-00
667 | *fast_realpath = \&fast_abs_path;
668 | 
669 | 
670 | # --- PORTING SECTION ---
671 | 
672 | # VMS: $ENV{'DEFAULT'} points to default directory at all times
673 | # 06-Mar-1996  Charles Bailey  bailey@newman.upenn.edu
674 | # Note: Use of Cwd::chdir() causes the logical name PWD to be defined
675 | #   in the process logical name table as the default device and directory
676 | #   seen by Perl. This may not be the same as the default device
677 | #   and directory seen by DCL after Perl exits, since the effects
678 | #   the CRTL chdir() function persist only until Perl exits.
679 | 
680 | sub _vms_cwd {
681 |     return $ENV{'DEFAULT'};
682 | }
683 | 
684 | sub _vms_abs_path {
685 |     return $ENV{'DEFAULT'} unless @_;
686 |     my $path = shift;
687 | 
688 |     my $efs = _vms_efs;
689 |     my $unix_rpt = _vms_unix_rpt;
690 | 
691 |     if (defined &VMS::Filespec::vmsrealpath) {
692 |         my $path_unix = 0;
693 |         my $path_vms = 0;
694 | 
695 |         $path_unix = 1 if ($path =~ m#(?<=\^)/#);
696 |         $path_unix = 1 if ($path =~ /^\.\.?$/);
697 |         $path_vms = 1 if ($path =~ m#[\[<\]]#);
698 |         $path_vms = 1 if ($path =~ /^--?$/);
699 | 
700 |         my $unix_mode = $path_unix;
701 |         if ($efs) {
702 |             # In case of a tie, the Unix report mode decides.
703 |             if ($path_vms == $path_unix) {
704 |                 $unix_mode = $unix_rpt;
705 |             } else {
706 |                 $unix_mode = 0 if $path_vms;
707 |             }
708 |         }
709 | 
710 |         if ($unix_mode) {
711 |             # Unix format
712 |             return VMS::Filespec::unixrealpath($path);
713 |         }
714 | 
715 | 	# VMS format
716 | 
717 | 	my $new_path = VMS::Filespec::vmsrealpath($path);
718 | 
719 | 	# Perl expects directories to be in directory format
720 | 	$new_path = VMS::Filespec::pathify($new_path) if -d $path;
721 | 	return $new_path;
722 |     }
723 | 
724 |     # Fallback to older algorithm if correct ones are not
725 |     # available.
726 | 
727 |     if (-l $path) {
728 |         my $link_target = readlink($path);
729 |         die "Can't resolve link $path: $!" unless defined $link_target;
730 | 
731 |         return _vms_abs_path($link_target);
732 |     }
733 | 
734 |     # may need to turn foo.dir into [.foo]
735 |     my $pathified = VMS::Filespec::pathify($path);
736 |     $path = $pathified if defined $pathified;
737 | 	
738 |     return VMS::Filespec::rmsexpand($path);
739 | }
740 | 
741 | sub _os2_cwd {
742 |     $ENV{'PWD'} = `cmd /c cd`;
743 |     chomp $ENV{'PWD'};
744 |     $ENV{'PWD'} =~ s:\\:/:g ;
745 |     return $ENV{'PWD'};
746 | }
747 | 
748 | sub _win32_cwd_simple {
749 |     $ENV{'PWD'} = `cd`;
750 |     chomp $ENV{'PWD'};
751 |     $ENV{'PWD'} =~ s:\\:/:g ;
752 |     return $ENV{'PWD'};
753 | }
754 | 
755 | sub _win32_cwd {
756 |     # Need to avoid taking any sort of reference to the typeglob or the code in
757 |     # the optree, so that this tests the runtime state of things, as the
758 |     # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at
759 |     # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table
760 |     # lookup avoids needing a string eval, which has been reported to cause
761 |     # problems (for reasons that we haven't been able to get to the bottom of -
762 |     # rt.cpan.org #56225)
763 |     if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) {
764 | 	$ENV{'PWD'} = Win32::GetCwd();
765 |     }
766 |     else { # miniperl
767 | 	chomp($ENV{'PWD'} = `cd`);
768 |     }
769 |     $ENV{'PWD'} =~ s:\\:/:g ;
770 |     return $ENV{'PWD'};
771 | }
772 | 
773 | *_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple;
774 | 
775 | sub _dos_cwd {
776 |     if (!defined &Dos::GetCwd) {
777 |         $ENV{'PWD'} = `command /c cd`;
778 |         chomp $ENV{'PWD'};
779 |         $ENV{'PWD'} =~ s:\\:/:g ;
780 |     } else {
781 |         $ENV{'PWD'} = Dos::GetCwd();
782 |     }
783 |     return $ENV{'PWD'};
784 | }
785 | 
786 | sub _qnx_cwd {
787 | 	local $ENV{PATH} = '';
788 | 	local $ENV{CDPATH} = '';
789 | 	local $ENV{ENV} = '';
790 |     $ENV{'PWD'} = `/usr/bin/fullpath -t`;
791 |     chomp $ENV{'PWD'};
792 |     return $ENV{'PWD'};
793 | }
794 | 
795 | sub _qnx_abs_path {
796 | 	local $ENV{PATH} = '';
797 | 	local $ENV{CDPATH} = '';
798 | 	local $ENV{ENV} = '';
799 |     my $path = @_ ? shift : '.';
800 |     local *REALPATH;
801 | 
802 |     defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or
803 |       die "Can't open /usr/bin/fullpath: $!";
804 |     my $realpath = <REALPATH>;
805 |     close REALPATH;
806 |     chomp $realpath;
807 |     return $realpath;
808 | }
809 | 
810 | sub _epoc_cwd {
811 |     $ENV{'PWD'} = EPOC::getcwd();
812 |     return $ENV{'PWD'};
813 | }
814 | 
815 | 
816 | # Now that all the base-level functions are set up, alias the
817 | # user-level functions to the right places
818 | 
819 | if (exists $METHOD_MAP{$^O}) {
820 |   my $map = $METHOD_MAP{$^O};
821 |   foreach my $name (keys %$map) {
822 |     local $^W = 0;  # assignments trigger 'subroutine redefined' warning
823 |     no strict 'refs';
824 |     *{$name} = \&{$map->{$name}};
825 |   }
826 | }
827 | 
828 | # In case the XS version doesn't load.
829 | *abs_path = \&_perl_abs_path unless defined &abs_path;
830 | *getcwd = \&_perl_getcwd unless defined &getcwd;
831 | 
832 | # added function alias for those of us more
833 | # used to the libc function.  --tchrist 27-Jan-00
834 | *realpath = \&abs_path;
835 | 
836 | 1;
837 | 


--------------------------------------------------------------------------------
/lib/Math/Combinatorics.pm:
--------------------------------------------------------------------------------
   1 | =head1 NAME
   2 | 
   3 | Math::Combinatorics - Perform combinations and permutations on lists
   4 | 
   5 | =head1 SYNOPSIS
   6 | 
   7 | Available as an object oriented API.
   8 | 
   9 |   use Math::Combinatorics;
  10 | 
  11 |   my @n = qw(a b c);
  12 |   my $combinat = Math::Combinatorics->new(count => 2,
  13 |                                           data => [@n],
  14 |                                          );
  15 | 
  16 |   print "combinations of 2 from: ".join(" ",@n)."\n";
  17 |   print "------------------------".("--" x scalar(@n))."\n";
  18 |   while(my @combo = $combinat->next_combination){
  19 |     print join(' ', @combo)."\n";
  20 |   }
  21 | 
  22 |   print "\n";
  23 | 
  24 |   print "permutations of 3 from: ".join(" ",@n)."\n";
  25 |   print "------------------------".("--" x scalar(@n))."\n";
  26 |   while(my @permu = $combinat->next_permutation){
  27 |     print join(' ', @permu)."\n";
  28 |   }
  29 | 
  30 |   output:
  31 | 
  32 | Or available via exported functions 'permute', 'combine', and 'factorial'.
  33 | 
  34 |   use Math::Combinatorics;
  35 | 
  36 |   my @n = qw(a b c);
  37 |   print "combinations of 2 from: ".join(" ",@n)."\n";
  38 |   print "------------------------".("--" x scalar(@n))."\n";
  39 |   print join("\n", map { join " ", @$_ } combine(2,@n)),"\n";
  40 |   print "\n";
  41 |   print "permutations of 3 from: ".join(" ",@n)."\n";
  42 |   print "------------------------".("--" x scalar(@n))."\n";
  43 |   print join("\n", map { join " ", @$_ } permute(@n)),"\n";
  44 | 
  45 | 
  46 | Output:
  47 | 
  48 |   combinations of 2 from: a b c
  49 |   ------------------------------
  50 |   a b
  51 |   a c
  52 |   b c
  53 | 
  54 |   permutations of 3 from: a b c
  55 |   ------------------------------
  56 |   a b c
  57 |   a c b
  58 |   b a c
  59 |   b c a
  60 |   c a b
  61 |   c b a
  62 | 
  63 | Output from both types of calls is the same, but the object-oriented approach consumes
  64 | much less memory for large sets.
  65 | 
  66 | =head1 DESCRIPTION
  67 | 
  68 | Combinatorics is the branch of mathematics studying the enumeration, combination,
  69 | and permutation of sets of elements and the mathematical relations that characterize
  70 | their properties.  As a jumping off point, refer to:
  71 | 
  72 |  http://mathworld.wolfram.com/Combinatorics.html
  73 | 
  74 | This module provides a pure-perl implementation of nCk, nCRk, nPk, nPRk, !n and n!
  75 | (combination, multiset, permutation, string, derangement, and factorial, respectively).
  76 | Functional and object-oriented usages allow problems such as the following to be solved:
  77 | 
  78 | =over
  79 | 
  80 | =item combine - nCk
  81 | 
  82 |  http://mathworld.wolfram.com/Combination.html
  83 | 
  84 | "Fun questions to ask the pizza parlor wait staff: how many possible combinations
  85 | of 2 toppings can I get on my pizza?".
  86 | 
  87 | =item derange - !n
  88 | 
  89 |  http://mathworld.wolfram.com/Derangement.html
  90 | 
  91 | "A derangement of n ordered objects, denoted !n, is a permutation in which none of the
  92 | objects appear in their "natural" (i.e., ordered) place."
  93 | 
  94 | =item permute - nPk
  95 | 
  96 |  http://mathworld.wolfram.com/Permutation.html
  97 | 
  98 | "Master Mind Game: ways to arrange pieces of different colors in a
  99 | certain number of positions, without repetition of a color".
 100 | 
 101 | =back
 102 | 
 103 | Object-oriented usage additionally allows solving these problems by calling L</new()>
 104 | with a B<frequency> vector:
 105 | 
 106 | =over
 107 | 
 108 | =item string - nPRk
 109 | 
 110 |  http://mathworld.wolfram.com/String.html
 111 | 
 112 | "Morse signals: diferent signals of 3 positions using the two symbols - and .".
 113 | 
 114 |  $o = Math::Combinatorics->new( count=>3 , data=>[qw(. -)] , frequency=>[3,3] );
 115 |  while ( my @x = $o->next_multiset ) {
 116 |    my $p = Math::Combinatorics->new( data=>\@x , frequency=>[map{1} @x] );
 117 |    while ( my @y = $p->next_string ) {
 118 |      #do something
 119 |    }
 120 |  }
 121 | 
 122 | =item multiset/multichoose - nCRk
 123 | 
 124 |  http://mathworld.wolfram.com/Multiset.html
 125 | 
 126 | "ways to extract 3 balls at once of a bag with 3 black and 3 white balls".
 127 | 
 128 |  $o = Math::Combinatorics->new( count=>3 , data=>[qw(white black)] , frequency=>[3,3] );
 129 |  while ( my @x = $o->next_multiset ) {
 130 |    #do something
 131 |  }
 132 | 
 133 | =back
 134 | 
 135 | =head2 EXPORT
 136 | 
 137 | the following export tags will bring a single method into the caller's
 138 | namespace.  no symbols are exported by default.  see pod documentation below for
 139 | method descriptions.
 140 | 
 141 |   combine
 142 |   derange
 143 |   multiset
 144 |   permute
 145 |   string
 146 |   factorial
 147 | 
 148 | =head1 AUTHOR
 149 | 
 150 | Allen Day <allenday@ucla.edu>, with algorithmic contributions from Christopher Eltschka and
 151 | Tye.
 152 | 
 153 | Copyright (c) 2004-2005 Allen Day. All rights reserved. This program is free software; you
 154 | can redistribute it and/or modify it under the same terms as Perl itself.
 155 | 
 156 | =head1 ACKNOWLEDGEMENTS
 157 | 
 158 | A sincere thanks to everyone for helping to make this a better module.  After initial
 159 | development I've only had time to accept patches and improvements.  Math::Combinatorics
 160 | continues to be developed and improved by the community.  Contributors of note include:
 161 | 
 162 | For adding new features: Carlos Rica, David Coppit, Carlos Segre, Lyon Lemmens
 163 | 
 164 | For bug reports: Ying Yang, Joerg Beyer, Marc Logghe, Yunheng Wang,
 165 | Torsten Seemann, Gerrit Haase, Joern Behre, Lyon Lemmens, Federico Lucifredi
 166 | 
 167 | =head1 BUGS / TODO
 168 | 
 169 | Report them to the author.
 170 | 
 171 |  * Need more extensive unit tests.
 172 | 
 173 |    * tests for new()'s frequency argment
 174 | 
 175 |  * A known bug (more of a missing feature, actually) does not allow parameterization of k
 176 |  for nPk in permute().  it is assumed k == n.  L</permute()> for details.  You can work
 177 |  around this by making calls to both L</permute()> and L</combine()>
 178 | 
 179 |  * Lots of really interesting stuff from Mathworld.Wolfram.com.  MathWorld rocks!  Expect
 180 |  to see implementation of more concepts from their site, e.g.:
 181 | 
 182 |    http://mathworld.wolfram.com/BellNumber.html
 183 |    http://mathworld.wolfram.com/StirlingNumberoftheSecondKind.html
 184 |    http://mathworld.wolfram.com/Word.html
 185 | 
 186 |  * Other combinatorics stuff
 187 |    http://en.wikipedia.org/wiki/Catalan_number
 188 |    http://en.wikipedia.org/wiki/Stirling_number
 189 | 
 190 | =head1 SEE ALSO
 191 | 
 192 | L<Set::Scalar>
 193 | 
 194 | L<Set::Bag>
 195 | 
 196 | L<String::Combination> (alas misnamed, it actually returns permutations on a string).
 197 | 
 198 |  http://perlmonks.thepen.com/29374.html
 199 | 
 200 |  http://groups.google.com/groups?selm=38568F79.13680B86%40physik.tu-muenchen.de&output=gplain
 201 | 
 202 | 
 203 | =cut
 204 | 
 205 | package Math::Combinatorics;
 206 | 
 207 | use strict;
 208 | use Data::Dumper;
 209 | require Exporter;
 210 | 
 211 | our @ISA = qw(Exporter);
 212 | our @EXPORT = qw( combine derange factorial permute );
 213 | our $VERSION = '0.09';
 214 | 
 215 | =head1 EXPORTED FUNCTIONS
 216 | 
 217 | =head2 combine()
 218 | 
 219 |  Usage   : my @combinations = combine($k,@n);
 220 |  Function: implements nCk (n choose k), or n!/(k!*(n-k!)).
 221 |            returns all unique unorderd combinations of k items from set n.
 222 |            items in n are assumed to be character data, and are
 223 |            copied into the return data structure (see "Returns" below).
 224 |  Example : my @n = qw(a b c);
 225 |            my @c = combine(2,@n);
 226 |            print join "\n", map { join " ", @$_ } @c;
 227 |            # prints:
 228 |            # b c
 229 |            # a c
 230 |            # a b
 231 |  Returns : a list of arrays, where each array contains a unique combination
 232 |            of k items from n
 233 |  Args    : a list of items to be combined
 234 |  Notes   : data is internally assumed to be alphanumeric.  this is necessary
 235 |            to efficiently generate combinations of large sets.  if you need
 236 |            combinations of non-alphanumeric data, or on data
 237 |            C<sort {$a cmp $b}> would not be appropriate, use the
 238 |            object-oriented API.  See L</new()> and the B<compare> option.
 239 | 
 240 |            Identical items are assumed to be non-unique.  That is, calling
 241 |            C<combine(1,'a','a') yields two sets: {a}, and {a}.  See
 242 |            L</next_multiset() if this is not the desired behavior.
 243 | 
 244 | =cut
 245 | 
 246 | sub combine {
 247 |   my($k,@n) = @_;
 248 | 
 249 |   my @result = ();
 250 | 
 251 |   my $c = __PACKAGE__->new(data => [@n], count => $k);
 252 |   while(my(@combo) = $c->next_combination){
 253 |     push @result, [@combo];
 254 |   }
 255 | 
 256 |   return @result;
 257 | }
 258 | 
 259 | =head2 derange()
 260 | 
 261 |  Usage   : my @deranges = derange(@n);
 262 |  Function: implements !n, a derangement of n items in which none of the
 263 |            items appear in their originally ordered place.
 264 |  Example : my @n = qw(a b c);
 265 |            my @d = derange(@n);
 266 |            print join "\n", map { join " ", @$_ } @d;
 267 |            # prints:
 268 |            # a c b
 269 |            # b a c
 270 |            # b c a
 271 |            # c a b
 272 |            # c b a
 273 |  Returns : a list of arrays, where each array contains a derangement of
 274 |            k items from n (where k == n).
 275 |  Args    : a list of items to be deranged.
 276 |  Note    : k should really be parameterizable.  this will happen
 277 |            in a later version of the module.  send me a patch to
 278 |            make that version come out sooner.
 279 |  Notes   : data is internally assumed to be alphanumeric.  this is necessary
 280 |            to efficiently generate combinations of large sets.  if you need
 281 |            combinations of non-alphanumeric data, or on data
 282 |            C<sort {$a cmp $b}> would not be appropriate, use the
 283 |            object-oriented API.  See L</new()>, and the B<compare> option.
 284 | 
 285 | =cut
 286 | 
 287 | sub derange {
 288 |   my(@n) = @_;
 289 | 
 290 |   my @result = ();
 291 | 
 292 |   my $c = __PACKAGE__->new(data => [@n]);
 293 |   while(my(@derange) = $c->next_derangement){
 294 |     push @result, [@derange];
 295 |   }
 296 | 
 297 |   return @result;
 298 | }
 299 | 
 300 | =head2 next_derangement()
 301 | 
 302 |  Usage   : my @derangement = $c->next_derangement();
 303 |  Function: get derangements for @data.
 304 |  Returns : returns a permutation of items from @data (see L</new()>),
 305 |            where none of the items appear in their natural order.  repeated calls
 306 |            retrieve all unique derangements of @data elements.  a returned empty
 307 |            list signifies all derangements have been iterated.
 308 |  Args    : none.
 309 | 
 310 | =cut
 311 | 
 312 | sub next_derangement {
 313 |   my $self = shift;
 314 |   my $data = $self->data();
 315 | 
 316 |   my $cursor = $self->_permutation_cursor();
 317 |   my $values = @$cursor;
 318 |   if($self->{pin}){
 319 |     $self->{pin} = 0;
 320 | 
 321 |     my $i;
 322 |     for ($i = 1; $i < $values; $i += 2) {
 323 |       $$cursor[$i - 1] = $i;
 324 |       $$cursor[$i]     = $i - 1;
 325 |     }
 326 |     if ($values % 2 != 0) {
 327 |       $$cursor[$values - 1] = $values - 3;
 328 |       $$cursor[$values - 2] = $values - 1;
 329 |     }
 330 |     goto RESULT;
 331 |   }
 332 |   else {
 333 |     my $values = @$cursor;
 334 |     my $i;
 335 |     my @found;  # stores for each element if it has been found previously
 336 |     for ($i = 0; $i < $values; $i++) { $found[$i] = 0 }
 337 |     my $e;
 338 |     my $elemfound = 0;
 339 |     for ($i = $values - 1; $i > -1; $i--) {
 340 |       $found[$$cursor[$i]] = 1;
 341 |       if ($i > $values - 3) { # $values-1 or $values-2
 342 |         if ($i == $values - 2) {
 343 |           #print "i=$i (values-2)\n";##
 344 |           $e = $$cursor[$i + 1];
 345 |           if ($e > $$cursor[$i] && $e != $i
 346 |               && $$cursor[$i] != $i + 1) {
 347 |             $$cursor[$i + 1] = $$cursor[$i];
 348 |             $$cursor[$i] = $e;
 349 |             #print "!\n";##
 350 |             goto RESULT;
 351 |           }
 352 |         }
 353 |         next;
 354 |       }
 355 |       for ($e = $$cursor[$i] + 1; $e < $values; $e++) {
 356 |         if ($found[$e] && $e != $i) {
 357 |           $elemfound = 1;
 358 |           last;
 359 |         }
 360 |       }
 361 |       last if ($elemfound);
 362 |     }
 363 |     if ($elemfound) {
 364 |       $$cursor[$i] = $e;
 365 |       $found[$e] = 0;
 366 |       $i++;
 367 |       my $j;
 368 |       my @elems;
 369 |       for ($j = 0; $j < $values; $j++) {
 370 |         if ($found[$j]) { push(@elems, $j) }
 371 |       }
 372 |       for ($j = 0; $j < @elems; $j++) {
 373 |         if ($elems[$j] != $i) {
 374 |           # if the next is the last and it will be wrong:
 375 |           if ($j + 2 == @elems
 376 |               && $elems[$j + 1] == $i + 1) {
 377 |             # interchange them:
 378 |             $$cursor[$i] = $elems[$j + 1];
 379 |             $$cursor[$i + 1] = $elems[$j];
 380 |             last;
 381 |           }
 382 |           $$cursor[$i] = $elems[$j];
 383 |         }
 384 |         elsif ($j + 1 < @elems) {
 385 |           # use the next element:
 386 |           $$cursor[$i] = $elems[$j + 1];
 387 |           $elems[$j + 1] = $elems[$j];
 388 |         }
 389 |         else { die() }
 390 |         $i++;
 391 |       }
 392 |       goto RESULT;
 393 |     }
 394 |     return ();
 395 |   }
 396 |   RESULT:
 397 |   # map cursor to data array
 398 |   my @result;
 399 |   foreach my $c (@$cursor){
 400 |     push @result, $${ $data->[$c] };
 401 |   }
 402 |   return @result;
 403 | }
 404 | 
 405 | =head2 factorial()
 406 | 
 407 |  Usage   : my $f = factorial(4); #returns 24, or 4*3*2*1
 408 |  Function: calculates n! (n factorial).
 409 |  Returns : undef if n is non-integer or n < 0
 410 |  Args    : a positive, non-zero integer
 411 |  Note    : this function is used internally by combine() and permute()
 412 | 
 413 | =cut
 414 | 
 415 | sub factorial {
 416 |   my $n = shift;
 417 |   return undef unless $n >= 0 and $n == int($n);
 418 | 
 419 |   my $f;
 420 | 
 421 |   for($f = 1 ; $n > 0 ; $n--){
 422 |     $f *= $n
 423 |   }
 424 | 
 425 |   return $f;
 426 | }
 427 | 
 428 | =head2 permute()
 429 | 
 430 |  Usage   : my @permutations = permute(@n);
 431 |  Function: implements nPk (n permute k) (where k == n), or n!/(n-k)!
 432 |             returns all unique permutations of k items from set n
 433 |            (where n == k, see "Note" below).  items in n are assumed to
 434 |            be character data, and are copied into the return data
 435 |            structure.
 436 |  Example : my @n = qw(a b c);
 437 |            my @p = permute(@n);
 438 |            print join "\n", map { join " ", @$_ } @p;
 439 |            # prints:
 440 |            # b a c
 441 |            # b c a
 442 |            # c b a
 443 |            # c a b
 444 |            # a c b
 445 |            # a b c
 446 |  Returns : a list of arrays, where each array contains a permutation of
 447 |            k items from n (where k == n).
 448 |  Args    : a list of items to be permuted.
 449 |  Note    : k should really be parameterizable.  this will happen
 450 |            in a later version of the module.  send me a patch to
 451 |            make that version come out sooner.
 452 |  Notes   : data is internally assumed to be alphanumeric.  this is necessary
 453 |            to efficiently generate combinations of large sets.  if you need
 454 |            combinations of non-alphanumeric data, or on data
 455 |            C<sort {$a cmp $b}> would not be appropriate, use the
 456 |            object-oriented API.  See L</new()>, and the B<compare> option.
 457 | 
 458 |            Identical items are assumed to be non-unique.  That is, calling
 459 |            C<permute('a','a') yields two sets: {a,a}, and {a,a}.  See
 460 |            L</next_string() if this is not the desired behavior.
 461 | 
 462 | =cut
 463 | 
 464 | sub permute {
 465 |   my(@n) = @_;
 466 | 
 467 |   my @result = ();
 468 | 
 469 |   my $c = __PACKAGE__->new(data => [@n]);
 470 |   while(my(@permu) = $c->next_permutation){
 471 |     push @result, [@permu];
 472 |   }
 473 | 
 474 |   return @result;
 475 | }
 476 | 
 477 | =head1 CONSTRUCTOR
 478 | 
 479 | =cut
 480 | 
 481 | =head2 new()
 482 | 
 483 |  Usage   : my $c = Math::Combinatorics->new( count => 2,       #treated as int
 484 |                                              data => [1,2,3,4] #arrayref or anonymous array
 485 |                                            );
 486 |  Function: build a new Math::Combinatorics object.
 487 |  Returns : a Math::Combinatorics object
 488 |  Args    : count     - required for combinatoric functions/methods.  number of elements to be
 489 |                        present in returned set(s).
 490 |            data      - required for combinatoric B<AND> permutagenic functions/methods.  this is the
 491 |                        set elements are chosen from.  B<NOTE>: this array is modified in place; make
 492 |                        a copy of your array if the order matters in the caller's space.
 493 |            frequency - optional vector of data frequencies.  must be the same length as the B<data>
 494 |                        constructor argument.  These two constructor calls here are equivalent:
 495 | 
 496 |                          $a = 'a';
 497 |                          $b = 'b';
 498 | 
 499 |                          Math::Combinatorics->new( count=>2, data=>[\$a,\$a,\$a,\$a,\$a,\$b,\$b] );
 500 |                          Math::Combinatorics->new( count=>2, data=>[\$a,\$b], frequency=>[5,2] );
 501 | 
 502 |                        so why use this?  sometimes it's useful to have multiple identical entities in
 503 |                        a set (in set theory jargon, this is called a "bag", See L<Set::Bag>).
 504 |            compare   - optional subroutine reference used in sorting elements of the set.  examples:
 505 | 
 506 |                        #appropriate for character elements
 507 |                        compare => sub { $_[0] cmp $_[1] }
 508 |                        #appropriate for numeric elements
 509 |                        compare => sub { $_[0] <=> $_[1] }
 510 |                        #appropriate for object elements, perhaps
 511 |                        compare => sub { $_[0]->value <=> $_[1]->value }
 512 | 
 513 |                      The default sort mechanism is based on references, and cannot be predicted.
 514 |                      Improvements for a more flexible compare() mechanism are most welcome.
 515 | 
 516 | =cut
 517 | 
 518 | sub new {
 519 |   my($class,%arg) = @_;
 520 |   my $self = bless {}, $class;
 521 | 
 522 |   $self->{compare} = $arg{compare} || sub { $_[0] cmp $_[1] };
 523 |   $self->{count}   = $arg{count};
 524 | 
 525 |   #convert bag to set
 526 |   my $freq            = $arg{frequency};
 527 |   if(ref($freq) eq 'ARRAY' and scalar(@$freq) == scalar(@{$arg{data}})){
 528 |     $self->{frequency}++;
 529 |     my @bag = @{$arg{data}};
 530 |     my @set = ();
 531 | 
 532 |     #allow '0 but defined' elements (Yunheng Wang)
 533 |     foreach my $type ( @bag ) {
 534 |       my $f = shift @$freq;
 535 |       next if $f < 1;
 536 |       for(1..$f){
 537 |         #we push on a reference to make sure, for instance, that objects
 538 |         #are identical and not copied
 539 |         push @set, \$type;
 540 |       }
 541 |     }
 542 |     $arg{data} = \@set;
 543 |   }
 544 |   elsif(!ref($freq)){
 545 |     $arg{data} = [map { \$_ } @{$arg{data}}];
 546 |   }
 547 | 
 548 | #warn join ' ', @{$arg{data}};
 549 | 
 550 |   #OK, this is hokey, but I don't have time to fix it properly right now.
 551 |   #We want to allow both user-specified sorting as well as our own
 552 |   #reference-based internal sorting -- the latter only because unit tests
 553 |   #are failing if we don't have it.  Additionally, we don't want to require
 554 |   #the triple derefernce necessary for comparison of the pristine data in
 555 |   #the user-supplied compare coderef.  The solution for now is to do an
 556 |   #if/else.  If you're staring at this please fix it!
 557 |   my $compare = $self->{compare};
 558 |   if ( defined $arg{compare} ) {
 559 |     $self->{data} = [sort {&$compare($$$a,$$$b)} map {\$_} @{$arg{data}}];
 560 |   }
 561 |   else {
 562 |     $self->{data} = [sort {&$compare($a,$b)} map {\$_} @{$arg{data}}];
 563 |   }
 564 | 
 565 | #warn Dumper($self->{data});
 566 | 
 567 |   $self->{cin} = 1;
 568 |   $self->{pin} = 1;
 569 | 
 570 |   return $self;
 571 | }
 572 | 
 573 | =head1 OBJECT METHODS
 574 | 
 575 | =cut
 576 | 
 577 | =head2 next_combination()
 578 | 
 579 |  Usage   : my @combo = $c->next_combination();
 580 |  Function: get combinations of size $count from @data.
 581 |  Returns : returns a combination of $count items from @data (see L</new()>).
 582 |            repeated calls retrieve all unique combinations of $count elements.
 583 |            a returned empty list signifies all combinations have been iterated.
 584 |  Note    : this method may only be used if a B<frequency> argument is B<NOT>
 585 |            given to L</new()>, otherwise use L</next_multiset()>.
 586 |  Args    : none.
 587 | 
 588 | =cut
 589 | 
 590 | sub next_combination {
 591 |   my $self = shift;
 592 |   if ( $self->{frequency} ) {
 593 |     print STDERR "must use next_multiset() if 'frequency' argument passed to constructor\n";
 594 |     return ();
 595 |   }
 596 |   return $self->_next_combination;
 597 | }
 598 | 
 599 | sub _next_combination {
 600 |   my $self = shift;
 601 |   my $data = $self->data();
 602 |   my $combo_end = $self->count();
 603 | 
 604 |   my $begin = 0;
 605 |   my $end = $#{$data} + 1;
 606 | 
 607 |   my @result;
 608 | 
 609 |   return () if scalar(@$data) < $self->count();
 610 | 
 611 |   if($self->{cin}){
 612 |     $self->{cin} = 0;
 613 | 
 614 |     for(0..$self->count-1){
 615 |       push @result, $${ $data->[$_] };
 616 |     }
 617 | #warn 1;
 618 |     return @result;
 619 |   }
 620 | 
 621 |   if ($combo_end == $begin || $combo_end == $end) {
 622 |     return ();
 623 |   }
 624 | 
 625 |   my $combo = $combo_end;
 626 |   my $total_set;
 627 | 
 628 |   --$combo;
 629 |   $total_set = $self->upper_bound($combo_end,$end,$data->[$combo]);
 630 |   if ($total_set != $end) {
 631 |     $self->swap($combo,$total_set);
 632 | 
 633 |     for(0..$self->count-1){
 634 |       push @result, $${ $data->[$_] };
 635 |     }
 636 | #warn 2;
 637 |     return @result;
 638 |   }
 639 | 
 640 |   --$total_set;
 641 |   $combo = $self->lower_bound($begin, $combo_end, $data->[$total_set]);
 642 | 
 643 |   if ($combo == $begin) {
 644 |     $self->rotate($begin, $combo_end, $end);
 645 | #warn 3;
 646 |     return ();
 647 |   }
 648 | 
 649 |   my $combo_next = $combo;
 650 |   --$combo;
 651 |   $total_set = $self->upper_bound($combo_end, $end, $data->[$combo]);
 652 | 
 653 |   my $sort_pos = $end;
 654 |   $sort_pos += $combo_end - $total_set - 1;
 655 | 
 656 |   $self->rotate($combo_next, $total_set, $end);
 657 |   $self->rotate($combo, $combo_next, $end);
 658 |   $self->rotate($combo_end, $sort_pos, $end);
 659 | 
 660 |   for(0..$self->count-1){
 661 |     push @result, $${ $data->[$_] };
 662 |   }
 663 | #warn 4;
 664 |   return @result;
 665 | }
 666 | 
 667 | =head2 next_multiset()
 668 | 
 669 |  Usage   : my @multiset = $c->next_multiset();
 670 |  Function: get multisets for @data.
 671 |  Returns : returns a multiset of items from @data (see L</new()>).
 672 |            a multiset is a special type of combination where the set from which
 673 |            combinations are drawn contains items that are indistinguishable.  use
 674 |            L</next_multiset()> when a B<frequency> argument is passed to L</new()>.
 675 |            repeated calls retrieve all unique multisets of @data elements.  a
 676 |            returned empty list signifies all multisets have been iterated.
 677 |  Note    : this method may only be used if a B<frequency> argument is given to
 678 |            L</new()>, otherwise use L</next_combination()>.
 679 |  Args    : none.
 680 | 
 681 | =cut
 682 | 
 683 | sub next_multiset {
 684 |   my $self = shift;
 685 | 
 686 |   if ( ! $self->{frequency} ) {
 687 |     print STDERR "must use next_combination() if 'frequency' argument not passed to constructor\n";
 688 |     return ();
 689 |   }
 690 | 
 691 |   my $data = $self->data();
 692 |   my $compare = $self->compare();
 693 | 
 694 |   while ( my @combo = $self->_next_combination ) {
 695 |     my $x = join '', map {scalar($$_)} sort @$data;
 696 |     my $y = join '', map {scalar($_) } sort @combo;
 697 | 
 698 |     next if $self->{'cache_multiset'}{$y}++;
 699 |     return @combo;
 700 |   }
 701 |   $self->{'cache_multiset'} = undef;
 702 |   return ();
 703 | }
 704 | 
 705 | =head2 next_permutation()
 706 | 
 707 |  Usage   : my @permu = $c->next_permutation();
 708 |  Function: get permutations of elements in @data.
 709 |  Returns : returns a permutation of items from @data (see L</new()>).
 710 |            repeated calls retrieve all unique permutations of @data elements.
 711 |            a returned empty list signifies all permutations have been iterated.
 712 |  Note    : this method may only be used if a B<frequency> argument is B<NOT>
 713 |            given to L</new()>, otherwise use L</next_string()>.
 714 |  Args    : none.
 715 | 
 716 | =cut
 717 | 
 718 | sub next_permutation {
 719 |   my $self = shift;
 720 |   if ( $self->{frequency} ) {
 721 |     print STDERR "must use next_string() if 'frequency' argument passed to constructor\n";
 722 |     return ();
 723 |   }
 724 |   return $self->_next_permutation;
 725 | }
 726 | 
 727 | sub _next_permutation {
 728 |   my $self = shift;
 729 |   my $data = $self->data();
 730 | 
 731 |   if($self->{pin}){
 732 |     $self->{pin} = 0;
 733 |     return map {$$$_} @$data;
 734 |   }
 735 | 
 736 |   my $cursor = $self->_permutation_cursor();
 737 | 
 738 |   my $last= $#{$cursor};
 739 | 
 740 |   if($last < 1){
 741 |     return ();
 742 |   }
 743 | 
 744 |   # Find last item not in reverse-sorted order:
 745 |   my $i = $last - 1;
 746 |   $i-- while  0 <= $i  &&  $cursor->[$i] >= $cursor->[$i+1];
 747 | 
 748 |   if($i == -1){
 749 |     return ();
 750 |   }
 751 | 
 752 | 
 753 |   # Re-sort the reversely-sorted tail of the list:
 754 |   @{$cursor}[$i+1..$last] = reverse @{$cursor}[$i+1..$last]
 755 |     if $cursor->[$i+1] > $cursor->[$last];
 756 | 
 757 |   # Find next item that will make us "greater":
 758 |   my $j = $i+1;
 759 |   $j++ while  $cursor->[$i] >= $cursor->[$j];
 760 | 
 761 |   # Swap:
 762 |   @{$cursor}[$i,$j] = @{$cursor}[$j,$i];
 763 | 
 764 |   # map cursor to data array
 765 |   my @result;
 766 |   foreach my $c (@$cursor){
 767 |     push @result, $${ $data->[$c] };
 768 |   }
 769 |   return @result;
 770 | }
 771 | 
 772 | =head2 next_string()
 773 | 
 774 |  Usage   : my @string = $c->next_string();
 775 |  Function: get strings for @data.
 776 |  Returns : returns a multiset of items from @data (see L</new()>).
 777 |            a multiset is a special type of permutation where the set from which
 778 |            combinations are drawn contains items that are indistinguishable.  use
 779 |            L</next_permutation()> when a B<frequency> argument is passed to L</new()>.
 780 |            repeated calls retrieve all unique multisets of @data elements.  a
 781 |            returned empty list signifies all strings have been iterated.
 782 |  Note    : this method may only be used if a B<frequency> argument is given to
 783 |            L</new()>, otherwise use L</next_permutation()>.
 784 |  Args    : none.
 785 | 
 786 | =cut
 787 | 
 788 | sub next_string {
 789 |   my $self = shift;
 790 |   my $data = $self->data();
 791 | 
 792 |   if ( ! $self->{frequency} ) {
 793 |     print STDERR "must use next_permutation() if 'frequency' argument not passed to constructor\n";
 794 |     return ();
 795 |   }
 796 | 
 797 | 
 798 |   while ( my @permu = $self->_next_permutation ) {
 799 |     my $x = join '', map {scalar($$_)} @$data;
 800 |     my $y = join '', map {scalar($_) } @permu;
 801 | 
 802 |     next if $self->{'cache_string'}{$y}++;
 803 |     return @permu;
 804 |   }
 805 | 
 806 |   $self->{'cache_string'} = undef;
 807 |   return ();
 808 | }
 809 | 
 810 | =head1 INTERNAL FUNCTIONS AND METHODS
 811 | 
 812 | =head2 sum()
 813 | 
 814 |  Usage   : my $sum = sum(1,2,3); # returns 6
 815 |  Function: sums a list of integers.  non-integer list elements are ignored
 816 |  Returns : sum of integer items in arguments passed in
 817 |  Args    : a list of integers
 818 |  Note    : this function is used internally by combine()
 819 | 
 820 | =cut
 821 | 
 822 | sub sum {
 823 |   my $sum = 0;
 824 |   foreach my $i (@_){
 825 |     $sum += $i if $i == int($i);
 826 |   }
 827 |   return $sum;
 828 | }
 829 | 
 830 | =head2 compare()
 831 | 
 832 |  Usage   : $obj->compare()
 833 |  Function: internal, undocumented.  holds a comparison coderef.
 834 |  Returns : value of compare (a coderef)
 835 | 
 836 | 
 837 | =cut
 838 | 
 839 | sub compare {
 840 |   my($self,$val) = @_;
 841 |   return $self->{'compare'};
 842 | }
 843 | 
 844 | 
 845 | =head2 count()
 846 | 
 847 |  Usage   : $obj->count()
 848 |  Function: internal, undocumented.  holds the "k" in nCk or nPk.
 849 |  Returns : value of count (an int)
 850 | 
 851 | =cut
 852 | 
 853 | sub count {
 854 |   my($self) = @_;
 855 |   return $self->{'count'};
 856 | }
 857 | 
 858 | 
 859 | =head2 data()
 860 | 
 861 |  Usage   : $obj->data()
 862 |  Function: internal, undocumented.  holds the set "n" in nCk or nPk.
 863 |  Returns : value of data (an arrayref)
 864 | 
 865 | =cut
 866 | 
 867 | sub data {
 868 |   my($self) = @_;
 869 |   return $self->{'data'};
 870 | }
 871 | 
 872 | 
 873 | =head2 swap()
 874 | 
 875 | internal, undocumented.
 876 | 
 877 | =cut
 878 | 
 879 | sub swap {
 880 |   my $self = shift;
 881 |   my $first = shift;
 882 |   my $second = shift;
 883 |   my $data = $self->data();
 884 | 
 885 |   my $temp = $data->[$first];
 886 |   $data->[$first] = $data->[$second];
 887 |   $data->[$second] = $temp;
 888 | }
 889 | 
 890 | =head2 reverse()
 891 | 
 892 | internal, undocumented.
 893 | 
 894 | =cut
 895 | 
 896 | sub reverse {
 897 |   my $self = shift;
 898 |   my $first = shift;
 899 |   my $last = shift;
 900 |   my $data = $self->data();
 901 | 
 902 |   while (1) {
 903 |     if ($first == $last || $first == --$last) {
 904 |       return;
 905 |     } else {
 906 |       $self->swap($first++, $last);
 907 |     }
 908 |   }
 909 | }
 910 | 
 911 | =head2 rotate()
 912 | 
 913 | internal, undocumented.
 914 | 
 915 | =cut
 916 | 
 917 | sub rotate {
 918 |   my $self = shift;
 919 |   my $first = shift;
 920 |   my $middle = shift;
 921 |   my $last = shift;
 922 |   my $data = $self->data();
 923 | 
 924 |   if ($first == $middle || $last == $middle) {
 925 |     return;
 926 |   }
 927 | 
 928 |   my $first2 = $middle;
 929 | 
 930 |   do {
 931 |     $self->swap($first++, $first2++);
 932 | 
 933 |     if ($first == $middle) {
 934 |       $middle = $first2;
 935 |     }
 936 |   } while ($first2 != $last);
 937 | 
 938 |   $first2 = $middle;
 939 | 
 940 |   while ($first2 != $last) {
 941 |     $self->swap($first++, $first2++);
 942 |     if ($first == $middle) {
 943 |       $middle = $first2;
 944 |     } elsif ($first2 == $last) {
 945 |       $first2 = $middle;
 946 |     }
 947 |   }
 948 | }
 949 | 
 950 | =head2 upper_bound()
 951 | 
 952 | internal, undocumented.
 953 | 
 954 | =cut
 955 | 
 956 | sub upper_bound {
 957 |   my $self = shift;
 958 |   my $first = shift;
 959 |   my $last = shift;
 960 |   my $value = shift;
 961 |   my $compare = $self->compare();
 962 |   my $data = $self->data();
 963 | 
 964 |   my $len = $last - $first;
 965 |   my $half;
 966 |   my $middle;
 967 | 
 968 |   while ($len > 0) {
 969 |     $half = $len >> 1;
 970 |     $middle = $first;
 971 |     $middle += $half;
 972 | 
 973 |     if (&$compare($value,$data->[$middle]) == -1) {
 974 |       $len = $half;
 975 |     } else {
 976 |       $first = $middle;
 977 |       ++$first;
 978 |       $len = $len - $half - 1;
 979 |     }
 980 |   }
 981 | 
 982 |   return $first;
 983 | }
 984 | 
 985 | =head2 lower_bound()
 986 | 
 987 | internal, undocumented.
 988 | 
 989 | =cut
 990 | 
 991 | sub lower_bound {
 992 |   my $self = shift;
 993 |   my $first = shift;
 994 |   my $last = shift;
 995 |   my $value = shift;
 996 |   my $compare = $self->compare();
 997 |   my $data = $self->data();
 998 | 
 999 |   my $len = $last - $first;
1000 |   my $half;
1001 |   my $middle;
1002 | 
1003 |   while ($len > 0) {
1004 |     $half = $len >> 1;
1005 |     $middle = $first;
1006 |     $middle += $half;
1007 | 
1008 |     if (&$compare($data->[$middle],$value) == -1) {
1009 |       $first = $middle;
1010 |       ++$first;
1011 |       $len = $len - $half - 1;
1012 |     } else {
1013 |       $len = $half;
1014 |     }
1015 |   }
1016 | 
1017 |   return $first;
1018 | }
1019 | 
1020 | =head2 _permutation_cursor()
1021 | 
1022 |  Usage   : $obj->_permutation_cursor()
1023 |  Function: internal method.  cursor on permutation iterator order.
1024 |  Returns : value of _permutation_cursor (an arrayref)
1025 |  Args    : none
1026 | 
1027 | =cut
1028 | 
1029 | sub _permutation_cursor {
1030 |   my($self,$val) = @_;
1031 | 
1032 |   if(!$self->{'_permutation_cursor'}){
1033 |     my $data = $self->data();
1034 |     my @tmp = ();
1035 |     my $i = 0;
1036 |     push @tmp, $i++ foreach @$data;
1037 |     $self->{'_permutation_cursor'} = \@tmp;
1038 |   }
1039 | 
1040 |   return $self->{'_permutation_cursor'};
1041 | }
1042 | 
1043 | 1;
1044 | 
1045 | 


--------------------------------------------------------------------------------
/lib/CorScorer.pm:
--------------------------------------------------------------------------------
   1 | package CorScorer;
   2 | 
   3 | # Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
   4 | #               2011-2014, Sameer Pradhan <at> childrens.harvard.edu
   5 | #
   6 | # This program is free software; you can redistribute it and/or modify it
   7 | # under the terms of the GNU General Public License as published by the
   8 | # Free Software Foundation; either version 2 of the License, or (at your
   9 | # option) any later version. This program is distributed in the hope that
  10 | # it will be useful, but WITHOUT ANY WARRANTY; without even the implied
  11 | # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12 | # GNU General Public License for more details.
  13 | #
  14 | # You should have received a copy of the GNU General Public License along
  15 | # with this program; if not, write to the Free Software Foundation, Inc.,
  16 | # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 | #
  18 | # Modified in 2013 for v1.07 by Sebastian Martschat,
  19 | #   sebastian.martschat <at> h-its.org
  20 | #
  21 | # Revised in July, 2013 by Xiaoqiang Luo (xql <at> google.com) to create v6.0.
  22 | # See comments under $VERSION for modifications.
  23 | #
  24 | # Revised in March, 2014 by Sameer Pradhan (sameer.pradhan <at> childrens.harvard.edu)
  25 | # to implement the BLANC metric for predicted mentions
  26 | 
  27 | 
  28 | use strict;
  29 | use Algorithm::Munkres;
  30 | use Data::Dumper;
  31 | 
  32 | #use Algorithm::Combinatorics qw(combinations);
  33 | use Math::Combinatorics;
  34 | use Cwd;
  35 | 
  36 | our $VERSION = '8.01';
  37 | print "version: " . $VERSION . " " . Cwd::realpath(__FILE__) . "\n";
  38 | 
  39 | ##
  40 | # 8.01 fixed a bug that crashed the the BLANC scoring when duplicate
  41 | #      (potentially singleton) mentions were present in the
  42 | #      response. as part of the fix, wee will allow a maximum of 10
  43 | #      duplicate mentions in response, but if there are more, than it
  44 | #      is a sign of a systematic error/manipulation and we will refuse
  45 | #      to score that run.
  46 | 
  47 | #  8.0 added code to compute the BLANC metric (generalized for both gold
  48 | #      and system mentions (Luo et al., 2014)
  49 | #
  50 | #  7.0 Removed code to compute *_cs metrics
  51 | #
  52 | #  6.0 The directory hosting the scorer is under v6 and internal $VERSION is
  53 | #      set to "6.0."
  54 | #      Changes:
  55 | #      - 'ceafm', 'ceafe' and 'bcub' in the previous version are renamed
  56 | #        'ceafm_cs', 'ceafe_cs', and 'bcub_cs', respectively.
  57 | #      - 'ceafm', 'ceafe' and 'bcub' are implemented without (Cai&Strube 2010)
  58 | #         modification. These metrics can handle twinless mentions and entities
  59 | #         just fine.
  60 | #
  61 | # 1.07 Modifications to implement BCUB and CEAFM
  62 | #      exactly as proposed by (Cai & Strube, 2010).
  63 | # 1.06 ?
  64 | # 1.05 Modification of IdentifMentions in order to correctly evaluate the
  65 | #     outputs with detected mentions. Based on (Cai & Strubbe, 2010)
  66 | # 1.04 Some output corrections in BLANC functions. Changed package name to "Scorer"
  67 | # 1.03 Detects mentions that start in a document but do not end
  68 | # 1.02 Corrected BCUB bug. It fails when the key file does not have any mention
  69 | 
  70 | # global variables
  71 | my $VERBOSE         = 2;
  72 | my $HEAD_COLUMN     = 8;
  73 | my $RESPONSE_COLUMN = -1;
  74 | my $KEY_COLUMN      = -1;
  75 | 
  76 | # Score. Scores the results of a coreference resolution system
  77 | # Input: Metric, keys file, response file, [name]
  78 | #        Metric: the metric desired to evaluate:
  79 | #                muc: MUCScorer (Vilain et al, 1995)
  80 | #                bcub: B-Cubed (Bagga and Baldwin, 1998)
  81 | #                ceafm: CEAF (Luo et al, 2005) using mention-based similarity
  82 | #                ceafe: CEAF (Luo et al, 2005) using entity-based similarity
  83 | #        keys file: file with expected coreference chains in SemEval format
  84 | #        response file: file with output of coreference system (SemEval format)
  85 | #        name: [optional] the name of the document to score. If name is not
  86 | #              given, all the documents in the dataset will be scored.
  87 | #
  88 | # Output: an array with numerators and denominators of recall and precision
  89 | #         (recall_num, recall_den, precision_num, precision_den)
  90 | #
  91 | #   Final scores:
  92 | # Recall = recall_num / recall_den
  93 | # Precision = precision_num / precision_den
  94 | # F1 = 2 * Recall * Precision / (Recall + Precision)
  95 | sub Score {
  96 |   my ($metric, $kFile, $rFile, $name) = @_;
  97 | 	our $repeated_mentions = 0;
  98 | 
  99 |   if (lc($metric) eq 'blanc') {
 100 |     return ScoreBLANC($kFile, $rFile, $name);
 101 |   }
 102 | 
 103 |   my %idenTotals =
 104 |     (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0);
 105 |   my ($acumNR, $acumDR, $acumNP, $acumDP) = (0, 0, 0, 0);
 106 | 
 107 |   if (defined($name) && $name ne 'none') {
 108 |     print "$name:\n" if ($VERBOSE);
 109 |     my $keys     = GetCoreference($kFile, $KEY_COLUMN,      $name);
 110 |     my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name);
 111 |     my (
 112 |       $keyChains, $keyChainsWithSingletonsFromResponse,
 113 |       $responseChains, $responseChainsWithoutMentionsNotInKey,
 114 |       $keyChainsOrig, $responseChainsOrig
 115 |     ) = IdentifMentions($keys, $response, \%idenTotals);
 116 |     ($acumNR, $acumDR, $acumNP, $acumDP) = Eval(
 117 |       $metric,                                $keyChains,
 118 |       $keyChainsWithSingletonsFromResponse,   $responseChains,
 119 |       $responseChainsWithoutMentionsNotInKey, $keyChainsOrig,
 120 |       $responseChainsOrig
 121 |     );
 122 |   }
 123 |   else {
 124 |     my $kIndexNames = GetFileNames($kFile);
 125 |     my $rIndexNames = GetFileNames($rFile);
 126 | 
 127 |     $VERBOSE = 0 if ($name eq 'none');
 128 |     foreach my $iname (keys(%{$kIndexNames})) {
 129 |       my $keys =
 130 |         GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
 131 |       my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
 132 |         $rIndexNames->{$iname});
 133 | 
 134 |       print "$iname:\n" if ($VERBOSE);
 135 |       my (
 136 |         $keyChains,      $keyChainsWithSingletonsFromResponse,
 137 |         $responseChains, $responseChainsWithoutMentionsNotInKey,
 138 |         $keyChainsOrig,  $responseChainsOrig
 139 |       ) = IdentifMentions($keys, $response, \%idenTotals);
 140 |       my ($nr, $dr, $np, $dp) = Eval(
 141 |         $metric,                                $keyChains,
 142 |         $keyChainsWithSingletonsFromResponse,   $responseChains,
 143 |         $responseChainsWithoutMentionsNotInKey, $keyChainsOrig,
 144 |         $responseChainsOrig
 145 |       );
 146 | 
 147 |       $acumNR += $nr;
 148 |       $acumDR += $dr;
 149 |       $acumNP += $np;
 150 |       $acumDP += $dp;
 151 |     }
 152 |   }
 153 | 
 154 |   if ($VERBOSE || $name eq 'none') {
 155 |     print "\n====== TOTALS =======\n";
 156 |     print "Identification of Mentions: ";
 157 |     ShowRPF(
 158 |       $idenTotals{recallNum},    $idenTotals{recallDen},
 159 |       $idenTotals{precisionNum}, $idenTotals{precisionDen}
 160 |     );
 161 |     print "Coreference: ";
 162 |     ShowRPF($acumNR, $acumDR, $acumNP, $acumDP);
 163 |   }
 164 | 
 165 |   return ($acumNR, $acumDR, $acumNP, $acumDP);
 166 | }
 167 | 
 168 | sub GetIndex {
 169 |   my ($ind, $i) = @_;
 170 |   if (!defined($ind->{$i})) {
 171 |     my $n = $ind->{nexti} || 0;
 172 |     $ind->{$i} = $n;
 173 |     $n++;
 174 |     $ind->{nexti} = $n;
 175 |   }
 176 | 
 177 |   return $ind->{$i};
 178 | }
 179 | 
 180 | # Get the coreference information from column $column of the file $file
 181 | # If $name is defined, only keys between "#begin document $name" and
 182 | # "#end file $name" are taken.
 183 | # The output is an array of entites, where each entity is an array
 184 | # of mentions and each mention is an array with two values corresponding
 185 | # to the mention's begin and end. For example:
 186 | # @entities = ( [ [1,3], [45,45], [57,62] ], # <-- entity 0
 187 | #               [ [5,5], [25,27], [31,31] ], # <-- entity 1
 188 | # ...
 189 | # );
 190 | # entity 0 is composed of 3 mentions: from token 1 to 3, token 45 and
 191 | # from token 57 to 62 (both included)
 192 | #
 193 | # if $name is not specified, the output is a hash including each file
 194 | # found in the document:
 195 | # $coref{$file} = \@entities
 196 | sub GetCoreference {
 197 |   my ($file, $column, $name, $pos) = @_;
 198 |   my %coref;
 199 |   my %ind;
 200 | 
 201 |   open(F, $file) || die "Can not open $file: $!";
 202 |   if ($pos) {
 203 |     seek(F, $pos, 0);
 204 |   }
 205 |   my $fName;
 206 |   my $getout = 0;
 207 |   do {
 208 |     # look for the begin of a file
 209 |     while (my $l = <F>) {
 210 |       chomp($l);
 211 |       $l =~ s/\r$//;    # m$ format jokes
 212 |       if ($l =~ /^\#\s*begin document (.*?)$/) {
 213 |         if (defined($name)) {
 214 |           if ($name eq $1) {
 215 |             $fName  = $name;
 216 |             $getout = 1;
 217 |             last;
 218 |           }
 219 |         }
 220 |         else {
 221 |           $fName = $1;
 222 |           last;
 223 |         }
 224 |       }
 225 |     }
 226 |     print "====> $fName:\n" if ($VERBOSE > 1);
 227 | 
 228 |     # Extract the keys from the file until #end is found
 229 |     my $lnumber = 0;
 230 |     my @entities;
 231 |     my @half;
 232 |     my @head;
 233 |     my @sentId;
 234 |     while (my $l = <F>) {
 235 |       chomp($l);
 236 | 			$l =~ s/^\s+$//;
 237 |       next if ($l eq '');
 238 |       if ($l =~ /\#\s*end document/) {
 239 |         foreach my $h (@half) {
 240 |           if (defined($h) && @$h) {
 241 |             die "Error: some mentions in the document do not close\n";
 242 |           }
 243 |         }
 244 |         last;
 245 |       }
 246 |       my @columns = split(/\t/, $l);
 247 |       my $cInfo = $columns[$column];
 248 |       push(@head,   $columns[$HEAD_COLUMN]);
 249 |       push(@sentId, $columns[0]);
 250 |       if ($cInfo ne '_') {
 251 | 
 252 |         #discard double antecedent
 253 |         while ($cInfo =~ s/\((\d+\+\d)\)//) {
 254 |           print "Discarded ($1)\n" if ($VERBOSE > 1);
 255 |         }
 256 | 
 257 |         # one-token mention(s)
 258 |         while ($cInfo =~ s/\((\d+)\)//) {
 259 |           my $ie = GetIndex(\%ind, $1);
 260 |           push(@{$entities[$ie]}, [$lnumber, $lnumber, $lnumber]);
 261 |           print "+mention (entity $ie): ($lnumber,$lnumber)\n"
 262 |             if ($VERBOSE > 2);
 263 |         }
 264 | 
 265 |         # begin of mention(s)
 266 |         while ($cInfo =~ s/\((\d+)//) {
 267 |           my $ie = GetIndex(\%ind, $1);
 268 |           push(@{$half[$ie]}, $lnumber);
 269 |           print "+init mention (entity $ie): ($lnumber\n" if ($VERBOSE > 2);
 270 |         }
 271 | 
 272 |         # end of mention(s)
 273 |         while ($cInfo =~ s/(\d+)\)//) {
 274 |           my $numberie = $1;
 275 |           my $ie       = GetIndex(\%ind, $numberie);
 276 |           my $start    = pop(@{$half[$ie]});
 277 |           if (defined($start)) {
 278 |             my $inim  = $sentId[$start];
 279 |             my $endm  = $sentId[$lnumber];
 280 |             my $tHead = $start;
 281 | 
 282 |         # the token whose head is outside the mention is the head of the mention
 283 |             for (my $t = $start ; $t <= $lnumber ; $t++) {
 284 |               if ($head[$t] < $inim || $head[$t] > $endm) {
 285 |                 $tHead = $t;
 286 |                 last;
 287 |               }
 288 |             }
 289 |             push(@{$entities[$ie]}, [$start, $lnumber, $tHead]);
 290 |           }
 291 |           else {
 292 |             die
 293 | "Detected the end of a mention [$numberie]($ie) without begin (?,$lnumber)";
 294 |           }
 295 |           print "+mention (entity $ie): ($start,$lnumber)\n" if ($VERBOSE > 2);
 296 | 
 297 |         }
 298 |       }
 299 |       $lnumber++;
 300 |     }
 301 | 
 302 |     # verbose
 303 |     if ($VERBOSE > 1) {
 304 |       print "File $fName:\n";
 305 |       for (my $e = 0 ; $e < scalar(@entities) ; $e++) {
 306 |         print "Entity $e:";
 307 |         foreach my $mention (@{$entities[$e]}) {
 308 |           print " ($mention->[0],$mention->[1])";
 309 |         }
 310 |         print "\n";
 311 |       }
 312 |     }
 313 | 
 314 |     $coref{$fName} = \@entities;
 315 |   } while (!$getout && !eof(F));
 316 | 
 317 |   if (defined($name)) {
 318 |     return $coref{$name};
 319 |   }
 320 |   return \%coref;
 321 | }
 322 | 
 323 | sub GetFileNames {
 324 |   my $file = shift;
 325 |   my %hash;
 326 |   my $last = 0;
 327 |   open(F, $file) || die "Can not open $file: $!";
 328 |   while (my $l = <F>) {
 329 |     chomp($l);
 330 |     $l =~ s/\r$//;    # m$ format jokes
 331 |     if ($l =~ /^\#\s*begin document (.*?)$/) {
 332 |       my $name = $1;
 333 |       $hash{$name} = $last;
 334 |     }
 335 |     $last = tell(F);
 336 |   }
 337 |   close(F);
 338 |   return \%hash;
 339 | }
 340 | 
 341 | sub IdentifMentions {
 342 | 	my ($keys, $response, $totals) = @_;
 343 |   my @kChains;
 344 |   my @kChainsWithSingletonsFromResponse;
 345 |   my @rChains;
 346 |   my @rChainsWithoutMentionsNotInKey;
 347 |   my %id;
 348 |   my %map;
 349 |   my $idCount = 0;
 350 |   my @assigned;
 351 |   my @kChainsOrig = ();
 352 |   my @rChainsOrig = ();
 353 | 
 354 |   # for each mention found in keys an ID is generated
 355 |   foreach my $entity (@$keys) {
 356 |     foreach my $mention (@$entity) {
 357 |       if (defined($id{"$mention->[0],$mention->[1]"})) {
 358 |         print "Repeated mention in the key: $mention->[0], $mention->[1] ",
 359 |           $id{"$mention->[0],$mention->[1]"}, $idCount, "\n";
 360 |       }
 361 |       $id{"$mention->[0],$mention->[1]"} = $idCount;
 362 |       $idCount++;
 363 |     }
 364 |   }
 365 | 
 366 |   # correct identification: Exact bound limits
 367 |   my $exact = 0;
 368 |   foreach my $entity (@$response) {
 369 | 
 370 |     my $i = 0;
 371 |     my @remove;
 372 | 		
 373 |     foreach my $mention (@$entity) {
 374 |       if (defined($map{"$mention->[0],$mention->[1]"})) {
 375 |         print "Repeated mention in the response: $mention->[0], $mention->[1] ",
 376 |           $map{"$mention->[0],$mention->[1]"},
 377 |           $id{"$mention->[0],$mention->[1]"},
 378 |           "\n";
 379 |         push(@remove, $i);
 380 | 				$main::repeated_mentions++;
 381 | 
 382 | 				if ($main::repeated_mentions > 10)
 383 | 				{
 384 | 						print STDERR "Found too many repeated mentions (> 10) in the response, so refusing to score. Please fix the output.\n";
 385 | 						exit 1;
 386 | 				}
 387 | 
 388 |       }
 389 |       elsif (defined($id{"$mention->[0],$mention->[1]"})
 390 |         && !$assigned[$id{"$mention->[0],$mention->[1]"}])
 391 |       {
 392 |         $assigned[$id{"$mention->[0],$mention->[1]"}] = 1;
 393 |         $map{"$mention->[0],$mention->[1]"} =
 394 |           $id{"$mention->[0],$mention->[1]"};
 395 |         $exact++;
 396 |       }
 397 |       $i++;
 398 |     }
 399 | 
 400 |     # Remove repeated mentions in the response
 401 |     foreach my $i (sort { $b <=> $a } (@remove)) {
 402 |       splice(@$entity, $i, 1);
 403 |     }
 404 |   }
 405 | 
 406 | 
 407 | 	# now, lets remove any empty elements in the response array after removing
 408 | 	# potential repeats
 409 | 	my @another_remove = ();
 410 | 	my $ii;
 411 | 
 412 | 	foreach my $eentity (@$response)
 413 | 	{
 414 | 			if ( @$eentity == 0)
 415 | 			{
 416 | 					push(@another_remove, $ii);
 417 | 			}
 418 | 			$ii++;
 419 | 	}
 420 | 
 421 | 	foreach my $iii (sort { $b <=> $a } (@another_remove)) {
 422 |       splice(@$response, $iii, 1);
 423 | 	}
 424 | 
 425 | 
 426 |   # Partial identificaiton: Inside bounds and including the head
 427 |   my $part = 0;
 428 | 
 429 |   # Each mention in response not included in keys has a new ID
 430 |   my $mresp = 0;
 431 |   foreach my $entity (@$response) {
 432 |     foreach my $mention (@$entity) {
 433 |       my $ini = $mention->[0];
 434 |       my $end = $mention->[1];
 435 |       if (!defined($map{"$mention->[0],$mention->[1]"})) {
 436 |         $map{"$mention->[0],$mention->[1]"} = $idCount;
 437 |         $idCount++;
 438 |       }
 439 |       $mresp++;
 440 |     }
 441 |   }
 442 | 
 443 |   if ($VERBOSE) {
 444 |     print "Total key mentions: " . scalar(keys(%id)) . "\n";
 445 |     print "Total response mentions: " . scalar(keys(%map)) . "\n";
 446 |     print "Strictly correct identified mentions: $exact\n";
 447 |     print "Partially correct identified mentions: $part\n";
 448 |     print "No identified: " . (scalar(keys(%id)) - $exact - $part) . "\n";
 449 |     print "Invented: " . ($idCount - scalar(keys(%id))) . "\n";
 450 |   }
 451 | 
 452 |   if (defined($totals)) {
 453 |     $totals->{recallDen}      += scalar(keys(%id));
 454 |     $totals->{recallNum}      += $exact;
 455 |     $totals->{precisionDen}   += scalar(keys(%map));
 456 |     $totals->{precisionNum}   += $exact;
 457 |     $totals->{precisionExact} += $exact;
 458 |     $totals->{precisionPart}  += $part;
 459 |   }
 460 | 
 461 |   # The coreference chains arrays are generated again with ID of mentions
 462 |   # instead of token coordenates
 463 |   my $e = 0;
 464 |   foreach my $entity (@$keys) {
 465 |     foreach my $mention (@$entity) {
 466 |       push(@{$kChainsOrig[$e]}, $id{"$mention->[0],$mention->[1]"});
 467 |       push(@{$kChains[$e]},     $id{"$mention->[0],$mention->[1]"});
 468 |     }
 469 |     $e++;
 470 |   }
 471 |   $e = 0;
 472 |   foreach my $entity (@$response) {
 473 |     foreach my $mention (@$entity) {
 474 |       push(@{$rChainsOrig[$e]}, $map{"$mention->[0],$mention->[1]"});
 475 |       push(@{$rChains[$e]},     $map{"$mention->[0],$mention->[1]"});
 476 |     }
 477 |     $e++;
 478 |   }
 479 | 
 480 |   # In order to use the metrics as in (Cai & Strube, 2010):
 481 |   # 1. Include the non-detected key mentions into the response as singletons
 482 |   # 2. Discard the detected mentions not included in key resolved as singletons
 483 |   # 3a. For computing precision: put twinless system mentions in key
 484 |   # 3b. For computing recall: discard twinless system mentions in response
 485 | 
 486 |   my $kIndex = Indexa(\@kChains);
 487 |   my $rIndex = Indexa(\@rChains);
 488 | 
 489 |   # 1. Include the non-detected key mentions into the response as singletons
 490 |   my $addkey = 0;
 491 |   if (scalar(keys(%id)) - $exact - $part > 0) {
 492 |     foreach my $kc (@kChains) {
 493 |       foreach my $m (@$kc) {
 494 |         if (!defined($rIndex->{$m})) {
 495 |           push(@rChains, [$m]);
 496 |           $addkey++;
 497 |         }
 498 |       }
 499 |     }
 500 |   }
 501 | 
 502 |   @kChainsWithSingletonsFromResponse = @kChains;
 503 |   @rChainsWithoutMentionsNotInKey    = [];
 504 | 
 505 |   # 2. Discard the detected mentions not included in key resolved as singletons
 506 |   my $delsin = 0;
 507 | 
 508 |   if ($idCount - scalar(keys(%id)) > 0) {
 509 |     foreach my $rc (@rChains) {
 510 |       if (scalar(@$rc) == 1) {
 511 |         if (!defined($kIndex->{$rc->[0]})) {
 512 |           @$rc = ();
 513 |           $delsin++;
 514 |         }
 515 |       }
 516 |     }
 517 |   }
 518 | 
 519 | # 3a. For computing precision: put twinless system mentions in key as singletons
 520 |   my $addinv = 0;
 521 | 
 522 |   if ($idCount - scalar(keys(%id)) > 0) {
 523 |     foreach my $rc (@rChains) {
 524 |       if (scalar(@$rc) > 1) {
 525 |         foreach my $m (@$rc) {
 526 |           if (!defined($kIndex->{$m})) {
 527 |             push(@kChainsWithSingletonsFromResponse, [$m]);
 528 |             $addinv++;
 529 |           }
 530 |         }
 531 |       }
 532 |     }
 533 |   }
 534 | 
 535 |   # 3b. For computing recall: discard twinless system mentions in response
 536 |   my $delsys = 0;
 537 | 
 538 |   foreach my $rc (@rChains) {
 539 |     my @temprc;
 540 |     my $i = 0;
 541 | 
 542 |     foreach my $m (@$rc) {
 543 |       if (defined($kIndex->{$m})) {
 544 |         push(@temprc, $m);
 545 |         $i++;
 546 |       }
 547 |       else {
 548 |         $delsys++;
 549 |       }
 550 |     }
 551 | 
 552 |     if ($i > 0) {
 553 |       push(@rChainsWithoutMentionsNotInKey, \@temprc);
 554 |     }
 555 |   }
 556 | 
 557 |   # We clean the empty chains
 558 |   my @newrc;
 559 |   foreach my $rc (@rChains) {
 560 |     if (scalar(@$rc) > 0) {
 561 |       push(@newrc, $rc);
 562 |     }
 563 |   }
 564 |   @rChains = @newrc;
 565 | 
 566 |   return (
 567 |     \@kChains, \@kChainsWithSingletonsFromResponse,
 568 |     \@rChains, \@rChainsWithoutMentionsNotInKey,
 569 |     \@kChainsOrig, \@rChainsOrig
 570 |   );
 571 | }
 572 | 
 573 | sub Eval {
 574 |   my ($scorer, $keys, $keysPrecision, $response, $responseRecall,
 575 |     $keyChainsOrig, $responseChainsOrig)
 576 |     = @_;
 577 |   $scorer = lc($scorer);
 578 |   my ($nr, $dr, $np, $dp);
 579 |   if ($scorer eq 'muc') {
 580 |     ($nr, $dr, $np, $dp) =
 581 |       MUCScorer($keys, $keysPrecision, $response, $responseRecall);
 582 |   }
 583 |   elsif ($scorer eq 'bcub') {
 584 |     ($nr, $dr, $np, $dp) = BCUBED($keyChainsOrig, $responseChainsOrig);
 585 |   }
 586 |   elsif ($scorer eq 'ceafm') {
 587 |     ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 1);
 588 |   }
 589 |   elsif ($scorer eq 'ceafe') {
 590 |     ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 0);
 591 |   }
 592 |   else {
 593 |     die "Metric $scorer not implemented yet\n";
 594 |   }
 595 |   return ($nr, $dr, $np, $dp);
 596 | }
 597 | 
 598 | # Indexes an array of arrays, in order to easily know the position of an element
 599 | sub Indexa {
 600 |   my ($arrays) = @_;
 601 |   my %index;
 602 | 
 603 |   for (my $i = 0 ; $i < @$arrays ; $i++) {
 604 |     foreach my $e (@{$arrays->[$i]}) {
 605 |       $index{$e} = $i;
 606 |     }
 607 |   }
 608 |   return \%index;
 609 | }
 610 | 
 611 | # Consider the "links" within every coreference chain. For example,
 612 | # chain A-B-C-D has 3 links: A-B, B-C and C-D.
 613 | # Recall: num correct links / num expected links.
 614 | # Precision: num correct links / num output links
 615 | 
 616 | sub MUCScorer {
 617 |   my ($keys, $keysPrecision, $response, $responseRecall) = @_;
 618 | 
 619 |   my $kIndex = Indexa($keys);
 620 | 
 621 |   # Calculate correct links
 622 |   my $correct = 0;
 623 |   foreach my $rEntity (@$response) {
 624 |     next if (!defined($rEntity));
 625 | 
 626 |     # for each possible pair
 627 |     for (my $i = 0 ; $i < @$rEntity ; $i++) {
 628 |       my $id_i = $rEntity->[$i];
 629 |       for (my $j = $i + 1 ; $j < @$rEntity ; $j++) {
 630 |         my $id_j = $rEntity->[$j];
 631 |         if ( defined($kIndex->{$id_i})
 632 |           && defined($kIndex->{$id_j})
 633 |           && $kIndex->{$id_i} == $kIndex->{$id_j})
 634 |         {
 635 |           $correct++;
 636 |           last;
 637 |         }
 638 |       }
 639 |     }
 640 |   }
 641 | 
 642 |   # Links in key
 643 |   my $keylinks = 0;
 644 |   foreach my $kEntity (@$keys) {
 645 |     next if (!defined($kEntity));
 646 |     $keylinks += scalar(@$kEntity) - 1 if (scalar(@$kEntity));
 647 |   }
 648 | 
 649 |   # Links in response
 650 |   my $reslinks = 0;
 651 |   foreach my $rEntity (@$response) {
 652 |     next if (!defined($rEntity));
 653 |     $reslinks += scalar(@$rEntity) - 1 if (scalar(@$rEntity));
 654 |   }
 655 | 
 656 |   ShowRPF($correct, $keylinks, $correct, $reslinks) if ($VERBOSE);
 657 |   return ($correct, $keylinks, $correct, $reslinks);
 658 | }
 659 | 
 660 | # Compute precision for every mention in the response, and compute
 661 | # recall for every mention in the keys
 662 | sub BCUBED {
 663 |   my ($keys, $response) = @_;
 664 |   my $kIndex = Indexa($keys);
 665 |   my $rIndex = Indexa($response);
 666 |   my $acumP  = 0;
 667 |   my $acumR  = 0;
 668 |   foreach my $rChain (@$response) {
 669 |     foreach my $m (@$rChain) {
 670 |       my $kChain = (defined($kIndex->{$m})) ? $keys->[$kIndex->{$m}] : [];
 671 |       my $ci     = 0;
 672 |       my $ri     = scalar(@$rChain);
 673 |       my $ki     = scalar(@$kChain);
 674 | 
 675 |       # common mentions in rChain and kChain => Ci
 676 |       foreach my $mr (@$rChain) {
 677 |         foreach my $mk (@$kChain) {
 678 |           if ($mr == $mk) {
 679 |             $ci++;
 680 |             last;
 681 |           }
 682 |         }
 683 |       }
 684 | 
 685 |       $acumP += $ci / $ri if ($ri);
 686 |       $acumR += $ci / $ki if ($ki);
 687 |     }
 688 |   }
 689 | 
 690 |   # Mentions in key
 691 |   my $keymentions = 0;
 692 |   foreach my $kEntity (@$keys) {
 693 |     $keymentions += scalar(@$kEntity);
 694 |   }
 695 | 
 696 |   # Mentions in response
 697 |   my $resmentions = 0;
 698 |   foreach my $rEntity (@$response) {
 699 |     $resmentions += scalar(@$rEntity);
 700 |   }
 701 | 
 702 |   ShowRPF($acumR, $keymentions, $acumP, $resmentions) if ($VERBOSE);
 703 |   return ($acumR, $keymentions, $acumP, $resmentions);
 704 | }
 705 | 
 706 | # type = 0: Entity-based
 707 | # type = 1: Mention-based
 708 | sub CEAF {
 709 |   my ($keys, $response, $type) = @_;
 710 | 
 711 |   my @sim;
 712 |   for (my $i = 0 ; $i < scalar(@$keys) ; $i++) {
 713 |     for (my $j = 0 ; $j < scalar(@$response) ; $j++) {
 714 |       if (defined($keys->[$i]) && defined($response->[$j])) {
 715 |         if ($type == 0) {    # entity-based
 716 |           $sim[$i][$j] = 1 - SIMEntityBased($keys->[$i], $response->[$j]);
 717 | 
 718 |           # 1 - X => the library searches minima not maxima
 719 |         }
 720 |         elsif ($type == 1) {    # mention-based
 721 |           $sim[$i][$j] = 1 - SIMMentionBased($keys->[$i], $response->[$j]);
 722 |         }
 723 |       }
 724 |       else {
 725 |         $sim[$i][$j] = 1;
 726 |       }
 727 |     }
 728 | 
 729 |     # fill the matrix when response chains are less than key ones
 730 |     for (my $j = scalar(@$response) ; $j < scalar(@$keys) ; $j++) {
 731 |       $sim[$i][$j] = 1;
 732 |     }
 733 | 
 734 |     #$denrec += SIMEntityBased($kChain->[$i], $kChain->[$i]);
 735 |   }
 736 | 
 737 |   my @out;
 738 | 
 739 |   # Munkres algorithm
 740 |   assign(\@sim, \@out);
 741 | 
 742 |   my $numerador = 0;
 743 |   my $denpre    = 0;
 744 |   my $denrec    = 0;
 745 | 
 746 |   # entity-based
 747 |   if ($type == 0) {
 748 |     foreach my $c (@$response) {
 749 |       $denpre++ if (defined($c) && scalar(@$c) > 0);
 750 |     }
 751 |     foreach my $c (@$keys) {
 752 |       $denrec++ if (defined($c) && scalar(@$c) > 0);
 753 |     }
 754 |   }
 755 | 
 756 |   # mention-based
 757 |   elsif ($type == 1) {
 758 |     foreach my $c (@$response) {
 759 |       $denpre += scalar(@$c) if (defined($c));
 760 |     }
 761 |     foreach my $c (@$keys) {
 762 |       $denrec += scalar(@$c) if (defined($c));
 763 |     }
 764 |   }
 765 | 
 766 |   for (my $i = 0 ; $i < scalar(@$keys) ; $i++) {
 767 |     $numerador += 1 - $sim[$i][$out[$i]];
 768 |   }
 769 | 
 770 |   ShowRPF($numerador, $denrec, $numerador, $denpre) if ($VERBOSE);
 771 | 
 772 |   return ($numerador, $denrec, $numerador, $denpre);
 773 | }
 774 | 
 775 | sub SIMEntityBased {
 776 |   my ($a, $b) = @_;
 777 |   my $intersection = 0;
 778 | 
 779 |   # Common elements in A and B
 780 |   foreach my $ma (@$a) {
 781 |     next if (!defined($ma));
 782 |     foreach my $mb (@$b) {
 783 |       next if (!defined($mb));
 784 |       if ($ma == $mb) {
 785 |         $intersection++;
 786 |         last;
 787 |       }
 788 |     }
 789 |   }
 790 | 
 791 |   my $r = 0;
 792 |   my $d = scalar(@$a) + scalar(@$b);
 793 |   if ($d != 0) {
 794 |     $r = 2 * $intersection / $d;
 795 |   }
 796 | 
 797 |   return $r;
 798 | }
 799 | 
 800 | sub SIMMentionBased {
 801 |   my ($a, $b) = @_;
 802 |   my $intersection = 0;
 803 | 
 804 |   # Common elements in A and B
 805 |   foreach my $ma (@$a) {
 806 |     next if (!defined($ma));
 807 |     foreach my $mb (@$b) {
 808 |       next if (!defined($mb));
 809 |       if ($ma == $mb) {
 810 |         $intersection++;
 811 |         last;
 812 |       }
 813 |     }
 814 |   }
 815 | 
 816 |   return $intersection;
 817 | }
 818 | 
 819 | sub ShowRPF {
 820 |   my ($numrec, $denrec, $numpre, $denpre, $f1) = @_;
 821 | 
 822 |   my $precisio = $denpre ? $numpre / $denpre : 0;
 823 |   my $recall   = $denrec ? $numrec / $denrec : 0;
 824 |   if (!defined($f1)) {
 825 |     $f1 = 0;
 826 |     if ($recall + $precisio) {
 827 |       $f1 = 2 * $precisio * $recall / ($precisio + $recall);
 828 |     }
 829 |   }
 830 | 
 831 |   print "Recall: ($numrec / $denrec) " . int($recall * 10000) / 100 . '%';
 832 |   print "\tPrecision: ($numpre / $denpre) "
 833 |     . int($precisio * 10000) / 100 . '%';
 834 |   print "\tF1: " . int($f1 * 10000) / 100 . "\%\n";
 835 |   print
 836 | "--------------------------------------------------------------------------\n";
 837 | }
 838 | 
 839 | # NEW
 840 | sub ScoreBLANC {
 841 |   my ($kFile, $rFile, $name) = @_;
 842 |   my ($acumNRa, $acumDRa, $acumNPa, $acumDPa) = (0, 0, 0, 0);
 843 |   my ($acumNRr, $acumDRr, $acumNPr, $acumDPr) = (0, 0, 0, 0);
 844 |   my %idenTotals =
 845 |     (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0);
 846 | 
 847 |   if (defined($name) && $name ne 'none') {
 848 |     print "$name:\n" if ($VERBOSE);
 849 |     my $keys     = GetCoreference($kFile, $KEY_COLUMN,      $name);
 850 |     my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name);
 851 |     my (
 852 |       $keyChains, $keyChainsWithSingletonsFromResponse,
 853 |       $responseChains, $responseChainsWithoutMentionsNotInKey,
 854 |       $keyChainsOrig, $responseChainsOrig
 855 |     ) = IdentifMentions($keys, $response, \%idenTotals);
 856 |     (
 857 |       $acumNRa, $acumDRa, $acumNPa, $acumDPa,
 858 |       $acumNRr, $acumDRr, $acumNPr, $acumDPr
 859 |     ) = BLANC_Internal($keyChainsOrig, $responseChainsOrig);
 860 |   }
 861 |   else {
 862 |     my $kIndexNames = GetFileNames($kFile);
 863 |     my $rIndexNames = GetFileNames($rFile);
 864 | 
 865 |     $VERBOSE = 0 if ($name eq 'none');
 866 |     foreach my $iname (keys(%{$kIndexNames})) {
 867 |       my $keys =
 868 |         GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
 869 |       my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
 870 |         $rIndexNames->{$iname});
 871 | 
 872 |       print "$name:\n" if ($VERBOSE);
 873 |       my (
 874 |         $keyChains,      $keyChainsWithSingletonsFromResponse,
 875 |         $responseChains, $responseChainsWithoutMentionsNotInKey,
 876 |         $keyChainsOrig,  $responseChainsOrig
 877 |       ) = IdentifMentions($keys, $response, \%idenTotals);
 878 |       my ($nra, $dra, $npa, $dpa, $nrr, $drr, $npr, $dpr) =
 879 |         BLANC_Internal($keyChainsOrig, $responseChainsOrig);
 880 | 
 881 |       $acumNRa += $nra;
 882 |       $acumDRa += $dra;
 883 |       $acumNPa += $npa;
 884 |       $acumDPa += $dpa;
 885 |       $acumNRr += $nrr;
 886 |       $acumDRr += $drr;
 887 |       $acumNPr += $npr;
 888 |       $acumDPr += $dpr;
 889 |     }
 890 |   }
 891 | 
 892 |   if ($VERBOSE || $name eq 'none') {
 893 |     print "\n====== TOTALS =======\n";
 894 |     print "Identification of Mentions: ";
 895 |     ShowRPF(
 896 |       $idenTotals{recallNum},    $idenTotals{recallDen},
 897 |       $idenTotals{precisionNum}, $idenTotals{precisionDen}
 898 |     );
 899 |     print "\nCoreference:\n";
 900 |     print "Coreference links: ";
 901 |     ShowRPF($acumNRa, $acumDRa, $acumNPa, $acumDPa);
 902 |     print "Non-coreference links: ";
 903 |     ShowRPF($acumNRr, $acumDRr, $acumNPr, $acumDPr);
 904 |     print "BLANC: ";
 905 | 
 906 |     my $Ra = ($acumDRa) ? $acumNRa / $acumDRa : -1;
 907 |     my $Rr = ($acumDRr) ? $acumNRr / $acumDRr : -1;
 908 |     my $Pa = ($acumDPa) ? $acumNPa / $acumDPa : 0;
 909 |     my $Pr = ($acumDPr) ? $acumNPr / $acumDPr : 0;
 910 | 
 911 |     my $R = ($Ra + $Rr) / 2;
 912 |     my $P = ($Pa + $Pr) / 2;
 913 | 
 914 |     my $Fa = ($Pa + $Ra) ? 2 * $Pa * $Ra / ($Pa + $Ra) : 0;
 915 |     my $Fr = ($Pr + $Rr) ? 2 * $Pr * $Rr / ($Pr + $Rr) : 0;
 916 | 
 917 |     my $f1 = ($Fa + $Fr) / 2;
 918 | 
 919 |     if ($Ra == -1 && $Rr == -1) {
 920 |       $R  = 0;
 921 |       $P  = 0;
 922 |       $f1 = 0;
 923 |     }
 924 |     elsif ($Ra == -1) {
 925 |       $R  = $Rr;
 926 |       $P  = $Pr;
 927 |       $f1 = $Fr;
 928 |     }
 929 |     elsif ($Rr == -1) {
 930 |       $R  = $Ra;
 931 |       $P  = $Pa;
 932 |       $f1 = $Fa;
 933 |     }
 934 | 
 935 |     ShowRPF($R, 1, $P, 1, $f1);
 936 |   }
 937 |   return (
 938 |     $acumNRa, $acumDRa, $acumNPa, $acumDPa,
 939 |     $acumNRr, $acumDRr, $acumNPr, $acumDPr
 940 |   );
 941 | }
 942 | 
 943 | sub cartesian {
 944 |   my @C = map { [$_] } @{shift @_};
 945 | 
 946 |   foreach (@_) {
 947 |     my @A = @$_;
 948 | 
 949 |     @C = map {
 950 |       my $n = $_;
 951 |       map { [$n, @$_] } @C
 952 |     } @A;
 953 |   }
 954 | 
 955 |   return @C;
 956 | }
 957 | 
 958 | sub BLANC_Internal {
 959 |   my ($keys, $response) = @_;
 960 |   my ($ga, $gr, $ba, $br) = (0, 0, 0, 0);
 961 |   my $key_coreference_links          = {};
 962 |   my $key_non_coreference_links      = {};
 963 |   my $response_coreference_links     = {};
 964 |   my $response_non_coreference_links = {};
 965 | 
 966 |   print "list containing list of chains in key:\n" if ($VERBOSE > 2);
 967 |   print Dumper $keys if ($VERBOSE > 2);
 968 | 
 969 |   print "each key chain printed individually:\n" if ($VERBOSE > 2);
 970 | 
 971 |   if ($VERBOSE > 2) {
 972 |     foreach my $z (@$keys) {
 973 |       print Dumper $z;
 974 |     }
 975 |   }
 976 | 
 977 |   print "list containing list of chains in response:\n" if ($VERBOSE > 2);
 978 |   print Dumper $response if ($VERBOSE > 2);
 979 | 
 980 |   print "each response chain printed individually:\n" if ($VERBOSE > 2);
 981 | 
 982 |   if ($VERBOSE > 2) {
 983 |     foreach my $z (@$response) {
 984 |       print Dumper $z;
 985 |     }
 986 |   }
 987 | 
 988 |   print
 989 | "---------------------------------------------------------------------------------"
 990 |     . "\n"
 991 |     if ($VERBOSE > 2);
 992 | 
 993 |   print "combinations of links for each chain in the key:\n" if ($VERBOSE > 2);
 994 |   for my $kkk (@$keys) {
 995 |     my $ccombinat = Math::Combinatorics->new(
 996 |       count => 2,
 997 |       data  => [@$kkk],
 998 |     );
 999 | 
1000 |     while (my @zcombo = $ccombinat->next_combination) {
1001 |       print Dumper [@zcombo] if ($VERBOSE > 2);
1002 |       my @zzcombo = sort { $a <=> $b } @zcombo;
1003 | 
1004 |       $key_coreference_links->{$zzcombo[0] . "-" . $zzcombo[1]} = 1;
1005 |     }
1006 | 
1007 |     print
1008 | "................................................................................\n"
1009 |       if ($VERBOSE > 2);
1010 |   }
1011 | 
1012 |   print Dumper $key_coreference_links if ($VERBOSE > 2);
1013 |   print
1014 | "********************************************************************************\n"
1015 |     if ($VERBOSE > 2);
1016 | 
1017 |   print
1018 | "---------------------------------------------------------------------------------"
1019 |     . "\n"
1020 |     if ($VERBOSE > 2);
1021 |   print "combinations of links for each chain in the response:\n"
1022 |     if ($VERBOSE > 2);
1023 |   for my $rrr (@$response) {
1024 |     my $ccombinat = Math::Combinatorics->new(
1025 |       count => 2,
1026 |       data  => [@$rrr],
1027 |     );
1028 | 
1029 |     while (my @zcombo = $ccombinat->next_combination) {
1030 |       print Dumper [@zcombo] if ($VERBOSE > 2);
1031 |       my @zzcombo = sort { $a <=> $b } @zcombo;
1032 | 
1033 |       $response_coreference_links->{$zzcombo[0] . "-" . $zzcombo[1]} = 1;
1034 |     }
1035 | 
1036 |     print
1037 | "................................................................................\n"
1038 |       if ($VERBOSE > 2);
1039 |   }
1040 | 
1041 |   print Dumper $response_coreference_links if ($VERBOSE > 2);
1042 |   print
1043 | "********************************************************************************\n"
1044 |     if ($VERBOSE > 2);
1045 | 
1046 |   my $number_chains_in_key = @$keys;
1047 |   print "number chains in key: " . $number_chains_in_key . "\n"
1048 |     if ($VERBOSE > 2);
1049 | 
1050 |   my @s  = (0 .. $number_chains_in_key - 1);
1051 |   my $ss = join(' ', @s);
1052 |   my @n  = split(' ', $ss);
1053 | 
1054 |   my $combinat = Math::Combinatorics->new(
1055 |     count => 2,
1056 |     data  => [@n],
1057 |   );
1058 | 
1059 |   print "combinations of 2 from: " . join(" ", @n) . "\n" if ($VERBOSE > 2);
1060 |   print "------------------------" . ("--" x scalar(@n)) . "\n"
1061 |     if ($VERBOSE > 2);
1062 | 
1063 |   while (my @combo = $combinat->next_combination) {
1064 | 
1065 |     my @kcombo = ();
1066 |     foreach my $comboo (@combo) {
1067 |       push(@kcombo, @$keys[$comboo]);
1068 |     }
1069 | 
1070 |     my $lkcombo = @kcombo;
1071 |     print "length: " . $lkcombo . "\n" if ($VERBOSE > 2);
1072 |     print "kcombo:\n"                  if ($VERBOSE > 2);
1073 |     print "+++++\n"                    if ($VERBOSE > 2);
1074 |     print Dumper [@kcombo] if ($VERBOSE > 2);
1075 |     my @kccar = cartesian($kcombo[0], $kcombo[1]);
1076 | 
1077 |     foreach my $x (@kccar) {
1078 |       print "--->>>>>>>>>>>>\n" if ($VERBOSE > 2);
1079 |       print Dumper $x if ($VERBOSE > 2);
1080 |       my @y = sort { $a <=> $b } @$x;
1081 |       print Dumper [@y] if ($VERBOSE > 2);
1082 |       $key_non_coreference_links->{@y[0] . "-" . @y[1]} = 1;
1083 |     }
1084 | 
1085 |     print Dumper $key_non_coreference_links if ($VERBOSE > 2);
1086 |     print "" . "\n" if ($VERBOSE > 2);
1087 | 
1088 |     print ".....\n" if ($VERBOSE > 2);
1089 | 
1090 |     print "\n" if ($VERBOSE > 2);
1091 |   }
1092 | 
1093 |   print "\n" if ($VERBOSE > 2);
1094 |   my $number_chains_in_response = @$response;
1095 |   print "number chains in response: " . $number_chains_in_response . "\n"
1096 |     if ($VERBOSE > 2);
1097 | 
1098 |   my @s  = (0 .. $number_chains_in_response - 1);
1099 |   my $ss = join(' ', @s);
1100 |   my @n  = split(' ', $ss);
1101 | 
1102 |   my $combinat = Math::Combinatorics->new(
1103 |     count => 2,
1104 |     data  => [@n],
1105 |   );
1106 | 
1107 |   print "combinations of 2 from: " . join(" ", @n) . "\n" if ($VERBOSE > 2);
1108 |   print "------------------------" . ("--" x scalar(@n)) . "\n"
1109 |     if ($VERBOSE > 2);
1110 | 
1111 |   while (my @combo = $combinat->next_combination) {
1112 |     my @kcombo = ();
1113 |     foreach my $comboo (@combo) {
1114 |       push(@kcombo, @$response[$comboo]);
1115 |     }
1116 | 
1117 |     my $lkcombo = @kcombo;
1118 |     print "length: " . $lkcombo . "\n" if ($VERBOSE > 2);
1119 |     print "kcombo:\n"                  if ($VERBOSE > 2);
1120 |     print "+++++\n"                    if ($VERBOSE > 2);
1121 |     print Dumper [@kcombo] if ($VERBOSE > 2);
1122 |     my @kccar = cartesian($kcombo[0], $kcombo[1]);
1123 | 
1124 |     foreach my $x (@kccar) {
1125 |       print "--->>>>>>>>>>>>\n" if ($VERBOSE > 2);
1126 |       print Dumper $x if ($VERBOSE > 2);
1127 |       my @y = sort { $a <=> $b } @$x;
1128 |       print Dumper [@y] if ($VERBOSE > 2);
1129 |       $response_non_coreference_links->{@y[0] . "-" . @y[1]} = 1;
1130 |     }
1131 | 
1132 |     print Dumper $response_non_coreference_links if ($VERBOSE > 2);
1133 |     print "" . "\n" if ($VERBOSE > 2);
1134 | 
1135 |     print ".....\n" if ($VERBOSE > 2);
1136 |     print "\n"      if ($VERBOSE > 2);
1137 |   }
1138 | 
1139 |   print "\n" if ($VERBOSE > 2);
1140 | 
1141 |   print
1142 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1143 |     if ($VERBOSE > 2);
1144 |   print Dumper $key_coreference_links          if ($VERBOSE > 2);
1145 |   print Dumper $response_coreference_links     if ($VERBOSE > 2);
1146 |   print Dumper $key_non_coreference_links      if ($VERBOSE > 2);
1147 |   print Dumper $response_non_coreference_links if ($VERBOSE > 2);
1148 |   print
1149 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1150 |     if ($VERBOSE > 2);
1151 | 
1152 |   my @union_cl = my @isect_cl = ();
1153 |   my %union_cl = my %isect_cl = ();
1154 | 
1155 |   my @kcl = keys %$key_coreference_links;
1156 |   my @rcl = keys %$response_coreference_links;
1157 | 
1158 |   print Dumper @kcl if ($VERBOSE > 2);
1159 |   print
1160 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
1161 |     if ($VERBOSE > 2);
1162 |   print Dumper @rcl if ($VERBOSE > 2);
1163 |   print
1164 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
1165 |     if ($VERBOSE > 2);
1166 | 
1167 |   foreach my $e (@kcl, @rcl) { $union_cl{$e}++ && $isect_cl{$e}++ }
1168 | 
1169 |   @union_cl = keys %union_cl;
1170 |   @isect_cl = keys %isect_cl;
1171 | 
1172 |   print Dumper @isect_cl if ($VERBOSE > 2);
1173 |   print
1174 | "********************************************************************************\n"
1175 |     if ($VERBOSE > 2);
1176 | 
1177 |   my @union_ncl = my @isect_ncl = ();
1178 |   my %union_ncl = my %isect_ncl = ();
1179 | 
1180 |   my @kncl = keys %$key_non_coreference_links;
1181 |   my @rncl = keys %$response_non_coreference_links;
1182 | 
1183 |   print Dumper @kncl if ($VERBOSE > 2);
1184 |   print
1185 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
1186 |     if ($VERBOSE > 2);
1187 |   print Dumper @rncl if ($VERBOSE > 2);
1188 |   print
1189 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
1190 |     if ($VERBOSE > 2);
1191 | 
1192 |   foreach my $e (@kncl, @rncl) { $union_ncl{$e}++ && $isect_ncl{$e}++ }
1193 | 
1194 |   @union_ncl = keys %union_ncl;
1195 |   @isect_ncl = keys %isect_ncl;
1196 | 
1197 |   print Dumper @isect_ncl if ($VERBOSE > 2);
1198 |   print
1199 | "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
1200 |     if ($VERBOSE > 2);
1201 | 
1202 |   my $num_isect_cl = @isect_cl;
1203 |   print
1204 | "    number of links in the intersection of key and response coreference links: "
1205 |     . $num_isect_cl . "\n"
1206 |     if ($VERBOSE > 2);
1207 | 
1208 |   my $num_isect_ncl = @isect_ncl;
1209 |   print
1210 | "number of links in the intersection of key and response non-coreference links: "
1211 |     . $num_isect_ncl . "\n"
1212 |     if ($VERBOSE > 2);
1213 | 
1214 |   my $num_key_coreference_links = keys %$key_coreference_links;
1215 |   print "number of key coreference links: " . $num_key_coreference_links . "\n"
1216 |     if ($VERBOSE > 2);
1217 | 
1218 |   my $num_response_coreference_links = keys %$response_coreference_links;
1219 |   print "number of response coreference links: "
1220 |     . $num_response_coreference_links . "\n"
1221 |     if ($VERBOSE > 2);
1222 | 
1223 |   my $num_key_non_coreference_links = keys %$key_non_coreference_links;
1224 |   print "number of key non-coreference links: "
1225 |     . $num_key_non_coreference_links . "\n"
1226 |     if ($VERBOSE > 2);
1227 | 
1228 |   my $num_response_non_coreference_links =
1229 |     keys %$response_non_coreference_links;
1230 |   print "number of response non-coreference links: "
1231 |     . $num_response_non_coreference_links . "\n"
1232 |     if ($VERBOSE > 2);
1233 | 
1234 |   my ($r_blanc, $p_blanc, $f_blanc) = ComputeBLANCFromCounts(
1235 |     $num_isect_cl,                   $num_key_coreference_links,
1236 |     $num_response_coreference_links, $num_isect_ncl,
1237 |     $num_key_non_coreference_links,  $num_response_non_coreference_links
1238 |   );
1239 | 
1240 |   print "   blanc recall: " . $r_blanc . "\n" if ($VERBOSE > 2);
1241 |   print "blanc precision: " . $p_blanc . "\n" if ($VERBOSE > 2);
1242 |   print "  blanc score: " . $f_blanc . "\n"   if ($VERBOSE > 2);
1243 |   print
1244 | ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
1245 |     if ($VERBOSE > 2);
1246 | 
1247 |   return (
1248 |     $num_isect_cl,  $num_key_coreference_links,
1249 |     $num_isect_cl,  $num_response_coreference_links,
1250 |     $num_isect_ncl, $num_key_non_coreference_links,
1251 |     $num_isect_ncl, $num_response_non_coreference_links
1252 |   );
1253 | }
1254 | 
1255 | ################################################################################
1256 | # Compute BLANC recall, precision and F-measure from counts.
1257 | # Parameters:
1258 | #    (#correct_coref_links, #key_coref_links, #response_coref_links,
1259 | #     #correct_noncoref_links, #key_noncoref_links, #response_noncoref_links).
1260 | # Returns: (recall, precision, F-measure).
1261 | ################################################################################
1262 | sub ComputeBLANCFromCounts {
1263 |   my (
1264 |     $num_isect_cl,                   $num_key_coreference_links,
1265 |     $num_response_coreference_links, $num_isect_ncl,
1266 |     $num_key_non_coreference_links,  $num_response_non_coreference_links
1267 |   ) = @_;
1268 | 
1269 |   my $kcl_recall =
1270 |     ($num_key_coreference_links == 0)
1271 |     ? 0
1272 |     : ($num_isect_cl / $num_key_coreference_links);
1273 |   my $kcl_precision =
1274 |     ($num_response_coreference_links == 0)
1275 |     ? 0
1276 |     : ($num_isect_cl / $num_response_coreference_links);
1277 | 
1278 |   print
1279 | "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1280 |     if ($VERBOSE > 2);
1281 |   print "       coreference recall: " . $kcl_recall . "\n"    if ($VERBOSE > 2);
1282 |   print "    coreference precision: " . $kcl_precision . "\n" if ($VERBOSE > 2);
1283 | 
1284 |   my $fcl =
1285 |     ($kcl_recall + $kcl_precision == 0)
1286 |     ? 0
1287 |     : (2 * $kcl_recall * $kcl_precision / ($kcl_recall + $kcl_precision));
1288 |   print "      coreference f-score: " . $fcl . "\n" if ($VERBOSE > 2);
1289 | 
1290 |   my $kncl_recall =
1291 |     ($num_key_non_coreference_links == 0)
1292 |     ? 0
1293 |     : ($num_isect_ncl / $num_key_non_coreference_links);
1294 |   my $kncl_precision =
1295 |     ($num_response_non_coreference_links == 0)
1296 |     ? 0
1297 |     : ($num_isect_ncl / $num_response_non_coreference_links);
1298 | 
1299 |   print
1300 | "--------------------------------------------------------------------------------\n"
1301 |     if ($VERBOSE > 2);
1302 |   print "   non-coreference recall: " . $kncl_recall . "\n" if ($VERBOSE > 2);
1303 |   print "non-coreference precision: " . $kncl_precision . "\n"
1304 |     if ($VERBOSE > 2);
1305 | 
1306 |   my $fncl =
1307 |     ($kncl_recall + $kncl_precision == 0)
1308 |     ? 0
1309 |     : (2 * $kncl_recall * $kncl_precision / ($kncl_recall + $kncl_precision));
1310 |   print "  non-coreference f-score: " . $fncl . "\n" if ($VERBOSE > 2);
1311 |   print
1312 | "--------------------------------------------------------------------------------\n"
1313 |     if ($VERBOSE > 2);
1314 | 
1315 |   my $r_blanc = -1;
1316 |   my $p_blanc = -1;
1317 |   my $f_blanc = -1;
1318 | 
1319 |   if ($num_key_coreference_links == 0 && $num_key_non_coreference_links == 0) {
1320 |     $r_blanc = 0;
1321 |     $p_blanc = 0;
1322 |     $f_blanc = 0;
1323 |   }
1324 |   elsif ($num_key_coreference_links == 0 || $num_key_non_coreference_links == 0)
1325 |   {
1326 |     if ($num_key_coreference_links == 0) {
1327 |       $r_blanc = $kncl_recall;
1328 |       $p_blanc = $kncl_precision;
1329 |       $f_blanc = $fncl;
1330 |     }
1331 |     elsif ($num_key_non_coreference_links == 0) {
1332 |       $r_blanc = $kcl_recall;
1333 |       $p_blanc = $kcl_precision;
1334 |       $f_blanc = $fcl;
1335 |     }
1336 |   }
1337 |   else {
1338 |     $r_blanc = ($kcl_recall + $kncl_recall) / 2;
1339 |     $p_blanc = ($kcl_precision + $kncl_precision) / 2;
1340 |     $f_blanc = ($fcl + $fncl) / 2;
1341 |   }
1342 | 
1343 |   return ($r_blanc, $p_blanc, $f_blanc);
1344 | }
1345 | 
1346 | 1;
1347 | 


--------------------------------------------------------------------------------