├── README.md
├── metricAL.py
└── sample_rw.txt


/README.md:
--------------------------------------------------------------------------------
 1 | code for STACL(**S**imultaneous **T**ranslation with Intergrated **A**niticipation and **C**ontrollable **L**atency) [paper](https://arxiv.org/abs/1810.08398)
 2 | 
 3 |  ### metric of Average Lagging (AL)  
 4 | details in Sec.4 of STACL paper
 5 | 
 6 | #### run sample 
 7 | ```bash
 8 | python metricAL.py
 9 | # output:
10 | # corpus mean: 5.96639835314598
11 | # weighted average: 5.486924076400874
12 | ```
13 | #### functions
14 | 1. `RW2AL(s, add_eos=False)`  
15 | - get `AL` value from a single **RW** sequence  
16 | - `s` is **RW** sequence, a string, in format of `0 0 0 1 1 0 1 0 1`, or `R R R W W R W R W`, flexible on blank/comma  
17 | - `add_eos` is used to add eos token for both src and tgt if you did not do it during RW generating (to add tail `0` and `1` into the RW sequence)  
18 | - output is a single value as `AL`  
19 | 
20 | 2. `RW2AL_file(file_rw, is_weight_ave=False)` 
21 | - get `AL` value from a file with **RW** sequences  
22 | - `file_rw` is the path to a **RW** sequence file  
23 | - `is_weight_ave` is used to return weighted average result against READ length or mean result on corpus  
24 | - output is a single value as `AL`  
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/metricAL.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import numpy as np
 4 | 
 5 | 
 6 | 
 7 | trantab = str.maketrans('RrWw', '0011')
 8 | 
 9 | 
10 | # s is RW sequence, in format of '0 0 0 1 1 0 1 0 1', or 'R R R W W R W R W', flexible on blank/comma
11 | def RW2AP(s, add_eos=False):
12 |     if isinstance(s, str):  
13 |         s = s.translate(trantab).replace(' ','').replace(',','')
14 |         if add_eos: # to add eos token for both src and tgt if you did not do it during RW generating
15 |             idx = s.rfind('0')
16 |             s = s[:idx+1]+'0'+s[idx+1:]+'1'  # remove last 0/1(<eos>) to keep actuall setence length
17 |             # s = (s[:idx]+s[idx+1:])[:-1]  # remove last 0/1(<eos>) to keep actuall setence length
18 |     else: return None
19 |     x, y = s.count('0'), s.count('1')
20 |     if x == 0 or y == 0: return 0
21 | 
22 |     count = 0
23 |     curr = []
24 |     for i in s:
25 |         if i == '0': count += 1
26 |         else: curr.append(count)
27 |     return sum(curr) / x / y
28 | 
29 | 
30 | # s is RW sequence, in format of '0 0 0 1 1 0 1 0 1', or 'R R R W W R W R W', flexible on blank/comma
31 | def RW2AL(s, add_eos=False):
32 |     if isinstance(s, str):  
33 |         s = s.translate(trantab).replace(' ','').replace(',','')
34 |         if add_eos: # to add eos token for both src and tgt if you did not do it during RW generating
35 |             idx = s.rfind('0')
36 |             s = s[:idx+1]+'0'+s[idx+1:]+'1'  # remove last 0/1(<eos>) to keep actuall setence length
37 |             # s = (s[:idx]+s[idx+1:])[:-1]  # remove last 0/1(<eos>) to keep actuall setence length
38 |     else: return None
39 |     x, y = s.count('0'), s.count('1')
40 |     if x == 0 or y == 0: return 0
41 | 
42 |     count = 0
43 |     rate = y/x
44 |     curr = []
45 |     for i in s:
46 |         if i == '0': count += 1
47 |         else: curr.append(count)
48 |         if i == '1' and count == x: break
49 |     y1 = len(curr)
50 |     diag = [(t-1)/rate for t in range(1, y1+1)]
51 |     return sum(l1-l2 for l1,l2 in zip(curr,diag)) / y1
52 | 
53 | 
54 | 
55 | def RW2AL_file(file_rw, is_weight_ave=False):
56 |     ALs, Lsrc = [], []
57 |     for line in open(file_rw, 'r').readlines():
58 |         line = line.strip()
59 |         rw = RW2AL(line)
60 |         if rw is not None:
61 |             ALs.append(rw)
62 |             Lsrc.append(line.count('0'))
63 |     
64 |     AL = np.average(ALs, weights=Lsrc) if is_weight_ave else np.average(ALs)
65 |     return AL
66 | 
67 | 
68 | if __name__=="__main__":
69 |     file_rw = 'sample_rw.txt'
70 |     print("corpus mean:", RW2AL_file(file_rw))
71 |     print("weighted average:", RW2AL_file(file_rw, is_weight_ave=True))
72 | 


--------------------------------------------------------------------------------