├── README.md ├── metricAL.py └── sample_rw.txt /README.md: -------------------------------------------------------------------------------- 1 | code for STACL(**S**imultaneous **T**ranslation with Intergrated **A**niticipation and **C**ontrollable **L**atency) [paper](https://arxiv.org/abs/1810.08398) 2 | 3 | ### metric of Average Lagging (AL) 4 | details in Sec.4 of STACL paper 5 | 6 | #### run sample 7 | ```bash 8 | python metricAL.py 9 | # output: 10 | # corpus mean: 5.96639835314598 11 | # weighted average: 5.486924076400874 12 | ``` 13 | #### functions 14 | 1. `RW2AL(s, add_eos=False)` 15 | - get `AL` value from a single **RW** sequence 16 | - `s` is **RW** sequence, a string, in format of `0 0 0 1 1 0 1 0 1`, or `R R R W W R W R W`, flexible on blank/comma 17 | - `add_eos` is used to add eos token for both src and tgt if you did not do it during RW generating (to add tail `0` and `1` into the RW sequence) 18 | - output is a single value as `AL` 19 | 20 | 2. `RW2AL_file(file_rw, is_weight_ave=False)` 21 | - get `AL` value from a file with **RW** sequences 22 | - `file_rw` is the path to a **RW** sequence file 23 | - `is_weight_ave` is used to return weighted average result against READ length or mean result on corpus 24 | - output is a single value as `AL` 25 | 26 | 27 | -------------------------------------------------------------------------------- /metricAL.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | 5 | 6 | 7 | trantab = str.maketrans('RrWw', '0011') 8 | 9 | 10 | # s is RW sequence, in format of '0 0 0 1 1 0 1 0 1', or 'R R R W W R W R W', flexible on blank/comma 11 | def RW2AP(s, add_eos=False): 12 | if isinstance(s, str): 13 | s = s.translate(trantab).replace(' ','').replace(',','') 14 | if add_eos: # to add eos token for both src and tgt if you did not do it during RW generating 15 | idx = s.rfind('0') 16 | s = s[:idx+1]+'0'+s[idx+1:]+'1' # remove last 0/1() to keep actuall setence length 17 | # s = (s[:idx]+s[idx+1:])[:-1] # remove last 0/1() to keep actuall setence length 18 | else: return None 19 | x, y = s.count('0'), s.count('1') 20 | if x == 0 or y == 0: return 0 21 | 22 | count = 0 23 | curr = [] 24 | for i in s: 25 | if i == '0': count += 1 26 | else: curr.append(count) 27 | return sum(curr) / x / y 28 | 29 | 30 | # s is RW sequence, in format of '0 0 0 1 1 0 1 0 1', or 'R R R W W R W R W', flexible on blank/comma 31 | def RW2AL(s, add_eos=False): 32 | if isinstance(s, str): 33 | s = s.translate(trantab).replace(' ','').replace(',','') 34 | if add_eos: # to add eos token for both src and tgt if you did not do it during RW generating 35 | idx = s.rfind('0') 36 | s = s[:idx+1]+'0'+s[idx+1:]+'1' # remove last 0/1() to keep actuall setence length 37 | # s = (s[:idx]+s[idx+1:])[:-1] # remove last 0/1() to keep actuall setence length 38 | else: return None 39 | x, y = s.count('0'), s.count('1') 40 | if x == 0 or y == 0: return 0 41 | 42 | count = 0 43 | rate = y/x 44 | curr = [] 45 | for i in s: 46 | if i == '0': count += 1 47 | else: curr.append(count) 48 | if i == '1' and count == x: break 49 | y1 = len(curr) 50 | diag = [(t-1)/rate for t in range(1, y1+1)] 51 | return sum(l1-l2 for l1,l2 in zip(curr,diag)) / y1 52 | 53 | 54 | 55 | def RW2AL_file(file_rw, is_weight_ave=False): 56 | ALs, Lsrc = [], [] 57 | for line in open(file_rw, 'r').readlines(): 58 | line = line.strip() 59 | rw = RW2AL(line) 60 | if rw is not None: 61 | ALs.append(rw) 62 | Lsrc.append(line.count('0')) 63 | 64 | AL = np.average(ALs, weights=Lsrc) if is_weight_ave else np.average(ALs) 65 | return AL 66 | 67 | 68 | if __name__=="__main__": 69 | file_rw = 'sample_rw.txt' 70 | print("corpus mean:", RW2AL_file(file_rw)) 71 | print("weighted average:", RW2AL_file(file_rw, is_weight_ave=True)) 72 | --------------------------------------------------------------------------------