├── setup.py ├── readme.md ├── sample.py ├── krippendorff_alpha.py ├── BERT.ipynb └── krippendorff.py /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup 3 | 4 | setup( 5 | name = 'krippendorff_alpha', 6 | version = '0.01', 7 | author = "Tamanna", 8 | author_email = "tam.tamanna18@gmail.com", 9 | maintainer = "Tamanna", 10 | maintainer_email = "tam.tamanna18@gmail.com", 11 | description = "Python implementation of Krippendorff's alpha measure for interrated agreement", 12 | license = "GNU General Public License v3 (GPLv3)", 13 | url = "https://github.com/tamanna18/NLP", 14 | py_modules = ["krippendorff_alpha"], 15 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Krippendorff 5 | 6 | 7 | ### Installation 8 | 9 | ```bash 10 | pip install krippendorff 11 | ``` 12 | 13 | 14 | ### Example usage 15 | 16 | ```python 17 | import krippendorff 18 | krippendorff.alpha(reliability_data=...) 19 | ``` 20 | 21 | 22 | ------------------------------------------------------------------ 23 | 24 | from nltk import agreement 25 | coder1 = [1,4,1,2,4,1,1,1,5,1] 26 | 27 | coder2 = [1,4,1,3,4,1,1,1,4,1] 28 | 29 | coder3 = [1,4,1,2,4,1,1,1,5,1] 30 | 31 | formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))] + [[3,i,coder3[i]] for i in range(len(coder3))] 32 | ratingtask = agreement.AnnotationTask(data=formatted_codes) 33 | print('Krippendorff\'s alpha:',ratingtask.alpha()) 34 | 35 | 36 | -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import krippendorff 3 | import numpy as np 4 | 5 | 6 | def main(): 7 | print("Example from http://en.wikipedia.org/wiki/Krippendorff's_Alpha") 8 | print() 9 | reliability_data_str = ( 10 | "* * * * * 3 4 1 2 1 1 3 3 * 3", # coder A 11 | "1 * 2 1 3 3 4 3 * * * * * * *", # coder B 12 | "* * 2 1 3 4 4 * 2 1 1 3 3 * 4", # coder C 13 | ) 14 | print('\n'.join(reliability_data_str)) 15 | print() 16 | 17 | reliability_data = [[np.nan if v == '*' else int(v) for v in coder.split()] for coder in reliability_data_str] 18 | 19 | print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=reliability_data, 20 | level_of_measurement='nominal')) 21 | print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(reliability_data=reliability_data)) 22 | 23 | print() 24 | print() 25 | print("From value counts:") 26 | print() 27 | value_counts = np.array([[1, 0, 0, 0], 28 | [0, 0, 0, 0], 29 | [0, 2, 0, 0], 30 | [2, 0, 0, 0], 31 | [0, 0, 2, 0], 32 | [0, 0, 2, 1], 33 | [0, 0, 0, 3], 34 | [1, 0, 1, 0], 35 | [0, 2, 0, 0], 36 | [2, 0, 0, 0], 37 | [2, 0, 0, 0], 38 | [0, 0, 2, 0], 39 | [0, 0, 2, 0], 40 | [0, 0, 0, 0], 41 | [0, 0, 1, 1]]) 42 | print(value_counts) 43 | print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(value_counts=value_counts, 44 | level_of_measurement='nominal')) 45 | print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(value_counts=value_counts)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /krippendorff_alpha.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 3 | ''' 4 | Python implementation of Krippendorff's alpha -- inter-rater reliability 5 | (c)2011-17 Thomas Grill (http://grrrr.org) 6 | Python version >= 2.4 required 7 | ''' 8 | 9 | from __future__ import print_function 10 | try: 11 | import numpy as np 12 | except ImportError: 13 | np = None 14 | 15 | 16 | def nominal_metric(a, b): 17 | return a != b 18 | 19 | 20 | def interval_metric(a, b): 21 | return (a-b)**2 22 | 23 | 24 | def ratio_metric(a, b): 25 | return ((a-b)/(a+b))**2 26 | 27 | 28 | def krippendorff_alpha(data, metric=interval_metric, force_vecmath=False, convert_items=float, missing_items=None): 29 | ''' 30 | Calculate Krippendorff's alpha (inter-rater reliability): 31 | 32 | data is in the format 33 | [ 34 | {unit1:value, unit2:value, ...}, # coder 1 35 | {unit1:value, unit3:value, ...}, # coder 2 36 | ... # more coders 37 | ] 38 | or 39 | it is a sequence of (masked) sequences (list, numpy.array, numpy.ma.array, e.g.) with rows corresponding to coders and columns to items 40 | 41 | metric: function calculating the pairwise distance 42 | force_vecmath: force vector math for custom metrics (numpy required) 43 | convert_items: function for the type conversion of items (default: float) 44 | missing_items: indicator for missing items (default: None) 45 | ''' 46 | 47 | # number of coders 48 | m = len(data) 49 | 50 | # set of constants identifying missing values 51 | if missing_items is None: 52 | maskitems = [] 53 | else: 54 | maskitems = list(missing_items) 55 | if np is not None: 56 | maskitems.append(np.ma.masked_singleton) 57 | 58 | # convert input data to a dict of items 59 | units = {} 60 | for d in data: 61 | try: 62 | # try if d behaves as a dict 63 | diter = d.items() 64 | except AttributeError: 65 | # sequence assumed for d 66 | diter = enumerate(d) 67 | 68 | for it, g in diter: 69 | if g not in maskitems: 70 | try: 71 | its = units[it] 72 | except KeyError: 73 | its = [] 74 | units[it] = its 75 | its.append(convert_items(g)) 76 | 77 | 78 | units = dict((it, d) for it, d in units.items() if len(d) > 1) # units with pairable values 79 | n = sum(len(pv) for pv in units.values()) # number of pairable values 80 | 81 | if n == 0: 82 | raise ValueError("No items to compare.") 83 | 84 | np_metric = (np is not None) and ((metric in (interval_metric, nominal_metric, ratio_metric)) or force_vecmath) 85 | 86 | Do = 0. 87 | for grades in units.values(): 88 | if np_metric: 89 | gr = np.asarray(grades) 90 | Du = sum(np.sum(metric(gr, gri)) for gri in gr) 91 | else: 92 | Du = sum(metric(gi, gj) for gi in grades for gj in grades) 93 | Do += Du/float(len(grades)-1) 94 | Do /= float(n) 95 | 96 | if Do == 0: 97 | return 1. 98 | 99 | De = 0. 100 | for g1 in units.values(): 101 | if np_metric: 102 | d1 = np.asarray(g1) 103 | for g2 in units.values(): 104 | De += sum(np.sum(metric(d1, gj)) for gj in g2) 105 | else: 106 | for g2 in units.values(): 107 | De += sum(metric(gi, gj) for gi in g1 for gj in g2) 108 | De /= float(n*(n-1)) 109 | 110 | return 1.-Do/De if (Do and De) else 1. 111 | 112 | 113 | if __name__ == '__main__': 114 | print("Example from http://en.wikipedia.org/wiki/Krippendorff's_Alpha") 115 | 116 | data = ( 117 | "* * * * * 3 4 1 2 1 1 3 3 * 3", # coder A 118 | "1 * 2 1 3 3 4 3 * * * * * * *", # coder B 119 | "* * 2 1 3 4 4 * 2 1 1 3 3 * 4", # coder C 120 | ) 121 | 122 | missing = '*' # indicator for missing values 123 | array = [d.split() for d in data] # convert to 2D list of string items 124 | 125 | print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing)) 126 | print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing)) 127 | -------------------------------------------------------------------------------- /BERT.ipynb: -------------------------------------------------------------------------------- 1 | import math 2 | import re 3 | from random import * 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | 9 | # sample IsNext and NotNext to be same in small batch size 10 | def make_batch(): 11 | batch = [] 12 | positive = negative = 0 13 | while positive != batch_size/2 or negative != batch_size/2: 14 | tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences 15 | tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index] 16 | input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']] 17 | segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1) 18 | 19 | # MASK LM 20 | n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence 21 | cand_maked_pos = [i for i, token in enumerate(input_ids) 22 | if token != word_dict['[CLS]'] and token != word_dict['[SEP]']] 23 | shuffle(cand_maked_pos) 24 | masked_tokens, masked_pos = [], [] 25 | for pos in cand_maked_pos[:n_pred]: 26 | masked_pos.append(pos) 27 | masked_tokens.append(input_ids[pos]) 28 | if random() < 0.8: # 80% 29 | input_ids[pos] = word_dict['[MASK]'] # make mask 30 | elif random() < 0.5: # 10% 31 | index = randint(0, vocab_size - 1) # random index in vocabulary 32 | input_ids[pos] = word_dict[number_dict[index]] # replace 33 | 34 | # Zero Paddings 35 | n_pad = maxlen - len(input_ids) 36 | input_ids.extend([0] * n_pad) 37 | segment_ids.extend([0] * n_pad) 38 | 39 | # Zero Padding (100% - 15%) tokens 40 | if max_pred > n_pred: 41 | n_pad = max_pred - n_pred 42 | masked_tokens.extend([0] * n_pad) 43 | masked_pos.extend([0] * n_pad) 44 | 45 | if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2: 46 | batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext 47 | positive += 1 48 | elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2: 49 | batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext 50 | negative += 1 51 | return batch 52 | # Proprecessing Finished 53 | 54 | def get_attn_pad_mask(seq_q, seq_k): 55 | batch_size, len_q = seq_q.size() 56 | batch_size, len_k = seq_k.size() 57 | # eq(zero) is PAD token 58 | pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking 59 | return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k 60 | 61 | def gelu(x): 62 | "Implementation of the gelu activation function by Hugging Face" 63 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 64 | 65 | class Embedding(nn.Module): 66 | def __init__(self): 67 | super(Embedding, self).__init__() 68 | self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding 69 | self.pos_embed = nn.Embedding(maxlen, d_model) # position embedding 70 | self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding 71 | self.norm = nn.LayerNorm(d_model) 72 | 73 | def forward(self, x, seg): 74 | seq_len = x.size(1) 75 | pos = torch.arange(seq_len, dtype=torch.long) 76 | pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len) 77 | embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg) 78 | return self.norm(embedding) 79 | 80 | class ScaledDotProductAttention(nn.Module): 81 | def __init__(self): 82 | super(ScaledDotProductAttention, self).__init__() 83 | 84 | def forward(self, Q, K, V, attn_mask): 85 | scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] 86 | scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. 87 | attn = nn.Softmax(dim=-1)(scores) 88 | context = torch.matmul(attn, V) 89 | return context, attn 90 | 91 | class MultiHeadAttention(nn.Module): 92 | def __init__(self): 93 | super(MultiHeadAttention, self).__init__() 94 | self.W_Q = nn.Linear(d_model, d_k * n_heads) 95 | self.W_K = nn.Linear(d_model, d_k * n_heads) 96 | self.W_V = nn.Linear(d_model, d_v * n_heads) 97 | def forward(self, Q, K, V, attn_mask): 98 | # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model] 99 | residual, batch_size = Q, Q.size(0) 100 | # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) 101 | q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k] 102 | k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k] 103 | v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v] 104 | 105 | attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k] 106 | 107 | # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] 108 | context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask) 109 | context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v] 110 | output = nn.Linear(n_heads * d_v, d_model)(context) 111 | return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model] 112 | 113 | class PoswiseFeedForwardNet(nn.Module): 114 | def __init__(self): 115 | super(PoswiseFeedForwardNet, self).__init__() 116 | self.fc1 = nn.Linear(d_model, d_ff) 117 | self.fc2 = nn.Linear(d_ff, d_model) 118 | 119 | def forward(self, x): 120 | # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model) 121 | return self.fc2(gelu(self.fc1(x))) 122 | 123 | class EncoderLayer(nn.Module): 124 | def __init__(self): 125 | super(EncoderLayer, self).__init__() 126 | self.enc_self_attn = MultiHeadAttention() 127 | self.pos_ffn = PoswiseFeedForwardNet() 128 | 129 | def forward(self, enc_inputs, enc_self_attn_mask): 130 | enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V 131 | enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model] 132 | return enc_outputs, attn 133 | 134 | class BERT(nn.Module): 135 | def __init__(self): 136 | super(BERT, self).__init__() 137 | self.embedding = Embedding() 138 | self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) 139 | self.fc = nn.Linear(d_model, d_model) 140 | self.activ1 = nn.Tanh() 141 | self.linear = nn.Linear(d_model, d_model) 142 | self.activ2 = gelu 143 | self.norm = nn.LayerNorm(d_model) 144 | self.classifier = nn.Linear(d_model, 2) 145 | # decoder is shared with embedding layer 146 | embed_weight = self.embedding.tok_embed.weight 147 | n_vocab, n_dim = embed_weight.size() 148 | self.decoder = nn.Linear(n_dim, n_vocab, bias=False) 149 | self.decoder.weight = embed_weight 150 | self.decoder_bias = nn.Parameter(torch.zeros(n_vocab)) 151 | 152 | def forward(self, input_ids, segment_ids, masked_pos): 153 | output = self.embedding(input_ids, segment_ids) 154 | enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids) 155 | for layer in self.layers: 156 | output, enc_self_attn = layer(output, enc_self_attn_mask) 157 | # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model] 158 | # it will be decided by first token(CLS) 159 | h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model] 160 | logits_clsf = self.classifier(h_pooled) # [batch_size, 2] 161 | 162 | masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model] 163 | # get masked position from final output of transformer. 164 | h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model] 165 | h_masked = self.norm(self.activ2(self.linear(h_masked))) 166 | logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab] 167 | 168 | return logits_lm, logits_clsf 169 | 170 | if __name__ == '__main__': 171 | # BERT Parameters 172 | maxlen = 30 # maximum of length 173 | batch_size = 6 174 | max_pred = 5 # max tokens of prediction 175 | n_layers = 6 # number of Encoder of Encoder Layer 176 | n_heads = 12 # number of heads in Multi-Head Attention 177 | d_model = 768 # Embedding Size 178 | d_ff = 768 * 4 # 4*d_model, FeedForward dimension 179 | d_k = d_v = 64 # dimension of K(=Q), V 180 | n_segments = 2 181 | 182 | text = ( 183 | 'Hello, how are you? I am Romeo.\n' 184 | 'Hello, Romeo My name is Juliet. Nice to meet you.\n' 185 | 'Nice meet you too. How are you today?\n' 186 | 'Great. My baseball team won the competition.\n' 187 | 'Oh Congratulations, Juliet\n' 188 | 'Thanks you Romeo' 189 | ) 190 | sentences = re.sub("[.,!?\\-]", '', text.lower()).split('\n') # filter '.', ',', '?', '!' 191 | word_list = list(set(" ".join(sentences).split())) 192 | word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3} 193 | for i, w in enumerate(word_list): 194 | word_dict[w] = i + 4 195 | number_dict = {i: w for i, w in enumerate(word_dict)} 196 | vocab_size = len(word_dict) 197 | 198 | token_list = list() 199 | for sentence in sentences: 200 | arr = [word_dict[s] for s in sentence.split()] 201 | token_list.append(arr) 202 | 203 | model = BERT() 204 | criterion = nn.CrossEntropyLoss() 205 | optimizer = optim.Adam(model.parameters(), lr=0.001) 206 | 207 | batch = make_batch() 208 | input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch)) 209 | 210 | for epoch in range(100): 211 | optimizer.zero_grad() 212 | logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos) 213 | loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM 214 | loss_lm = (loss_lm.float()).mean() 215 | loss_clsf = criterion(logits_clsf, isNext) # for sentence classification 216 | loss = loss_lm + loss_clsf 217 | if (epoch + 1) % 10 == 0: 218 | print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) 219 | loss.backward() 220 | optimizer.step() 221 | 222 | # Predict mask tokens ans isNext 223 | input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0])) 224 | print(text) 225 | print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]']) 226 | 227 | logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos) 228 | logits_lm = logits_lm.data.max(2)[1][0].data.numpy() 229 | print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0]) 230 | print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0]) 231 | 232 | logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0] 233 | print('isNext : ', True if isNext else False) 234 | print('predict isNext : ',True if logits_clsf else False) 235 | -------------------------------------------------------------------------------- /krippendorff.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides a function to compute the Krippendorff's alpha statistical measure of the agreement achieved 3 | when coding a set of units based on the values of a variable. 4 | 5 | For more information, see: https://en.wikipedia.org/wiki/Krippendorff%27s_alpha 6 | 7 | The module naming follows the one from the Wikipedia link. 8 | """ 9 | from typing import Any, Callable, Iterable, Optional, Sequence, Union 10 | 11 | import numpy as np 12 | 13 | 14 | def _nominal_metric(v1: np.ndarray, v2: np.ndarray, dtype: Any = np.float64, **kwargs) -> np.ndarray: # noqa 15 | """Metric for nominal data.""" 16 | return (v1 != v2).astype(dtype) 17 | 18 | 19 | def _ordinal_metric(v1: np.ndarray, v2: np.ndarray, i1: np.ndarray, i2: np.ndarray, # noqa 20 | n_v: np.ndarray, dtype: Any = np.float64, **kwargs) -> np.ndarray: # noqa 21 | """Metric for ordinal data.""" 22 | i1, i2 = np.minimum(i1, i2), np.maximum(i1, i2) 23 | 24 | ranges = np.dstack((i1, i2 + 1)) 25 | sums_between_indices = np.add.reduceat(np.append(n_v, 0), ranges.reshape(-1))[::2].reshape(*i1.shape) 26 | 27 | return (sums_between_indices - np.divide(n_v[i1] + n_v[i2], 2, dtype=dtype)) ** 2 28 | 29 | 30 | def _interval_metric(v1: np.ndarray, v2: np.ndarray, dtype: Any = np.float64, **kwargs) -> np.ndarray: # noqa 31 | """Metric for interval data.""" 32 | return (v1 - v2).astype(dtype) ** 2 33 | 34 | 35 | def _ratio_metric(v1: np.ndarray, v2: np.ndarray, dtype: Any = np.float64, **kwargs) -> np.ndarray: # noqa 36 | """Metric for ratio data.""" 37 | v1_plus_v2 = v1 + v2 38 | return np.divide(v1 - v2, v1_plus_v2, out=np.zeros(np.broadcast(v1, v2).shape), where=v1_plus_v2 != 0, 39 | dtype=dtype) ** 2 40 | 41 | 42 | def _coincidences(value_counts: np.ndarray, dtype: Any = np.float64) -> np.ndarray: 43 | """Coincidence matrix. 44 | 45 | Parameters 46 | ---------- 47 | value_counts : ndarray, with shape (N, V) 48 | Number of coders that assigned a certain value to a determined unit, where N is the number of units 49 | and V is the value count. 50 | 51 | dtype : data-type 52 | Result and computation data-type. 53 | 54 | Returns 55 | ------- 56 | o : ndarray, with shape (V, V) 57 | Coincidence matrix. 58 | """ 59 | N, V = value_counts.shape 60 | pairable = np.maximum(value_counts.sum(axis=1), 2) 61 | diagonals = value_counts[:, np.newaxis, :] * np.eye(V)[np.newaxis, ...] 62 | unnormalized_coincidences = value_counts[..., np.newaxis] * value_counts[:, np.newaxis, :] - diagonals 63 | return np.divide(unnormalized_coincidences, (pairable - 1).reshape((-1, 1, 1)), dtype=dtype).sum(axis=0) 64 | 65 | 66 | def _random_coincidences(n_v: np.ndarray, dtype: Any = np.float64) -> np.ndarray: 67 | """Random coincidence matrix. 68 | 69 | Parameters 70 | n_v : ndarray, with shape (V,) 71 | Number of pairable elements for each value. 72 | 73 | dtype : data-type 74 | Result and computation data-type. 75 | 76 | Returns 77 | ------- 78 | e : ndarray, with shape (V, V) 79 | Random coincidence matrix. 80 | """ 81 | return np.divide(np.outer(n_v, n_v) - np.diagflat(n_v), n_v.sum() - 1, dtype=dtype) 82 | 83 | 84 | def _distances(value_domain: np.ndarray, distance_metric: Callable[..., np.ndarray], n_v: np.ndarray, 85 | dtype: Any = np.float64) -> np.ndarray: 86 | """Distances of the different possible values. 87 | 88 | Parameters 89 | ---------- 90 | value_domain : ndarray, with shape (V,) 91 | Possible values V the units can take. 92 | If the level of measurement is not nominal, it must be ordered. 93 | 94 | distance_metric : callable 95 | Callable that return the distance of two given values. 96 | 97 | n_v : ndarray, with shape (V,) 98 | Number of pairable elements for each value. 99 | 100 | dtype : data-type 101 | Result and computation data-type. 102 | 103 | Returns 104 | ------- 105 | d : ndarray, with shape (V, V) 106 | Distance matrix for each value pair. 107 | """ 108 | indices = np.arange(len(value_domain)) 109 | return distance_metric(value_domain[:, np.newaxis], value_domain[np.newaxis, :], i1=indices[:, np.newaxis], 110 | i2=indices[np.newaxis, :], n_v=n_v, dtype=dtype) 111 | 112 | 113 | def _distance_metric(level_of_measurement: Union[str, Callable[..., np.ndarray]]) -> Callable[..., np.ndarray]: 114 | """Distance metric callable of the level of measurement. 115 | 116 | Parameters 117 | ---------- 118 | level_of_measurement : string or callable 119 | Steven's level of measurement of the variable. 120 | It must be one of 'nominal', 'ordinal', 'interval', 'ratio' or a callable. 121 | 122 | Returns 123 | ------- 124 | metric : callable 125 | Distance callable. 126 | """ 127 | return { 128 | 'nominal': _nominal_metric, 129 | 'ordinal': _ordinal_metric, 130 | 'interval': _interval_metric, 131 | 'ratio': _ratio_metric, 132 | }.get(level_of_measurement, level_of_measurement) 133 | 134 | 135 | def _reliability_data_to_value_counts(reliability_data: np.ndarray, value_domain: np.ndarray) -> np.ndarray: 136 | """Return the value counts given the reliability data. 137 | 138 | Parameters 139 | ---------- 140 | reliability_data : ndarray, with shape (M, N) 141 | Reliability data matrix which has the rate the i coder gave to the j unit, where M is the number of raters 142 | and N is the unit count. 143 | Missing rates are represented with `np.nan`. 144 | 145 | value_domain : ndarray, with shape (V,) 146 | Possible values the units can take. 147 | 148 | Returns 149 | ------- 150 | value_counts : ndarray, with shape (N, V) 151 | Number of coders that assigned a certain value to a determined unit, where N is the number of units 152 | and V is the value count. 153 | """ 154 | return (reliability_data.T[..., np.newaxis] == value_domain[np.newaxis, np.newaxis, :]).sum(axis=1) # noqa 155 | 156 | 157 | def alpha(reliability_data: Optional[Iterable[Any]] = None, value_counts: Optional[np.ndarray] = None, 158 | value_domain: Optional[Sequence[Any]] = None, 159 | level_of_measurement: Union[str, Callable[..., Any]] = 'interval', dtype: Any = np.float64) -> float: 160 | """Compute Krippendorff's alpha. 161 | 162 | See https://en.wikipedia.org/wiki/Krippendorff%27s_alpha for more information. 163 | 164 | Parameters 165 | ---------- 166 | reliability_data : array_like, with shape (M, N) 167 | Reliability data matrix which has the rate the i coder gave to the j unit, where M is the number of raters 168 | and N is the unit count. 169 | Missing rates are represented with `np.nan`. 170 | If it's provided then `value_counts` must not be provided. 171 | 172 | value_counts : array_like, with shape (N, V) 173 | Number of coders that assigned a certain value to a determined unit, where N is the number of units 174 | and V is the value count. 175 | If it's provided then `reliability_data` must not be provided. 176 | 177 | value_domain : array_like, with shape (V,) 178 | Possible values the units can take. 179 | If the level of measurement is not nominal, it must be ordered. 180 | If `reliability_data` is provided, then the default value is the ordered list of unique rates that appear. 181 | Else, the default value is `list(range(V))`. 182 | 183 | level_of_measurement : string or callable 184 | Steven's level of measurement of the variable. 185 | It must be one of 'nominal', 'ordinal', 'interval', 'ratio' or a callable. 186 | 187 | dtype : data-type 188 | Result and computation data-type. 189 | 190 | Returns 191 | ------- 192 | alpha : ndarray 193 | Scalar value of Krippendorff's alpha of type `dtype`. 194 | 195 | Examples 196 | -------- 197 | >>> reliability_data = [[np.nan, np.nan, np.nan, np.nan, np.nan, 3, 4, 1, 2, 1, 1, 3, 3, np.nan, 3], 198 | ... [1, np.nan, 2, 1, 3, 3, 4, 3, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], 199 | ... [np.nan, np.nan, 2, 1, 3, 4, 4, np.nan, 2, 1, 1, 3, 3, np.nan, 4]] 200 | >>> print(round(alpha(reliability_data=reliability_data, level_of_measurement='nominal'), 6)) 201 | 0.691358 202 | >>> print(round(alpha(reliability_data=reliability_data, level_of_measurement='interval'), 6)) 203 | 0.810845 204 | >>> value_counts = np.array([[1, 0, 0, 0], 205 | ... [0, 0, 0, 0], 206 | ... [0, 2, 0, 0], 207 | ... [2, 0, 0, 0], 208 | ... [0, 0, 2, 0], 209 | ... [0, 0, 2, 1], 210 | ... [0, 0, 0, 3], 211 | ... [1, 0, 1, 0], 212 | ... [0, 2, 0, 0], 213 | ... [2, 0, 0, 0], 214 | ... [2, 0, 0, 0], 215 | ... [0, 0, 2, 0], 216 | ... [0, 0, 2, 0], 217 | ... [0, 0, 0, 0], 218 | ... [0, 0, 1, 1]]) 219 | >>> print(round(alpha(value_counts=value_counts, level_of_measurement='nominal'), 6)) 220 | 0.691358 221 | >>> # The following examples were extracted from 222 | >>> # https://www.statisticshowto.datasciencecentral.com/wp-content/uploads/2016/07/fulltext.pdf, page 8. 223 | >>> reliability_data = [[1, 2, 3, 3, 2, 1, 4, 1, 2, np.nan, np.nan, np.nan], 224 | ... [1, 2, 3, 3, 2, 2, 4, 1, 2, 5, np.nan, 3], 225 | ... [np.nan, 3, 3, 3, 2, 3, 4, 2, 2, 5, 1, np.nan], 226 | ... [1, 2, 3, 3, 2, 4, 4, 1, 2, 5, 1, np.nan]] 227 | >>> print(round(alpha(reliability_data, level_of_measurement='ordinal'), 3)) 228 | 0.815 229 | >>> print(round(alpha(reliability_data, level_of_measurement='ratio'), 3)) 230 | 0.797 231 | >>> reliability_data = [["very low", "low", "mid", "mid", "low", "very low", "high", "very low", "low", np.nan, 232 | ... np.nan, np.nan], 233 | ... ["very low", "low", "mid", "mid", "low", "low", "high", "very low", "low", "very high", 234 | ... np.nan, "mid"], 235 | ... [np.nan, "mid", "mid", "mid", "low", "mid", "high", "low", "low", "very high", "very low", 236 | ... np.nan], 237 | ... ["very low", "low", "mid", "mid", "low", "high", "high", "very low", "low", "very high", 238 | ... "very low", np.nan]] 239 | >>> print(round(alpha(reliability_data, level_of_measurement='ordinal', 240 | ... value_domain=["very low", "low", "mid", "high", "very high"]), 3)) 241 | 0.815 242 | """ 243 | if (reliability_data is None) == (value_counts is None): 244 | raise ValueError("Either reliability_data or value_counts must be provided, but not both.") 245 | 246 | # Don't know if it's a list or numpy array. If it's the latter, the truth value is ambiguous. So, ask for None. 247 | if value_counts is None: 248 | reliability_data = np.asarray(reliability_data) 249 | 250 | if value_domain is None: 251 | value_domain = np.unique(reliability_data[~np.isnan(reliability_data)]) 252 | else: 253 | value_domain = np.asarray(value_domain) 254 | assert np.isin(reliability_data, np.append(value_domain, np.nan)).all(), \ 255 | "The reliability data contains out-of-domain values." 256 | 257 | value_counts = _reliability_data_to_value_counts(reliability_data, value_domain) 258 | else: # elif reliability_data is None 259 | value_counts = np.asarray(value_counts) 260 | 261 | if value_domain is None: 262 | value_domain = np.arange(value_counts.shape[1]) 263 | else: 264 | value_domain = np.asarray(value_domain) 265 | assert value_counts.shape[1] == len(value_domain), \ 266 | "The value domain should be equal to the number of columns of value_counts." 267 | 268 | assert len(value_domain) > 1, "There has to be more than one value in the domain." 269 | 270 | distance_metric = _distance_metric(level_of_measurement) 271 | 272 | o = _coincidences(value_counts, dtype=dtype) 273 | n_v = o.sum(axis=0) 274 | e = _random_coincidences(n_v, dtype=dtype) 275 | d = _distances(value_domain, distance_metric, n_v, dtype=dtype) 276 | return 1 - (o * d).sum() / (e * d).sum() 277 | --------------------------------------------------------------------------------