Commit dcfc6ce3 by Martin Karlsson

### hw6

parent d55d6e6d
 *.jpg *.png
This diff is collapsed.
 import numpy as np class ConfusionMatrix: """ Simple confusion matrix class row is the true class, column is the predicted class """ def __init__(self, num_classes, class_names=None): self.n_classes = num_classes if class_names is None: self.class_names = map(str, range(num_classes)) else: self.class_names = class_names # find max class_name and pad max_len = max(map(len, self.class_names)) self.max_len = max_len for idx, name in enumerate(self.class_names): if len(self.class_names) < max_len: self.class_names[idx] = name + " "*(max_len-len(name)) self.mat = np.zeros((num_classes,num_classes),dtype='int') def __str__(self): # calucate row and column sums col_sum = np.sum(self.mat, axis=1) row_sum = np.sum(self.mat, axis=0) s = [] mat_str = self.mat.__str__() mat_str = mat_str.replace('[','').replace(']','').split('\n') for idx, row in enumerate(mat_str): if idx == 0: pad = " " else: pad = "" class_name = self.class_names[idx] class_name = " " + class_name + " |" row_str = class_name + pad + row row_str += " |" + str(col_sum[idx]) s.append(row_str) row_sum = [(self.max_len+4)*" "+" ".join(map(str, row_sum))] hline = [(1+self.max_len)*" "+"-"*len(row_sum[0])] s = hline + s + hline + row_sum # add linebreaks s_out = [line+'\n' for line in s] return "".join(s_out) def batch_add(self, targets, preds): assert targets.shape == preds.shape assert len(targets) == len(preds) assert max(targets) < self.n_classes assert max(preds) < self.n_classes targets = targets.flatten() preds = preds.flatten() for i in range(len(targets)): self.mat[targets[i], preds[i]] += 1 def get_errors(self): tp = np.asarray(np.diag(self.mat).flatten(),dtype='float') fn = np.asarray(np.sum(self.mat, axis=1).flatten(),dtype='float') - tp fp = np.asarray(np.sum(self.mat, axis=0).flatten(),dtype='float') - tp tn = np.asarray(np.sum(self.mat)*np.ones(self.n_classes).flatten(), dtype='float') - tp - fn - fp return tp, fn, fp, tn def accuracy(self): """ Calculates global accuracy :return: accuracy :example: >>> conf = ConfusionMatrix(3) >>> conf.batchAdd([0,0,1],[0,0,2]) >>> print conf.accuracy() """ tp, _, _, _ = self.get_errors() n_samples = np.sum(self.mat) return np.sum(tp) / n_samples def sensitivity(self): tp, tn, fp, fn = self.get_errors() res = tp / (tp + fn) res = res[~np.isnan(res)] return res def specificity(self): tp, tn, fp, fn = self.get_errors() res = tn / (tn + fp) res = res[~np.isnan(res)] return res def positive_predictive_value(self): tp, tn, fp, fn = self.get_errors() res = tp / (tp + fp) res = res[~np.isnan(res)] return res def negative_predictive_value(self): tp, tn, fp, fn = self.get_errors() res = tn / (tn + fn) res = res[~np.isnan(res)] return res def false_positive_rate(self): tp, tn, fp, fn = self.get_errors() res = fp / (fp + tn) res = res[~np.isnan(res)] return res def false_discovery_rate(self): tp, tn, fp, fn = self.get_errors() res = fp / (tp + fp) res = res[~np.isnan(res)] return res def F1(self): tp, tn, fp, fn = self.get_errors() res = (2*tp) / (2*tp + fp + fn) res = res[~np.isnan(res)] return res def matthews_correlation(self): tp, tn, fp, fn = self.get_errors() numerator = tp*tn - fp*fn denominator = np.sqrt((tp + fp)*(tp + fn)*(tn + fp)*(tn + fn)) res = numerator / denominator res = res[~np.isnan(res)] return res
 from __future__ import print_function import numpy as np target_to_text = { '0':'noll', '1':'ett', '2':'tva', '3':'tre', '4':'fyra', '5':'fem', '6':'sex', '7':'sju', '8':'atta', '9':'nio', } stop_character = start_character = '#' input_characters = " ".join(target_to_text.values()) valid_characters = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '#'] + \ list(set(input_characters)) def print_valid_characters(): l = '' for i,c in enumerate(valid_characters): l += "\'%s\'=%i,\t" % (c,i) print("Number of valid characters:", len(valid_characters)) print(l) ninput_chars = len(valid_characters) def get_batch(batch_size=100, min_digits = 3, max_digits=3): ''' Generates random sequences of integers and translates them to text i.e. 1->'one'. :param batch_size: number of samples to return :param min_digits: minimum length of target :param max_digits: maximum length of target ''' text_inputs = [] int_inputs = [] text_targets_in = [] text_targets_out = [] int_targets_in = [] int_targets_out = [] for i in range(batch_size): #convert integer into a list of digits tar_len = np.random.randint(min_digits,max_digits+1) text_target = inp_str = "".join(map(str,np.random.randint(0,10,tar_len))) text_target_in = start_character + text_target text_target_out = text_target + stop_character #generate the targets as a list of intergers int_target_in = map(lambda c: valid_characters.index(c), text_target_in) int_target_out = map(lambda c: valid_characters.index(c), text_target_out) #generate the text input text_input = " ".join(map(lambda k: target_to_text[k], inp_str)) #generate the inputs as a list of intergers int_input = map(lambda c: valid_characters.index(c), text_input) text_inputs.append(text_input) int_inputs.append(int_input) text_targets_in.append(text_target_in) text_targets_out.append(text_target_out) int_targets_in.append(int_target_in) int_targets_out.append(int_target_out) #create the input matrix, mask and seq_len - note that we zero pad the shorter sequences. max_input_len = max(map(len, int_inputs)) inputs = np.zeros((batch_size, max_input_len)) # input_masks = np.zeros((batch_size,max_input_len)) for (i,inp) in enumerate(int_inputs): cur_len = len(inp) inputs[i,:cur_len] = inp # input_masks[i,:cur_len] = 1 inputs_seqlen = np.asarray(map(len, int_inputs)) max_target_in_len = max(map(len, int_targets_in)) targets_in = np.zeros((batch_size, max_target_in_len)) targets_mask = np.zeros((batch_size, max_target_in_len)) for (i, tar) in enumerate(int_targets_in): cur_len = len(tar) targets_in[i, :cur_len] = tar targets_seqlen = np.asarray(map(len, int_targets_in)) max_target_out_len = max(map(len, int_targets_out)) targets_out = np.zeros((batch_size, max_target_in_len)) for (i,tar) in enumerate(int_targets_out): cur_len = len(tar) targets_out[i,:cur_len] = tar targets_mask[i,:cur_len] = 1 return inputs.astype('int32'), \ inputs_seqlen.astype('int32'), \ targets_in.astype('int32'), \ targets_out.astype('int32'), \ targets_seqlen.astype('int32'), \ targets_mask.astype('float32'), \ text_inputs, \ text_targets_in, \ text_targets_out if __name__ == '__main__': batch_size = 3 inputs, inputs_seqlen, targets_in, targets_out, targets_seqlen, targets_mask, \ text_inputs, text_targets_in, text_targets_out = \ get_batch(batch_size=batch_size, max_digits=2, min_digits=1) print("input types:", inputs.dtype, inputs_seqlen.dtype, targets_in.dtype, targets_out.dtype, targets_seqlen.dtype) print(print_valid_characters()) print("Stop/start character = #") for i in range(batch_size): print("\nSAMPLE",i) print("TEXT INPUTS:\t\t\t", text_inputs[i]) print("TEXT TARGETS INPUT:\t\t", text_targets_in[i]) print("TEXT TARGETS OUTPUT:\t\t", text_targets_out[i]) print("ENCODED INPUTS:\t\t\t", inputs[i]) print("INPUTS SEQUENCE LENGTH:\t\t", inputs_seqlen[i]) print("ENCODED TARGETS INPUT:\t\t", targets_in[i]) print("ENCODED TARGETS OUTPUT:\t\t", targets_out[i]) print("TARGETS SEQUENCE LENGTH:\t", targets_seqlen[i]) print("TARGETS MASK:\t\t\t", targets_mask[i])