I wonder what is a proper solution for evaluate method based on extracted features?
I figured out somthing like this but I have feeling there is better solution for that:
class MyImageClassifier(ImageClassifier):
def evaluateExtractedFeatures(self, extracted_features, metric='auto', verbose=True, batch_size=64):
import os, json, math
import turicreate as _tc
from turicreate.toolkits.image_classifier._evaluation import Evaluation as _Evaluation
if(batch_size < 1):
raise ValueError("'batch_size' must be greater than or equal to 1")
metrics = self.classifier.evaluate(extracted_features, metric=metric, with_predictions=True)
predictions = metrics["predictions"]["probs"]
state = self.__proxy__.get_state()
labels = state["classes"]
from turicreate.toolkits._evaluate_utils import (
entropy,
confidence,
relative_confidence,
get_confusion_matrix,
hclusterSort,
l2Dist
)
evaluation_result = {k: metrics[k] for k in ['accuracy', 'f1_score', 'log_loss', 'precision',
'recall', 'auc', 'roc_curve', 'confusion_matrix']}
evaluation_result['num_test_examples'] = len(extracted_features)
for k in ['num_classes', 'num_features', 'input_image_shape', 'num_examples', 'training_loss', 'training_time', 'model', 'max_iterations']:
evaluation_result[k] = getattr(self, k)
# Extend the given test data
extended_test = extracted_features.add_column(predictions, 'probs')
extended_test = extended_test.add_columns( [extended_test.apply(lambda d: labels[d['probs'].index(confidence(d['probs']))]),
extended_test.apply(lambda d: entropy(d['probs'])),
extended_test.apply(lambda d: confidence(d['probs'])),
extended_test.apply(lambda d: relative_confidence(d['probs']))],
['predicted_label', 'entropy', 'confidence', 'relative_confidence'])
extended_test = extended_test.add_column(extended_test.apply(lambda d: d['label'] == d['predicted_label']), 'correct')
evaluation_result['model_name'] = state['model']
# Calculate the confusion matrix
sf_conf_mat = get_confusion_matrix(extended_test, labels)
confidence_threshold = 0.5
hesitant_threshold = 0.2
evaluation_result['confidence_threshold'] = confidence_threshold
evaluation_result['hesitant_threshold'] = hesitant_threshold
evaluation_result['confidence_metric_for_threshold'] = 'relative_confidence'
evaluation_result['conf_mat'] = list(sf_conf_mat)
# Get sorted labels (sorted by hCluster)
vectors = map(lambda l: {'name': l, 'pos':list(sf_conf_mat[sf_conf_mat['target_label']==l].sort('predicted_label')['norm_prob'])},
labels)
evaluation_result['sorted_labels'] = hclusterSort(vectors, l2Dist)[0]['name'].split("|")
# Get recall and precision per label
per_l = extended_test.groupby(['label'], {'count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
per_l['recall'] = per_l.apply(lambda l: l['correct_count']*1.0 / l['count'])
per_pl = extended_test.groupby(['predicted_label'], {'predicted_count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
per_pl['precision'] = per_pl.apply(lambda l: l['correct_count']*1.0 / l['predicted_count'])
per_pl = per_pl.rename({'predicted_label': 'label'})
evaluation_result['label_metrics'] = list(per_l.join(per_pl, on='label', how='outer').select_columns(['label', 'count', 'correct_count', 'predicted_count', 'recall', 'precision']))
evaluation_result['labels'] = labels
extended_test = extended_test.add_row_number('__idx').rename({'label': 'target_label'})
evaluation_result['test_data'] = extended_test
evaluation_result['feature'] = self.feature
return _Evaluation(evaluation_result)