Chapter 5, Page 191 - Exercise for the adventurous reader

I wonder what is a proper solution for evaluate method based on extracted features?

I figured out somthing like this but I have feeling there is better solution for that:

class MyImageClassifier(ImageClassifier):
    def evaluateExtractedFeatures(self, extracted_features, metric='auto', verbose=True, batch_size=64):
        import os, json, math
        import turicreate as _tc
        from turicreate.toolkits.image_classifier._evaluation import Evaluation as _Evaluation

        if(batch_size < 1):
            raise ValueError("'batch_size' must be greater than or equal to 1")

        metrics = self.classifier.evaluate(extracted_features, metric=metric, with_predictions=True)

        predictions = metrics["predictions"]["probs"]
        state = self.__proxy__.get_state()
        labels = state["classes"]

        from turicreate.toolkits._evaluate_utils import  (
            entropy,
            confidence,
            relative_confidence,
            get_confusion_matrix,
            hclusterSort,
            l2Dist
        )

        evaluation_result = {k: metrics[k] for k in ['accuracy', 'f1_score', 'log_loss', 'precision',
                                                     'recall', 'auc', 'roc_curve', 'confusion_matrix']}
        evaluation_result['num_test_examples'] = len(extracted_features)
        for k in ['num_classes', 'num_features', 'input_image_shape', 'num_examples', 'training_loss', 'training_time', 'model', 'max_iterations']:
            evaluation_result[k] = getattr(self, k)

        # Extend the given test data
        extended_test = extracted_features.add_column(predictions, 'probs')
        extended_test = extended_test.add_columns( [extended_test.apply(lambda d: labels[d['probs'].index(confidence(d['probs']))]),
            extended_test.apply(lambda d: entropy(d['probs'])),
            extended_test.apply(lambda d: confidence(d['probs'])),
            extended_test.apply(lambda d: relative_confidence(d['probs']))],
            ['predicted_label', 'entropy', 'confidence', 'relative_confidence'])
        extended_test = extended_test.add_column(extended_test.apply(lambda d: d['label'] == d['predicted_label']), 'correct')

        evaluation_result['model_name'] = state['model']
        # Calculate the confusion matrix
        sf_conf_mat = get_confusion_matrix(extended_test, labels)
        confidence_threshold = 0.5
        hesitant_threshold = 0.2
        evaluation_result['confidence_threshold'] = confidence_threshold
        evaluation_result['hesitant_threshold'] = hesitant_threshold
        evaluation_result['confidence_metric_for_threshold'] = 'relative_confidence'

        evaluation_result['conf_mat'] = list(sf_conf_mat)

        # Get sorted labels (sorted by hCluster)
        vectors = map(lambda l: {'name': l, 'pos':list(sf_conf_mat[sf_conf_mat['target_label']==l].sort('predicted_label')['norm_prob'])},
                    labels)
        evaluation_result['sorted_labels'] = hclusterSort(vectors, l2Dist)[0]['name'].split("|")

        # Get recall and precision per label
        per_l = extended_test.groupby(['label'], {'count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
        per_l['recall'] = per_l.apply(lambda l: l['correct_count']*1.0 / l['count'])

        per_pl = extended_test.groupby(['predicted_label'], {'predicted_count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
        per_pl['precision'] = per_pl.apply(lambda l: l['correct_count']*1.0 / l['predicted_count'])
        per_pl = per_pl.rename({'predicted_label': 'label'})
        evaluation_result['label_metrics'] = list(per_l.join(per_pl, on='label', how='outer').select_columns(['label', 'count', 'correct_count', 'predicted_count', 'recall', 'precision']))
        evaluation_result['labels'] = labels

        extended_test = extended_test.add_row_number('__idx').rename({'label': 'target_label'})

        evaluation_result['test_data'] = extended_test
        evaluation_result['feature'] = self.feature

        return _Evaluation(evaluation_result)