import os 
os.chdir('/Users/per.morten.halvorsen@schibsted.com/personal/music-genre-classifiers')

from functools import partial
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

import numpy as np
import pandas as pd
import torch 
import torch.nn as nn

import plotly.express as ex
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'notebook'

mfcc_tensor = torch.load("data/mfcc.pt")
covariance_tensor =  torch.load("data/covariance.pt")
file_paths = np.load("data/file_paths.npy")
labels = np.load("data/labels.npy")

mfcc_tensor.shape

torch.Size([999, 2986, 13])

covariance_tensor.shape

torch.Size([999, 13, 13])

labels.shape

(999,)

file_paths.shape

(999,)

labels_to_int = {label: i for i, label in enumerate(np.unique(labels))}
int_to_labels = {i: label for label, i in labels_to_int.items()}

labels_to_int

{'blues': 0,
 'classical': 1,
 'country': 2,
 'disco': 3,
 'hiphop': 4,
 'jazz': 5,
 'metal': 6,
 'pop': 7,
 'reggae': 8,
 'rock': 9}

# Reshape the data into a 2D array (num_samples, num_features)
num_samples, num_frames, num_mfcc = mfcc_tensor.shape
mfcc_tensor_2d = np.reshape(mfcc_tensor, (num_samples, num_frames * num_mfcc))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mfcc_tensor_2d, labels, test_size=0.2, random_state=42)

# Get validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

pd.options.plotting.backend = "plotly"

uniques, counts = np.unique(y_train, return_counts=True)
print("Training set distribution:")
train_value_counts = pd.DataFrame(list(zip(uniques, counts))).sort_values(by=1, ascending=False)

train_value_counts.plot.bar(x=0, y=1, title="Training set distribution").show()
train_value_counts

Training set distribution:

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the test data
predictions = rf_classifier.predict(X_test)

# Evaluate 
rf_accuracy = accuracy_score(y_test, predictions)
rf_f1_score= f1_score(y_test, predictions, average="weighted")
rf_confusion_matrix = confusion_matrix(y_test, predictions, labels=list(labels_to_int))
rf_classification_report = classification_report(y_test, predictions)
print("Accuracy:", rf_accuracy)
print("F1 Score:", rf_f1_score) 
print("Classification Report:\n", rf_classification_report)

Accuracy: 0.5
F1 Score: 0.4826347227651575
Classification Report:
               precision    recall  f1-score   support

       blues       0.42      0.45      0.43        22
   classical       0.96      0.93      0.95        28
     country       0.46      0.27      0.34        22
       disco       0.31      0.21      0.25        24
      hiphop       0.54      0.35      0.42        20
        jazz       0.42      0.42      0.42        19
       metal       0.41      0.92      0.56        12
         pop       0.58      0.90      0.70        21
      reggae       0.50      0.33      0.40        15
        rock       0.17      0.18      0.17        17

    accuracy                           0.50       200
   macro avg       0.48      0.50      0.47       200
weighted avg       0.50      0.50      0.48       200

def plot_confusion_matrix(cm, classes=list(labels_to_int), title='Confusion matrix', cmap=ex.colors.sequential.Blues):
    """
    This function prints and plots the confusion matrix.
    """
    fig = ex.imshow(cm, x=classes, y=classes, color_continuous_scale=cmap)
    fig.update_layout(title=title, xaxis_title="Predicted", yaxis_title="Actual")
    fig.show()

plot_confusion_matrix(rf_confusion_matrix, title="Confusion Matrix for Random Forest Classifier")

# Initialize the SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0, random_state=42)

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
predictions = svm_classifier.predict(X_test)

# Evaluate
svm_accuracy = accuracy_score(y_test, predictions)
svm_f1_score = f1_score(y_test, predictions, average='weighted')
svm_confusion_matrix = confusion_matrix(y_test, predictions)
svm_classification_report = classification_report(y_test, predictions)

print("Accuracy:", svm_accuracy)
print("F1 Score:", svm_f1_score)
print("Classification Report:\n", svm_classification_report)
plot_confusion_matrix(svm_confusion_matrix, title="Confusion Matrix for SVM Classifier")

Accuracy: 0.585
F1 Score: 0.5796226458334879
Classification Report:
               precision    recall  f1-score   support

       blues       0.42      0.45      0.43        22
   classical       0.93      0.96      0.95        28
     country       0.53      0.36      0.43        22
       disco       0.54      0.54      0.54        24
      hiphop       0.53      0.45      0.49        20
        jazz       0.53      0.47      0.50        19
       metal       0.53      0.75      0.62        12
         pop       0.76      0.90      0.83        21
      reggae       0.62      0.53      0.57        15
        rock       0.26      0.29      0.28        17

    accuracy                           0.58       200
   macro avg       0.56      0.57      0.56       200
weighted avg       0.58      0.58      0.58       200

# Initialize the KNN classifier with a specified number of neighbors (e.g., 5)
knn_classifier = KNeighborsClassifier()

# Train the classifier
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data
predictions = knn_classifier.predict(X_test)

# Evaluate 
knn_accuracy = accuracy_score(y_test, predictions)
knn_f1_score = f1_score(y_test, predictions, average='weighted')
knn_confusion_matrix = confusion_matrix(y_test, predictions)
knn_classification_report = classification_report(y_test, predictions)

print("Accuracy:", knn_accuracy)
print("F1 Score:", knn_f1_score)
print("Classification Report:\n", knn_classification_report)
plot_confusion_matrix(knn_confusion_matrix, title="Confusion Matrix for KNN Classifier")

Accuracy: 0.31
F1 Score: 0.2670479172726008
Classification Report:
               precision    recall  f1-score   support

       blues       1.00      0.18      0.31        22
   classical       0.75      0.96      0.84        28
     country       0.50      0.09      0.15        22
       disco       0.00      0.00      0.00        24
      hiphop       1.00      0.10      0.18        20
        jazz       0.15      0.47      0.23        19
       metal       0.18      1.00      0.31        12
         pop       0.67      0.10      0.17        21
      reggae       1.00      0.07      0.12        15
        rock       0.13      0.18      0.15        17

    accuracy                           0.31       200
   macro avg       0.54      0.31      0.25       200
weighted avg       0.55      0.31      0.27       200

def get_module_name(module):
    """
    Returns the name of the module.
    """
    return str(module).split(".")[-1].split("'")[0].split("(")[0]

get_module_name(KNeighborsClassifier)

'KNeighborsClassifier'

def grid_search(classifier, params, cv=5, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, return_full_metrics=False):
    # Create a GridSearchCV object with the specified parameter grid and classifier
    _grid_search = GridSearchCV(estimator=classifier, param_grid=params, cv=cv, n_jobs=-1, scoring="f1_macro")  #scoring= f1 with average='weighted'?

    # Perform grid search on your data
    _grid_search.fit(X_train, y_train)

    # Print the best parameters found by the grid search
    print("Best Parameters:", _grid_search.best_params_)

    # Make predictions using the best estimator
    predictions = _grid_search.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    f1 = f1_score(y_test, predictions, average='weighted')
    conf_matrix = confusion_matrix(y_test, predictions)
    class_report = classification_report(y_test, predictions)

    print("Accuracy:", accuracy)
    print("F1 Score:", f1)
    print("Classification Report:\n", class_report)
    plot_confusion_matrix(conf_matrix, title=f"Confusion Matrix for {get_module_name(classifier)} Classifier")

    # append classifier name and params for plotting
    _grid_search.name = get_module_name(classifier)
    _grid_search.params = params
    _grid_search.f1 = f1
    _grid_search.accuracy = accuracy

    if return_full_metrics:
        return accuracy, f1, conf_matrix, class_report, _grid_search

    return _grid_search

rf_grid_search = grid_search(RandomForestClassifier(random_state=42), params={'n_estimators': [10, 50, 100, 200, 300, 400, 500], 'max_depth': [None, 1, 5, 10, 15, 20]})

Best Parameters: {'max_depth': 15, 'n_estimators': 400}
Accuracy: 0.49
F1 Score: 0.46292371718924125
Classification Report:
               precision    recall  f1-score   support

       blues       0.38      0.45      0.42        22
   classical       0.87      0.96      0.92        28
     country       0.44      0.18      0.26        22
       disco       0.50      0.25      0.33        24
      hiphop       0.45      0.25      0.32        20
        jazz       0.46      0.32      0.37        19
       metal       0.41      0.92      0.56        12
         pop       0.56      0.95      0.70        21
      reggae       0.50      0.47      0.48        15
        rock       0.10      0.12      0.11        17

    accuracy                           0.49       200
   macro avg       0.47      0.49      0.45       200
weighted avg       0.49      0.49      0.46       200

rf_grid_search.refit_time_

18.9330792427063

# decision_function 'ovr' means one vs rest genre comparisons
# would be interesting to test ovo too
svm_grid_search = grid_search(SVC(decision_function_shape='ovr'), params={'kernel': ['rbf', 'linear', 'poly', 'sigmoid'], 'C': [0.1, 1, 5, 10, 13, 15, 20]})

/opt/homebrew/Caskroom/miniconda/base/envs/music-genre/lib/python3.11/site-packages/joblib/externals/loky/process_executor.py:752: UserWarning:

A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.

Best Parameters: {'C': 10, 'kernel': 'rbf'}
Accuracy: 0.615
F1 Score: 0.6107994176974292
Classification Report:
               precision    recall  f1-score   support

       blues       0.50      0.50      0.50        22
   classical       0.93      0.96      0.95        28
     country       0.50      0.41      0.45        22
       disco       0.60      0.50      0.55        24
      hiphop       0.63      0.60      0.62        20
        jazz       0.56      0.47      0.51        19
       metal       0.59      0.83      0.69        12
         pop       0.76      0.90      0.83        21
      reggae       0.62      0.53      0.57        15
        rock       0.29      0.35      0.32        17

    accuracy                           0.61       200
   macro avg       0.60      0.61      0.60       200
weighted avg       0.61      0.61      0.61       200

svm_grid_search.refit_time_

14.293503999710083

knn_grid_search = grid_search(KNeighborsClassifier(), params={'n_neighbors': [2, 5, 10, 20, 30, 50, 70, 100], "weights": ["uniform", "distance"]})
# unused param "algorithm": ["auto", "ball_tree", "kd_tree", "brute"]

Best Parameters: {'n_neighbors': 2, 'weights': 'distance'}
Accuracy: 0.36
F1 Score: 0.33877263939763935
Classification Report:
               precision    recall  f1-score   support

       blues       1.00      0.23      0.37        22
   classical       0.96      0.96      0.96        28
     country       0.60      0.14      0.22        22
       disco       0.18      0.08      0.11        24
      hiphop       1.00      0.05      0.10        20
        jazz       0.24      0.58      0.34        19
       metal       0.23      1.00      0.37        12
         pop       0.71      0.24      0.36        21
      reggae       1.00      0.07      0.12        15
        rock       0.12      0.29      0.17        17

    accuracy                           0.36       200
   macro avg       0.60      0.36      0.31       200
weighted avg       0.63      0.36      0.34       200

knn_grid_search.refit_time_

0.1570141315460205

def plot_grid_seach(gs, score_col="mean_test_score", param_cols=None, verbose=0):
    """#d plot of grid search with params on x and y and score on z axis"""
    gs_df = pd.DataFrame(gs.cv_results_)
    print("name", gs.name)
    print("accuracy", gs.accuracy)
    print("f1_score", gs.f1)
    print("best params", gs.best_params_)

    # get score column
    if score_col is None:
        score_col = [x for x in gs_df.columns if "score" in x].pop()
        print("score_col", score_col) if verbose>1 else None

    # get param cols
    if param_cols is None:
        param_cols = [x for x in gs_df.columns if "param_" in x]
        print("param_cols", param_cols) if verbose >1 else None

    # get sizes
    x_size = len(gs_df[param_cols[0]].unique())
    y_size = len(gs_df[param_cols[1]].unique())

    # get x, y, z  # need smart way of finding df size..
    x = gs_df[param_cols[0]].values.reshape(x_size, y_size).T[0]
    y = gs_df[param_cols[1]].values.reshape(y_size, x_size)[0]
    z = gs_df[score_col].values.reshape(x_size, y_size).T

    fig = go.Figure(
        data=[go.Surface(
            x=x, y=y, z=z, 
            hovertemplate=f"{param_cols[0]}: {'%{x}'}<br>{param_cols[1]}: {'%{y}'}<br>{score_col}: {'%{z}'}<extra></extra>",
        )]
    )
    fig.update_layout(
        title=f"GridSearchCV Results for {gs.name} Classifier", 
        scene=dict(
            xaxis_title=param_cols[0], 
            yaxis_title=param_cols[1], 
            zaxis_title=score_col,
            # xaxis_type="log" if "x" in log else "linear",
            # yaxis_type="log" if "y" in log else "linear",
            # zaxis_type="log" if "z" in log else "linear",
        ),
        height=750,
    )

    if verbose:
        print("x", x)
        print("y", y)
        print("z", z)

    return fig


plot_grid_seach(rf_grid_search)

name RandomForestClassifier
accuracy 0.49
f1_score 0.46292371718924125
best params {'max_depth': 15, 'n_estimators': 400}

plot_grid_seach(svm_grid_search)

name SVC
accuracy 0.615
f1_score 0.6107994176974292
best params {'C': 10, 'kernel': 'rbf'}

knn_grid_search.name = "KNN"
plot_grid_seach(knn_grid_search)

name KNN
accuracy 0.36
f1_score 0.33877263939763935
best params {'n_neighbors': 2, 'weights': 'distance'}

def compute_metrics(model, X_val=X_val, y_val=y_val, name=""):
    # Make predictions using the best estimator
    predictions = model.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, predictions)
    f1 = f1_score(y_val, predictions, average='weighted')
    conf_matrix = confusion_matrix(y_val, predictions)
    class_report = classification_report(y_val, predictions)

    print("Accuracy:", accuracy)
    print("F1 Score:", f1)
    print("Classification Report:\n", class_report)
    plot_confusion_matrix(conf_matrix, title=f"Confusion Matrix for {name} Classifier")
    
    return accuracy, f1, conf_matrix, class_report

rf_accuracy, rf_f1, rf_conf_matrix, rf_class_report = compute_metrics(rf_grid_search, name="Random Forest")
svm_accuracy, svm_f1, svm_conf_matrix, svm_class_report = compute_metrics(svm_grid_search, name="SVM")
knn_accuracy, knn_f1, knn_conf_matrix, knn_class_report = compute_metrics(knn_grid_search, name="KNN")

Accuracy: 0.6375
F1 Score: 0.6379452923573936
Classification Report:
               precision    recall  f1-score   support

       blues       0.38      0.60      0.46         5
   classical       0.92      1.00      0.96        11
     country       0.86      0.60      0.71        10
       disco       0.00      0.00      0.00         5
      hiphop       0.33      0.57      0.42         7
        jazz       0.75      0.67      0.71         9
       metal       0.67      0.75      0.71         8
         pop       0.46      0.86      0.60         7
      reggae       1.00      0.50      0.67         8
        rock       1.00      0.50      0.67        10

    accuracy                           0.64        80
   macro avg       0.64      0.60      0.59        80
weighted avg       0.70      0.64      0.64        80

Accuracy: 0.65
F1 Score: 0.6311731111502279
Classification Report:
               precision    recall  f1-score   support

       blues       0.60      0.60      0.60         5
   classical       0.92      1.00      0.96        11
     country       1.00      0.30      0.46        10
       disco       0.00      0.00      0.00         5
      hiphop       0.45      0.71      0.56         7
        jazz       0.80      0.89      0.84         9
       metal       0.88      0.88      0.88         8
         pop       0.45      0.71      0.56         7
      reggae       0.62      0.62      0.62         8
        rock       0.50      0.50      0.50        10

    accuracy                           0.65        80
   macro avg       0.62      0.62      0.60        80
weighted avg       0.67      0.65      0.63        80

Accuracy: 0.45
F1 Score: 0.40871066991756655
Classification Report:
               precision    recall  f1-score   support

       blues       1.00      0.40      0.57         5
   classical       0.90      0.82      0.86        11
     country       1.00      0.10      0.18        10
       disco       0.33      0.20      0.25         5
      hiphop       1.00      0.14      0.25         7
        jazz       0.26      0.56      0.36         9
       metal       0.42      1.00      0.59         8
         pop       0.60      0.43      0.50         7
      reggae       0.00      0.00      0.00         8
        rock       0.32      0.60      0.41        10

    accuracy                           0.45        80
   macro avg       0.58      0.42      0.40        80
weighted avg       0.58      0.45      0.41        80

Music genre classifier¶

Classifiers¶

Goal¶

Dataset¶

Source¶

Load data¶

Train test split¶

Traditional ML classifiers¶

Random Forest¶

SVM¶

KNN¶

Out-of-the-box analysis¶

Hyperparameter optimization¶

Validation¶

Compare models¶

F1-score¶

Training time¶

Conclusion¶

	0	1
6	metal	80
8	reggae	77
0	blues	73
4	hiphop	73
9	rock	73
7	pop	72
3	disco	71
5	jazz	71
2	country	68
1	classical	61