Galaxy Image Classification¶

Importing Libraries¶

import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm #displays progress bar within for loops
import matplotlib.pyplot as plt
from skimage import io
import pickle
from skimage.transform import resize
import glob
import os
%matplotlib inline 
import warnings
warnings.filterwarnings('ignore')
from keras import regularizers, optimizers

from functions.model_functions import plot_confusion_matrix, predict_one_image_cnn

import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

Using TensorFlow backend.

Size of all images¶

size = set()
for filename in glob.glob('data/*.jpg'): 
    im=io.imread(filename).shape
    size.add(im)

size for all images are: {(424, 424, 3)}

Visualizing some of the training data¶

#Let's look at four of our galaxies from the 61578 images in our training set. 

plt.figure(1, figsize=(12,9))
plt.subplot(231)
plt.imshow(plt.imread('data/train/spiral/100023.jpg'))
plt.title('Galaxy 100023')

plt.subplot(232)
plt.imshow(plt.imread('data/train/elliptical/100078.jpg'))
plt.title('Galaxy 100078')

plt.subplot(233)
plt.imshow(plt.imread('data/train/elliptical/100123.jpg'))
plt.title('Galaxy 100123')

plt.subplot(234)
plt.imshow(plt.imread('data/train/spiral/100143.jpg'))
plt.title('Galaxy 100143')

plt.subplot(235)
plt.imshow(plt.imread('data/train/spiral/233622.jpg'))
plt.title('Galaxy 233622')

plt.subplot(236)
plt.imshow(plt.imread('data/train/spiral/177755.jpg'))
plt.title('Galaxy 177755')

plt.show()
plt.tight_layout()

<Figure size 432x288 with 0 Axes>

From the images above, as a scientist I can already tell what morphology these galaxies are of, but now the question is will the computer be able to classify them correctly. Let's first build a baseline model then compare it to a CNN model.

Predictions.csv¶

import pandas as pd
import numpy as np

predictions = pd.read_csv('predictions.csv')
predictions.head()

There seems to be ambigious headings for our columns and the resources on Kaggle don't seem to help much. Essentially, the classes refer to the morphology on the galaxy.

#All of the classes in our Class1 add up to 1. These classes refer to the probaility of 
#the galaxy being either the shape in Class1.1, Class1.2, or in Class1.3. 

(predictions['Class1.1'] + predictions['Class1.2'] + predictions['Class1.3']).head()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
dtype: float64

#Dropping all the other columns which do not refer to the morphology. 
predictions = predictions[['GalaxyID','Class1.1', 'Class1.2', 'Class1.3']]
predictions.head(10)

Based on the images printed above, we can use our physics brain to determine what the columns stand for and rename the columns.

predictions.columns = ['GalaxyID', 'Elliptical', 'Spiral', 'Irregular']
predictions.head()

If the value in the irregular was the max value for that row, we called it an irregular.

Note: The following code below no longer works because the irregular folders are deleted.

print('num in train elliptical = {}'.format(len(os.listdir('data/train/elliptical/'))))
print('num in train spiral = {}'.format(len(os.listdir('data/train/spiral/'))))
print('num in train irregular = {}'.format(len(os.listdir('data/train/irregular/'))))
print('num in test elliptical = {}'.format(len(os.listdir('data/test/elliptical/'))))
print('num in test spiral = {}'.format(len(os.listdir('data/test/spiral/'))))
print('num in test irregular = {}'.format(len(os.listdir('data/test/irregular/'))))

num in train elliptical = 13237
num in train spiral = 17526
num in train irregular = 26
num in test elliptical = 13456
num in test spiral = 17300
num in test irregular = 33

For this, we can remove the irregular images since they make such a small portion of our over all dataset.

Now, let's delete the irregular galaxies from the predictions (because we deleted their images). Then we will drop the "Irregular" column.

len(predictions[(predictions.Irregular > predictions.Spiral) & (predictions.Irregular > predictions.Elliptical)])

59

predictions[(predictions.Irregular > predictions.Spiral) & (predictions.Irregular > predictions.Elliptical)].head()

#Dropped all the irregular galaxies. 
predictions.drop(predictions[(predictions.Irregular > predictions.Spiral) & (predictions.Irregular > predictions.Elliptical)].index,inplace=True)

predictions.drop(['Irregular'], axis=1, inplace=True)

predictions.head()

import matplotlib.pyplot as plt
import numpy as np
import os, shutil
from keras import models
from keras import layers
from keras.layers import Dropout 
from sklearn.metrics import confusion_matrix, f1_score
np.random.seed(123)
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# get all the data in the directory split/test, and reshape them
datagen = ImageDataGenerator(rescale=1./255,
                            rotation_range=90,
                            brightness_range=(0.5,2),
                            )

#Take 30000 images from the training folder and resize images down to 106 x 106 px
data_tr=datagen.flow_from_directory(
        'data/train', 
        target_size=(106, 106), #actual image size 
        batch_size = 30000,
        class_mode='binary',
        seed = 123)

Found 30763 images belonging to 2 classes.

#Take 20000 images from the testing folder and resize images down to 106 x 106 px
data_te = ImageDataGenerator(rescale=1./255).flow_from_directory( 
        'data/test', 
        target_size=(106, 106), 
        batch_size = 20000, 
        class_mode='binary',
        seed = 123)

Found 30756 images belonging to 2 classes.

# Defining x_train, y_train, and x_test, y_test by grabbing the first 30,000 batch for train 
# and 20,000 image batch for test

x_tr, y_tr = next(data_tr)
x_te, y_te = next(data_te)

#Displaying that our training data is binary

y_tr[:5]

array([1., 1., 0., 0., 1.], dtype=float32)

#Splitting our data using sklearn. 80% of our data is X_train, y_train - what we are training 
#our model on. 20% of our data is the X_val, y_val what we are testing our data on. 

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(x_tr, y_tr, test_size=0.20, random_state=123)

len(x_te)

X_val.shape

CNN Architecture¶

cnn = models.Sequential()
cnn.add(layers.Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(0.1), input_shape=(106, 106, 3), padding='valid',strides=(2,2)))
cnn.add(layers.Conv2D(64, (3, 3), input_shape=(106, 106,  3), padding='valid',strides=(2,2)))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Activation('relu'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(Dropout(0.5))
cnn.add(layers.Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(0.1)))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Activation('relu'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Flatten())

cnn.add(layers.Dense(128))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Activation('relu'))

cnn.add(layers.Dense(32, activation='relu'))
cnn.add(layers.Dense(1, activation='sigmoid'))        
        

# decay_rate = learning_rate / epochs
sgd = optimizers.SGD(lr=0.01, decay=0.0002, momentum=0.9, nesterov=False)
cnn.compile(loss='binary_crossentropy',
              optimizer=sgd,
              metrics=['acc'])
    
cnn.load_weights('model_weights/cnn11_only_lr.h5')

WARNING:tensorflow:From /anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Fitting CNN Model¶

cnn1 = cnn.fit(X_train, y_train,
                epochs=50,
                validation_data=(X_val, y_val),
                batch_size=500)

print(cnn.summary())

Let's see what our CNN is doing behind the scenes when it is fed this image:¶

# Printing one random image to compare with the hidden layers below.

plt.imshow(X_train[50])
plt.show()

Grabbing the first hidden 8 layers from CNN to visualize the activation from the first layer¶

# layer_outputs = [layer.output for layer in cnn.layers[:8]]

layer_outputs = [layer.output for layer in cnn.layers]

cnn.layers

[<keras.layers.convolutional.Conv2D at 0x3c50b1630>,
 <keras.layers.convolutional.Conv2D at 0x3c540b5f8>,
 <keras.layers.normalization.BatchNormalization at 0x3c50bb0b8>,
 <keras.layers.core.Activation at 0x3c50a82b0>,
 <keras.layers.pooling.MaxPooling2D at 0x13ee3f400>,
 <keras.layers.core.Dropout at 0x3c50a51d0>,
 <keras.layers.convolutional.Conv2D at 0x3c542cf28>,
 <keras.layers.normalization.BatchNormalization at 0x3c50a8710>,
 <keras.layers.core.Activation at 0x3c547cd68>,
 <keras.layers.pooling.MaxPooling2D at 0x3c547ce10>,
 <keras.layers.core.Flatten at 0x3c54ba7f0>,
 <keras.layers.core.Dense at 0x3c54e7f28>,
 <keras.layers.normalization.BatchNormalization at 0x3c54cea90>,
 <keras.layers.core.Activation at 0x3c55a1dd8>,
 <keras.layers.core.Dense at 0x3c55a1e48>,
 <keras.layers.core.Dense at 0x3c55a1198>]

activation_model = models.Model(inputs=cnn.input, outputs=layer_outputs)

from keras import models

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[1]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[7]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[0]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[5]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[9]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[3]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

# from keras import models
from keras.preprocessing import image

activations = activation_model.predict(X_train)

fig, axes = plt.subplots(5, 4, figsize=(12,24))
for i in range(20):
    row = i//4
    column = i%4
    ax = axes[row, column]
    first_layer_activation = activations[4]
    ax.matshow(first_layer_activation[50, :, :, i], cmap='viridis')

Confusion Matrix for CNN¶

cnn.evaluate(x_te, y_te)

20000/20000 [==============================] - 29s 1ms/step

[0.521887234210968, 0.79075]

predictions_cnn = cnn.predict(x_te)
predictions_cnn = np.around(predictions_cnn)

f1_score(y_te, predictions_cnn, average='macro')

0.7873114379632022

plt.figure()
plot_confusion_matrix(confusion_matrix(y_te, predictions_cnn), classes=['Elliptical', 'Spiral'])
plt.title('Confusion Matrix for CNN Model')
plt.savefig('CNN_ConfusionMatrix.png')

Confusion matrix, without normalization
[[6636 2109]
 [2076 9179]]

Let's test our model on some images of galaxies it has not seen yet.¶

def predictoneimage_cnn(cnn, path):
    img = load_img(path, target_size=(106, 106))
    plt.imshow(img)
    img = img_to_array(img)
    img = img/255
    img = np.expand_dims(img, axis=0)
    predict = cnn.predict(img)
    return predict

Keras' flow_from_directory method orders folder structure alphabetical, elliptical is first, and 0 is first. So, 0 is elliptical and 1 is spiral.¶

predict_one_image_cnn(cnn, 'data/train/elliptical/100078.jpg')

array([[0.29584587]], dtype=float32)

predict_one_image_cnn(cnn, 'data/test/elliptical/564639.jpg')

array([[0.09924649]], dtype=float32)

The CNN model classified the above galaxy as an elliptical. Which is correct since this test image came from the elliptical folder.¶

ROC Curve for CNN Model¶

from sklearn.metrics import roc_curve

fpr_cnn, tpr_cnn, thresholds = roc_curve(y_te, cnn.predict(x_te))

plt.plot(fpr_cnn,tpr_cnn)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve for CNN Model')
# plt.show()
plt.savefig('CNN_ROC.png')

from sklearn.metrics import auc
roc_auc = auc(fpr_cnn, tpr_cnn)

print(roc_auc)

0.8601756871159988

Decision Tree¶

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

tree_model = DecisionTreeClassifier(max_depth=10)

x_tr.shape

(30000, 106, 106, 3)

X_train.shape

(24000, 106, 106, 3)

y_train.shape

(24000,)

x_tr_flat = X_train.reshape((24000,X_train.shape[1]*X_train.shape[2]*X_train.shape[3]))

tree_model.fit(x_tr_flat, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

x_te.shape

(20000, 106, 106, 3)

x_te_flat = x_te.reshape((20000,x_te.shape[1]*x_te.shape[2]*x_te.shape[3]))

y_predict = tree_model.predict(x_te_flat)

y_predict_proba = tree_model.predict_proba(x_te_flat)

y_predict_proba

array([[0.91032609, 0.08967391],
       [0.03174603, 0.96825397],
       [0.10344828, 0.89655172],
       ...,
       [0.06465517, 0.93534483],
       [1.        , 0.        ],
       [0.015625  , 0.984375  ]])

accuracy_score(y_te, y_predict)

0.6887

from sklearn.metrics import confusion_matrix

confusion_matrix(y_te, y_predict)

array([[5899, 2846],
       [3380, 7875]])

plt.figure()
plot_confusion_matrix(confusion_matrix(y_te, y_predict), classes=['Elliptical', 'Spiral'])
plt.title('Confusion Matrix for Decision Tree')
plt.savefig('DecisionTree_ConfusionMatrix.png')

Confusion matrix, without normalization
[[5899 2846]
 [3380 7875]]

y_te[:10]

array([0., 1., 1., 1., 0., 1., 0., 1., 1., 0.], dtype=float32)

# np.unique(y_predict_proba)[:5]
y_predict_proba[:10]

array([[0.91032609, 0.08967391],
       [0.03174603, 0.96825397],
       [0.10344828, 0.89655172],
       [0.        , 1.        ],
       [0.07480315, 0.92519685],
       [0.07480315, 0.92519685],
       [0.10576923, 0.89423077],
       [0.        , 1.        ],
       [0.88823529, 0.11176471],
       [0.16923077, 0.83076923]])

[x[1] for x in y_predict_proba][:10]

[0.08967391304347826,
 0.9682539682539683,
 0.896551724137931,
 1.0,
 0.9251968503937008,
 0.9251968503937008,
 0.8942307692307693,
 1.0,
 0.11176470588235295,
 0.8307692307692308]

from sklearn.metrics import auc, roc_curve

fpr_decision, tpr_decision, thresholds = roc_curve(y_te, [x[1] for x in y_predict_proba])

plt.plot(fpr_decision,tpr_decision)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve for Decision Tree')
# plt.show()
plt.savefig('DecisionTree_ROC.png')

roc_auc = auc(fpr_decision, tpr_decision)

# fpr_decision

# tpr_decision

print(roc_auc)

0.6984602739294575

Inception¶

#we can use any of the inception stuff even if they are trained on anything
from keras.applications import inception_v3
from keras.layers import Dense,GlobalAveragePooling2D
from sklearn.metrics import confusion_matrix, f1_score
from keras.models import Model

imagenet=inception_v3.InceptionV3(weights='imagenet',include_top=False)
imagenet_new=imagenet.output
new_model = models.Sequential()
new_model.add(imagenet)
new_model.add(GlobalAveragePooling2D())
new_model.add(Dense(1024,activation='relu'))
new_model.add(Dense(1024,activation='relu')) #dense layer 2
new_model.add(Dense(512,activation='relu')) #dense layer 3
new_model.add(Dense(1,activation='sigmoid')) #final layer

for i,layer in enumerate(imagenet.layers):
    print(i,layer.name, layer.trainable)

0 input_1 False
1 conv2d_4 True
2 batch_normalization_4 True
3 activation_4 True
4 conv2d_5 True
5 batch_normalization_5 True
6 activation_5 True
7 conv2d_6 True
8 batch_normalization_6 True
9 activation_6 True
10 max_pooling2d_3 True
11 conv2d_7 True
12 batch_normalization_7 True
13 activation_7 True
14 conv2d_8 True
15 batch_normalization_8 True
16 activation_8 True
17 max_pooling2d_4 True
18 conv2d_12 True
19 batch_normalization_12 True
20 activation_12 True
21 conv2d_10 True
22 conv2d_13 True
23 batch_normalization_10 True
24 batch_normalization_13 True
25 activation_10 True
26 activation_13 True
27 average_pooling2d_1 True
28 conv2d_9 True
29 conv2d_11 True
30 conv2d_14 True
31 conv2d_15 True
32 batch_normalization_9 True
33 batch_normalization_11 True
34 batch_normalization_14 True
35 batch_normalization_15 True
36 activation_9 True
37 activation_11 True
38 activation_14 True
39 activation_15 True
40 mixed0 True
41 conv2d_19 True
42 batch_normalization_19 True
43 activation_19 True
44 conv2d_17 True
45 conv2d_20 True
46 batch_normalization_17 True
47 batch_normalization_20 True
48 activation_17 True
49 activation_20 True
50 average_pooling2d_2 True
51 conv2d_16 True
52 conv2d_18 True
53 conv2d_21 True
54 conv2d_22 True
55 batch_normalization_16 True
56 batch_normalization_18 True
57 batch_normalization_21 True
58 batch_normalization_22 True
59 activation_16 True
60 activation_18 True
61 activation_21 True
62 activation_22 True
63 mixed1 True
64 conv2d_26 True
65 batch_normalization_26 True
66 activation_26 True
67 conv2d_24 True
68 conv2d_27 True
69 batch_normalization_24 True
70 batch_normalization_27 True
71 activation_24 True
72 activation_27 True
73 average_pooling2d_3 True
74 conv2d_23 True
75 conv2d_25 True
76 conv2d_28 True
77 conv2d_29 True
78 batch_normalization_23 True
79 batch_normalization_25 True
80 batch_normalization_28 True
81 batch_normalization_29 True
82 activation_23 True
83 activation_25 True
84 activation_28 True
85 activation_29 True
86 mixed2 True
87 conv2d_31 True
88 batch_normalization_31 True
89 activation_31 True
90 conv2d_32 True
91 batch_normalization_32 True
92 activation_32 True
93 conv2d_30 True
94 conv2d_33 True
95 batch_normalization_30 True
96 batch_normalization_33 True
97 activation_30 True
98 activation_33 True
99 max_pooling2d_5 True
100 mixed3 True
101 conv2d_38 True
102 batch_normalization_38 True
103 activation_38 True
104 conv2d_39 True
105 batch_normalization_39 True
106 activation_39 True
107 conv2d_35 True
108 conv2d_40 True
109 batch_normalization_35 True
110 batch_normalization_40 True
111 activation_35 True
112 activation_40 True
113 conv2d_36 True
114 conv2d_41 True
115 batch_normalization_36 True
116 batch_normalization_41 True
117 activation_36 True
118 activation_41 True
119 average_pooling2d_4 True
120 conv2d_34 True
121 conv2d_37 True
122 conv2d_42 True
123 conv2d_43 True
124 batch_normalization_34 True
125 batch_normalization_37 True
126 batch_normalization_42 True
127 batch_normalization_43 True
128 activation_34 True
129 activation_37 True
130 activation_42 True
131 activation_43 True
132 mixed4 True
133 conv2d_48 True
134 batch_normalization_48 True
135 activation_48 True
136 conv2d_49 True
137 batch_normalization_49 True
138 activation_49 True
139 conv2d_45 True
140 conv2d_50 True
141 batch_normalization_45 True
142 batch_normalization_50 True
143 activation_45 True
144 activation_50 True
145 conv2d_46 True
146 conv2d_51 True
147 batch_normalization_46 True
148 batch_normalization_51 True
149 activation_46 True
150 activation_51 True
151 average_pooling2d_5 True
152 conv2d_44 True
153 conv2d_47 True
154 conv2d_52 True
155 conv2d_53 True
156 batch_normalization_44 True
157 batch_normalization_47 True
158 batch_normalization_52 True
159 batch_normalization_53 True
160 activation_44 True
161 activation_47 True
162 activation_52 True
163 activation_53 True
164 mixed5 True
165 conv2d_58 True
166 batch_normalization_58 True
167 activation_58 True
168 conv2d_59 True
169 batch_normalization_59 True
170 activation_59 True
171 conv2d_55 True
172 conv2d_60 True
173 batch_normalization_55 True
174 batch_normalization_60 True
175 activation_55 True
176 activation_60 True
177 conv2d_56 True
178 conv2d_61 True
179 batch_normalization_56 True
180 batch_normalization_61 True
181 activation_56 True
182 activation_61 True
183 average_pooling2d_6 True
184 conv2d_54 True
185 conv2d_57 True
186 conv2d_62 True
187 conv2d_63 True
188 batch_normalization_54 True
189 batch_normalization_57 True
190 batch_normalization_62 True
191 batch_normalization_63 True
192 activation_54 True
193 activation_57 True
194 activation_62 True
195 activation_63 True
196 mixed6 True
197 conv2d_68 True
198 batch_normalization_68 True
199 activation_68 True
200 conv2d_69 True
201 batch_normalization_69 True
202 activation_69 True
203 conv2d_65 True
204 conv2d_70 True
205 batch_normalization_65 True
206 batch_normalization_70 True
207 activation_65 True
208 activation_70 True
209 conv2d_66 True
210 conv2d_71 True
211 batch_normalization_66 True
212 batch_normalization_71 True
213 activation_66 True
214 activation_71 True
215 average_pooling2d_7 True
216 conv2d_64 True
217 conv2d_67 True
218 conv2d_72 True
219 conv2d_73 True
220 batch_normalization_64 True
221 batch_normalization_67 True
222 batch_normalization_72 True
223 batch_normalization_73 True
224 activation_64 True
225 activation_67 True
226 activation_72 True
227 activation_73 True
228 mixed7 True
229 conv2d_76 True
230 batch_normalization_76 True
231 activation_76 True
232 conv2d_77 True
233 batch_normalization_77 True
234 activation_77 True
235 conv2d_74 True
236 conv2d_78 True
237 batch_normalization_74 True
238 batch_normalization_78 True
239 activation_74 True
240 activation_78 True
241 conv2d_75 True
242 conv2d_79 True
243 batch_normalization_75 True
244 batch_normalization_79 True
245 activation_75 True
246 activation_79 True
247 max_pooling2d_6 True
248 mixed8 True
249 conv2d_84 True
250 batch_normalization_84 True
251 activation_84 True
252 conv2d_81 True
253 conv2d_85 True
254 batch_normalization_81 True
255 batch_normalization_85 True
256 activation_81 True
257 activation_85 True
258 conv2d_82 True
259 conv2d_83 True
260 conv2d_86 True
261 conv2d_87 True
262 average_pooling2d_8 True
263 conv2d_80 True
264 batch_normalization_82 True
265 batch_normalization_83 True
266 batch_normalization_86 True
267 batch_normalization_87 True
268 conv2d_88 True
269 batch_normalization_80 True
270 activation_82 True
271 activation_83 True
272 activation_86 True
273 activation_87 True
274 batch_normalization_88 True
275 activation_80 True
276 mixed9_0 True
277 concatenate_1 True
278 activation_88 True
279 mixed9 True
280 conv2d_93 True
281 batch_normalization_93 True
282 activation_93 True
283 conv2d_90 True
284 conv2d_94 True
285 batch_normalization_90 True
286 batch_normalization_94 True
287 activation_90 True
288 activation_94 True
289 conv2d_91 True
290 conv2d_92 True
291 conv2d_95 True
292 conv2d_96 True
293 average_pooling2d_9 True
294 conv2d_89 True
295 batch_normalization_91 True
296 batch_normalization_92 True
297 batch_normalization_95 True
298 batch_normalization_96 True
299 conv2d_97 True
300 batch_normalization_89 True
301 activation_91 True
302 activation_92 True
303 activation_95 True
304 activation_96 True
305 batch_normalization_97 True
306 activation_89 True
307 mixed9_1 True
308 concatenate_2 True
309 activation_97 True
310 mixed10 True

#don't train the image net (or you will wait too long which is the first layer)
for layer in new_model.layers[:1]:
    layer.trainable=False

new_model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=['accuracy'])

# step_size_train=train_generator.n//train_generator.batch_size
new_model.fit(X_train,
          y_train,
          epochs=10,
          batch_size=50,
          validation_data=(X_val, y_val))

WARNING:tensorflow:From /anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 24000 samples, validate on 6000 samples
Epoch 1/10
24000/24000 [==============================] - 320s 13ms/step - loss: 0.6184 - acc: 0.6614 - val_loss: 0.8695 - val_acc: 0.5390
Epoch 2/10
24000/24000 [==============================] - 297s 12ms/step - loss: 0.5916 - acc: 0.6817 - val_loss: 0.7445 - val_acc: 0.5502
Epoch 3/10
24000/24000 [==============================] - 258s 11ms/step - loss: 0.5806 - acc: 0.6937 - val_loss: 0.6791 - val_acc: 0.6050
Epoch 4/10
24000/24000 [==============================] - 277s 12ms/step - loss: 0.5765 - acc: 0.6966 - val_loss: 0.7362 - val_acc: 0.5488
Epoch 5/10
24000/24000 [==============================] - 275s 11ms/step - loss: 0.5692 - acc: 0.7007 - val_loss: 1.1220 - val_acc: 0.5000
Epoch 6/10
24000/24000 [==============================] - 279s 12ms/step - loss: 0.5728 - acc: 0.6979 - val_loss: 0.8048 - val_acc: 0.5378
Epoch 7/10
24000/24000 [==============================] - 263s 11ms/step - loss: 0.5681 - acc: 0.7027 - val_loss: 0.9025 - val_acc: 0.5497
Epoch 8/10
24000/24000 [==============================] - 274s 11ms/step - loss: 0.5647 - acc: 0.7075 - val_loss: 0.7129 - val_acc: 0.5532
Epoch 9/10
24000/24000 [==============================] - 260s 11ms/step - loss: 0.5600 - acc: 0.7097 - val_loss: 0.6915 - val_acc: 0.5875
Epoch 10/10
24000/24000 [==============================] - 261s 11ms/step - loss: 0.5581 - acc: 0.7097 - val_loss: 0.7527 - val_acc: 0.5527

<keras.callbacks.History at 0xb22cce828>

new_model.evaluate(x_te, y_te)

20000/20000 [==============================] - 256s 13ms/step

[0.7556947304725647, 0.5354]

predictions_transfer = new_model.predict(x_te)
predictions_transfer = np.around(predictions_transfer)

f1_score(y_te, predictions_transfer, average='macro')

0.5048216209418035

plt.figure()
plot_confusion_matrix(confusion_matrix(y_te, predictions_transfer), classes=['Elliptical', 'Spiral'])
plt.title('Confusion Matrix for Inception')
plt.savefig('Inception_ConfusionMatrix.png')

Confusion matrix, without normalization
[[7839  906]
 [8386 2869]]

fpr_inception, tpr_inception, thresholds = roc_curve(y_te, new_model.predict(x_te))

plt.plot(fpr_inception,tpr_inception)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve for Inception')
plt.savefig('Inception_ROC.png')
# plt.show()

Simple Neural Network Baseline Model¶

from keras.models import Sequential, Input
from keras.layers import Dense
model = Sequential()

model.add(layers.Flatten(input_shape=(106, 106,  3)))
layer_1 = model.add(Dense(units=128, activation='relu')) 
layer_2 = model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['acc'])

history = model.fit(X_train, y_train,
                epochs=50,
                validation_data=(X_val, y_val),
                batch_size=500)

Train on 24000 samples, validate on 6000 samples
Epoch 1/50
24000/24000 [==============================] - 7s 274us/step - loss: 0.6586 - acc: 0.6196 - val_loss: 0.6371 - val_acc: 0.6652
Epoch 2/50
24000/24000 [==============================] - 5s 193us/step - loss: 0.6281 - acc: 0.6750 - val_loss: 0.6176 - val_acc: 0.6927
Epoch 3/50
24000/24000 [==============================] - 5s 195us/step - loss: 0.6134 - acc: 0.6923 - val_loss: 0.6077 - val_acc: 0.6958
Epoch 4/50
24000/24000 [==============================] - 5s 196us/step - loss: 0.6057 - acc: 0.7022 - val_loss: 0.6008 - val_acc: 0.7102
Epoch 5/50
24000/24000 [==============================] - 5s 191us/step - loss: 0.5996 - acc: 0.7071 - val_loss: 0.5966 - val_acc: 0.7205
Epoch 6/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5956 - acc: 0.7101 - val_loss: 0.5919 - val_acc: 0.7177
Epoch 7/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5913 - acc: 0.7162 - val_loss: 0.5901 - val_acc: 0.7278
Epoch 8/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5885 - acc: 0.7179 - val_loss: 0.5851 - val_acc: 0.7257
Epoch 9/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5850 - acc: 0.7211 - val_loss: 0.5828 - val_acc: 0.7257
Epoch 10/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5817 - acc: 0.7236 - val_loss: 0.5792 - val_acc: 0.7340
Epoch 11/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5785 - acc: 0.7280 - val_loss: 0.5764 - val_acc: 0.7362
Epoch 12/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5754 - acc: 0.7315 - val_loss: 0.5738 - val_acc: 0.7375
Epoch 13/50
24000/24000 [==============================] - 5s 197us/step - loss: 0.5723 - acc: 0.7337 - val_loss: 0.5709 - val_acc: 0.7422
Epoch 14/50
24000/24000 [==============================] - 5s 193us/step - loss: 0.5698 - acc: 0.7365 - val_loss: 0.5682 - val_acc: 0.7422
Epoch 15/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5664 - acc: 0.7383 - val_loss: 0.5656 - val_acc: 0.7450
Epoch 16/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5637 - acc: 0.7390 - val_loss: 0.5631 - val_acc: 0.7508
Epoch 17/50
24000/24000 [==============================] - 5s 194us/step - loss: 0.5612 - acc: 0.7445 - val_loss: 0.5609 - val_acc: 0.7503
Epoch 18/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5584 - acc: 0.7447 - val_loss: 0.5584 - val_acc: 0.7523
Epoch 19/50
24000/24000 [==============================] - 5s 191us/step - loss: 0.5564 - acc: 0.7506 - val_loss: 0.5562 - val_acc: 0.7528
Epoch 20/50
24000/24000 [==============================] - 5s 192us/step - loss: 0.5532 - acc: 0.7509 - val_loss: 0.5555 - val_acc: 0.7515
Epoch 21/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5510 - acc: 0.7524 - val_loss: 0.5531 - val_acc: 0.7623
Epoch 22/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5485 - acc: 0.7555 - val_loss: 0.5501 - val_acc: 0.7598
Epoch 23/50
24000/24000 [==============================] - 5s 191us/step - loss: 0.5465 - acc: 0.7547 - val_loss: 0.5494 - val_acc: 0.7682
Epoch 24/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5443 - acc: 0.7573 - val_loss: 0.5469 - val_acc: 0.7625
Epoch 25/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5421 - acc: 0.7602 - val_loss: 0.5460 - val_acc: 0.7583
Epoch 26/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5402 - acc: 0.7598 - val_loss: 0.5430 - val_acc: 0.7688
Epoch 27/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5381 - acc: 0.7629 - val_loss: 0.5419 - val_acc: 0.7665
Epoch 28/50
24000/24000 [==============================] - 5s 189us/step - loss: 0.5363 - acc: 0.7629 - val_loss: 0.5399 - val_acc: 0.7743
Epoch 29/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5344 - acc: 0.7651 - val_loss: 0.5387 - val_acc: 0.7728
Epoch 30/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5330 - acc: 0.7655 - val_loss: 0.5375 - val_acc: 0.7710
Epoch 31/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5313 - acc: 0.7658 - val_loss: 0.5360 - val_acc: 0.7763
Epoch 32/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5295 - acc: 0.7661 - val_loss: 0.5365 - val_acc: 0.7670
Epoch 33/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5281 - acc: 0.7685 - val_loss: 0.5337 - val_acc: 0.7747
Epoch 34/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5265 - acc: 0.7694 - val_loss: 0.5328 - val_acc: 0.7748
Epoch 35/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5255 - acc: 0.7698 - val_loss: 0.5314 - val_acc: 0.7773
Epoch 36/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5236 - acc: 0.7701 - val_loss: 0.5319 - val_acc: 0.7765
Epoch 37/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5226 - acc: 0.7710 - val_loss: 0.5294 - val_acc: 0.7775
Epoch 38/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5217 - acc: 0.7707 - val_loss: 0.5285 - val_acc: 0.7787
Epoch 39/50
24000/24000 [==============================] - 5s 195us/step - loss: 0.5200 - acc: 0.7727 - val_loss: 0.5276 - val_acc: 0.7792
Epoch 40/50
24000/24000 [==============================] - 5s 193us/step - loss: 0.5183 - acc: 0.7729 - val_loss: 0.5282 - val_acc: 0.7792
Epoch 41/50
24000/24000 [==============================] - 5s 190us/step - loss: 0.5179 - acc: 0.7718 - val_loss: 0.5258 - val_acc: 0.7802
Epoch 42/50
24000/24000 [==============================] - 5s 191us/step - loss: 0.5167 - acc: 0.7744 - val_loss: 0.5250 - val_acc: 0.7807
Epoch 43/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5149 - acc: 0.7748 - val_loss: 0.5242 - val_acc: 0.7807
Epoch 44/50
24000/24000 [==============================] - 5s 193us/step - loss: 0.5143 - acc: 0.7765 - val_loss: 0.5235 - val_acc: 0.7810
Epoch 45/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5133 - acc: 0.7744 - val_loss: 0.5231 - val_acc: 0.7813
Epoch 46/50
24000/24000 [==============================] - 4s 187us/step - loss: 0.5120 - acc: 0.7749 - val_loss: 0.5236 - val_acc: 0.7800
Epoch 47/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5109 - acc: 0.7761 - val_loss: 0.5217 - val_acc: 0.7823
Epoch 48/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5096 - acc: 0.7777 - val_loss: 0.5209 - val_acc: 0.7820
Epoch 49/50
24000/24000 [==============================] - 5s 188us/step - loss: 0.5093 - acc: 0.7786 - val_loss: 0.5221 - val_acc: 0.7798
Epoch 50/50
24000/24000 [==============================] - 5s 192us/step - loss: 0.5080 - acc: 0.7780 - val_loss: 0.5198 - val_acc: 0.7822

# history.history['val_acc']

predictions_mlp = model.predict(x_te)
predictions_mlp = np.around(predictions_mlp)

plt.figure()
plot_confusion_matrix(confusion_matrix(y_te, predictions_mlp), classes=['Elliptical', 'Spiral'])
plt.title('Confusion Matrix for Neural Network')
plt.savefig('Baseline_MLP_ConfusionMatrix.png')

Confusion matrix, without normalization
[[5882 2863]
 [1612 9643]]

fpr_neural, tpr_neural, thresholds = roc_curve(y_te, model.predict(x_te))

plt.plot(fpr_neural,tpr_neural)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve for Neural Network')
plt.savefig('BaselineMLP_ROC.png')

fpr

array([0.        , 0.        , 0.        , ..., 0.99736993, 0.99736993,
       1.        ])

tpr

array([0.00000000e+00, 8.88494003e-05, 1.77698801e-04, ...,
       9.99911151e-01, 1.00000000e+00, 1.00000000e+00])

plt.figure()
plt.plot(range(len(history.history['val_acc'])), history.history['val_acc'])
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')

Text(0, 0.5, 'Validation Accuracy')

loss_and_metrics = model.evaluate(x_te, y_te)

20000/20000 [==============================] - 3s 129us/step

loss_and_metrics

[0.526056180858612, 0.77625]

model.metrics_names

['loss', 'acc']

model.save('simple_nn_baseline.h5')

# fpr_cnn, tpr_cnn, thresholds = roc_curve(y_te, cnn.predict(x_te)) # CNN -- already have
fpr_decision, tpr_decision, thresholds = roc_curve(y_te, [x[1] for x in y_predict_proba])
# fpr, tpr, thresholds = roc_curve(y_te, new_model.predict(x_te)) # inception
fpr_mlp, tpr_mlp, thresholds = roc_curve(y_te, model.predict(x_te)) # simple MLP

plt.plot(fpr_cnn, tpr_cnn, label='CNN')
plt.plot(fpr_inception, tpr_inception, label='Inception')
plt.plot(fpr_decision, tpr_decision, label="Decision Tree")
plt.plot(fpr_mlp, tpr_mlp, label="Baseline MLP")

plt.plot([0, 1], [0, 1],linestyle='--',c='black')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.savefig('all_models_roc.png')

	GalaxyID	Class1.1	Class1.2	Class1.3
0	100008	0.383147	0.616853	0.000000
1	100023	0.327001	0.663777	0.009222
2	100053	0.765717	0.177352	0.056931
3	100078	0.693377	0.238564	0.068059
4	100090	0.933839	0.000000	0.066161
5	100122	0.738832	0.238159	0.023009
6	100123	0.462492	0.456033	0.081475
7	100128	0.687783	0.288344	0.023873
8	100134	0.021834	0.976952	0.001214
9	100143	0.269843	0.730157	0.000000

	GalaxyID	Elliptical	Spiral	Irregular
0	100008	0.383147	0.616853	0.000000
1	100023	0.327001	0.663777	0.009222
2	100053	0.765717	0.177352	0.056931
3	100078	0.693377	0.238564	0.068059
4	100090	0.933839	0.000000	0.066161

	GalaxyID	Elliptical	Spiral	Irregular
1791	126783	0.178000	0.044000	0.778000
2376	135453	0.360342	0.098619	0.541039
6957	202788	0.242413	0.290022	0.467565
7434	209411	0.273749	0.202910	0.523342
7557	211113	0.289000	0.133000	0.578000

	GalaxyID	Elliptical	Spiral
0	100008	0.383147	0.616853
1	100023	0.327001	0.663777
2	100053	0.765717	0.177352
3	100078	0.693377	0.238564
4	100090	0.933839	0.000000