Skip to content
Snippets Groups Projects
Commit fc43603f authored by Yifan Wang's avatar Yifan Wang
Browse files

update training process

parent b059bb8b
No related branches found
No related tags found
No related merge requests found
Showing
with 798 additions and 168 deletions
%% Cell type:code id: tags:
``` python
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm
import utils.samplefiles
```
%% Cell type:code id: tags:
``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
import h5py
```
%% Cell type:code id: tags:
``` python
data = utils.samplefiles.SampleFile()
data.read_hdf('./output/train.hdf')
```
%% Cell type:code id: tags:
``` python
wave, noise = data.as_dataframe(injection_parameters=True,
static_arguments=False,
command_line_arguments=False,
split_injections_noise=True)
```
%% Cell type:code id: tags:
``` python
wave['h1_strain'][0].size
```
%% Output
4096
%% Cell type:markdown id: tags:
Turn strain into multi-dimension array
%% Cell type:code id: tags:
``` python
h1w = wave['h1_strain'].tolist()
h1n = noise['h1_strain'].tolist()
wary = np.array(h1w)
nary = np.array(h1n)
```
%% Cell type:code id: tags:
``` python
h1w[0].size
```
%% Output
4096
%% Cell type:markdown id: tags:
Split train and test set
%% Cell type:code id: tags:
``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
wtrain = wary[:train_wnum,:]
ntrain = nary[:train_nnum,:]
wtest = wary[train_wnum:,:]
ntest = nary[train_nnum:,:]
```
%% Cell type:code id: tags:
``` python
wtrain.shape
```
%% Output
(50, 4096)
%% Cell type:markdown id: tags:
Insert label
%% Cell type:code id: tags:
``` python
wtrain = np.insert(wtrain, 0, values=1, axis=1)
ntrain = np.insert(ntrain, 0, values=0, axis=1)
wtest = np.insert(wtest, 0, values=1, axis=1)
ntest = np.insert(ntest, 0, values=0, axis=1)
```
%% Cell type:markdown id: tags:
Training set name
# Training set
%% Cell type:code id: tags:
``` python
train_name = []
num = 50
num = wtrain.shape[1]-1 # 4096
train_name.append('label')
for i in tqdm(range(0,num)):
train_name.append('point{s1}'.format(s1=i))
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 299593.14it/s]
100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]
%% Cell type:code id: tags:
``` python
with open("output/train.csv","w") as csvfile:
writer = csv.writer(csvfile)
#columns_name
writer.writerow(train_name)
#use writerows to write lines
for i in tqdm(range(0,train_wnum)):
writer.writerow(wtrain[i])
writer.writerow(ntrain[i])
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 63.12it/s]
100%|██████████| 50/50 [00:00<00:00, 120.83it/s]
%% Cell type:code id: tags:
``` python
train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
train_set.shape
```
%% Output
(100, 4097)
%% Cell type:markdown id: tags:
testing set name
# Testing set
%% Cell type:code id: tags:
``` python
test_name = []
num = 50
num = wtrain.shape[1]-1 # 4096
test_name.append('label')
for i in tqdm(range(0,num)):
test_name.append('point{s1}'.format(s1=i))
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 394201.50it/s]
100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]
%% Cell type:code id: tags:
``` python
with open("output/test.csv","w") as csvfile:
writer = csv.writer(csvfile)
#columns_name
writer.writerow(test_name)
#use writerows to write lines
for i in tqdm(range(0,test_wnum)):
writer.writerow(wtest[i])
writer.writerow(ntest[i])
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 60.24it/s]
100%|██████████| 50/50 [00:00<00:00, 120.86it/s]
%% Cell type:code id: tags:
``` python
test_set = pd.read_csv("./output/test.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
test_set.shape
```
%% Output
(100, 4097)
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:code id: tags:
``` python
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchsummary import summary
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from models import *
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
# hyper-parameters
# how many samples per batch to load
batch_size = 10
# percentage of training set to use as validation
valid_size = 0.1
# number of epochs to train the model
n_epochs = 30
# track change in validation loss
valid_loss_min = np.Inf
# specify the image classes
classes = ['noise', 'wave']
# gpu
DEVICE = torch.device('cuda: 3' if torch.cuda.is_available() else 'cpu')
```
%% Cell type:code id: tags:
``` python
# choose the training and test datasets
train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
# Seperate the features and labels
total_train_label = train_set.label.values
total_train_data = train_set.loc[:, train_set.columns != 'label'].values
total_train_data = total_train_data.reshape(-1, 1, 4096)
# Split into training and test set
data_train, data_valid, label_train, label_valid = train_test_split(total_train_data, total_train_label, test_size=0.1, random_state=2)
```
%% Cell type:code id: tags:
``` python
# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
dataTrain = torch.from_numpy(data_train)
labelTrain = torch.from_numpy(label_train).type(torch.LongTensor) # data type is long
# create feature and targets tensor for valid set.
dataValid = torch.from_numpy(data_valid)
labelValid = torch.from_numpy(label_valid).type(torch.LongTensor) # data type is long
```
%% Cell type:code id: tags:
``` python
# Pytorch train and valid sets
train = torch.utils.data.TensorDataset(dataTrain,labelTrain)
valid = torch.utils.data.TensorDataset(dataValid,labelValid)
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = True)
```
%% Cell type:code id: tags:
``` python
# instantiate model
model = ConvNet4().to(DEVICE)
# specify optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-5)
#learning rate
lr_sched = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
# summary
#summary(model, input_size=(1,4096))
```
%% Cell type:code id: tags:
``` python
#curve list
train_Loss_list = []
valid_Loss_list = []
accracy_list = []
valid_len = 1
```
%% Cell type:code id: tags:
``` python
# train
for epoch in range(0, n_epochs):
# keep track of training, validation loss and correct
train_loss = 0.0
valid_loss = 0.0
correct = 0.0
# train the model
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
optimizer.zero_grad()
output = model(data)
loss = F.binary_cross_entropy(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
#valid more
if (batch_idx + 1) % valid_len == 0:
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
# validate the model
model.eval()
for data, target in valid_loader:
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
output = model(data)
loss = F.binary_cross_entropy(output, target)
valid_loss += loss.item()*data.size(0)
# less the limit 0.5->0.1, higher the recall rate
pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(DEVICE)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred).long())
correct = np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i].int()
class_correct[label] += correct[i].item()
class_total[label] += 1
# calculate accuracy
accuracy = 100. * np.sum(class_correct) / np.sum(class_total)
# calculate average losses
train_loss = train_loss / (valid_len * batch_size)
valid_loss = valid_loss / len(valid_loader.dataset)
#curve data
train_Loss_list.append(train_loss)
valid_Loss_list.append(valid_loss)
accracy_list.append(accuracy)
# print training/validation statistics
if((epoch * 10 + (batch_idx + 1) / valid_len) % 5 == 0):
print('iteration: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch * 10 + (batch_idx + 1) / valid_len, train_loss, valid_loss))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(model.state_dict(), './param/exp1_data1.2_convnet4.pt')
valid_loss_min = valid_loss
train_loss = 0.0
valid_loss = 0.0
# learning rate
lr_sched.step()
```
%% Output
Validation loss decreased (0.638117 --> 0.638116). Saving model ...
Validation loss decreased (0.638116 --> 0.638115). Saving model ...
Validation loss decreased (0.638115 --> 0.638113). Saving model ...
Validation loss decreased (0.638113 --> 0.638113). Saving model ...
iteration: 5.0 Training Loss: 0.241428 Validation Loss: 0.638112
Validation loss decreased (0.638113 --> 0.638112). Saving model ...
Validation loss decreased (0.638112 --> 0.638110). Saving model ...
Validation loss decreased (0.638110 --> 0.638110). Saving model ...
iteration: 15.0 Training Loss: 0.278747 Validation Loss: 0.638117
iteration: 25.0 Training Loss: 0.217533 Validation Loss: 0.638129
iteration: 35.0 Training Loss: 0.268120 Validation Loss: 0.638133
iteration: 45.0 Training Loss: 0.242722 Validation Loss: 0.638136
iteration: 55.0 Training Loss: 0.210212 Validation Loss: 0.638121
iteration: 65.0 Training Loss: 0.222743 Validation Loss: 0.638116
iteration: 75.0 Training Loss: 0.180599 Validation Loss: 0.638120
iteration: 85.0 Training Loss: 0.219321 Validation Loss: 0.638117
iteration: 95.0 Training Loss: 0.218023 Validation Loss: 0.638128
iteration: 105.0 Training Loss: 0.212116 Validation Loss: 0.638129
iteration: 115.0 Training Loss: 0.226289 Validation Loss: 0.638129
iteration: 125.0 Training Loss: 0.266237 Validation Loss: 0.638129
iteration: 135.0 Training Loss: 0.228767 Validation Loss: 0.638129
iteration: 145.0 Training Loss: 0.249504 Validation Loss: 0.638129
iteration: 155.0 Training Loss: 0.242986 Validation Loss: 0.638129
iteration: 165.0 Training Loss: 0.222421 Validation Loss: 0.638129
iteration: 175.0 Training Loss: 0.195560 Validation Loss: 0.638129
iteration: 185.0 Training Loss: 0.228540 Validation Loss: 0.638129
iteration: 195.0 Training Loss: 0.217326 Validation Loss: 0.638129
iteration: 205.0 Training Loss: 0.258029 Validation Loss: 0.638129
iteration: 215.0 Training Loss: 0.254950 Validation Loss: 0.638129
iteration: 225.0 Training Loss: 0.185815 Validation Loss: 0.638129
iteration: 235.0 Training Loss: 0.224218 Validation Loss: 0.638129
iteration: 245.0 Training Loss: 0.215922 Validation Loss: 0.638129
iteration: 255.0 Training Loss: 0.245718 Validation Loss: 0.638129
iteration: 265.0 Training Loss: 0.268707 Validation Loss: 0.638129
iteration: 275.0 Training Loss: 0.195637 Validation Loss: 0.638129
iteration: 285.0 Training Loss: 0.205642 Validation Loss: 0.638129
iteration: 295.0 Training Loss: 0.205471 Validation Loss: 0.638129
%% Cell type:code id: tags:
``` python
length = len(train_Loss_list)
x = range(0, length)
plt.plot(x, train_Loss_list, label='train loss')
plt.plot(x, valid_Loss_list, label='valid loss')
plt.title('Convolutional Neural Network')
plt.xlabel('iteration')
plt.ylabel('Loss')
plt.legend()
#plt.savefig("cnn_loss.jpg")
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
length = len(accracy_list)
x = range(0, length)
plt.plot(x, accracy_list)
plt.title('Convolutional Neural Network')
plt.xlabel('iteration')
plt.ylabel('accracy')
#plt.savefig("cnn_loss.jpg")
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
#np.savetxt('./cache/new/data_1.2_convnet1/accracy.txt',accracy_list,fmt="%.6f",delimiter="\n")
#np.savetxt('./cache/new/data_1.2_convnet1/train_Loss_list.txt',train_Loss_list,fmt="%.6f",delimiter="\n")
#np.savetxt('./cache/new/data_1.2_convnet1/valid_Loss_list.txt',valid_Loss_list,fmt="%.6f",delimiter="\n")
```
%% Cell type:code id: tags:
``` python
model.load_state_dict(torch.load('./param/exp1_data1.2_convnet4.pt'))
```
%% Output
<All keys matched successfully>
%% Cell type:code id: tags:
``` python
test_set = pd.read_csv("./output/train.csv", dtype=np.float32)
# Seperate the features and labels
label_test = test_set.label.values
data_test = test_set.loc[:, test_set.columns != 'label'].values
data_test = data_test.reshape(-1, 1, 4096)
# create feature and targets tensor for test set.
dataTest = torch.from_numpy(data_test)
labelTest = torch.from_numpy(label_test).type(torch.LongTensor) # data type is long
test = torch.utils.data.TensorDataset(dataTest,labelTest)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)
```
%% Cell type:code id: tags:
``` python
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
model.eval()
# iterate over test data
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
output = model(data)
loss = F.binary_cross_entropy(output, target)
test_loss += loss.item()*data.size(0)
# less the limit 0.5->0.1, higher the recall rate
pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(DEVICE)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred).long())
correct = np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i].int()
class_correct[label] += correct[i].item()
class_total[label] += 1
# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(2):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))
```
%% Output
Test Loss: 0.268362
Test Accuracy of noise: 98% (49/50)
Test Accuracy of wave: 96% (48/50)
Test Accuracy (Overall): 97% (97/100)
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchsummary import summary
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from models import *
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
# hyper-parameters
# how many samples per batch to load
batch_size = 25
# percentage of training set to use as validation
valid_size = 0.1
# number of epochs to train the model
n_epochs = 30
n_epochs = 1
# track change in validation loss
valid_loss_min = np.Inf
# specify the image classes
classes = ['noise', 'wave']
# gpu
DEVICE = torch.device('cuda: 3' if torch.cuda.is_available() else 'cpu')
```
%% Cell type:code id: tags:
``` python
# choose the training and test datasets
train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
# Seperate the features and labels
total_train_label = train_set.label.values
total_train_data = train_set.loc[:, train_set.columns != 'label'].values
total_train_data = total_train_data.reshape(-1, 1, 4096)
```
%% Cell type:code id: tags:
``` python
total_train_data,total_train_data.shape
total_train_label.shape
```
%% Output
(array([[-62.03712 , -7.5803556, 15.291977 , ..., 7.230299 ,
-69.84998 , 18.02432 ],
[-32.492123 , 4.4595265, -0.5901758, ..., 14.5061865,
-23.653366 , 37.64089 ],
[-26.649189 , 31.127876 , 56.877144 , ..., 51.428246 ,
49.74805 , -14.098241 ],
...,
[-22.56183 , 33.652225 , 15.230522 , ..., -56.98435 ,
19.305769 , 18.769468 ],
[-15.172637 , -36.630943 , 43.394974 , ..., -5.847082 ,
-59.176975 , 19.188839 ],
[-22.888329 , 41.428745 , 5.050339 , ..., 2.9315968,
-15.47938 , -23.167208 ]], dtype=float32),
(100, 50))
(100,)
%% Cell type:code id: tags:
``` python
total_train_data = total_train_data.reshape(-1, 1, 100)
total_train_data.shape
```
%% Output
(100, 1, 4096)
%% Cell type:code id: tags:
``` python
# Split into training and test set
data_train, data_valid, label_train, label_valid = \
train_test_split(total_train_data, total_train_label, test_size=0.1, random_state=2)
#total_train_data = total_train_data.reshape(-1, 1, 16384)
#total_train_label = total_train_label.reshape(-1, 1, 1)
```
%% Cell type:code id: tags:
``` python
total_train_data.shape
```
%% Output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-7-98b7e345d0c3> in <module>
1 # Split into training and test set
2 data_train, data_valid, label_train, label_valid = \
----> 3 train_test_split(total_train_data, total_train_label, test_size=0.1, random_state=2)
~/opt/miniconda3/lib/python3.9/site-packages/sklearn/model_selection/_split.py in train_test_split(test_size, train_size, random_state, shuffle, stratify, *arrays)
2170 raise ValueError("At least one array required as input")
2171
-> 2172 arrays = indexable(*arrays)
2173
2174 n_samples = _num_samples(arrays[0])
~/opt/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py in indexable(*iterables)
354 """
355 result = [_make_indexable(X) for X in iterables]
--> 356 check_consistent_length(*result)
357 return result
358
~/opt/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
317 uniques = np.unique(lengths)
318 if len(uniques) > 1:
--> 319 raise ValueError("Found input variables with inconsistent numbers of"
320 " samples: %r" % [int(l) for l in lengths])
321
ValueError: Found input variables with inconsistent numbers of samples: [50, 100]
(100, 1, 4096)
%% Cell type:code id: tags:
``` python
total_train_label.shape
```
%% Output
(100,)
%% Cell type:code id: tags:
``` python
# Split into training and test set
data_train, data_valid, label_train, label_valid = \
train_test_split(total_train_data, total_train_label, test_size=valid_size, random_state=2)
```
%% Cell type:code id: tags:
``` python
# create feature and targets tensor for train set.
# As you remember we need variable to accumulate gradients.
# Therefore first we create tensor, then we will create variable
dataTrain = torch.from_numpy(data_train)
labelTrain = torch.from_numpy(label_train).type(torch.LongTensor) # data type is long
# create feature and targets tensor for valid set.
dataValid = torch.from_numpy(data_valid)
labelValid = torch.from_numpy(label_valid).type(torch.LongTensor) # data type is long
```
%% Cell type:code id: tags:
``` python
# Pytorch train and valid sets
train = torch.utils.data.TensorDataset(dataTrain,labelTrain)
valid = torch.utils.data.TensorDataset(dataValid,labelValid)
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = True)
```
%% Cell type:code id: tags:
``` python
for batch_idx, (data, target) in enumerate(train_loader):
print(batch_idx,(data,target))
```
%% Output
0 (tensor([[[ -3.6588, -8.7501, -13.2873, ..., 50.3284, 73.6249, -7.4861]],
[[ 58.8892, -34.4240, -7.0151, ..., 19.6284, 5.8056, -10.1896]],
[[-25.7599, 38.4685, -5.8641, ..., -1.0486, -15.9929, -13.8097]],
...,
[[ -8.6096, 11.9229, -7.7777, ..., -33.6976, 31.7323, 22.5505]],
[[ 4.9527, -4.7260, -51.1175, ..., -23.3081, -23.6608, 9.2065]],
[[ 41.5923, 16.7344, -9.3329, ..., -4.5990, 40.6475, 17.2668]]]), tensor([1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1,
0]))
1 (tensor([[[ 31.8002, -35.3708, -3.3680, ..., -37.3070, -20.4746, -25.6443]],
[[ -8.9197, -14.1783, -7.0737, ..., -6.1846, -9.7816, 34.2528]],
[[-20.0077, 76.9484, 6.7912, ..., 30.4908, 10.9669, 18.0433]],
...,
[[ 61.0710, -1.1720, -5.4269, ..., -15.4596, -49.9105, 7.8992]],
[[ -2.5201, -16.3237, 30.1252, ..., -0.4326, 36.2127, 19.0306]],
[[-47.8746, 14.1169, 28.6327, ..., -34.9201, -8.5922, 19.4526]]]), tensor([0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
0]))
2 (tensor([[[ 24.0152, 46.1441, 10.9930, ..., -3.9326, 12.6095, -33.1261]],
[[ 5.0609, -19.4981, -45.3404, ..., 28.1799, -84.9188, -9.8493]],
[[ 31.7707, -8.6773, -11.7029, ..., -28.5239, 47.4721, -6.9119]],
...,
[[ 33.2307, 11.9690, 48.2033, ..., 3.5819, 5.1547, 3.5468]],
[[ 55.1532, -6.3257, 12.0293, ..., -58.0621, -10.6701, -30.3893]],
[[-39.4150, 25.1565, -14.5661, ..., 43.6791, -66.4943, 74.3911]]]), tensor([1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0]))
3 (tensor([[[ 21.6147, -20.1361, -72.1704, ..., -26.1830, 3.1268, -2.8540]],
[[-15.4415, 32.3177, -19.1063, ..., -61.5150, 18.5274, -44.2418]],
[[ -2.6239, 20.6541, 29.9448, ..., 5.2102, 0.8877, 30.8585]],
...,
[[ 10.4988, 16.8289, -18.8190, ..., -90.1444, 29.9813, 53.8949]],
[[ 31.0470, 18.0455, -4.1932, ..., -11.7091, -41.2828, 17.5074]],
[[ 17.8683, -21.5610, -1.2673, ..., 1.7552, 25.0248, 14.9674]]]), tensor([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0]))
%% Cell type:code id: tags:
``` python
train.tensors[0].shape
```
%% Output
torch.Size([90, 1, 4096])
%% Cell type:code id: tags:
``` python
# instantiate model
model = ConvNet4().to(DEVICE)
model = ConvNet1().to(DEVICE)
# specify optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-5)
#learning rate
lr_sched = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
# summary
#summary(model, input_size=(1,4096))
```
%% Cell type:code id: tags:
``` python
model
```
%% Output
ConvNet1(
(conv1): Conv1d(1, 16, kernel_size=(16,), stride=(1,))
(max_pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv1d(16, 32, kernel_size=(8,), stride=(1,))
(max_pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv1d(32, 64, kernel_size=(8,), stride=(1,))
(max_pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=3904, out_features=64, bias=True)
(fc2): Linear(in_features=64, out_features=1, bias=True)
)
%% Cell type:code id: tags:
``` python
#curve list
train_Loss_list = []
valid_Loss_list = []
accracy_list = []
valid_len = 20
```
%% Cell type:code id: tags:
``` python
# train
for epoch in range(0, n_epochs):
# keep track of training, validation loss and correct
train_loss = 0.0
valid_loss = 0.0
correct = 0.0
# train the model
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
optimizer.zero_grad()
output = model(data)
loss = F.binary_cross_entropy(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
#valid more
if (batch_idx + 1) % valid_len == 0:
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
# validate the model
model.eval()
for data, target in valid_loader:
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
output = model(data)
loss = F.binary_cross_entropy(output, target)
valid_loss += loss.item()*data.size(0)
# less the limit 0.5->0.1, higher the recall rate
pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(DEVICE)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred).long())
correct = np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i].int()
class_correct[label] += correct[i].item()
class_total[label] += 1
# calculate accuracy
accuracy = 100. * np.sum(class_correct) / np.sum(class_total)
# calculate average losses
train_loss = train_loss / (valid_len * batch_size)
valid_loss = valid_loss / len(valid_loader.dataset)
#curve data
train_Loss_list.append(train_loss)
valid_Loss_list.append(valid_loss)
accracy_list.append(accuracy)
# print training/validation statistics
if((epoch * 10 + (batch_idx + 1) / valid_len) % 5 == 0):
print('iteration: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch * 10 + (batch_idx + 1) / valid_len, train_loss, valid_loss))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(model.state_dict(), './param/exp1_data1.2_convnet4.pt')
valid_loss_min = valid_loss
train_loss = 0.0
valid_loss = 0.0
# learning rate
lr_sched.step()
```
%% Output
Validation loss decreased (inf --> 0.687406). Saving model ...
Validation loss decreased (0.687406 --> 0.681616). Saving model ...
Validation loss decreased (0.681616 --> 0.676216). Saving model ...
iteration: 5.0 Training Loss: 0.670587 Validation Loss: 0.667222
Validation loss decreased (0.676216 --> 0.667222). Saving model ...
Validation loss decreased (0.667222 --> 0.648601). Saving model ...
Validation loss decreased (0.648601 --> 0.642810). Saving model ...
Validation loss decreased (0.642810 --> 0.621476). Saving model ...
Validation loss decreased (0.621476 --> 0.600710). Saving model ...
Validation loss decreased (0.600710 --> 0.559009). Saving model ...
Validation loss decreased (0.559009 --> 0.469705). Saving model ...
Validation loss decreased (0.469705 --> 0.414217). Saving model ...
iteration: 15.0 Training Loss: 0.427971 Validation Loss: 0.370859
Validation loss decreased (0.414217 --> 0.370859). Saving model ...
Validation loss decreased (0.370859 --> 0.369948). Saving model ...
Validation loss decreased (0.369948 --> 0.327391). Saving model ...
Validation loss decreased (0.327391 --> 0.312415). Saving model ...
Validation loss decreased (0.312415 --> 0.271771). Saving model ...
Validation loss decreased (0.271771 --> 0.265176). Saving model ...
Validation loss decreased (0.265176 --> 0.261692). Saving model ...
Validation loss decreased (0.261692 --> 0.244539). Saving model ...
iteration: 25.0 Training Loss: 0.241963 Validation Loss: 0.246531
Validation loss decreased (0.244539 --> 0.233539). Saving model ...
Validation loss decreased (0.233539 --> 0.230913). Saving model ...
Validation loss decreased (0.230913 --> 0.227057). Saving model ...
Validation loss decreased (0.227057 --> 0.225386). Saving model ...
Validation loss decreased (0.225386 --> 0.219995). Saving model ...
iteration: 35.0 Training Loss: 0.266930 Validation Loss: 0.240498
Validation loss decreased (0.219995 --> 0.216780). Saving model ...
Validation loss decreased (0.216780 --> 0.214125). Saving model ...
Validation loss decreased (0.214125 --> 0.212111). Saving model ...
Validation loss decreased (0.212111 --> 0.211900). Saving model ...
iteration: 45.0 Training Loss: 0.179977 Validation Loss: 0.259835
Validation loss decreased (0.211900 --> 0.204226). Saving model ...
Validation loss decreased (0.204226 --> 0.199974). Saving model ...
iteration: 55.0 Training Loss: 0.147942 Validation Loss: 0.205356
Validation loss decreased (0.199974 --> 0.195755). Saving model ...
Validation loss decreased (0.195755 --> 0.194970). Saving model ...
Validation loss decreased (0.194970 --> 0.194251). Saving model ...
Validation loss decreased (0.194251 --> 0.194100). Saving model ...
Validation loss decreased (0.194100 --> 0.192323). Saving model ...
iteration: 65.0 Training Loss: 0.170526 Validation Loss: 0.192809
Validation loss decreased (0.192323 --> 0.188182). Saving model ...
Validation loss decreased (0.188182 --> 0.187559). Saving model ...
Validation loss decreased (0.187559 --> 0.186908). Saving model ...
Validation loss decreased (0.186908 --> 0.185635). Saving model ...
iteration: 75.0 Training Loss: 0.112142 Validation Loss: 0.187386
Validation loss decreased (0.185635 --> 0.181862). Saving model ...
iteration: 85.0 Training Loss: 0.119231 Validation Loss: 0.182259
Validation loss decreased (0.181862 --> 0.181267). Saving model ...
Validation loss decreased (0.181267 --> 0.180979). Saving model ...
Validation loss decreased (0.180979 --> 0.180108). Saving model ...
Validation loss decreased (0.180108 --> 0.178496). Saving model ...
iteration: 95.0 Training Loss: 0.149047 Validation Loss: 0.179502
iteration: 105.0 Training Loss: 0.085169 Validation Loss: 0.179523
Validation loss decreased (0.178496 --> 0.178160). Saving model ...
iteration: 115.0 Training Loss: 0.084234 Validation Loss: 0.195391
iteration: 125.0 Training Loss: 0.092414 Validation Loss: 0.181713
iteration: 135.0 Training Loss: 0.062861 Validation Loss: 0.188943
iteration: 145.0 Training Loss: 0.055033 Validation Loss: 0.187805
iteration: 155.0 Training Loss: 0.047300 Validation Loss: 0.191877
iteration: 165.0 Training Loss: 0.035127 Validation Loss: 0.205924
iteration: 175.0 Training Loss: 0.045897 Validation Loss: 0.199864
iteration: 185.0 Training Loss: 0.026902 Validation Loss: 0.222597
iteration: 195.0 Training Loss: 0.025435 Validation Loss: 0.218615
iteration: 205.0 Training Loss: 0.018787 Validation Loss: 0.221879
iteration: 215.0 Training Loss: 0.016591 Validation Loss: 0.223594
iteration: 225.0 Training Loss: 0.017554 Validation Loss: 0.224143
iteration: 235.0 Training Loss: 0.013084 Validation Loss: 0.225407
iteration: 245.0 Training Loss: 0.015281 Validation Loss: 0.223412
iteration: 255.0 Training Loss: 0.011665 Validation Loss: 0.227159
iteration: 265.0 Training Loss: 0.010941 Validation Loss: 0.227533
iteration: 275.0 Training Loss: 0.012160 Validation Loss: 0.234278
iteration: 285.0 Training Loss: 0.009149 Validation Loss: 0.232661
iteration: 295.0 Training Loss: 0.013521 Validation Loss: 0.231244
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-39-747daa443a1d> in <module>
9 model.train()
10 for batch_idx, (data, target) in enumerate(train_loader):
---> 11 data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
12 optimizer.zero_grad()
13 output = model(data)
RuntimeError: shape '[25, 1]' is invalid for input of size 15
%% Cell type:code id: tags:
``` python
length = len(train_Loss_list)
x = range(0, length)
plt.plot(x, train_Loss_list, label='train loss')
plt.plot(x, valid_Loss_list, label='valid loss')
plt.title('Convolutional Neural Network')
plt.xlabel('iteration')
plt.ylabel('Loss')
plt.legend()
#plt.savefig("cnn_loss.jpg")
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
length = len(accracy_list)
x = range(0, length)
plt.plot(x, accracy_list)
plt.title('Convolutional Neural Network')
plt.xlabel('iteration')
plt.ylabel('accracy')
#plt.savefig("cnn_loss.jpg")
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
#np.savetxt('./cache/new/data_1.2_convnet1/accracy.txt',accracy_list,fmt="%.6f",delimiter="\n")
#np.savetxt('./cache/new/data_1.2_convnet1/train_Loss_list.txt',train_Loss_list,fmt="%.6f",delimiter="\n")
#np.savetxt('./cache/new/data_1.2_convnet1/valid_Loss_list.txt',valid_Loss_list,fmt="%.6f",delimiter="\n")
```
%% Cell type:code id: tags:
``` python
model.load_state_dict(torch.load('./param/exp1_data1.2_convnet4.pt'))
```
%% Output
<All keys matched successfully>
%% Cell type:code id: tags:
``` python
test_set = pd.read_csv("./data/mass/exp1/data1.2/test.csv", dtype=np.float32)
# Seperate the features and labels
label_test = test_set.label.values
data_test = test_set.loc[:, test_set.columns != 'label'].values
data_test = data_test.reshape(-1, 1, 4096)
# create feature and targets tensor for test set.
dataTest = torch.from_numpy(data_test)
labelTest = torch.from_numpy(label_test).type(torch.LongTensor) # data type is long
test = torch.utils.data.TensorDataset(dataTest,labelTest)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)
```
%% Cell type:code id: tags:
``` python
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
model.eval()
# iterate over test data
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE).float().reshape(batch_size, 1)
output = model(data)
loss = F.binary_cross_entropy(output, target)
test_loss += loss.item()*data.size(0)
# less the limit 0.5->0.1, higher the recall rate
pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(DEVICE)
# compare predictions to true label
correct_tensor = pred.eq(target.data.view_as(pred).long())
correct = np.squeeze(correct_tensor.cpu().numpy())
# calculate test accuracy for each object class
for i in range(batch_size):
label = target.data[i].int()
class_correct[label] += correct[i].item()
class_total[label] += 1
# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(2):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))
```
%% Output
Test Loss: 0.180455
Test Accuracy of noise: 93% (234/250)
Test Accuracy of wave: 93% (233/250)
Test Accuracy (Overall): 93% (467/500)
%% Cell type:code id: tags:
``` python
```
......
......@@ -8,5 +8,5 @@
"n_injection_samples": 100,
"n_noise_samples": 100,
"n_processes": 4,
"output_file_name": "train.hdf"
"output_file_name": "test.hdf"
}
......@@ -13,7 +13,7 @@ injection_snr =
[static_args]
approximant = SEOBNRv4
domain = time
f_lower = 18
f_lower = 10
distance = 100
waveform_length = 128
......@@ -31,8 +31,8 @@ target_sampling_rate = 2048
; Define parameters for the whitening procedure. See documentation of the
; pycbc.types.TimeSeries.whiten() method for an explanation of what these
; values exactly mean.
whitening_segment_duration = 4
whitening_max_filter_duration = 4
whitening_segment_duration = 1
whitening_max_filter_duration = 1
; Define the lower and upper bound for the bandpass filter (in Hertz)
bandpass_lower = 20
......@@ -41,8 +41,8 @@ bandpass_upper = 2048
; Define how to align the sample around the event time. By convention, the
; event time is the H1 time!
; The sum of these values will be the the sample_length!
seconds_before_event = 5.5
seconds_after_event = 2.5
seconds_before_event = 1.5
seconds_after_event = 0.5
; alpha for the Tukey window that is used to "fade on" the waveforms
; It represents the fraction of the window inside the cosine tapered region.
......
File added
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment