Commit fc43603f authored by Yifan Wang's avatar Yifan Wang
Browse files

update training process

parent b059bb8b
%% Cell type:code id: tags:
``` python
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm
import utils.samplefiles
```
%% Cell type:code id: tags:
``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
import h5py
```
%% Cell type:code id: tags:
``` python
data = utils.samplefiles.SampleFile()
data.read_hdf('./output/train.hdf')
```
%% Cell type:code id: tags:
``` python
wave, noise = data.as_dataframe(injection_parameters=True,
static_arguments=False,
command_line_arguments=False,
split_injections_noise=True)
```
%% Cell type:code id: tags:
``` python
wave['h1_strain'][0].size
```
%% Output
4096
%% Cell type:markdown id: tags:
Turn strain into multi-dimension array
%% Cell type:code id: tags:
``` python
h1w = wave['h1_strain'].tolist()
h1n = noise['h1_strain'].tolist()
wary = np.array(h1w)
nary = np.array(h1n)
```
%% Cell type:code id: tags:
``` python
h1w[0].size
```
%% Output
4096
%% Cell type:markdown id: tags:
Split train and test set
%% Cell type:code id: tags:
``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
wtrain = wary[:train_wnum,:]
ntrain = nary[:train_nnum,:]
wtest = wary[train_wnum:,:]
ntest = nary[train_nnum:,:]
```
%% Cell type:code id: tags:
``` python
wtrain.shape
```
%% Output
(50, 4096)
%% Cell type:markdown id: tags:
Insert label
%% Cell type:code id: tags:
``` python
wtrain = np.insert(wtrain, 0, values=1, axis=1)
ntrain = np.insert(ntrain, 0, values=0, axis=1)
wtest = np.insert(wtest, 0, values=1, axis=1)
ntest = np.insert(ntest, 0, values=0, axis=1)
```
%% Cell type:markdown id: tags:
Training set name
# Training set
%% Cell type:code id: tags:
``` python
train_name = []
num = 50
num = wtrain.shape[1]-1 # 4096
train_name.append('label')
for i in tqdm(range(0,num)):
train_name.append('point{s1}'.format(s1=i))
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 299593.14it/s]
100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]
%% Cell type:code id: tags:
``` python
with open("output/train.csv","w") as csvfile:
writer = csv.writer(csvfile)
#columns_name
writer.writerow(train_name)
#use writerows to write lines
for i in tqdm(range(0,train_wnum)):
writer.writerow(wtrain[i])
writer.writerow(ntrain[i])
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 63.12it/s]
100%|██████████| 50/50 [00:00<00:00, 120.83it/s]
%% Cell type:code id: tags:
``` python
train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
train_set.shape
```
%% Output
(100, 4097)
%% Cell type:markdown id: tags:
testing set name
# Testing set
%% Cell type:code id: tags:
``` python
test_name = []
num = 50
num = wtrain.shape[1]-1 # 4096
test_name.append('label')
for i in tqdm(range(0,num)):
test_name.append('point{s1}'.format(s1=i))
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 394201.50it/s]
100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]
%% Cell type:code id: tags:
``` python
with open("output/test.csv","w") as csvfile:
writer = csv.writer(csvfile)
#columns_name
writer.writerow(test_name)
#use writerows to write lines
for i in tqdm(range(0,test_wnum)):
writer.writerow(wtest[i])
writer.writerow(ntest[i])
```
%% Output
100%|██████████| 50/50 [00:00<00:00, 60.24it/s]
100%|██████████| 50/50 [00:00<00:00, 120.86it/s]
%% Cell type:code id: tags:
``` python
test_set = pd.read_csv("./output/test.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
test_set.shape
```
%% Output
(100, 4097)
%% Cell type:code id: tags:
``` python
```
......