Commit fc43603f authored by Yifan Wang's avatar Yifan Wang
Browse files

update training process

parent b059bb8b
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import csv import csv
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
import utils.samplefiles import utils.samplefiles
``` import h5py
%% Cell type:code id: tags:
``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = utils.samplefiles.SampleFile() data = utils.samplefiles.SampleFile()
data.read_hdf('./output/train.hdf') data.read_hdf('./output/train.hdf')
```
%% Cell type:code id: tags:
``` python
wave, noise = data.as_dataframe(injection_parameters=True, wave, noise = data.as_dataframe(injection_parameters=True,
static_arguments=False, static_arguments=False,
command_line_arguments=False, command_line_arguments=False,
split_injections_noise=True) split_injections_noise=True)
``` ```
%% Cell type:code id: tags:
``` python
wave['h1_strain'][0].size
```
%% Output
4096
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Turn strain into multi-dimension array Turn strain into multi-dimension array
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
h1w = wave['h1_strain'].tolist() h1w = wave['h1_strain'].tolist()
h1n = noise['h1_strain'].tolist() h1n = noise['h1_strain'].tolist()
wary = np.array(h1w) wary = np.array(h1w)
nary = np.array(h1n) nary = np.array(h1n)
``` ```
%% Cell type:code id: tags:
``` python
h1w[0].size
```
%% Output
4096
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Split train and test set Split train and test set
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
train_wnum = 50
train_nnum = 50
test_wnum = 50
test_nnum = 50
wtrain = wary[:train_wnum,:] wtrain = wary[:train_wnum,:]
ntrain = nary[:train_nnum,:] ntrain = nary[:train_nnum,:]
wtest = wary[train_wnum:,:] wtest = wary[train_wnum:,:]
ntest = nary[train_nnum:,:] ntest = nary[train_nnum:,:]
``` ```
%% Cell type:code id: tags:
``` python
wtrain.shape
```
%% Output
(50, 4096)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Insert label Insert label
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
wtrain = np.insert(wtrain, 0, values=1, axis=1) wtrain = np.insert(wtrain, 0, values=1, axis=1)
ntrain = np.insert(ntrain, 0, values=0, axis=1) ntrain = np.insert(ntrain, 0, values=0, axis=1)
wtest = np.insert(wtest, 0, values=1, axis=1) wtest = np.insert(wtest, 0, values=1, axis=1)
ntest = np.insert(ntest, 0, values=0, axis=1) ntest = np.insert(ntest, 0, values=0, axis=1)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Training set name # Training set
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
train_name = [] train_name = []
num = 50 num = wtrain.shape[1]-1 # 4096
train_name.append('label') train_name.append('label')
for i in tqdm(range(0,num)): for i in tqdm(range(0,num)):
train_name.append('point{s1}'.format(s1=i)) train_name.append('point{s1}'.format(s1=i))
``` ```
%% Output %% Output
100%|██████████| 50/50 [00:00<00:00, 299593.14it/s] 100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
with open("output/train.csv","w") as csvfile: with open("output/train.csv","w") as csvfile:
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
#columns_name #columns_name
writer.writerow(train_name) writer.writerow(train_name)
#use writerows to write lines #use writerows to write lines
for i in tqdm(range(0,train_wnum)): for i in tqdm(range(0,train_wnum)):
writer.writerow(wtrain[i]) writer.writerow(wtrain[i])
writer.writerow(ntrain[i]) writer.writerow(ntrain[i])
``` ```
%% Output %% Output
100%|██████████| 50/50 [00:00<00:00, 63.12it/s] 100%|██████████| 50/50 [00:00<00:00, 120.83it/s]
%% Cell type:code id: tags:
``` python
train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
train_set.shape
```
%% Output
(100, 4097)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
testing set name # Testing set
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
test_name = [] test_name = []
num = 50 num = wtrain.shape[1]-1 # 4096
test_name.append('label') test_name.append('label')
for i in tqdm(range(0,num)): for i in tqdm(range(0,num)):
test_name.append('point{s1}'.format(s1=i)) test_name.append('point{s1}'.format(s1=i))
``` ```
%% Output %% Output
100%|██████████| 50/50 [00:00<00:00, 394201.50it/s] 100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
with open("output/test.csv","w") as csvfile: with open("output/test.csv","w") as csvfile:
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
#columns_name #columns_name
writer.writerow(test_name) writer.writerow(test_name)
#use writerows to write lines #use writerows to write lines
for i in tqdm(range(0,test_wnum)): for i in tqdm(range(0,test_wnum)):
writer.writerow(wtest[i]) writer.writerow(wtest[i])
writer.writerow(ntest[i]) writer.writerow(ntest[i])
``` ```
%% Output %% Output
100%|██████████| 50/50 [00:00<00:00, 60.24it/s] 100%|██████████| 50/50 [00:00<00:00, 120.86it/s]
%% Cell type:code id: tags:
``` python
test_set = pd.read_csv("./output/test.csv", dtype=np.float32)
```
%% Cell type:code id: tags:
``` python
test_set.shape
```
%% Output
(100, 4097)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
This diff is collapsed.
...@@ -8,5 +8,5 @@ ...@@ -8,5 +8,5 @@
"n_injection_samples": 100, "n_injection_samples": 100,
"n_noise_samples": 100, "n_noise_samples": 100,
"n_processes": 4, "n_processes": 4,
"output_file_name": "train.hdf" "output_file_name": "test.hdf"
} }
...@@ -13,7 +13,7 @@ injection_snr = ...@@ -13,7 +13,7 @@ injection_snr =
[static_args] [static_args]
approximant = SEOBNRv4 approximant = SEOBNRv4
domain = time domain = time
f_lower = 18 f_lower = 10
distance = 100 distance = 100