update training process

fc43603f · Yifan Wang · b059bb8b · fc43603f · fc43603f · fc43603f
Commit fc43603f authored 3 years ago by Yifan Wang
--- a/workspace/2-data_csv.ipynb
+++ b/workspace/2-data_csv.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -10,41 +10,43 @@
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
-    "import utils.samplefiles"
+    "import utils.samplefiles\n",
+    "import h5py"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
-    "train_wnum = 50\n",
-    "train_nnum = 50\n",
-    "test_wnum = 50\n",
-    "test_nnum = 50"
+    "data = utils.samplefiles.SampleFile()\n",
+    "data.read_hdf('./output/train.hdf')\n",
+    "\n",
+    "wave, noise = data.as_dataframe(injection_parameters=True, \n",
+    "                  static_arguments=False, \n",
+    "                  command_line_arguments=False, \n",
+    "                  split_injections_noise=True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 23,
   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = utils.samplefiles.SampleFile()\n",
-    "data.read_hdf('./output/train.hdf')"
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4096"
      ]
     },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
+     "execution_count": 23,
     "metadata": {},
-   "outputs": [],
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "wave, noise = data.as_dataframe(injection_parameters=True, \n",
-    "                  static_arguments=False, \n",
-    "                  command_line_arguments=False, \n",
-    "                  split_injections_noise=True)"
+    "wave['h1_strain'][0].size"
   ]
  },
  {
@@ -56,7 +58,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -66,6 +68,26 @@
    "nary = np.array(h1n)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4096"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "h1w[0].size"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -75,16 +97,41 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
+    "train_wnum = 50\n",
+    "train_nnum = 50\n",
+    "test_wnum = 50\n",
+    "test_nnum = 50\n",
+    "\n",
    "wtrain = wary[:train_wnum,:]\n",
    "ntrain = nary[:train_nnum,:]\n",
    "wtest = wary[train_wnum:,:]\n",
    "ntest = nary[train_nnum:,:]"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(50, 4096)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wtrain.shape"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -94,7 +141,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -108,25 +155,25 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Training set name"
+    "# Training set"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "100%|██████████| 50/50 [00:00<00:00, 299593.14it/s]\n"
+      "100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]\n"
     ]
    }
   ],
   "source": [
    "train_name = []\n",
-    "num = 50\n",
+    "num = wtrain.shape[1]-1 # 4096\n",
    "train_name.append('label')\n",
    "for i in tqdm(range(0,num)):\n",
    "    train_name.append('point{s1}'.format(s1=i))"
@@ -134,14 +181,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "100%|██████████| 50/50 [00:00<00:00, 63.12it/s]\n"
+      "100%|██████████| 50/50 [00:00<00:00, 120.83it/s]\n"
     ]
    }
   ],
@@ -156,29 +203,58 @@
    "        writer.writerow(ntrain[i])"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_set = pd.read_csv(\"./output/train.csv\", dtype=np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 4097)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_set.shape"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "testing set name"
+    "# Testing set"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "100%|██████████| 50/50 [00:00<00:00, 394201.50it/s]\n"
+      "100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]\n"
     ]
    }
   ],
   "source": [
    "test_name = []\n",
-    "num = 50\n",
+    "num = wtrain.shape[1]-1 # 4096\n",
    "test_name.append('label')\n",
    "for i in tqdm(range(0,num)):\n",
    "    test_name.append('point{s1}'.format(s1=i))"
@@ -186,14 +262,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "100%|██████████| 50/50 [00:00<00:00, 60.24it/s]\n"
+      "100%|██████████| 50/50 [00:00<00:00, 120.86it/s]\n"
     ]
    }
   ],
@@ -208,6 +284,35 @@
    "        writer.writerow(ntest[i])"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_set = pd.read_csv(\"./output/test.csv\", dtype=np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 4097)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_set.shape"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -232,7 +337,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.8.10"
  }
 },
 "nbformat": 4,

 %% Cell type:code id: tags:

 ``` python
 import csv
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 import utils.samplefiles
-```
-
-%% Cell type:code id: tags:
-
-``` python
-train_wnum = 50
-train_nnum = 50
-test_wnum = 50
-test_nnum = 50
+import h5py
 ```

 %% Cell type:code id: tags:

 ``` python
 data = utils.samplefiles.SampleFile()
 data.read_hdf('./output/train.hdf')
-```
-
-%% Cell type:code id: tags:

-``` python
 wave, noise = data.as_dataframe(injection_parameters=True,
                  static_arguments=False,
                  command_line_arguments=False,
                  split_injections_noise=True)
 ```

+%% Cell type:code id: tags:
+
+``` python
+wave['h1_strain'][0].size
+```
+
+%% Output
+
+    4096
+
 %% Cell type:markdown id: tags:

 Turn strain into multi-dimension array

 %% Cell type:code id: tags:

 ``` python
 h1w = wave['h1_strain'].tolist()
 h1n = noise['h1_strain'].tolist()
 wary = np.array(h1w)
 nary = np.array(h1n)
 ```

+%% Cell type:code id: tags:
+
+``` python
+h1w[0].size
+```
+
+%% Output
+
+    4096
+
 %% Cell type:markdown id: tags:

 Split train and test set

 %% Cell type:code id: tags:

 ``` python
+train_wnum = 50
+train_nnum = 50
+test_wnum = 50
+test_nnum = 50
+
 wtrain = wary[:train_wnum,:]
 ntrain = nary[:train_nnum,:]
 wtest = wary[train_wnum:,:]
 ntest = nary[train_nnum:,:]
 ```

+%% Cell type:code id: tags:
+
+``` python
+wtrain.shape
+```
+
+%% Output
+
+    (50, 4096)
+
 %% Cell type:markdown id: tags:

 Insert label

 %% Cell type:code id: tags:

 ``` python
 wtrain = np.insert(wtrain, 0, values=1, axis=1)
 ntrain = np.insert(ntrain, 0, values=0, axis=1)
 wtest = np.insert(wtest, 0, values=1, axis=1)
 ntest = np.insert(ntest, 0, values=0, axis=1)
 ```

 %% Cell type:markdown id: tags:

-Training set name
+# Training set

 %% Cell type:code id: tags:

 ``` python
 train_name = []
-num = 50
+num = wtrain.shape[1]-1 # 4096
 train_name.append('label')
 for i in tqdm(range(0,num)):
    train_name.append('point{s1}'.format(s1=i))
 ```

 %% Output

-    100%|██████████| 50/50 [00:00<00:00, 299593.14it/s]
+    100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]

 %% Cell type:code id: tags:

 ``` python
 with open("output/train.csv","w") as csvfile:
    writer = csv.writer(csvfile)
    #columns_name
    writer.writerow(train_name)
    #use writerows to write lines
    for i in tqdm(range(0,train_wnum)):
        writer.writerow(wtrain[i])
        writer.writerow(ntrain[i])
 ```

 %% Output

-    100%|██████████| 50/50 [00:00<00:00, 63.12it/s]
+    100%|██████████| 50/50 [00:00<00:00, 120.83it/s]
+
+%% Cell type:code id: tags:
+
+``` python
+train_set = pd.read_csv("./output/train.csv", dtype=np.float32)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+train_set.shape
+```
+
+%% Output
+
+    (100, 4097)

 %% Cell type:markdown id: tags:

-testing set name
+# Testing set

 %% Cell type:code id: tags:

 ``` python
 test_name = []
-num = 50
+num = wtrain.shape[1]-1 # 4096
 test_name.append('label')
 for i in tqdm(range(0,num)):
    test_name.append('point{s1}'.format(s1=i))
 ```

 %% Output

-    100%|██████████| 50/50 [00:00<00:00, 394201.50it/s]
+    100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]

 %% Cell type:code id: tags:

 ``` python
 with open("output/test.csv","w") as csvfile:
    writer = csv.writer(csvfile)
    #columns_name
    writer.writerow(test_name)
    #use writerows to write lines
    for i in tqdm(range(0,test_wnum)):
        writer.writerow(wtest[i])
        writer.writerow(ntest[i])
 ```

 %% Output

-    100%|██████████| 50/50 [00:00<00:00, 60.24it/s]
+    100%|██████████| 50/50 [00:00<00:00, 120.86it/s]
+
+%% Cell type:code id: tags:
+
+``` python
+test_set = pd.read_csv("./output/test.csv", dtype=np.float32)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+test_set.shape
+```
+
+%% Output
+
+    (100, 4097)

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/workspace/3-training.ipynb
+++ b/workspace/3-training.ipynb
--- a/workspace/3-machine_learning.ipynb
+++ b/workspace/3-machine_learning.ipynb
--- a/workspace/config_files/default.json
+++ b/workspace/config_files/default.json
@@ -8,5 +8,5 @@
  "n_injection_samples": 100,
  "n_noise_samples": 100,
  "n_processes": 4,
-  "output_file_name": "train.hdf"
+  "output_file_name": "test.hdf"
 }
--- a/workspace/config_files/waveform_params.ini
+++ b/workspace/config_files/waveform_params.ini
@@ -13,7 +13,7 @@ injection_snr =
 [static_args]
 approximant = SEOBNRv4
 domain = time
-f_lower = 18
+f_lower = 10
 distance = 100
 waveform_length = 128

@@ -31,8 +31,8 @@ target_sampling_rate = 2048
 ; Define parameters for the whitening procedure. See documentation of the
 ; pycbc.types.TimeSeries.whiten() method for an explanation of what these
 ; values exactly mean.
-whitening_segment_duration = 4
-whitening_max_filter_duration = 4
+whitening_segment_duration = 1
+whitening_max_filter_duration = 1

 ; Define the lower and upper bound for the bandpass filter (in Hertz)
 bandpass_lower = 20
@@ -41,8 +41,8 @@ bandpass_upper = 2048
 ; Define how to align the sample around the event time. By convention, the
 ; event time is the H1 time!
 ; The sum of these values will be the the sample_length!
-seconds_before_event = 5.5
-seconds_after_event = 2.5
+seconds_before_event = 1.5
+seconds_after_event = 0.5

 ; alpha for the Tukey window that is used to "fade on" the waveforms
 ; It represents the fraction of the window inside the cosine tapered region.

--- a/workspace/param/exp1_data1.2_convnet4.pt
+++ b/workspace/param/exp1_data1.2_convnet4.pt
--- a/workspace/plot_sample.py
+++ b/workspace/plot_sample.py
--- a/workspace/utils/__init__.py
+++ b/workspace/utils/__init__.py
--- a/workspace/utils/configfiles.py
+++ b/workspace/utils/configfiles.py
--- a/workspace/utils/hdffiles.py
+++ b/workspace/utils/hdffiles.py
--- a/workspace/utils/progressbar.py
+++ b/workspace/utils/progressbar.py
--- a/workspace/utils/samplefiles.py
+++ b/workspace/utils/samplefiles.py
--- a/workspace/utils/samplegeneration.py
+++ b/workspace/utils/samplegeneration.py
--- a/workspace/utils/staticargs.py
+++ b/workspace/utils/staticargs.py
--- a/workspace/utils/waveforms.py
+++ b/workspace/utils/waveforms.py