{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"import utils.samplefiles\n",
"import h5py"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"data = utils.samplefiles.SampleFile()\n",
"data.read_hdf('./output/train.hdf')\n",
"\n",
"wave, noise = data.as_dataframe(injection_parameters=True, \n",
" static_arguments=False, \n",
" command_line_arguments=False, \n",
" split_injections_noise=True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4096"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wave['h1_strain'][0].size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Turn strain into multi-dimension array"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"h1w = wave['h1_strain'].tolist()\n",
"h1n = noise['h1_strain'].tolist()\n",
"wary = np.array(h1w)\n",
"nary = np.array(h1n)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4096"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"h1w[0].size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Split train and test set"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"train_wnum = 50\n",
"train_nnum = 50\n",
"test_wnum = 50\n",
"test_nnum = 50\n",
"\n",
"wtrain = wary[:train_wnum,:]\n",
"ntrain = nary[:train_nnum,:]\n",
"wtest = wary[train_wnum:,:]\n",
"ntest = nary[train_nnum:,:]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50, 4096)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wtrain.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Insert label"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"wtrain = np.insert(wtrain, 0, values=1, axis=1)\n",
"ntrain = np.insert(ntrain, 0, values=0, axis=1)\n",
"wtest = np.insert(wtest, 0, values=1, axis=1)\n",
"ntest = np.insert(ntest, 0, values=0, axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training set"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 4096/4096 [00:00<00:00, 774600.71it/s]\n"
]
}
],
"source": [
"train_name = []\n",
"num = wtrain.shape[1]-1 # 4096\n",
"train_name.append('label')\n",
"for i in tqdm(range(0,num)):\n",
" train_name.append('point{s1}'.format(s1=i))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:00<00:00, 120.83it/s]\n"
]
}
],
"source": [
"with open(\"output/train.csv\",\"w\") as csvfile: \n",
" writer = csv.writer(csvfile)\n",
" #columns_name\n",
" writer.writerow(train_name)\n",
" #use writerows to write lines\n",
" for i in tqdm(range(0,train_wnum)):\n",
" writer.writerow(wtrain[i])\n",
" writer.writerow(ntrain[i])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"train_set = pd.read_csv(\"./output/train.csv\", dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 4097)"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Testing set"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 4096/4096 [00:00<00:00, 457568.56it/s]\n"
]
}
],
"source": [
"test_name = []\n",
"num = wtrain.shape[1]-1 # 4096\n",
"test_name.append('label')\n",
"for i in tqdm(range(0,num)):\n",
" test_name.append('point{s1}'.format(s1=i))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:00<00:00, 120.86it/s]\n"
]
}
],
"source": [
"with open(\"output/test.csv\",\"w\") as csvfile: \n",
" writer = csv.writer(csvfile)\n",
" #columns_name\n",
" writer.writerow(test_name)\n",
" #use writerows to write lines\n",
" for i in tqdm(range(0,test_wnum)):\n",
" writer.writerow(wtest[i])\n",
" writer.writerow(ntest[i])"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"test_set = pd.read_csv(\"./output/test.csv\", dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 4097)"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_set.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}