{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Flood prediction Model","metadata":{}},{"cell_type":"code","source":"#Import some basic libraries\nimport numpy as np\nimport pandas as pd","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.059515Z","iopub.execute_input":"2021-08-08T04:28:48.059825Z","iopub.status.idle":"2021-08-08T04:28:48.064116Z","shell.execute_reply.started":"2021-08-08T04:28:48.059795Z","shell.execute_reply":"2021-08-08T04:28:48.063092Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"markdown","source":"# Data Insight","metadata":{}},{"cell_type":"code","source":"#Read the data present in dataset\n##data = pd.read_csv('../input/kerela-flood/kerala.csv')\n\ndata = pd.read_csv('../input/my-district-dataset/FLOOD PREDICTION MALAYSIA.csv')\n#data = pd.read_csv('../input/simpleflooddata/Flood_Rain_Simple_Data.csv')\n\n#Using data.head() we can see the top 5 rows of the dataset\ndata.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.072735Z","iopub.execute_input":"2021-08-08T04:28:48.073102Z","iopub.status.idle":"2021-08-08T04:28:48.111350Z","shell.execute_reply.started":"2021-08-08T04:28:48.073065Z","shell.execute_reply":"2021-08-08T04:28:48.110345Z"},"trusted":true},"execution_count":53,"outputs":[{"execution_count":53,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n0 108 108001 2000 158.83 162.37 210.68 192.51 214.73 157.55 \n1 108 108001 2001 159.10 41.71 174.50 220.56 177.65 105.61 \n2 108 108001 2002 61.25 50.34 88.15 207.13 115.01 96.08 \n3 108 108001 2003 82.88 118.04 193.40 100.36 101.07 166.81 \n4 108 108001 2004 119.30 71.16 120.80 138.74 120.27 146.03 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL FLOOD \n0 98.80 165.63 289.14 388.77 313.59 213.60 2566.19 0 \n1 166.59 193.88 206.40 298.14 232.54 150.82 2127.50 1 \n2 115.78 111.12 285.96 206.94 261.33 264.61 1863.70 1 \n3 167.61 270.87 238.84 682.07 251.46 182.35 2555.77 1 \n4 145.35 172.92 222.61 360.21 187.22 168.79 1973.39 1 ","text/html":"
\n\n
\n \n
\n
\n
STATE
\n
DISTRICT
\n
YEAR
\n
JAN
\n
FEB
\n
MAR
\n
APR
\n
MAY
\n
JUN
\n
JUL
\n
AUG
\n
SEP
\n
OCT
\n
0V
\n
DEC
\n
ANNUAL RAINFALL
\n
FLOOD
\n
\n \n \n
\n
0
\n
108
\n
108001
\n
2000
\n
158.83
\n
162.37
\n
210.68
\n
192.51
\n
214.73
\n
157.55
\n
98.80
\n
165.63
\n
289.14
\n
388.77
\n
313.59
\n
213.60
\n
2566.19
\n
0
\n
\n
\n
1
\n
108
\n
108001
\n
2001
\n
159.10
\n
41.71
\n
174.50
\n
220.56
\n
177.65
\n
105.61
\n
166.59
\n
193.88
\n
206.40
\n
298.14
\n
232.54
\n
150.82
\n
2127.50
\n
1
\n
\n
\n
2
\n
108
\n
108001
\n
2002
\n
61.25
\n
50.34
\n
88.15
\n
207.13
\n
115.01
\n
96.08
\n
115.78
\n
111.12
\n
285.96
\n
206.94
\n
261.33
\n
264.61
\n
1863.70
\n
1
\n
\n
\n
3
\n
108
\n
108001
\n
2003
\n
82.88
\n
118.04
\n
193.40
\n
100.36
\n
101.07
\n
166.81
\n
167.61
\n
270.87
\n
238.84
\n
682.07
\n
251.46
\n
182.35
\n
2555.77
\n
1
\n
\n
\n
4
\n
108
\n
108001
\n
2004
\n
119.30
\n
71.16
\n
120.80
\n
138.74
\n
120.27
\n
146.03
\n
145.35
\n
172.92
\n
222.61
\n
360.21
\n
187.22
\n
168.79
\n
1973.39
\n
1
\n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"#Now we will cheak if any colomns is left empty\ndata.apply(lambda x:sum(x.isnull()), axis=0)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.114067Z","iopub.execute_input":"2021-08-08T04:28:48.114562Z","iopub.status.idle":"2021-08-08T04:28:48.127952Z","shell.execute_reply.started":"2021-08-08T04:28:48.114502Z","shell.execute_reply":"2021-08-08T04:28:48.126873Z"},"trusted":true},"execution_count":54,"outputs":[{"execution_count":54,"output_type":"execute_result","data":{"text/plain":"STATE 0\nDISTRICT 0\nYEAR 0\nJAN 0\nFEB 0\nMAR 0\nAPR 0\nMAY 0\nJUN 0\nJUL 0\nAUG 0\nSEP 0\nOCT 0\n0V 0\nDEC 0\nANNUAL RAINFALL 0\nFLOOD 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"#We want the data in numbers, therefore we will replace the yes/no in floods coloumn by 1/0\n#data['FLOOD'].replace(['YES','NO'],[1,0],inplace=True)\n#data['FLOODS'].replace(['Yes','No'],[1,0],inplace=True)\n#print('done')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.130075Z","iopub.execute_input":"2021-08-08T04:28:48.130474Z","iopub.status.idle":"2021-08-08T04:28:48.138603Z","shell.execute_reply.started":"2021-08-08T04:28:48.130434Z","shell.execute_reply":"2021-08-08T04:28:48.137546Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"markdown","source":"New data no need to convert\n","metadata":{}},{"cell_type":"code","source":"#Let's see how are data looks like now\n#data.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.140992Z","iopub.execute_input":"2021-08-08T04:28:48.141637Z","iopub.status.idle":"2021-08-08T04:28:48.148294Z","shell.execute_reply.started":"2021-08-08T04:28:48.141586Z","shell.execute_reply":"2021-08-08T04:28:48.147413Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"#Now let's seperate the data which we are gonna use for prediction\n\nx = data.iloc[:,0:16]\nx.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.151564Z","iopub.execute_input":"2021-08-08T04:28:48.152373Z","iopub.status.idle":"2021-08-08T04:28:48.177567Z","shell.execute_reply.started":"2021-08-08T04:28:48.152304Z","shell.execute_reply":"2021-08-08T04:28:48.176516Z"},"trusted":true},"execution_count":57,"outputs":[{"execution_count":57,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n0 108 108001 2000 158.83 162.37 210.68 192.51 214.73 157.55 \n1 108 108001 2001 159.10 41.71 174.50 220.56 177.65 105.61 \n2 108 108001 2002 61.25 50.34 88.15 207.13 115.01 96.08 \n3 108 108001 2003 82.88 118.04 193.40 100.36 101.07 166.81 \n4 108 108001 2004 119.30 71.16 120.80 138.74 120.27 146.03 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL \n0 98.80 165.63 289.14 388.77 313.59 213.60 2566.19 \n1 166.59 193.88 206.40 298.14 232.54 150.82 2127.50 \n2 115.78 111.12 285.96 206.94 261.33 264.61 1863.70 \n3 167.61 270.87 238.84 682.07 251.46 182.35 2555.77 \n4 145.35 172.92 222.61 360.21 187.22 168.79 1973.39 ","text/html":"
\n\n
\n \n
\n
\n
STATE
\n
DISTRICT
\n
YEAR
\n
JAN
\n
FEB
\n
MAR
\n
APR
\n
MAY
\n
JUN
\n
JUL
\n
AUG
\n
SEP
\n
OCT
\n
0V
\n
DEC
\n
ANNUAL RAINFALL
\n
\n \n \n
\n
0
\n
108
\n
108001
\n
2000
\n
158.83
\n
162.37
\n
210.68
\n
192.51
\n
214.73
\n
157.55
\n
98.80
\n
165.63
\n
289.14
\n
388.77
\n
313.59
\n
213.60
\n
2566.19
\n
\n
\n
1
\n
108
\n
108001
\n
2001
\n
159.10
\n
41.71
\n
174.50
\n
220.56
\n
177.65
\n
105.61
\n
166.59
\n
193.88
\n
206.40
\n
298.14
\n
232.54
\n
150.82
\n
2127.50
\n
\n
\n
2
\n
108
\n
108001
\n
2002
\n
61.25
\n
50.34
\n
88.15
\n
207.13
\n
115.01
\n
96.08
\n
115.78
\n
111.12
\n
285.96
\n
206.94
\n
261.33
\n
264.61
\n
1863.70
\n
\n
\n
3
\n
108
\n
108001
\n
2003
\n
82.88
\n
118.04
\n
193.40
\n
100.36
\n
101.07
\n
166.81
\n
167.61
\n
270.87
\n
238.84
\n
682.07
\n
251.46
\n
182.35
\n
2555.77
\n
\n
\n
4
\n
108
\n
108001
\n
2004
\n
119.30
\n
71.16
\n
120.80
\n
138.74
\n
120.27
\n
146.03
\n
145.35
\n
172.92
\n
222.61
\n
360.21
\n
187.22
\n
168.79
\n
1973.39
\n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"#Now seperate the flood label from the dataset\n##Changed to seperate to no of occurance\ny = data.iloc[:, -1]\ny.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.179610Z","iopub.execute_input":"2021-08-08T04:28:48.180277Z","iopub.status.idle":"2021-08-08T04:28:48.190936Z","shell.execute_reply.started":"2021-08-08T04:28:48.180206Z","shell.execute_reply":"2021-08-08T04:28:48.189614Z"},"trusted":true},"execution_count":58,"outputs":[{"execution_count":58,"output_type":"execute_result","data":{"text/plain":"0 0\n1 1\n2 1\n3 1\n4 1\nName: FLOOD , dtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"#Let's see how the rainfall index vary during rainy season\n\nimport matplotlib.pyplot as plt\n%matplotlib inline\nc = data[['JUN','JUL','AUG','SEP']]\nc.hist()\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.193439Z","iopub.execute_input":"2021-08-08T04:28:48.193993Z","iopub.status.idle":"2021-08-08T04:28:48.694451Z","shell.execute_reply.started":"2021-08-08T04:28:48.193908Z","shell.execute_reply":"2021-08-08T04:28:48.693266Z"},"trusted":true},"execution_count":59,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"code","source":"#Data might be widely distributed so let's scale it between 0 and 1\nfrom sklearn import preprocessing\nminmax = preprocessing.MinMaxScaler(feature_range=(0,1))\nminmax.fit(x).transform(x)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.696214Z","iopub.execute_input":"2021-08-08T04:28:48.696625Z","iopub.status.idle":"2021-08-08T04:28:48.713123Z","shell.execute_reply.started":"2021-08-08T04:28:48.696584Z","shell.execute_reply":"2021-08-08T04:28:48.711595Z"},"trusted":true},"execution_count":60,"outputs":[{"execution_count":60,"output_type":"execute_result","data":{"text/plain":"array([[0.58333333, 0.58275058, 0. , ..., 0.24197396, 0.21689401,\n 0.27370822],\n [0.58333333, 0.58275058, 0.1 , ..., 0.15328811, 0.13151274,\n 0.13721348],\n [0.58333333, 0.58275058, 0.2 , ..., 0.18479046, 0.286268 ,\n 0.0551343 ],\n ...,\n [1. , 1. , 0.8 , ..., 0.33582449, 0.66894695,\n 0.68216256],\n [1. , 1. , 0.9 , ..., 0.31348069, 0.43433203,\n 0.58096062],\n [1. , 1. , 1. , ..., 0.22622825, 0.37333569,\n 0.61803626]])"},"metadata":{}}]},{"cell_type":"code","source":"#Let's divide the dataset into 2 sets:train and test in ratio (4:1)\nfrom sklearn import model_selection,neighbors\nfrom sklearn.model_selection import train_test_split\n\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)\nprint('done')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.715009Z","iopub.execute_input":"2021-08-08T04:28:48.715489Z","iopub.status.idle":"2021-08-08T04:28:48.725481Z","shell.execute_reply.started":"2021-08-08T04:28:48.715441Z","shell.execute_reply":"2021-08-08T04:28:48.724349Z"},"trusted":true},"execution_count":61,"outputs":[{"name":"stdout","text":"done\n","output_type":"stream"}]},{"cell_type":"code","source":"#Let's see how our train set looks like\nx_train.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.730421Z","iopub.execute_input":"2021-08-08T04:28:48.731108Z","iopub.status.idle":"2021-08-08T04:28:48.757578Z","shell.execute_reply.started":"2021-08-08T04:28:48.731057Z","shell.execute_reply":"2021-08-08T04:28:48.756212Z"},"trusted":true},"execution_count":62,"outputs":[{"execution_count":62,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n285 104 104002 2010 132.35 147.80 189.22 282.07 204.16 281.77 \n75 109 109001 2009 86.13 35.56 237.37 183.95 239.64 54.51 \n755 113 113007 2007 665.20 476.31 229.65 290.45 171.51 287.15 \n617 112 112010 2001 248.31 166.11 304.94 213.58 119.12 334.54 \n313 101 101002 2005 51.45 93.53 163.38 171.01 218.36 68.27 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL \n285 184.29 205.93 183.01 121.62 259.54 220.18 2411.96 \n75 177.39 294.59 205.61 215.24 474.45 100.80 2305.25 \n755 354.60 266.08 358.46 257.24 371.74 526.44 4254.84 \n617 130.77 152.89 390.77 667.85 275.17 278.29 3282.32 \n313 234.98 307.62 132.12 268.09 247.55 205.03 2161.37 ","text/html":"