{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Flood prediction Model","metadata":{}},{"cell_type":"code","source":"#Import some basic libraries\nimport numpy as np\nimport pandas as pd","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.059515Z","iopub.execute_input":"2021-08-08T04:28:48.059825Z","iopub.status.idle":"2021-08-08T04:28:48.064116Z","shell.execute_reply.started":"2021-08-08T04:28:48.059795Z","shell.execute_reply":"2021-08-08T04:28:48.063092Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"markdown","source":"# Data Insight","metadata":{}},{"cell_type":"code","source":"#Read the data present in dataset\n##data = pd.read_csv('../input/kerela-flood/kerala.csv')\n\ndata = pd.read_csv('../input/my-district-dataset/FLOOD PREDICTION MALAYSIA.csv')\n#data = pd.read_csv('../input/simpleflooddata/Flood_Rain_Simple_Data.csv')\n\n#Using data.head() we can see the top 5 rows of the dataset\ndata.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.072735Z","iopub.execute_input":"2021-08-08T04:28:48.073102Z","iopub.status.idle":"2021-08-08T04:28:48.111350Z","shell.execute_reply.started":"2021-08-08T04:28:48.073065Z","shell.execute_reply":"2021-08-08T04:28:48.110345Z"},"trusted":true},"execution_count":53,"outputs":[{"execution_count":53,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n0 108 108001 2000 158.83 162.37 210.68 192.51 214.73 157.55 \n1 108 108001 2001 159.10 41.71 174.50 220.56 177.65 105.61 \n2 108 108001 2002 61.25 50.34 88.15 207.13 115.01 96.08 \n3 108 108001 2003 82.88 118.04 193.40 100.36 101.07 166.81 \n4 108 108001 2004 119.30 71.16 120.80 138.74 120.27 146.03 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL FLOOD \n0 98.80 165.63 289.14 388.77 313.59 213.60 2566.19 0 \n1 166.59 193.88 206.40 298.14 232.54 150.82 2127.50 1 \n2 115.78 111.12 285.96 206.94 261.33 264.61 1863.70 1 \n3 167.61 270.87 238.84 682.07 251.46 182.35 2555.77 1 \n4 145.35 172.92 222.61 360.21 187.22 168.79 1973.39 1 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
STATEDISTRICTYEARJANFEBMARAPRMAYJUNJULAUGSEPOCT0VDECANNUAL RAINFALLFLOOD
01081080012000158.83162.37210.68192.51214.73157.5598.80165.63289.14388.77313.59213.602566.190
11081080012001159.1041.71174.50220.56177.65105.61166.59193.88206.40298.14232.54150.822127.501
2108108001200261.2550.3488.15207.13115.0196.08115.78111.12285.96206.94261.33264.611863.701
3108108001200382.88118.04193.40100.36101.07166.81167.61270.87238.84682.07251.46182.352555.771
41081080012004119.3071.16120.80138.74120.27146.03145.35172.92222.61360.21187.22168.791973.391
\n
"},"metadata":{}}]},{"cell_type":"code","source":"#Now we will cheak if any colomns is left empty\ndata.apply(lambda x:sum(x.isnull()), axis=0)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.114067Z","iopub.execute_input":"2021-08-08T04:28:48.114562Z","iopub.status.idle":"2021-08-08T04:28:48.127952Z","shell.execute_reply.started":"2021-08-08T04:28:48.114502Z","shell.execute_reply":"2021-08-08T04:28:48.126873Z"},"trusted":true},"execution_count":54,"outputs":[{"execution_count":54,"output_type":"execute_result","data":{"text/plain":"STATE 0\nDISTRICT 0\nYEAR 0\nJAN 0\nFEB 0\nMAR 0\nAPR 0\nMAY 0\nJUN 0\nJUL 0\nAUG 0\nSEP 0\nOCT 0\n0V 0\nDEC 0\nANNUAL RAINFALL 0\nFLOOD 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"#We want the data in numbers, therefore we will replace the yes/no in floods coloumn by 1/0\n#data['FLOOD'].replace(['YES','NO'],[1,0],inplace=True)\n#data['FLOODS'].replace(['Yes','No'],[1,0],inplace=True)\n#print('done')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.130075Z","iopub.execute_input":"2021-08-08T04:28:48.130474Z","iopub.status.idle":"2021-08-08T04:28:48.138603Z","shell.execute_reply.started":"2021-08-08T04:28:48.130434Z","shell.execute_reply":"2021-08-08T04:28:48.137546Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"markdown","source":"New data no need to convert\n","metadata":{}},{"cell_type":"code","source":"#Let's see how are data looks like now\n#data.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.140992Z","iopub.execute_input":"2021-08-08T04:28:48.141637Z","iopub.status.idle":"2021-08-08T04:28:48.148294Z","shell.execute_reply.started":"2021-08-08T04:28:48.141586Z","shell.execute_reply":"2021-08-08T04:28:48.147413Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"#Now let's seperate the data which we are gonna use for prediction\n\nx = data.iloc[:,0:16]\nx.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.151564Z","iopub.execute_input":"2021-08-08T04:28:48.152373Z","iopub.status.idle":"2021-08-08T04:28:48.177567Z","shell.execute_reply.started":"2021-08-08T04:28:48.152304Z","shell.execute_reply":"2021-08-08T04:28:48.176516Z"},"trusted":true},"execution_count":57,"outputs":[{"execution_count":57,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n0 108 108001 2000 158.83 162.37 210.68 192.51 214.73 157.55 \n1 108 108001 2001 159.10 41.71 174.50 220.56 177.65 105.61 \n2 108 108001 2002 61.25 50.34 88.15 207.13 115.01 96.08 \n3 108 108001 2003 82.88 118.04 193.40 100.36 101.07 166.81 \n4 108 108001 2004 119.30 71.16 120.80 138.74 120.27 146.03 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL \n0 98.80 165.63 289.14 388.77 313.59 213.60 2566.19 \n1 166.59 193.88 206.40 298.14 232.54 150.82 2127.50 \n2 115.78 111.12 285.96 206.94 261.33 264.61 1863.70 \n3 167.61 270.87 238.84 682.07 251.46 182.35 2555.77 \n4 145.35 172.92 222.61 360.21 187.22 168.79 1973.39 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
STATEDISTRICTYEARJANFEBMARAPRMAYJUNJULAUGSEPOCT0VDECANNUAL RAINFALL
01081080012000158.83162.37210.68192.51214.73157.5598.80165.63289.14388.77313.59213.602566.19
11081080012001159.1041.71174.50220.56177.65105.61166.59193.88206.40298.14232.54150.822127.50
2108108001200261.2550.3488.15207.13115.0196.08115.78111.12285.96206.94261.33264.611863.70
3108108001200382.88118.04193.40100.36101.07166.81167.61270.87238.84682.07251.46182.352555.77
41081080012004119.3071.16120.80138.74120.27146.03145.35172.92222.61360.21187.22168.791973.39
\n
"},"metadata":{}}]},{"cell_type":"code","source":"#Now seperate the flood label from the dataset\n##Changed to seperate to no of occurance\ny = data.iloc[:, -1]\ny.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.179610Z","iopub.execute_input":"2021-08-08T04:28:48.180277Z","iopub.status.idle":"2021-08-08T04:28:48.190936Z","shell.execute_reply.started":"2021-08-08T04:28:48.180206Z","shell.execute_reply":"2021-08-08T04:28:48.189614Z"},"trusted":true},"execution_count":58,"outputs":[{"execution_count":58,"output_type":"execute_result","data":{"text/plain":"0 0\n1 1\n2 1\n3 1\n4 1\nName: FLOOD , dtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"#Let's see how the rainfall index vary during rainy season\n\nimport matplotlib.pyplot as plt\n%matplotlib inline\nc = data[['JUN','JUL','AUG','SEP']]\nc.hist()\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.193439Z","iopub.execute_input":"2021-08-08T04:28:48.193993Z","iopub.status.idle":"2021-08-08T04:28:48.694451Z","shell.execute_reply.started":"2021-08-08T04:28:48.193908Z","shell.execute_reply":"2021-08-08T04:28:48.693266Z"},"trusted":true},"execution_count":59,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"code","source":"#Data might be widely distributed so let's scale it between 0 and 1\nfrom sklearn import preprocessing\nminmax = preprocessing.MinMaxScaler(feature_range=(0,1))\nminmax.fit(x).transform(x)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.696214Z","iopub.execute_input":"2021-08-08T04:28:48.696625Z","iopub.status.idle":"2021-08-08T04:28:48.713123Z","shell.execute_reply.started":"2021-08-08T04:28:48.696584Z","shell.execute_reply":"2021-08-08T04:28:48.711595Z"},"trusted":true},"execution_count":60,"outputs":[{"execution_count":60,"output_type":"execute_result","data":{"text/plain":"array([[0.58333333, 0.58275058, 0. , ..., 0.24197396, 0.21689401,\n 0.27370822],\n [0.58333333, 0.58275058, 0.1 , ..., 0.15328811, 0.13151274,\n 0.13721348],\n [0.58333333, 0.58275058, 0.2 , ..., 0.18479046, 0.286268 ,\n 0.0551343 ],\n ...,\n [1. , 1. , 0.8 , ..., 0.33582449, 0.66894695,\n 0.68216256],\n [1. , 1. , 0.9 , ..., 0.31348069, 0.43433203,\n 0.58096062],\n [1. , 1. , 1. , ..., 0.22622825, 0.37333569,\n 0.61803626]])"},"metadata":{}}]},{"cell_type":"code","source":"#Let's divide the dataset into 2 sets:train and test in ratio (4:1)\nfrom sklearn import model_selection,neighbors\nfrom sklearn.model_selection import train_test_split\n\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)\nprint('done')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.715009Z","iopub.execute_input":"2021-08-08T04:28:48.715489Z","iopub.status.idle":"2021-08-08T04:28:48.725481Z","shell.execute_reply.started":"2021-08-08T04:28:48.715441Z","shell.execute_reply":"2021-08-08T04:28:48.724349Z"},"trusted":true},"execution_count":61,"outputs":[{"name":"stdout","text":"done\n","output_type":"stream"}]},{"cell_type":"code","source":"#Let's see how our train set looks like\nx_train.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.730421Z","iopub.execute_input":"2021-08-08T04:28:48.731108Z","iopub.status.idle":"2021-08-08T04:28:48.757578Z","shell.execute_reply.started":"2021-08-08T04:28:48.731057Z","shell.execute_reply":"2021-08-08T04:28:48.756212Z"},"trusted":true},"execution_count":62,"outputs":[{"execution_count":62,"output_type":"execute_result","data":{"text/plain":" STATE DISTRICT YEAR JAN FEB MAR APR MAY JUN \\\n285 104 104002 2010 132.35 147.80 189.22 282.07 204.16 281.77 \n75 109 109001 2009 86.13 35.56 237.37 183.95 239.64 54.51 \n755 113 113007 2007 665.20 476.31 229.65 290.45 171.51 287.15 \n617 112 112010 2001 248.31 166.11 304.94 213.58 119.12 334.54 \n313 101 101002 2005 51.45 93.53 163.38 171.01 218.36 68.27 \n\n JUL AUG SEP OCT 0V DEC ANNUAL RAINFALL \n285 184.29 205.93 183.01 121.62 259.54 220.18 2411.96 \n75 177.39 294.59 205.61 215.24 474.45 100.80 2305.25 \n755 354.60 266.08 358.46 257.24 371.74 526.44 4254.84 \n617 130.77 152.89 390.77 667.85 275.17 278.29 3282.32 \n313 234.98 307.62 132.12 268.09 247.55 205.03 2161.37 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
STATEDISTRICTYEARJANFEBMARAPRMAYJUNJULAUGSEPOCT0VDECANNUAL RAINFALL
2851041040022010132.35147.80189.22282.07204.16281.77184.29205.93183.01121.62259.54220.182411.96
75109109001200986.1335.56237.37183.95239.6454.51177.39294.59205.61215.24474.45100.802305.25
7551131130072007665.20476.31229.65290.45171.51287.15354.60266.08358.46257.24371.74526.444254.84
6171121120102001248.31166.11304.94213.58119.12334.54130.77152.89390.77667.85275.17278.293282.32
313101101002200551.4593.53163.38171.01218.3668.27234.98307.62132.12268.09247.55205.032161.37
\n
"},"metadata":{}}]},{"cell_type":"code","source":"y_train.head()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.761292Z","iopub.execute_input":"2021-08-08T04:28:48.761921Z","iopub.status.idle":"2021-08-08T04:28:48.771513Z","shell.execute_reply.started":"2021-08-08T04:28:48.761879Z","shell.execute_reply":"2021-08-08T04:28:48.770204Z"},"trusted":true},"execution_count":63,"outputs":[{"execution_count":63,"output_type":"execute_result","data":{"text/plain":"285 1\n75 1\n755 0\n617 0\n313 0\nName: FLOOD , dtype: int64"},"metadata":{}}]},{"cell_type":"markdown","source":"# Prediction Algorithms:","metadata":{}},{"cell_type":"markdown","source":"# 1. KNN Classifier","metadata":{}},{"cell_type":"code","source":"clf = neighbors.KNeighborsClassifier()\nknn_clf = clf.fit(x_train,y_train)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.773293Z","iopub.execute_input":"2021-08-08T04:28:48.774002Z","iopub.status.idle":"2021-08-08T04:28:48.784412Z","shell.execute_reply.started":"2021-08-08T04:28:48.773908Z","shell.execute_reply":"2021-08-08T04:28:48.783427Z"},"trusted":true},"execution_count":64,"outputs":[]},{"cell_type":"code","source":"#Let's predict chances of flood\ny_predict = knn_clf.predict(x_test)\nprint('predicted chances of flood')\nprint(y_predict)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:48.786186Z","iopub.execute_input":"2021-08-08T04:28:48.786818Z","iopub.status.idle":"2021-08-08T04:28:48.806813Z","shell.execute_reply.started":"2021-08-08T04:28:48.786775Z","shell.execute_reply":"2021-08-08T04:28:48.805888Z"},"trusted":true},"execution_count":65,"outputs":[{"name":"stdout","text":"predicted chances of flood\n[1 1 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 1 1 0 0 1 1 0 1 0\n 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 1 0 1 1 0 0 1 1 0 1\n 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0\n 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 1 0 1\n 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0]\n","output_type":"stream"}]},{"cell_type":"code","source":"#Actual chances of flood\nprint(\"actual values of floods:\")\nprint(y_test)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:48.808889Z","iopub.execute_input":"2021-08-08T04:28:48.809336Z","iopub.status.idle":"2021-08-08T04:28:48.817944Z","shell.execute_reply.started":"2021-08-08T04:28:48.809293Z","shell.execute_reply":"2021-08-08T04:28:48.816712Z"},"trusted":true},"execution_count":66,"outputs":[{"name":"stdout","text":"actual values of floods:\n549 1\n392 0\n646 0\n750 0\n411 0\n ..\n391 0\n474 0\n680 0\n794 0\n595 0\nName: FLOOD , Length: 165, dtype: int64\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.model_selection import cross_val_score","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.819882Z","iopub.execute_input":"2021-08-08T04:28:48.820691Z","iopub.status.idle":"2021-08-08T04:28:48.827537Z","shell.execute_reply.started":"2021-08-08T04:28:48.820641Z","shell.execute_reply":"2021-08-08T04:28:48.826084Z"},"trusted":true},"execution_count":67,"outputs":[]},{"cell_type":"code","source":"knn_accuracy = cross_val_score(knn_clf,x_test,y_test,cv=3,scoring='accuracy',n_jobs=-1)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.829521Z","iopub.execute_input":"2021-08-08T04:28:48.830279Z","iopub.status.idle":"2021-08-08T04:28:48.898302Z","shell.execute_reply.started":"2021-08-08T04:28:48.830232Z","shell.execute_reply":"2021-08-08T04:28:48.897070Z"},"trusted":true},"execution_count":68,"outputs":[]},{"cell_type":"code","source":"knn_accuracy.mean()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.899614Z","iopub.execute_input":"2021-08-08T04:28:48.899954Z","iopub.status.idle":"2021-08-08T04:28:48.909697Z","shell.execute_reply.started":"2021-08-08T04:28:48.899908Z","shell.execute_reply":"2021-08-08T04:28:48.908551Z"},"trusted":true},"execution_count":69,"outputs":[{"execution_count":69,"output_type":"execute_result","data":{"text/plain":"0.6121212121212122"},"metadata":{}}]},{"cell_type":"markdown","source":"# 2. Logistic Regression","metadata":{}},{"cell_type":"code","source":"x_train_std = minmax.fit_transform(x_train)\nx_test_std = minmax.transform(x_test)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.911728Z","iopub.execute_input":"2021-08-08T04:28:48.912508Z","iopub.status.idle":"2021-08-08T04:28:48.936894Z","shell.execute_reply.started":"2021-08-08T04:28:48.912463Z","shell.execute_reply":"2021-08-08T04:28:48.932861Z"},"trusted":true},"execution_count":70,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\n\nlr = LogisticRegression()\nlr_clf = lr.fit(x_train_std,y_train)\n\nlr_accuracy = cross_val_score(lr_clf,x_test_std,y_test,cv=3,scoring='accuracy',n_jobs=-1)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:48.938322Z","iopub.execute_input":"2021-08-08T04:28:48.938702Z","iopub.status.idle":"2021-08-08T04:28:49.017547Z","shell.execute_reply.started":"2021-08-08T04:28:48.938663Z","shell.execute_reply":"2021-08-08T04:28:49.015999Z"},"trusted":true},"execution_count":71,"outputs":[]},{"cell_type":"code","source":"lr_accuracy.mean()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:49.023615Z","iopub.execute_input":"2021-08-08T04:28:49.025486Z","iopub.status.idle":"2021-08-08T04:28:49.047779Z","shell.execute_reply.started":"2021-08-08T04:28:49.025349Z","shell.execute_reply":"2021-08-08T04:28:49.046293Z"},"trusted":true},"execution_count":72,"outputs":[{"execution_count":72,"output_type":"execute_result","data":{"text/plain":"0.6424242424242425"},"metadata":{}}]},{"cell_type":"code","source":"y_predict = lr_clf.predict(x_test_std)\nprint('Predicted chances of flood')\n##print('Predicted No. Occurence of flood')\nprint(y_predict)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.049601Z","iopub.execute_input":"2021-08-08T04:28:49.053176Z","iopub.status.idle":"2021-08-08T04:28:49.065531Z","shell.execute_reply.started":"2021-08-08T04:28:49.053119Z","shell.execute_reply":"2021-08-08T04:28:49.063865Z"},"trusted":true},"execution_count":73,"outputs":[{"name":"stdout","text":"Predicted chances of flood\n[1 1 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 1 0 0 0\n 0 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 1\n 1 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 0 0\n 1 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1\n 0 1 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0]\n","output_type":"stream"}]},{"cell_type":"code","source":"print('Actual chances of flood')\n##print('Actual No. Occurence of flood')\nprint(y_test.values)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.067321Z","iopub.execute_input":"2021-08-08T04:28:49.067829Z","iopub.status.idle":"2021-08-08T04:28:49.082677Z","shell.execute_reply.started":"2021-08-08T04:28:49.067786Z","shell.execute_reply":"2021-08-08T04:28:49.078134Z"},"trusted":true},"execution_count":74,"outputs":[{"name":"stdout","text":"Actual chances of flood\n[1 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 0 0 0\n 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 1 1 0 1 0 1 1 0 0 1 0 0 0 1 0 1 1 0 1 1 0 1\n 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 1 1 0\n 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1\n 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0]\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score,recall_score,roc_auc_score,confusion_matrix\nprint(\"\\naccuracy score: %f\"%(accuracy_score(y_test,y_predict)*100))\nprint(\"recall score: %f\"%(recall_score(y_test,y_predict)*100))\nprint(\"roc score: %f\"%(roc_auc_score(y_test,y_predict)*100))","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.084135Z","iopub.execute_input":"2021-08-08T04:28:49.084889Z","iopub.status.idle":"2021-08-08T04:28:49.102485Z","shell.execute_reply.started":"2021-08-08T04:28:49.084823Z","shell.execute_reply":"2021-08-08T04:28:49.098603Z"},"trusted":true},"execution_count":75,"outputs":[{"name":"stdout","text":"\naccuracy score: 60.000000\nrecall score: 35.820896\nroc score: 56.175754\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# 3. Decision tree classification","metadata":{}},{"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier\ndtc_clf = DecisionTreeClassifier()\ndtc_clf.fit(x_train,y_train)\ndtc_clf_acc = cross_val_score(dtc_clf,x_train_std,y_train,cv=3,scoring=\"accuracy\",n_jobs=-1)\ndtc_clf_acc","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:49.103986Z","iopub.execute_input":"2021-08-08T04:28:49.104547Z","iopub.status.idle":"2021-08-08T04:28:49.176679Z","shell.execute_reply.started":"2021-08-08T04:28:49.104505Z","shell.execute_reply":"2021-08-08T04:28:49.175690Z"},"trusted":true},"execution_count":76,"outputs":[{"execution_count":76,"output_type":"execute_result","data":{"text/plain":"array([0.64545455, 0.69545455, 0.67272727])"},"metadata":{}}]},{"cell_type":"code","source":"#Predicted flood chances\ny_pred = dtc_clf.predict(x_test)\nprint(y_pred)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.178973Z","iopub.execute_input":"2021-08-08T04:28:49.181291Z","iopub.status.idle":"2021-08-08T04:28:49.198705Z","shell.execute_reply.started":"2021-08-08T04:28:49.181250Z","shell.execute_reply":"2021-08-08T04:28:49.197623Z"},"trusted":true},"execution_count":77,"outputs":[{"name":"stdout","text":"[1 1 0 0 1 1 0 1 1 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0\n 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 0 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 1 1 1\n 0 1 1 1 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 1 0 1 0\n 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 0\n 0 0 1 0 1 0 1 1 1 1 0 1 0 1 1 0 0]\n","output_type":"stream"}]},{"cell_type":"code","source":"#Actual flood chances\nprint(\"actual values:\")\nprint(y_test.values)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.202198Z","iopub.execute_input":"2021-08-08T04:28:49.203854Z","iopub.status.idle":"2021-08-08T04:28:49.215933Z","shell.execute_reply.started":"2021-08-08T04:28:49.203808Z","shell.execute_reply":"2021-08-08T04:28:49.214100Z"},"trusted":true},"execution_count":78,"outputs":[{"name":"stdout","text":"actual values:\n[1 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 0 0 0\n 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 1 1 0 1 0 1 1 0 0 1 0 0 0 1 0 1 1 0 1 1 0 1\n 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 1 1 0\n 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1\n 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0]\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score,recall_score,roc_auc_score,confusion_matrix\nprint(\"\\naccuracy score:%f\"%(accuracy_score(y_test,y_pred)*100))\nprint(\"recall score:%f\"%(recall_score(y_test,y_pred)*100))\nprint(\"roc score:%f\"%(roc_auc_score(y_test,y_pred)*100))","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:49.218671Z","iopub.execute_input":"2021-08-08T04:28:49.220772Z","iopub.status.idle":"2021-08-08T04:28:49.245168Z","shell.execute_reply.started":"2021-08-08T04:28:49.220724Z","shell.execute_reply":"2021-08-08T04:28:49.244093Z"},"trusted":true},"execution_count":79,"outputs":[{"name":"stdout","text":"\naccuracy score:73.333333\nrecall score:76.119403\nroc score:73.773987\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# 4. Random Forest Classification","metadata":{}},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestClassifier\nrmf = RandomForestClassifier(max_depth=3,random_state=0)\nrmf_clf = rmf.fit(x_train,y_train)\nrmf_clf","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:49.248913Z","iopub.execute_input":"2021-08-08T04:28:49.252665Z","iopub.status.idle":"2021-08-08T04:28:49.607831Z","shell.execute_reply.started":"2021-08-08T04:28:49.252607Z","shell.execute_reply":"2021-08-08T04:28:49.607123Z"},"trusted":true},"execution_count":80,"outputs":[{"execution_count":80,"output_type":"execute_result","data":{"text/plain":"RandomForestClassifier(max_depth=3, random_state=0)"},"metadata":{}}]},{"cell_type":"code","source":"rmf_clf_acc = cross_val_score(rmf_clf,x_train_std,y_train,cv=3,scoring=\"accuracy\",n_jobs=-1)\n#rmf_proba = cross_val_predict(rmf_clf,x_train_std,y_train,cv=3,method='predict_proba')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:49.609816Z","iopub.execute_input":"2021-08-08T04:28:49.610391Z","iopub.status.idle":"2021-08-08T04:28:50.154903Z","shell.execute_reply.started":"2021-08-08T04:28:49.610351Z","shell.execute_reply":"2021-08-08T04:28:50.154158Z"},"trusted":true},"execution_count":81,"outputs":[]},{"cell_type":"code","source":"rmf_clf_acc","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:50.156073Z","iopub.execute_input":"2021-08-08T04:28:50.156437Z","iopub.status.idle":"2021-08-08T04:28:50.167293Z","shell.execute_reply.started":"2021-08-08T04:28:50.156399Z","shell.execute_reply":"2021-08-08T04:28:50.165869Z"},"trusted":true},"execution_count":82,"outputs":[{"execution_count":82,"output_type":"execute_result","data":{"text/plain":"array([0.61363636, 0.64090909, 0.62272727])"},"metadata":{}}]},{"cell_type":"code","source":"y_pred = rmf_clf.predict(x_test)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:50.168926Z","iopub.execute_input":"2021-08-08T04:28:50.169471Z","iopub.status.idle":"2021-08-08T04:28:50.191332Z","shell.execute_reply.started":"2021-08-08T04:28:50.169428Z","shell.execute_reply":"2021-08-08T04:28:50.190340Z"},"trusted":true},"execution_count":83,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score,recall_score,roc_auc_score,confusion_matrix\nprint(\"\\naccuracy score:%f\"%(accuracy_score(y_test,y_pred)*100))\nprint(\"recall score:%f\"%(recall_score(y_test,y_pred)*100))\nprint(\"roc score:%f\"%(roc_auc_score(y_test,y_pred)*100))","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:50.192802Z","iopub.execute_input":"2021-08-08T04:28:50.193229Z","iopub.status.idle":"2021-08-08T04:28:50.207143Z","shell.execute_reply.started":"2021-08-08T04:28:50.193186Z","shell.execute_reply":"2021-08-08T04:28:50.205880Z"},"trusted":true},"execution_count":84,"outputs":[{"name":"stdout","text":"\naccuracy score:67.878788\nrecall score:62.686567\nroc score:67.057569\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# 5. Enseble Learning","metadata":{}},{"cell_type":"code","source":"from sklearn.ensemble import VotingClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.neighbors import KNeighborsClassifier\n\nlog_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\nrnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\nknn_clf = KNeighborsClassifier()\n\nvoting = VotingClassifier(\n estimators=[('lr', log_clf), ('rf', rnd_clf), ('knn', knn_clf)],\n voting='hard')","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:50.208419Z","iopub.execute_input":"2021-08-08T04:28:50.208664Z","iopub.status.idle":"2021-08-08T04:28:50.217406Z","shell.execute_reply.started":"2021-08-08T04:28:50.208639Z","shell.execute_reply":"2021-08-08T04:28:50.216658Z"},"trusted":true},"execution_count":85,"outputs":[]},{"cell_type":"code","source":"voting_clf = voting.fit(x_train, y_train)","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:50.221943Z","iopub.execute_input":"2021-08-08T04:28:50.222212Z","iopub.status.idle":"2021-08-08T04:28:50.270054Z","shell.execute_reply.started":"2021-08-08T04:28:50.222187Z","shell.execute_reply":"2021-08-08T04:28:50.269245Z"},"trusted":true},"execution_count":86,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score\n\nfor clf in (log_clf, rnd_clf, knn_clf, voting_clf):\n clf.fit(x_train, y_train)\n y_pred = clf.predict(x_test)\n print(clf.__class__.__name__, accuracy_score(y_test, y_pred))","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:50.272055Z","iopub.execute_input":"2021-08-08T04:28:50.272444Z","iopub.status.idle":"2021-08-08T04:28:50.400585Z","shell.execute_reply.started":"2021-08-08T04:28:50.272403Z","shell.execute_reply":"2021-08-08T04:28:50.399734Z"},"trusted":true},"execution_count":87,"outputs":[{"name":"stdout","text":"LogisticRegression 0.5454545454545454\nRandomForestClassifier 0.703030303030303\nKNeighborsClassifier 0.6606060606060606\nVotingClassifier 0.6545454545454545\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Comparing all the prediction models","metadata":{}},{"cell_type":"code","source":"models = []\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.ensemble import VotingClassifier\nmodels.append(('KNN', KNeighborsClassifier()))\nmodels.append(('LR', LogisticRegression()))\nmodels.append(('DT', DecisionTreeClassifier()))\nmodels.append(('RF', RandomForestClassifier()))\nmodels.append(('EL', VotingClassifier(\n estimators=[('lr', log_clf), ('rf', rnd_clf), ('knn', knn_clf)],\n voting='hard')))\n\n\nnames = []\nscores = []\nfor name, model in models:\n model.fit(x_train, y_train)\n y_pred = model.predict(x_test)\n scores.append(accuracy_score(y_test, y_pred))\n names.append(name)\ntr_split = pd.DataFrame({'Name': names, 'Score': scores})\nprint(tr_split)","metadata":{"tags":[],"execution":{"iopub.status.busy":"2021-08-08T04:28:50.402512Z","iopub.execute_input":"2021-08-08T04:28:50.402935Z","iopub.status.idle":"2021-08-08T04:28:50.855164Z","shell.execute_reply.started":"2021-08-08T04:28:50.402897Z","shell.execute_reply":"2021-08-08T04:28:50.854106Z"},"trusted":true},"execution_count":88,"outputs":[{"name":"stdout","text":" Name Score\n0 KNN 0.660606\n1 LR 0.539394\n2 DT 0.727273\n3 RF 0.745455\n4 EL 0.654545\n","output_type":"stream"}]},{"cell_type":"code","source":"import seaborn as sns\naxis = sns.barplot(x = 'Name', y = 'Score', data =tr_split )\naxis.set(xlabel='Classifier', ylabel='Accuracy')\nfor p in axis.patches:\n height = p.get_height()\n axis.text(p.get_x() + p.get_width()/2, height + 0.005, '{:1.4f}'.format(height), ha=\"center\") \n \nplt.show()","metadata":{"execution":{"iopub.status.busy":"2021-08-08T04:28:50.856801Z","iopub.execute_input":"2021-08-08T04:28:50.857230Z","iopub.status.idle":"2021-08-08T04:28:51.003379Z","shell.execute_reply.started":"2021-08-08T04:28:50.857190Z","shell.execute_reply":"2021-08-08T04:28:51.002209Z"},"trusted":true},"execution_count":89,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"# As we can observe and Compare different Models and Choose the best One.","metadata":{}},{"cell_type":"markdown","source":"# Give an Upvote if you like this Notebook.","metadata":{}}]}