from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import pandas as pd
train = pd.read_csv('/content/drive/MyDrive/Dacon/hand_gesture_data/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Dacon/hand_gesture_data/test.csv')
train.drop(['id'], axis=1, inplace=True)
test.drop(['id'], axis=1, inplace=True)
train.head()
sensor_1 | sensor_2 | sensor_3 | sensor_4 | sensor_5 | sensor_6 | sensor_7 | sensor_8 | sensor_9 | sensor_10 | ... | sensor_24 | sensor_25 | sensor_26 | sensor_27 | sensor_28 | sensor_29 | sensor_30 | sensor_31 | sensor_32 | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -6.149463 | -0.929714 | 9.058368 | -7.017854 | -2.958471 | 0.179233 | -0.956591 | -0.972401 | 5.956213 | 4.145636 | ... | -7.026436 | -6.006282 | -6.005836 | 7.043084 | 21.884650 | -3.064152 | -5.247552 | -6.026107 | -11.990822 | 1 |
1 | -2.238836 | -1.003511 | 5.098079 | -10.880357 | -0.804562 | -2.992123 | 26.972724 | -8.900861 | -5.968298 | -4.060134 | ... | -1.996714 | -7.933806 | -3.136773 | 8.774211 | 10.944759 | 9.858186 | -0.969241 | -3.935553 | -15.892421 | 1 |
2 | 19.087934 | -2.092514 | 0.946750 | -21.831788 | 9.119235 | 17.853587 | -21.069954 | -15.933212 | -9.016039 | -5.975194 | ... | -6.889685 | 54.052330 | -6.109238 | 12.154595 | 6.095989 | -40.195088 | -3.958124 | -8.079537 | -5.160090 | 0 |
3 | -2.211629 | -1.930904 | 21.888406 | -3.067560 | -0.240634 | 2.985056 | -29.073369 | 0.200774 | -1.043742 | 2.099845 | ... | -2.126170 | -1.035526 | 2.178769 | 10.032723 | -1.010897 | -3.912848 | -2.980338 | -12.983597 | -3.001077 | 1 |
4 | 3.953852 | 2.964892 | -36.044802 | 0.899838 | 26.930210 | 11.004409 | -21.962423 | -11.950189 | -20.933785 | -4.000506 | ... | -2.051761 | 10.917567 | 1.905335 | -13.004707 | 17.169552 | 2.105194 | 3.967986 | 11.861657 | -27.088846 | 2 |
5 rows × 33 columns
train.corr()['target']
sensor_1 -0.031899 sensor_2 -0.029971 sensor_3 -0.059246 sensor_4 -0.014708 sensor_5 0.013416 sensor_6 -0.015927 sensor_7 0.023569 sensor_8 -0.004660 sensor_9 -0.005506 sensor_10 0.006381 sensor_11 0.013004 sensor_12 0.007906 sensor_13 0.019591 sensor_14 0.013936 sensor_15 -0.019540 sensor_16 -0.008993 sensor_17 -0.016523 sensor_18 -0.019564 sensor_19 0.001252 sensor_20 -0.019440 sensor_21 0.013673 sensor_22 0.004371 sensor_23 0.004909 sensor_24 0.003862 sensor_25 0.017889 sensor_26 -0.012798 sensor_27 -0.008405 sensor_28 -0.013656 sensor_29 -0.000169 sensor_30 -0.009233 sensor_31 -0.003516 sensor_32 0.041874 target 1.000000 Name: target, dtype: float64
df_data = train.drop(['target'], axis=1)
df_label = train.target
print(df_data.shape, df_label.shape)
(2335, 32) (2335,)
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
X_train, X_test, y_train, y_test = train_test_split(df_data, df_label, test_size=0.25, random_state=1)
X_train.shape
(1751, 32)
import matplotlib.pyplot as plt
k_list = range(1, 101)
accuracy = list()
for k in k_list:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
accuracy.append(knn.score(X_test, y_test))
plt.plot(k_list, accuracy)
plt.show()
k=3,
rf = RandomForestClassifier()
knn = KNeighborsClassifier(n_neighbors=3)
rf.fit(X_train, y_train)
knn.fit(X_train, y_train)
print(knn.score(X_test, y_test))
print(rf.score(X_test, y_test))
0.565068493150685 0.791095890410959
rf = RandomForestClassifier()
rf.fit(df_data, df_label)
RandomForestClassifier()
pre = rf.predict(test)
sub = pd.read_csv('/content/drive/MyDrive/Dacon/hand_gesture_data/sample_submission.csv')
sub.target = pre
sub.to_csv('/content/drive/MyDrive/Dacon/hand_gesture_data/output1.csv', index=False)
sub.head()
id | target | |
---|---|---|
0 | 1 | 0 |
1 | 2 | 0 |
2 | 3 | 1 |
3 | 4 | 3 |
4 | 5 | 2 |