[AI]/python.sklearn
sklearn.naive_bayes.BernoulliNB
givemebro
2020. 4. 21. 20:03
반응형
- 있다/없다 처럼 이진 속성을 가지고 속성이 아주 많은 경우 : BernoulliNB
- 이진 속성이 아닌 일반적인 연속값 속성을 가지는 데이터 : GaussianNB
import numpy as np
from sklearn.naive_bayes import BernoulliNB
X = np.random.randint(2, size=(6, 100))
#y = np.array([1, 2, 3, 4, 4, 5])
y = np.array([1, 0, 0, 1, 1, 0])
model = BernoulliNB()
model.fit(X, y)
pred_y = model.predict(X)
print(pred_y)
# [1 0 0 1 1 0]
X
'''
array([[0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0,
0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0],
[1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0],
[0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0],
[1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0,
0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1],
[1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]])
'''
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target)
# model = BernoulliNB()
model = GaussianNB()
# model = MultinomialNB()
model.fit(X_train, y_train)
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
display(train_score, test_score)
# 0.9436619718309859
# 0.9090909090909091
import mglearn
X_train, X_test, y_train, y_test = train_test_split(cancer.data[:,[0,1]], cancer.target)
model = GaussianNB()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
mglearn.plots.plot_2d_classification(model, X_train)
mglearn.discrete_scatter(X_train[:,0], X_train[:,1], y_train)
#plt.show()
from sklearn.linear_model import LogisticRegression
X_train, X_test, y_train, y_test = train_test_split(cancer.data[:,[0,1]], cancer.target)
model = LogisticRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
mglearn.plots.plot_2d_classification(model, X_train)
mglearn.discrete_scatter(X_train[:,0], X_train[:,1], y_train)
반응형