[AI]/python.sklearn
sklearn.textdata.BernoulliNB적용
givemebro
2020. 4. 28. 09:37
반응형
import numpy as np
# upload data file
imdb_tarin,imdb_test=np.load('imdb.npy')
# decode -> remove<br />
text_train=[s.decode().replace('<br />','') for s in imdb_tarin.data]
text_test=[s.decode().replace('<br .>','')for s in imdb_test.data]
y_train=imdb_tarin.target
y_test=imdb_test.target
from sklearn.feature_extraction.text import CountVectorizer
vect=CountVectorizer()
# train(train data)
vect.fit(text_train,y_train)
# define X_train
X_train=vect.transform(text_train)# sparse matrix
X_test=vect.transform(text_test)
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_val_score
# cross_val_score
scores=cross_val_score(BernoulliNB(),X_train,y_train)
display(scores.mean())
0.8490804269023817
model=BernoulliNB()
model.fit(X_train,y_train)
model.score(X_test,y_test)
0.82912
반응형