[AI]/python.sklearn
sklearn.feature_extraction.text.CountVectorizer.ngram.LogisticRegression.2단어들만 출력
givemebro
2020. 4. 28. 16:24
반응형
from sklearn.feature_extraction.text import CountVectorizer
vect=CountVectorizer(ngram_range=(1,2))
X_train=vect.fit_transform(text_train)
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(X_train,y_train)
# 2개의 단어로 구성된 feature 추출
fn=np.array(vect.get_feature_names())
mask=np.array([s.find(' ')>=0 for s in fn])
w=model.coef_[0][mask]
index_sorted_w=np.argsort(w)
index_small=index_sorted_w[:20]
index_big=index_sorted_w[-20:]
index_small_big=np.r_[index_small,index_big]
small_big_name=fn[index_small_big]
# visualization
import matplotlib.pyplot as plt
plt.figure(figsize=[10,10])
plt.title('ngram_range=(1,2)')
plt.bar(range(40),w[index_small_big])
plt.xticks(range(40),small_big_name,rotation=90)
pass
반응형