( 자연어처리) 기초 키워드 분석

!set -x \
&& pip install konlpy \
&& curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh | bash -x.


import urllib.request

raw = urllib.request.urlopen("https://raw.githubusercontent.com/e9t/nsmc/master/ratings.txt").readlines()
print(raw[:5])

raw = [x.decode() for x in raw[1:]]

reviews = []

for i in raw :
  reviews.append(i.split('\t')[1])

print(reviews[:5])



from konlpy.tag import Mecab

tagger = Mecab()

nouns = []

for review in reviews:
  for noun in tagger.nouns(review):
    nouns.append(noun)

nouns[:10]


stop_words = "영화 전 난 일 걸 뭐 줄 만 건 분 개 끝 잼 이거 번 중 듯 때 게 내 말 나 수 거 점 것"
stop_words = stop_words.split(" ")
print(stop_words)


nouns = []
for review in reviews:
  for noun in tagger.nouns(review):
    if noun not in stop_words:
      nouns.append(noun)

nouns[:10]

from collections import Counter
nouns_counter = Counter(nouns)

top_nouns = dict(nouns_counter.most_common(50))
top_nouns

import numpy as np
y_pos = np.arange(len(top_nouns))
plt.figure(figsize=(12,12))
plt.barh(y_pos, top_nouns.values())
plt.title('Word count')
plt.yticks(y_pos, top_nouns.keys())
plt.show()

from wordcloud import WordCloud
wc = WordCloud(background_color = "white", font_path = './font/NanumBarunGothic.ttf')
wc.generate_from_frequencies(top_nouns)

figure = plt.figure(figsize = (12,12))
ax = figure.add_subplot(1,1,1,)
ax.axis('off')
ax.imshow(wc)
plt.show()

import squarify

norm = mpl.colors.Normalize(vmin = min(top_nouns.values()),
                            vmax=max(top_nouns.values()))

colors = [mpl.cm.Blues(norm(value)) for value in top_nouns.values()]

squarify.plot(label = top_nouns.keys(),
              sizes = top_nouns.values(),
              color = colors,
              alpha = .7)

저작자표시

'인공지능 공부 > 딥러닝 논문읽기' 카테고리의 다른 글

(자연어처리) NLP 스팸메일 분류 CNN (0)	2021.11.02
(자연어처리 기초) 군집분석 - word2vec, 계층적 군집화, 비계층적 군집화 (0)	2021.11.02
(딥러닝) Deep Learning age and gender detection 나이를 추측해보자 (0)	2021.10.29
(코로나 바이러스 예측)Analysis of COVID-19 data using Python (0)	2021.09.23
파이썬 딥러닝 텐서플로 복잡한모델 생성 (0)	2021.09.06

'인공지능 공부 > 딥러닝 논문읽기' 카테고리의 다른 글

티스토리툴바