[projects] EDA_wordcloud
Open crawled data from naver blog reviews and represent them as wordcloud images.
Prerequisites
import re
import konlpy
import pandas as pd
# Open file
with open(r'./naver_blog_review_1', encoding='utf-8') as f:
text = f.readlines()
Preprocess text
text_strip = list([i.strip() for i in text if i != '\n'])
text_join = ' '.join(text_strip)
filtered_content = re.sub(r'[^\d\s\w]', ' ', text_join)
Tokenize Korean Vocabs
komoran = konlpy.tag.Komoran()
komoran_pos = komoran.pos(filtered_content)
komoran.morphs(filtered_content)
komoran_nouns = komoran.nouns(filtered_content)
stop_words = []
def tokenizer(text):
nouns = komoran.nouns(text)
go_words = [noun for noun in nouns if noun not in stop_words]
return [go_word for go_word in go_words if len(go_word)>1]
filtered_double = tokenizer(filtered_content)
Count the most Frequent vocabs
from collections import Counter
c = Counter(filtered_double)
frequent = c.most_common(10)
Wordcloud
# Wordcloud settings
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from os import path
FONT_PATH = "C:/windows/fonts/malgun.ttf"
Wordcloud -> “Hangang”, “fine dust”, “park”
# Wordcloud -> "Hangang", "fine dust", "park"
from wordcloud import ImageColorGenerator
img = plt.imread("blossom.jpg")
wordcloud1 = WordCloud(
font_path=FONT_PATH,
background_color = "black",
random_state = 1,
color_func = ImageColorGenerator(img),
mask = img
)
wordcloud1.generate_from_frequencies(c)
wordcloud1.to_image()
Wordcloud -> “fine dust”
# Wordcloud -> "fine dust"
from wordcloud import ImageColorGenerator
import numpy as np
col=['#6B4E24','#EECA98','#EBAA4F','#6B5B45','#B8853E','#AB891A','#6B5610','#EBBC23','#F7B50F']
def color_func(word, font_size, position,orientation,random_state=None, **kwargs):
return("hsl(40, 70%%, %d%%)" % np.random.randint(45,55))
img = plt.imread("blossom.jpg")
wordcloud1 = WordCloud(
font_path=FONT_PATH,
background_color = "black",
random_state = 1,
mask = img
)
wordcloud1.generate_from_frequencies(c)
wordcloud1.recolor(color_func = color_func)
wordcloud1.to_image()
댓글남기기