트우ㅣ트 분석기 인터페이스
library(stringr)
library(rJava)
.jinit()
.jaddClassPath('C:/Program Files/R/R-3.6.1/library/KoNLP/java/korean-text-3.0.jar')
.jaddClassPath('C:/Program Files/R/R-3.6.1/library/KoNLP/java/scala-library-2.11.8.jar')
.jaddClassPath('C:/Program Files/R/R-3.6.1/library/KoNLP/java/twitter-text-1.11.1.jar')
twitterObj <- .jnew('com.twitter.penguin.korean.TwitterKoreanProcessorJava$Builder')
twitterObj <- .jrcall(twitterObj, 'build')
job <- read.csv("C:/data/kdn_jobplanet.csv")
st <- job$기업장점
st1 <- paste0(st,collapse="")
st2 <- as.data.frame(st)
# 트위트 형태소 함수 설정====================================
twi.extractNoun <- function(strings){
res = .jrcall(twitterObj, 'tokenize', strings)
res = .jstrVal(res)
res = str_match_all(res, "([가-힣]+)\\(Noun")[[1]][,2]
res= res[nchar(res)>=2]
res
}
word1 <- apply(st2,1,twi.extractNoun)
twitter_sentence <- sapply(word1, function(x) { paste0(as.character(unlist(x)), collapse=" ") })