군집분석방법

Study Info

군집분석방법

HR분석 0 46 2019.12.12 06:06


hr <- read.csv("N:/data/hrdata.csv")

hr <- hr[,-1]


names(hr)[1] <- c("satisfaction")

names(hr)[2] <- c("evaluation")

names(hr)[3] <- c("project")

names(hr)[4] <- c("hours")

names(hr)[5] <- c("years")

names(hr)[6] <- c("accident")

names(hr)[8] <- c("promotion")

names(hr)[9] <- c("department")

#names(hr) <- c("satisfaction","evaluation",'project','hours','spend','accident','left','promotion','department','salary' )



hr <- na.omit(hr)

summary(hr)



library(dplyr)

s <-select(hr,1:8)


subset(hr,select=c(1:5))

#---------------------------테스트 데이터 준비


inTrain <- sample(1:nrow(s),nrow(s)*0.7)

training <- s[inTrain,]

testing <- s[-inTrain,]




#------------------scale을 이용한 데이터 표준화


training.data <- scale(training)

summary(training.data)


d <- dist(training.data)


par(mfrow=c(1,1))


fit.average <- hclust(d, method="average")

plot(fit.average,hang=-4,cex=.1)



set.seed(123)



#-------적절한 군집수 K

#("NbClust")

library(NbClust)


nc <- NbClust(training.data, min.nc=2, max.nc=15, method="kmeans")

par(mfrow=c(1,1))

barplot(table(nc$Best.n[1,]),

        xlab="Numer of Clusters", ylab="Number of Criteria",

        main="Number of Clusters Chosen")




k2 <- kmeans(s,2)

k3 <- kmeans(s,3)

k4 <- kmeans(s,4)

k5 <- kmeans(s,5)




k2$centers

k3$centers

k4$centers

k5$centers



cluster <-cbind(s,clus2=k2$cluster,clus3=k3$cluster,clus4=k4$cluster,clus5=k5$cluster)

head(cluster)


plot(jitter(cluster$satisfaction),jitter(cluster$evaluation),col=k2$cluster,

     pch=20)


plot(jitter(cluster$satisfaction),jitter(cluster$evaluation),col=k3$cluster,

     pch=20)


plot(jitter(cluster$satisfaction),jitter(cluster$evaluation),col=k4$cluster,

     pch=20)


[이 게시물은 HR분석님에 의해 2020-01-26 18:51:02 교육자료에서 복사 됨]

Comments

Study Info

상담 문의


010.9417.2025 hrd04@naver.com