library(rvest)
library(XML)
library(stringr)
navernews <- function(page){
url <- paste0("https://search.naver.com/search.naver?&where=news&query=%ED%95%9C%EC%A0%84KDN%EA%B5%90%EC%9C%A1&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=3&ds=2017.12.01&de=2019.11.30&docid=&nso=so:r,p:from20171201to20191130,a:all&mynews=0&cluster_rank=55&start=",as.character(page))
doc <- read_html(url)
parse <- htmlParse(doc)
title <- xpathSApply(parse,'//ul[@class="type01"]/li/dl/dt/a',xmlValue)
press <- xpathSApply(parse,'//ul[@class="type01"]/li/dl/dd[1]/span[1]',xmlValue)
date <- xpathSApply(parse,'//ul[@class="type01"]/li/dl/dd[1]',xmlValue)
desc <- xpathSApply(parse,'//ul[@class="type01"]/li/dl/dd[2]',xmlValue)
data <- cbind(title,press,date,desc)
data <- as.data.frame(data)
data
}
gc()
DATA <-NULL
x <- seq(1,2465,10)
for(i in x){
getdata <- navernews(i)
DATA <- rbind(DATA,getdata)
}