2016-01-12 3 views

ответ

1

Вы имеете в виду раздел в документах? Да:

library(tm) 
txt <- c("Reference Section 1: Foo", "Reference Section 2: Bar") 
corp <- Corpus(VectorSource(txt)) 
removeRefSec <- content_transformer(function(x) sub("^Reference Section \\d+: ", "", x)) 

corp[[1]] 
# <<PlainTextDocument>> 
# Metadata: 7 
# Content: chars: 24 

removeRefSec(corp[[1]]) 
# <<PlainTextDocument>> 
# Metadata: 7 
# Content: chars: 3 

corp <- tm_map(corp, removeRefSec) 
corp[[2]] 
# <<PlainTextDocument>> 
# Metadata: 7 
# Content: chars: 3