2012-03-12 5 views
2

Я делаю данные по финансированию кампании и ищу сотрудников компании x, которые пожертвовали х сумму денег в период с 1989 по 2010 год. Используя sqldf, я смог разобрать эту информацию. Данные> 4gb, где мой баран < 2gb, поэтому я разбил данные на таблицы размером около 100 МБ и у меня 40, которые загружаются, чтобы получить эту информацию. У некоторых есть участники от корпорации x, некоторые - нет.Как я могу связать с отсутствующими фреймами данных?

Кто-нибудь знает, как сделать «ловушки» для файлов? Этот «ловушки-митты» идентифицируют эти соответствующие файлы и связывают строки.

Я прикрепляю свой код на тот случай, если кто-то также хотел бы сделать общие улучшения?

Спасибо за вашу помощь

#Individual Donors for Retail Sales Companies 1990-2010 

#load file 

    indiv00.1<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.1.csv") 
    indiv00.2<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.2.csv") 
    indiv00.3<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.3.csv") 
    indiv00.4<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv00.4.csv") 
    indiv00.5<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.1.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name1<-sqldf("select * from 'indiv00.1' where Orgname like '%Wal-Mart Stores% '") 
    name2<-sqldf("select * from 'indiv00.2' where Orgname like '%Wal-Mart Stores% '") 
    name3<-sqldf("select * from 'indiv00.3' where Orgname like '%Wal-Mart Stores% '") 
    name4<-sqldf("select * from 'indiv00.4' where Orgname like '%Wal-Mart Stores% '") 
    name5<-sqldf("select * from 'indiv00.5' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge data 

    na1<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name1 group by 
    ContribID,Cycle") 

    na2<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name2 group by 
    ContribID,Cycle") 

    na3<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name3 group by 
    ContribID,Cycle") 

    na4<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name4 group by 
    ContribID,Cycle") 

    na5<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name5 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na1", mode="global.env"))save(na1,file="na1.Rdata") 
    if(!exists("na2", mode="global.env"))save(na2,file="na2.Rdata") 
    if(!exists("na3", mode="global.env"))save(na3,file="na3.Rdata") 
    if(!exists("na4", mode="global.env"))save(na4,file="na4.Rdata") 
    if(!exists("na5", mode="global.env"))save(na5,file="na5.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.6<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.2.csv") 
    indiv00.7<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv02.3.csv") 
    indiv00.8<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.1.csv") 
    indiv00.9<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.2.csv") 
    indiv00.10<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.3.csv") 

#check 

#library 

    library(sqldf) 

#search for name  

    name6<-sqldf("select * from 'indiv00.6' where Orgname like '%Wal-Mart Stores% '") 
    name7<-sqldf("select * from 'indiv00.7' where Orgname like '%Wal-Mart Stores% '") 
    name8<-sqldf("select * from 'indiv00.8' where Orgname like '%Wal-Mart Stores% '") 
    name9<-sqldf("select * from 'indiv00.9' where Orgname like '%Wal-Mart Stores% '") 
    name10<-sqldf("select * from 'indiv00.10' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge data 

    na6<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name6 group by 
    ContribID,Cycle") 

    na7<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name7 group by 
    ContribID,Cycle") 

    na8<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name8 group by 
    ContribID,Cycle") 

    na9<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name9 group by 
    ContribID,Cycle") 

    na10<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name10 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na6", mode="global.env"))save(na6,file="na6.Rdata") 
    if(!exists("na7", mode="global.env"))save(na7,file="na7.Rdata") 
    if(!exists("na8", mode="global.env"))save(na8,file="na8.Rdata") 
    if(!exists("na9", mode="global.env"))save(na9,file="na9.Rdata") 
    if(!exists("na10", mode="global.env"))save(na10,file="na10.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.11<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.4.csv") 
    indiv00.12<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.5.csv") 
    indiv00.13<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv04.6.csv") 
    indiv00.14<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.1.csv") 
    indiv00.15<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.2.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name11<-sqldf("select * from 'indiv00.11' where Orgname like '%Wal-Mart Stores% '") 
    name12<-sqldf("select * from 'indiv00.12' where Orgname like '%Wal-Mart Stores% '") 
    name13<-sqldf("select * from 'indiv00.13' where Orgname like '%Wal-Mart Stores% '") 
    name14<-sqldf("select * from 'indiv00.14' where Orgname like '%Wal-Mart Stores% '") 
    name15<-sqldf("select * from 'indiv00.15' where Orgname like '%Wal-Mart Stores% '") 

#merge 

    na11<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name11 group by 
    ContribID,Cycle") 

    na12<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name12 group by 
    ContribID,Cycle") 

    na13<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name13 group by 
    ContribID,Cycle") 

    na14<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name14 group by 
    ContribID,Cycle") 

    na15<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name15 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na11", mode="global.env"))save(na11,file="na11.Rdata") 
    if(!exists("na12", mode="global.env"))save(na12,file="na12.Rdata") 
    if(!exists("na13", mode="global.env"))save(na13,file="na13.Rdata") 
    if(!exists("na14", mode="global.env"))save(na14,file="na14.Rdata") 
    if(!exists("na15", mode="global.env"))save(na15,file="na15.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.16<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.3.csv") 
    indiv00.17<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.4.csv") 
    indiv00.18<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv06.5.csv") 
    indiv00.19<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.1.csv") 
    indiv00.20<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.2.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name16<-sqldf("select * from 'indiv00.16' where Orgname like '%Wal-Mart Stores% '") 
    name17<-sqldf("select * from 'indiv00.17' where Orgname like '%Wal-Mart Stores% '") 
    name18<-sqldf("select * from 'indiv00.18' where Orgname like '%Wal-Mart Stores% '") 
    name19<-sqldf("select * from 'indiv00.19' where Orgname like '%Wal-Mart Stores% '") 
    name20<-sqldf("select * from 'indiv00.20' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge 

    na16<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name16 group by 
    ContribID,Cycle") 

    na17<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name17 group by 
    ContribID,Cycle") 

    na18<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name18 group by 
    ContribID,Cycle") 

    na19<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name19 group by 
    ContribID,Cycle") 

    na20<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name20 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na16", mode="global.env"))save(na16,file="na16.Rdata") 
    if(!exists("na17", mode="global.env"))save(na17,file="na17.Rdata") 
    if(!exists("na18", mode="global.env"))save(na18,file="na18.Rdata") 
    if(!exists("na19", mode="global.env"))save(na19,file="na19.Rdata") 
    if(!exists("na20", mode="global.env"))save(na20,file="na20.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.21<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.3.csv") 
    indiv00.22<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.4.csv") 
    indiv00.23<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.5.csv") 
    indiv00.24<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.6.csv") 
    indiv00.25<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.7.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name21<-sqldf("select * from 'indiv00.21' where Orgname like '%Wal-Mart Stores% '") 
    name22<-sqldf("select * from 'indiv00.22' where Orgname like '%Wal-Mart Stores% '") 
    name23<-sqldf("select * from 'indiv00.23' where Orgname like '%Wal-Mart Stores% '") 
    name24<-sqldf("select * from 'indiv00.24' where Orgname like '%Wal-Mart Stores% '") 
    name25<-sqldf("select * from 'indiv00.25' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge 

    na21<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name21 group by 
    ContribID,Cycle") 

    na22<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name22 group by 
    ContribID,Cycle") 

    na23<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name23 group by 
    ContribID,Cycle") 

    na24<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name24 group by 
    ContribID,Cycle") 

    na25<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name25 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na21", mode="global.env"))save(na21,file="na21.Rdata") 
    if(!exists("na22", mode="global.env"))save(na22,file="na22.Rdata") 
    if(!exists("na23", mode="global.env"))save(na23,file="na23.Rdata") 
    if(!exists("na24", mode="global.env"))save(na24,file="na24.Rdata") 
    if(!exists("na25", mode="global.env"))save(na25,file="na25.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#Load file 

    indiv00.26<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.8.csv") 
    indiv00.27<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv08.9.csv") 
    indiv00.28<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.1.csv") 
    indiv00.29<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.2.csv") 
    indiv00.30<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.3.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name26<-sqldf("select * from 'indiv00.26' where Orgname like '%Wal-Mart Stores% '") 
    name27<-sqldf("select * from 'indiv00.27' where Orgname like '%Wal-Mart Stores% '") 
    name28<-sqldf("select * from 'indiv00.28' where Orgname like '%Wal-Mart Stores% '") 
    name29<-sqldf("select * from 'indiv00.29' where Orgname like '%Wal-Mart Stores% '") 
    name30<-sqldf("select * from 'indiv00.30' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge 

    na26<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name26 group by 
    ContribID,Cycle") 

    na27<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name27 group by 
    ContribID,Cycle") 

    na28<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name28 group by 
    ContribID,Cycle") 

    na29<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name29 group by 
    ContribID,Cycle") 

    na30<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name30 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na26", mode="global.env"))save(na26,file="na26.Rdata") 
    if(!exists("na27", mode="global.env"))save(na27,file="na27.Rdata") 
    if(!exists("na28", mode="global.env"))save(na28,file="na28.Rdata") 
    if(!exists("na29", mode="global.env"))save(na29,file="na29.Rdata") 
    if(!exists("na30", mode="global.env"))save(na30,file="na30.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.31<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.4.csv") 
    indiv00.32<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv10.5.csv") 
    indiv00.33<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.csv") 
    indiv00.34<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv96.2.csv") 
    indiv00.35<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

    name31<-sqldf("select * from 'indiv00.31' where Orgname like '%Wal-Mart Stores% '") 
    name32<-sqldf("select * from 'indiv00.32' where Orgname like '%Wal-Mart Stores% '") 
    name33<-sqldf("select * from 'indiv00.33' where Orgname like '%Wal-Mart Stores% '") 
    name34<-sqldf("select * from 'indiv00.34' where Orgname like '%Wal-Mart Stores% '") 
    name35<-sqldf("select * from 'indiv00.35' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge 

    na31<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name31 group by 
    ContribID,Cycle") 

    na32<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name32 group by 
    ContribID,Cycle") 

    na33<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name33 group by 
    ContribID,Cycle") 

    na34<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name34 group by 
    ContribID,Cycle") 

    na35<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name35 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na31", mode="global.env"))save(na31,file="na31.Rdata") 
    if(!exists("na32", mode="global.env"))save(na32,file="na32.Rdata") 
    if(!exists("na33", mode="global.env"))save(na33,file="na33.Rdata") 
    if(!exists("na34", mode="global.env"))save(na34,file="na34.Rdata") 
    if(!exists("na35", mode="global.env"))save(na35,file="na35.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    indiv00.36<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.2.csv") 
    indiv00.37<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indiv98.3.csv") 
    indiv00.38<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs90.csv") 
    indiv00.39<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs92.csv") 
    indiv00.40<-read.csv("C:\\Users\\tbrown11\\Desktop\\indivs\\indivs94.csv") 

#check 

#library 

    library(sqldf) 

#search for name 

#check 
    name36<-sqldf("select * from 'indiv00.36' where Orgname like '%Wal-Mart Stores% '") 
    name37<-sqldf("select * from 'indiv00.37' where Orgname like '%Wal-Mart Stores% '") 
    name38<-sqldf("select * from 'indiv00.38' where Orgname like '%Wal-Mart Stores% '") 
    name39<-sqldf("select * from 'indiv00.39' where Orgname like '%Wal-Mart Stores% '") 
    name40<-sqldf("select * from 'indiv00.40' where Orgname like '%Wal-Mart Stores% '") 

#check 

#merge 

    na36<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name36 group by 
    ContribID,Cycle") 

    na37<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name37 group by 
    ContribID,Cycle") 

    na38<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name38 group by 
    ContribID,Cycle") 

    na39<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name39 group by 
    ContribID,Cycle") 

    na40<-sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name40 group by 
    ContribID,Cycle") 

#check 

#save data 

    setwd("C:\\Users\\tbrown11\\Desktop\\Data") 

    if(!exists("na36", mode="global.env"))save(na36,file="na36.Rdata") 
    if(!exists("na37", mode="global.env"))save(na37,file="na37.Rdata") 
    if(!exists("na38", mode="global.env"))save(na38,file="na38.Rdata") 
    if(!exists("na39", mode="global.env"))save(na39,file="na39.Rdata") 
    if(!exists("na40", mode="global.env"))save(na40,file="na40.Rdata") 

#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 

#load file 

    a1<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na1.Rdata") 
    a2<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na2.Rdata") 
    a3<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na3.Rdata") 
    a4<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na4.Rdata") 
    a5<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na5.Rdata") 
    a6<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na6.Rdata") 
    a7<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na7.Rdata") 
    a8<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na8.Rdata") 
    a9<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na9.Rdata") 
    a10<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na10.Rdata") 
    a11<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na11.Rdata") 
    a12<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na12.Rdata") 
    a13<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na13.Rdata") 
    a14<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na14.Rdata") 
    a15<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na15.Rdata") 
    a16<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na16.Rdata") 
    a17<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na17.Rdata") 
    a18<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na18.Rdata") 
    a19<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na19.Rdata") 
    a20<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na20.Rdata") 
    a21<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na21.Rdata") 
    a22<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na22.Rdata") 
    a23<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na23.Rdata") 
    a24<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na24.Rdata") 
    a25<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na25.Rdata") 
    a26<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na26.Rdata") 
    a27<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na27.Rdata") 
    a28<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na28.Rdata") 
    a29<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na29.Rdata") 
    a30<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na30.Rdata") 
    a31<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na31.Rdata") 
    a32<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na32.Rdata") 
    a33<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na33.Rdata") 
    a34<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na34.Rdata") 
    a35<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na35.Rdata") 
    a36<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na36.Rdata") 
    a37<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na37.Rdata") 
    a38<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na38.Rdata") 
    a39<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na39.Rdata") 
    a40<-load("C:\\Users\\tbrown11\\Desktop\\Data\\na40.Rdata") 

#global environment 

    b<-c(ls()) 
    B<-paste("n",b) 
    paste(B,",")  

#clean in text editor 

#check  #check  #check  #check  #check 
#check  #check  #check  #check  #check 
#check  #check  #check  #check  #check 

#Row bind 


    name<-rbind(

















    ) 


#write table 
#library 
    library(XML) 
    setwd("C:\\Users\\tbrown11\\Desktop\\Data\\Retail") 
#writing table to csv 
    out_file <- file("Walmart.csv", open="a") 
    write.table(name, file=out_file, sep=",", dec=".", quote=FALSE, 
    col.names=NA, row.names=TRUE) 
    close(out_file) 
#check 

#clean house 

    rm(list = ls(), envir = globalenv()) 

#check 
+0

Обратите внимание на 'read.csv.sql' в sqldf, который читает файл и применяет оператор sql всего за один вызов. –

ответ

2

Поскольку вы делаете то же самое снова и снова, вам следует подумать об использовании какой-либо петли. неявные циклы в R, как lapply

список всех соответствующих файлов:

myFunction = function(f) 
{ 
    require(sqldf) 
    ##Better to use require, since if the library is already loaded it 
    ##won't try to load it again 
    indiv <- read.csv(f) 
    name <- sqldf("select * from 'indiv' where Orgname like '%Wal-Mart Stores% '") 
    na <- sqldf("select Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
       Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
       RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp from name group by 
       ContribID,Cycle") 

    if(!exists("na", mode="global.env"))save(na,file="na.Rdata") 
    rm(list = ls(), envir = globalenv()) 
} 

setwd("~/relevant_directories") 
files = list.files() 
pre.results = lapply(files, myFunction) 
results = do.call(rbind, pre.results) 

##Do whatever you want with your results then! 

Вы должны настроить MYFUNCTION код не сохранить за тот же код na.Rdata каждый раз, когда он вызывается, но это не должно быть слишком сложно. Возможно, передайте счетчик функции и вставьте счетчик в имя файла

Если ваш файл данных слишком велик для загрузки в память, там могут быть пакеты R, которые могут помочь с этим. Взгляните на пакет ff, который имеет множество функций для доступа к файлам данных на жестком диске без необходимости их загрузки в память.

2

Вы, кажется, есть кучки повторения read.csv, sql('select * from ...') и т.д. - Я думаю, вы можете превратить это в петлю. На каждой итерации цикла вы загружаете файл csv, выполняете запрос и получаете rbind.

Кроме того, ваш запрос в настоящее время находится в двух шагах - SELECT * .. where Orgname like '%Wal-Mart..', а затем вы выбираете из него различные столбцы. Вы можете объединить их в один запрос (я думаю!):

library(sqldf) 

# make a vector of all your files: 
# indiv00.{1--4}, invid02.{1,2,3}, indiv04.{1--6}, indiv06.{1--5}, 
# indiv08.{1--9}, indiv10.{1--5}, indiv96, indiv96.2, indiv98, indiv98.{2,3}, 
# indivs{90,92,94} 
fnames <- file.path('C:','Users','tbrown11','Desktop','indivs', 
        c(sprintf('indiv00.%i.csv', 1:4), 
         sprintf('indiv02.%i.csv', 1:3), 
         sprintf('indiv04.%i.csv', 1:6), 
         sprintf('indiv06.%i.csv', 1:5), 
         sprintf('indiv08.%i.csv', 1:9), 
         sprintf('indiv10.%i.csv', 1:5), 
         sprintf('indiv%i.csv',c(96,98)), 
         'indiv96.2.csv', 
         sprintf('indiv98.%i.csv', 2:3), 
         sprintf('indivs%i.csv',c(90,92,94))) 
        ) 
# make a data frame to hold results: 
results <- NULL 

# LOOP THROUGH FILES: 
for (csvfile in fnames) { 
    # read.csv: 
    tbl <- read.csv(csvfile) 

    # do SELECT * from 'table' where Orgname like ... 
    # Combine into one query:  
    dat <- sqldf("SELECT Cycle,FECTransID,ContribID,Contrib,ConFirst,RecipID, 
    Orgname,UltOrg,RealCode,Date,Amount,Street,City,State,Zip, 
    RecipCode,Type,CmteID,OtherID,Gender,FecOccEmp 
    FROM tbl 
    WHERE Orgname like '%Wal-Mart Stores% ' 
    GROUP BY ContribID, Cycle") 

    # store these results: 
    results <- rbind(results, dat)   
} 

# finished. now just write your XML table: 
library(XML) 
out_file <- file(file.path('C:','Users','tbrown11','Desktop','Data','Retail', 
          'Walmart.csv'), open='a') 
write.table(results, file=out_file, sep=',', dec='.', quote=FALSE, 
      col.names=NA, row.names=T) 
close(out_file) 

Смотрите, как все ваши повторения кода были сжаты в один для цикла? Это идея.

Вы, возможно, потребуется настроить, чтобы убедиться:

  1. (комбинированный) запрос работает
  2. вы действительно хотите write.table без каких-либо имен столбцов? Как другой человек, глядя на ваш файл CSV, знает, что столбцы на самом деле являются «Цикл», «FECTransID», «ContribID», ... и т. Д.?