• Importing data in R 1


    Importing data in R 学习笔记1

    flat files:CSV

    # Import swimming_pools.csv correctly: pools
    pools<-read.csv("swimming_pools.csv",stringsAsFactors=FALSE)
    

    txt文件

    read.delim("name.txt",header=TRUE)
    
    转化为table
    # Path to the hotdogs.txt file: path
    > path <- file.path("data", "hotdogs.txt")
    > 
    > # Import the hotdogs.txt file: hotdogs
    > hotdogs <- read.table(path, 
                            sep = "	", 
                            col.names = c("type", "calories", "sodium"))
    > 
    > # Call head() on hotdogs
    > head(hotdogs)
      type calories sodium
    1 Beef      186    495
    2 Beef      181    477
    3 Beef      176    425
    4 Beef      149    322
    5 Beef      184    482
    6 Beef      190    587
    

    tibble:简单数据框
    read_对比read.
    前者产生一个简单的数据框,并且会展示每一列的数据类型

    packages:readr

    read_csv()

    读入csv格式
    read_csv and read_tsv are special cases of the general read_delim. They're useful for reading the most common types of flat file data, comma separated values and tab separated values, respectively. read_csv2 uses ; for separators, instead of ,. This is common in European countries which use , as the decimal separator

    read_tsv

    读入txt格式

    > # readr is already loaded
    > 
    > # Column names
    > properties <- c("area", "temp", "size", "storage", "method",
                      "texture", "flavor", "moistness")
    > 
    > # Import potatoes.txt: potatoes
    读入数据并指定行名
    > potatoes<-read_tsv("potatoes.txt",col_names=properties)
    Parsed with column specification:
    cols(
      area = col_integer(),
      temp = col_integer(),
      size = col_integer(),
      storage = col_integer(),
      method = col_integer(),
      texture = col_double(),
      flavor = col_double(),
      moistness = col_double()
    )
    > col_names=properties
    > 
    > # Call head() on potatoes
    > head(potatoes)
    # A tibble: 6 x 8
       area  temp  size storage method texture flavor moistness
      <int> <int> <int>   <int>  <int>   <dbl>  <dbl>     <dbl>
    1     1     1     1       1      1     2.9    3.2       3  
    2     1     1     1       1      2     2.3    2.5       2.6
    3     1     1     1       1      3     2.5    2.8       2.8
    4     1     1     1       1      4     2.1    2.9       2.4
    5     1     1     1       1      5     1.9    2.8       2.2
    6     1     1     1       2      1     1.8    3         1.7
    

    read_delim()

    # Column names
    > properties <- c("area", "temp", "size", "storage", "method",
                      "texture", "flavor", "moistness")
    > 
    > # Import potatoes.txt using read_delim(): potatoes
    > potatoes <- read_delim("potatoes.txt", delim = "	", col_names = properties)
    Parsed with column specification:
    cols(
      area = col_integer(),
      temp = col_integer(),
      size = col_integer(),
      storage = col_integer(),
      method = col_integer(),
      texture = col_double(),
      flavor = col_double(),
      moistness = col_double()
    )
    > 
    > # Print out potatoes
    > potatoes
    # A tibble: 160 x 8
        area  temp  size storage method texture flavor moistness
       <int> <int> <int>   <int>  <int>   <dbl>  <dbl>     <dbl>
     1     1     1     1       1      1     2.9    3.2       3  
     2     1     1     1       1      2     2.3    2.5       2.6
     3     1     1     1       1      3     2.5    2.8       2.8
     4     1     1     1       1      4     2.1    2.9       2.4
     5     1     1     1       1      5     1.9    2.8       2.2
     6     1     1     1       2      1     1.8    3         1.7
     7     1     1     1       2      2     2.6    3.1       2.4
     8     1     1     1       2      3     3      3         2.9
     9     1     1     1       2      4     2.2    3.2       2.5
    10     1     1     1       2      5     2      2.8       1.9
    # ... with 150 more rows
    

    data.table()

    fread

    make up some column names itself
    more convenience

     # Import columns 6 and 8 of potatoes.csv: potatoes
    > potatoes<-fread("potatoes.csv",select=c(6,8))
    > 
    > # Plot texture (x) and moistness (y) of potatoes
    > plot(potatoes$texture,potatoes$moistness)
    

    readxl

    excel_sheets()

    library(readxl)
    # Print the names of all worksheets
    excel_sheets("urbanpop.xlsx")
    
    # Read all Excel sheets with lapply(): pop_list
    pop_list<- lapply(excel_sheets("urbanpop.xlsx"),
                          read_excel,
                          path = "urbanpop.xlsx")
    
    # Display the structure of pop_list
    str(pop_list)
    

    read_excel()

    # Import the second sheet of urbanpop.xlsx, skipping the first 21 rows: urbanpop_sel
    urbanpop_sel <- read_excel("urbanpop.xlsx", sheet = 2, col_names = FALSE, skip = 21)
    
    # Print out the first observation from urbanpop_sel
    urbanpop_sel[1,]
    

    gdata

    read.xls()

    读入xls格式的数据

    # Column names for urban_pop
    > columns <- c("country", paste0("year_", 1967:1974))
    > 
    > # Finish the read.xls call
    > urban_pop <- read.xls("urbanpop.xls", sheet = 2,
                            skip = 50, header = FALSE, stringsAsFactors = FALSE,
                            col.names = columns)
    > 
    > # Print first 10 observation of urban_pop
    > head(urban_pop,n=10)
                  country   year_1967   year_1968   year_1969   year_1970
    1              Cyprus   231929.74   237831.38   243983.34   250164.52
    2      Czech Republic  6204409.91  6266304.50  6326368.97  6348794.89
    3             Denmark  3777552.62  3826785.08  3874313.99  3930042.97
    4            Djibouti    77788.04    84694.35    92045.77    99845.22
    5            Dominica    27550.36    29527.32    31475.62    33328.25
    6  Dominican Republic  1535485.43  1625455.76  1718315.40  1814060.00
    7             Ecuador  2059355.12  2151395.14  2246890.79  2345864.41
    8               Egypt 13798171.00 14248342.19 14703858.22 15162858.52
    9         El Salvador  1345528.98  1387218.33  1429378.98  1472181.26
    10  Equatorial Guinea    75364.50    77295.03    78445.74    78411.07
         year_1971   year_1972   year_1973   year_1974
    1    261213.21   272407.99   283774.90   295379.83
    2   6437055.17  6572632.32  6718465.53  6873458.18
    3   3981360.12  4028247.92  4076867.28  4120201.43
    4    107799.69   116098.23   125391.58   136606.25
    5     34761.52    36049.99    37260.05    38501.47
    6   1915590.38  2020157.01  2127714.45  2238203.87
    7   2453817.78  2565644.81  2681525.25  2801692.62
    8  15603661.36 16047814.69 16498633.27 16960827.93
    9   1527985.34  1584758.18  1642098.95  1699470.87
    10    77055.29    74596.06    71438.96    68179.26
    

    getSheets()

    查看一个excel文件有多少的sheet,输出每个sheet的名字

    XLConnect

    loadWorkbook()

    主要是加载excel文件
    When working with XLConnect, the first step will be to load a workbook in your R session with loadWorkbook(); this function will build a "bridge" between your Excel file and your R session.

    library("XLConnect")
    > 
    > # Build connection to urbanpop.xlsx: my_book
    > my_book<-loadWorkbook("urbanpop.xlsx")
    > 
    > # Print out the class of my_book
    > class(my_book)
    [1] "workbook"
    attr(,"package")
    [1] "XLConnect"
    

    readWorksheet()

    读取excel文件
    所以顺序肯定是先加载再读取啊。

    # Import columns 3, 4, and 5 from second sheet in my_book: urbanpop_sel
    urbanpop_sel <- readWorksheet(my_book, sheet = 2,startCol=3,endCol=5)
    
    # Import first column from second sheet in my_book: countries
    countries<-readWorksheet(my_book, sheet = 2,startCol=1,endCol=1)
    
    # cbind() urbanpop_sel and countries together: selection
    selection<-cbind(countries,urbanpop_sel)
    

    createSheet()

    在已经有的excel中创建一个sheet,创建一个空的sheet

    # Build connection to urbanpop.xlsx
    > my_book <- loadWorkbook("urbanpop.xlsx")
    > 
    > # Add a worksheet to my_book, named "data_summary"
    > createSheet(my_book,"data_summary")
    > 
    > # Use getSheets() on my_book
    > getSheets(my_book)
    [1] "1960-1966"    "1967-1974"    "1975-2011"    "data_summary"
    

    writeWorksheet()

    Writes data to worksheets of a '>workbook.

    saveWorkbook

    保存工作表,就是存到磁盘上

    # Build connection to urbanpop.xlsx
    my_book <- loadWorkbook("urbanpop.xlsx")
    
    # Add a worksheet to my_book, named "data_summary"
    createSheet(my_book, "data_summary")
    
    # Create data frame: summ
    sheets <- getSheets(my_book)[1:3]
    dims <- sapply(sheets, function(x) dim(readWorksheet(my_book, sheet = x)), USE.NAMES = FALSE)
    summ <- data.frame(sheets = sheets,
                       nrows = dims[1, ],
                       ncols = dims[2, ])
    
    # Add data in summ to "data_summary" sheet
    writeWorksheet(my_book,summ,"data_summary")
    
    # Save workbook as summary.xlsx
     saveWorkbook(my_book,"summary.xlsx")
    

    renameSheet()

    给sheet表重命名

    # Rename "data_summary" sheet to "summary"
    renameSheet(my_book, "data_summary", "summary")
    
    # Print out sheets of my_book
    getSheets(my_book)
    
    # Save workbook to "renamed.xlsx"
    saveWorkbook(my_book, file = "renamed.xlsx")
    

    我发现我自己真的很容易丢参数哦,然后死活调不出来。。。===。。。苦恼的人儿

    removeSheet()

    删除指定sheet

    library(XLConnect)
    # Build connection to renamed.xlsx: my_book
    my_book<-loadWorkbook("renamed.xlsx")
    
    
    # Remove the fourth sheet
    removeSheet(my_book,sheet="summary")
    
    # Save workbook to "clean.xlsx"
    saveWorkbook(my_book,"clean.xlsx")
    
  • 相关阅读:
    【笔记】机器学习
    【笔记】机器学习
    【NLP】暑假课作业3
    【NLP】暑假课作业1
    【笔记】机器学习
    【转】谈谈我对攻读计算机研究生的看法
    Android学习笔记-事件处理
    精读《C++ primer》学习笔记(第四至六章)
    那些年,当我还是个小白
    精读《C++ primer》学习笔记(第一至三章)
  • 原文地址:https://www.cnblogs.com/gaowenxingxing/p/12036831.html
Copyright © 2020-2023  润新知