• 一个CSV文件解析类


    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * CsvParser
     * 此类参考了网上方案,在此表示感谢
     * 2013-12-10 21:43:48
     */
    public class CsvParser{
        // Saved input CSV file pathname
        private String inputCsvFile;
        
        // Space mark , ; : etc.
        private String spaceMark=",";
        
        /**
         * Contructor
         * @param inputCsvFile
         */
        public CsvParser(String inputCsvFile,String spaceMark){
            this.inputCsvFile=inputCsvFile;
            this.spaceMark=spaceMark;
        }
        
        /**
         * Contructor
         * @param inputCsvFile
         */
        public CsvParser(String inputCsvFile){
            this.inputCsvFile=inputCsvFile;
            this.spaceMark=",";
        }
        
        /**
         * Get parsed array from CSV file
         * @return
         */
        public Object[] getParsedArray() throws Exception{
            List<List<String>> retval=new ArrayList<List<String>>();
            
            String regExp = getRegExp();
            BufferedReader in = new BufferedReader(new FileReader(this.inputCsvFile));
            String strLine;
            String str = "";
            
            while ((strLine = in.readLine()) != null) {
                Pattern pattern = Pattern.compile(regExp);
                Matcher matcher = pattern.matcher(strLine);
                List<String> listTemp = new ArrayList<String>();
                while (matcher.find())
                {
                    str = matcher.group();
                    str = str.trim();
                    
                    if (str.endsWith(spaceMark))
                    {
                        str = str.substring(0, str.length() - 1);
                        str = str.trim();
                    }
                    
                    if (str.startsWith(""") && str.endsWith("""))
                    {
                        str = str.substring(1, str.length() - 1);
                        if (CsvParser.isExisted("""", str))
                        {
                            str = str.replaceAll("""", """);
                        }
                    }
                    
                    if (!"".equals(str))
                    {
                        listTemp.add(str);
                    }
                }
                
                // Add to retval
                retval.add(listTemp);     
            }
            in.close();
            
            return retval.toArray();
        }
        
        /**
         * Regular Expression for CSV parse
         * @return
         */
        private String getRegExp()
        {
            final String SPECIAL_CHAR_A = "[^",\n  ]";
            final String SPECIAL_CHAR_B = "[^""+spaceMark+"\n]";
            
            StringBuffer strRegExps = new StringBuffer();
            strRegExps.append(""((");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*["+spaceMark+"\n  ])*(");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*"{2})*)*");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*"[  ]*"+spaceMark+"[  ]*");
            strRegExps.append("|");
            strRegExps.append(SPECIAL_CHAR_B);
            strRegExps.append("*[  ]*"+spaceMark+"[  ]*");
            strRegExps.append("|"((");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*["+spaceMark+"\n  ])*(");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*"{2})*)*");
            strRegExps.append(SPECIAL_CHAR_A);
            strRegExps.append("*"[  ]*");
            strRegExps.append("|");
            strRegExps.append(SPECIAL_CHAR_B);
            strRegExps.append("*[  ]*");
            return strRegExps.toString();
        }
        
        /**
         * If argChar is exist in argStr
         * @param argChar
         * @param argStr
         * @return
         */
        private static boolean isExisted(String argChar, String argStr)
        {
            
            boolean blnReturnValue = false;
            if ((argStr.indexOf(argChar) >= 0)
                    && (argStr.indexOf(argChar) <= argStr.length()))
            {
                blnReturnValue = true;
            }
            return blnReturnValue;
        }
    
        /**
         * Test
         * @param args
         * @throws Exception
         */
        public static void main(String[] args)  throws Exception{
            CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_not quoted_1.csv");
            //CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_not quoted_2.csv");
            //CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_quoted.csv");
            //CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_quoted_2.csv");
            
            //CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_1.csv",";");
            //CsvParser parser=new CsvParser("C:\Users\IBM_ADMIN\Desktop\Test CSV Files\dummydata_2.csv",":");
            
            Object[] arr=parser.getParsedArray();
            //System.out.println(arr);
            
            for(Object obj:arr){
                System.out.print("[");
                
                List<String> ls=(List<String>)obj;
                
                for(String item:ls){
                    System.out.println(item+",");
                }
                
                System.out.println("],");
            }
        }
    }

    解析CSV文件:

    Column1,Column2,Column3,Column4,Column5
    Roderick Manuel,2013-02-21,59 E Brillhart Ave,67526,branch damage other impulse
    Lashonda Frank,2012-11-21,418 US Hwy 100,22609,fowl house
    Holly Silva,2013-09-15,16345 Old Jacksboro Hwy,13140,art sand colour tray boiling
    Martin Forbes,2013-04-28,69 Spr 33,91656,bag point necessary country
    Josefa Berry,2008-08-21,125 Hwy' 84 E,12604,Sort industry married safe shirt
    Allyson Green,2004-11-11,425 Ranch Rd 587,35059,mother political
    Tim Hopkins,2005-03-17,9559 I- 820,83598,crush. surprise station' distance from
    Olen Abbott,2009-02-17,2249 Westwood Dr N,15575,acid
    Ana Fowler,2011-02-19,22 State Loop 426,13409,memory poor farm adjustment
    Minerva House,2009-02-18,57151 Collett Rd,45782,dry summer

    将解析完得到的数组输出如下:

    [Column1,
    Column2,
    Column3,
    Column4,
    Column5,
    ],
    [Roderick Manuel,
    2013-02-21,
    59 E Brillhart Ave,
    67526,
    branch damage other impulse,
    ],
    [Lashonda Frank,
    2012-11-21,
    418 US Hwy 100,
    22609,
    fowl house,
    ],
    [Holly Silva,
    2013-09-15,
    16345 Old Jacksboro Hwy,
    13140,
    art sand colour tray boiling,
    ],
    [Martin Forbes,
    2013-04-28,
    69 Spr 33,
    91656,
    bag point necessary country,
    ],
    [Josefa Berry,
    2008-08-21,
    125 Hwy' 84 E,
    12604,
    Sort industry married safe shirt,
    ],
    [Allyson Green,
    2004-11-11,
    425 Ranch Rd 587,
    35059,
    mother political,
    ],
    [Tim Hopkins,
    2005-03-17,
    9559 I- 820,
    83598,
    crush. surprise station' distance from,
    ],
    [Olen Abbott,
    2009-02-17,
    2249 Westwood Dr N,
    15575,
    acid,
    ],
    [Ana Fowler,
    2011-02-19,
    22 State Loop 426,
    13409,
    memory poor farm adjustment,
    ],
    [Minerva House,
    2009-02-18,
    57151 Collett Rd,
    45782,
    dry summer,
    ],

  • 相关阅读:
    剑指offer JZ-1
    侯捷《C++面向对象开发》--String类的实现
    侯捷《C++面向对象开发》--复数类的实现
    辛普森悖论
    马尔可夫链的平稳分布
    熵和基尼指数的一些性质
    UVA 11624 Fire!(广度优先搜索)
    HDU 4578 Transformation (线段树区间多种更新)
    HDU 1540 Tunnel Warfare(线段树+区间合并)
    多重背包
  • 原文地址:https://www.cnblogs.com/doudouxiaoye/p/5772886.html
Copyright © 2020-2023  润新知