• CXX解析CSV文件


    1、头文件

     

    #ifndef _CSV_HELPER_H_

    #define _CSV_HELPER_H_

    #include <string>

    #include <list>

    #include <vector>

     

    using namespace std;

     

    typedef struct CsvRow {

        char **fields_;

        int numOfFields_;

    } CsvRow;

     

    typedef struct CsvParser {

        char *filePath_;

        char delimiter_;

        int firstLineIsHeader_;

        char *errMsg_;

        CsvRow *header_;

        FILE *fileHandler_;

        int fromString_;

        char *csvString_;

        int csvStringIter_;

        int iSkipLine;

        vector<string> vtSkippedLine;

    } CsvParser;

     

     

    CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine = 0);

    CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader);

    void CsvParser_destroy(CsvParser *csvParser);

    void CsvParser_destroy_row(CsvRow *csvRow);

     

    const CsvRow *CsvParser_getHeader(CsvParser *csvParser);

    CsvRow *CsvParser_getRow(CsvParser *csvParser);

    int CsvParser_getNumFields(const CsvRow *csvRow);

    const char **CsvParser_getFields(const CsvRow *csvRow);

    const char* CsvParser_getErrorMessage(CsvParser *csvParser);

     

    int _CsvParser_getNextLinePos(FILE *p);

    bool _CsvParser_skipLine(CsvParser *csvParser);

    CsvRow *_CsvParser_getRow(CsvParser *csvParser);

    int _CsvParser_delimiterIsAccepted(const char *delimiter);

    void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage);

     

     

    class CCsvHelper;

    class CCsvResultSet;

    using CCsvField = std::pair<string, string>;

     

    class CCsvRow

    {

    friend class CCsvResultSet;

    public:

        CCsvRow(){

            m_vtField.clear();

        }

        ~CCsvRow(){}

        string GetFieldByName(string sName);

        string GetFieldByIndex(int index);

        void Print();

    private:

        void SetData(vector<string> vtHead, vector<string> vtData);

    public:

        vector<CCsvField> m_vtField;

    };

     

    class CCsvResultSet

    {

        friend class CCsvHelper;

    public:

        CCsvResultSet(){

            m_vtHead.clear();

            m_vtRow.clear();

            m_iPos = 0;

        }

        ~CCsvResultSet(){

            m_vtHead.clear();

            m_vtRow.clear();

        }

        void Print();

        bool Next();

        int GetRowCount();

        CCsvRow GetRowByIndex(int index);

        CCsvRow FetchRow();

     

    private:

        void SetHeader(vector<string> vtHead);

        void SetRow(vector<string> vtRow);

     

    private:

        int m_iPos;

        vector<string> m_vtHead;//表头

        vector<vector<string>> m_vtRow;//行数据

    };

     

    class CCsvHelper

    {

    public:

        CCsvHelper(){

            

        }

        ~CCsvHelper(){

            

        }

     

    public:

        /*

        sFileName 文件路径,path/filename.csv

        iSkipLine 略过起始行数,略过的行将不被解析,略过文件头部连续的n行

        bFirstLineHead 读取的首行是否是表头,不是表头将直接当作数据

        */

        bool LoadFrom(string sFileName, int iSkipLine = 0, bool bFirstLineHead = true);

     

        /*

        获取当前文件结果集

        */

        CCsvResultSet GetResultSet();

     

    private:

        string m_sFileName;

        bool m_bHeader;//存在表头

     

        

        CCsvResultSet m_rsData;

    };

     

    #endif

     

    2、实现文件

     

    #include <stdlib.h>

    #include <string.h>

    #include <stdio.h>

    #include <errno.h>

    #include <iostream>

    #include <iomanip>

     

    #include "CsvHelper.h"

     

    using namespace std;

     

     

    CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine) {

    CsvParser *csvParser = (CsvParser*)malloc(sizeof(CsvParser));

    if (filePath == NULL) {

    csvParser->filePath_ = NULL;

    }

    else {

    int filePathLen = strlen(filePath);

    csvParser->filePath_ = (char*)malloc((filePathLen + 1));

    strcpy(csvParser->filePath_, filePath);

    }

    csvParser->firstLineIsHeader_ = firstLineIsHeader;

    csvParser->errMsg_ = NULL;

    if (delimiter == NULL) {

    csvParser->delimiter_ = ',';

    }

    else if (_CsvParser_delimiterIsAccepted(delimiter)) {

    csvParser->delimiter_ = *delimiter;

    }

    else {

    csvParser->delimiter_ = '';

    }

    csvParser->header_ = NULL;

    csvParser->fileHandler_ = NULL;

    csvParser->fromString_ = 0;

    csvParser->csvString_ = NULL;

    csvParser->csvStringIter_ = 0;

    csvParser->iSkipLine = iSkipLine;

     

    return csvParser;

    }

     

    CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader) {

    CsvParser *csvParser = CsvParser_new(NULL, delimiter, firstLineIsHeader);

    csvParser->fromString_ = 1;

    if (csvString != NULL) {

    int csvStringLen = strlen(csvString);

    csvParser->csvString_ = (char*)malloc(csvStringLen + 1);

    strcpy(csvParser->csvString_, csvString);

    }

    return csvParser;

    }

     

    void CsvParser_destroy(CsvParser *csvParser) {

    if (csvParser == NULL) {

    return;

    }

    if (csvParser->filePath_ != NULL) {

    free(csvParser->filePath_);

    }

    if (csvParser->errMsg_ != NULL) {

    free(csvParser->errMsg_);

    }

    if (csvParser->fileHandler_ != NULL) {

    fclose(csvParser->fileHandler_);

    }

    if (csvParser->header_ != NULL) {

    CsvParser_destroy_row(csvParser->header_);

    }

    if (csvParser->csvString_ != NULL) {

    free(csvParser->csvString_);

    }

    free(csvParser);

    }

     

    void CsvParser_destroy_row(CsvRow *csvRow) {

    int i;

    for (i = 0; i < csvRow->numOfFields_; i++) {

    free(csvRow->fields_[i]);

    }

    free(csvRow->fields_);

    free(csvRow);

    }

     

     

    const CsvRow *CsvParser_getHeader(CsvParser *csvParser) {

    if (!csvParser->firstLineIsHeader_) {

    _CsvParser_setErrorMessage(csvParser, "Cannot supply header, as current CsvParser object does not support header");

    return NULL;

    }

    if (csvParser->header_ == NULL) {

    csvParser->header_ = _CsvParser_getRow(csvParser);

    }

    return csvParser->header_;

    }

     

    CsvRow *CsvParser_getRow(CsvParser *csvParser) {

    if (csvParser->firstLineIsHeader_ && csvParser->header_ == NULL) {

    csvParser->header_ = _CsvParser_getRow(csvParser);

    }

    return _CsvParser_getRow(csvParser);

    }

     

    int CsvParser_getNumFields(const CsvRow *csvRow) {

    return csvRow->numOfFields_;

    }

     

    const char **CsvParser_getFields(const CsvRow *csvRow) {

    return (const char**)csvRow->fields_;

    }

    int _CsvParser_getNextLinePos(FILE *p)

    {

    int ch = fgetc(p);

    while (ch != EOF)

    {

    // putchar(ch);

    if (ch == ' ')

    break;

    else

    ch = fgetc(p);

    }

    return ftell(p);

    }

    bool _CsvParser_skipLine(CsvParser *csvParser) {

     

    fseek(csvParser->fileHandler_, _CsvParser_getNextLinePos(csvParser->fileHandler_), SEEK_SET);

    return true;

    }

     

    CsvRow *_CsvParser_getRow(CsvParser *csvParser) {

    int numRowRealloc = 0;

    int acceptedFields = 64;

    int acceptedCharsInField = 64;

    if (csvParser->filePath_ == NULL && (!csvParser->fromString_)) {

    _CsvParser_setErrorMessage(csvParser, "Supplied CSV file path is NULL");

    return NULL;

    }

    if (csvParser->csvString_ == NULL && csvParser->fromString_) {

    _CsvParser_setErrorMessage(csvParser, "Supplied CSV string is NULL");

    return NULL;

    }

    if (csvParser->delimiter_ == '') {

    _CsvParser_setErrorMessage(csvParser, "Supplied delimiter is not supported");

    return NULL;

    }

    if (!csvParser->fromString_) {

    if (csvParser->fileHandler_ == NULL) {

    csvParser->fileHandler_ = fopen(csvParser->filePath_, "r");

    if (csvParser->fileHandler_ == NULL) {

    int errorNum = errno;

    const char *errStr = strerror(errorNum);

    char *errMsg = (char*)malloc(1024 + strlen(errStr));

    strcpy(errMsg, "");

    sprintf(errMsg, "Error opening CSV file for reading: %s : %s", csvParser->filePath_, errStr);

    _CsvParser_setErrorMessage(csvParser, errMsg);

    free(errMsg);

    return NULL;

    }

    if(csvParser->iSkipLine > 0)

    {

    int iCnt = csvParser->iSkipLine;

    while (iCnt > 0)

    {

    _CsvParser_skipLine(csvParser);

    iCnt--;

    }

    }

    }

    }

     

    CsvRow *csvRow = (CsvRow*)malloc(sizeof(CsvRow));

    csvRow->fields_ = (char**)malloc(acceptedFields * sizeof(char*));

    csvRow->numOfFields_ = 0;

    int fieldIter = 0;

    char *currField = (char*)malloc(acceptedCharsInField);

    int inside_complex_field = 0;

    int currFieldCharIter = 0;

    int seriesOfQuotesLength = 0;

    int lastCharIsQuote = 0;

    int isEndOfFile = 0;

    while (1) {

    char currChar = (csvParser->fromString_) ? csvParser->csvString_[csvParser->csvStringIter_] : fgetc(csvParser->fileHandler_);

    csvParser->csvStringIter_++;

    int endOfFileIndicator;

    if (csvParser->fromString_) {

    endOfFileIndicator = (currChar == '');

    }

    else {

    endOfFileIndicator = feof(csvParser->fileHandler_);

    }

    if (endOfFileIndicator) {

    if (currFieldCharIter == 0 && fieldIter == 0) {

    _CsvParser_setErrorMessage(csvParser, "Reached EOF");

    free(currField);

    CsvParser_destroy_row(csvRow);

    return NULL;

    }

    currChar = ' ';

    isEndOfFile = 1;

    }

    if (currChar == ' ') {

    continue;

    }

    if (currFieldCharIter == 0 && !lastCharIsQuote) {

    if (currChar == '"') {

    inside_complex_field = 1;

    lastCharIsQuote = 1;

    continue;

    }

    }

    else if (currChar == '"') {

    seriesOfQuotesLength++;

    inside_complex_field = (seriesOfQuotesLength % 2 == 0);

    if (inside_complex_field) {

    currFieldCharIter--;

    }

    }

    else {

    seriesOfQuotesLength = 0;

    }

    if (isEndOfFile || ((currChar == csvParser->delimiter_ || currChar == ' ') && !inside_complex_field)) {

    currField[lastCharIsQuote ? currFieldCharIter - 1 : currFieldCharIter] = '';

    csvRow->fields_[fieldIter] = (char*)malloc(currFieldCharIter + 1);

    strcpy(csvRow->fields_[fieldIter], currField);

    free(currField);

    csvRow->numOfFields_++;

    if (currChar == ' ') {

    return csvRow;

    }

    if (csvRow->numOfFields_ != 0 && csvRow->numOfFields_ % acceptedFields == 0) {

    csvRow->fields_ = (char**)realloc(csvRow->fields_, ((numRowRealloc + 2) * acceptedFields) * sizeof(char*));

    numRowRealloc++;

    }

    acceptedCharsInField = 64;

    currField = (char*)malloc(acceptedCharsInField);

    currFieldCharIter = 0;

    fieldIter++;

    inside_complex_field = 0;

    }

    else {

    currField[currFieldCharIter] = currChar;

    currFieldCharIter++;

    if (currFieldCharIter == acceptedCharsInField - 1) {

    acceptedCharsInField *= 2;

    currField = (char*)realloc(currField, acceptedCharsInField);

    }

    }

    lastCharIsQuote = (currChar == '"') ? 1 : 0;

    }

    }

     

    int _CsvParser_delimiterIsAccepted(const char *delimiter) {

    char actualDelimiter = *delimiter;

    if (actualDelimiter == ' ' || actualDelimiter == ' ' || actualDelimiter == '' ||

    actualDelimiter == '"') {

    return 0;

    }

    return 1;

    }

     

    void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage) {

    if (csvParser->errMsg_ != NULL) {

    free(csvParser->errMsg_);

    }

    int errMsgLen = strlen(errorMessage);

    csvParser->errMsg_ = (char*)malloc(errMsgLen + 1);

    strcpy(csvParser->errMsg_, errorMessage);

    }

     

    const char *CsvParser_getErrorMessage(CsvParser *csvParser) {

    return csvParser->errMsg_;

    }

     

     

    ////////////////////////////////////////////////////////////////////////

    void CCsvRow::Print()

    {

    cout << left;

    if (m_vtField.size() > 0)

    {

    for (auto it : m_vtField)

    {

    cout << setw((it.first.length()>=it.second.length()? it.first.length(): it.second.length()) + 10) << it.first;

    // cout << it.first << " ";

    }

    cout << endl;

    cout << left;

    for (auto it : m_vtField)

    {

    cout << setw((it.first.length()>=it.second.length() ? it.first.length() : it.second.length()) + 10) << it.second;

    // cout << it.second << " ";

    }

    cout << endl;

    }

    cout << endl;

    }

    void CCsvRow::SetData(vector<string> vtHead, vector<string> vtData)

    {

    if (vtHead.size() != vtData.size())

    {

    throw ("col name size is not equal to row col size!!!");

    }

    int iSize = vtHead.size();

    for (int i = 0; i < iSize; i++)

    {

    CCsvField fld;

    fld.first = vtHead[i];

    fld.second = vtData[i];

    m_vtField.push_back(fld);

    }

    }

    string CCsvRow::GetFieldByName(string sName)

    {

    for (auto it: m_vtField)

    {

    if (it.first == sName)

    {

    return it.second;

    }

    }

    return string();

    }

    string CCsvRow::GetFieldByIndex(int index)

    {

    if (index < 0 || index > (int)m_vtField.size())

    {

    throw ("invalid index ");

    }

    else

    return m_vtField[index].second;

    }

     

     

    void CCsvResultSet::Print()

    {

    cout << left;

    if (m_vtHead.size() > 0)

    {

    for (auto it : m_vtHead)

    {

    cout << setw(20) << it;

    }

    }

    cout << endl;

    for (auto it : m_vtRow)

    {

    for (auto row : it)

    {

    cout << setw(20) << row;

    }

    cout << endl;;

    }

    }

    bool CCsvResultSet::Next()

    {

    if (m_iPos < 0 || m_iPos >= (int)m_vtRow.size())

    return false;

    else

    return true;

    }

    CCsvRow CCsvResultSet::FetchRow()

    {

    CCsvRow row;

    if (Next())

    {

    vector<string> vtTmp = m_vtRow[m_iPos];

    row.SetData(m_vtHead, vtTmp);

    m_iPos++;

    }

    return row;

    }

     

    void CCsvResultSet::SetHeader(vector<string> vtHead)

    {

    m_vtHead = vtHead;

    }

    void CCsvResultSet::SetRow(vector<string> vtRow)

    {

    m_vtRow.push_back(vtRow);

    }

    int CCsvResultSet::GetRowCount()

    {

    return (int)m_vtRow.size();

    }

    CCsvRow CCsvResultSet::GetRowByIndex(int index)

    {

    if (index < 0 || index >= (int)m_vtRow.size())

    {

    throw ("valid index!!!");

    }

    CCsvRow row;

    vector<string> vtTmp = m_vtRow[index];

    row.SetData(m_vtHead, vtTmp);

    return row;

    }

     

     

    bool CCsvHelper::LoadFrom(string sFileName, int iSkipLine, bool bFirstLineHead)

    {

    m_bHeader = bFirstLineHead;

    int i = 0;

    CsvParser *csvparser = CsvParser_new(sFileName.c_str(), ",", bFirstLineHead, iSkipLine);

    CsvRow *row = nullptr;

     

    if (bFirstLineHead) {//读取表头

    const CsvRow *header = CsvParser_getHeader(csvparser);

     

    if (header == NULL) {

    printf("%s ", CsvParser_getErrorMessage(csvparser));

    return false;

    }

    vector<string> vtHead;

    const char **headerFields = CsvParser_getFields(header);

    for (i = 0; i < CsvParser_getNumFields(header); i++) {

    // printf("TITLE: %s ", headerFields[i]);

    vtHead.push_back(headerFields[i]);

    }

    m_rsData.SetHeader(vtHead);

    }

     

    while ((row = CsvParser_getRow(csvparser))) {

    vector<string> vtRow;

    const char **rowFields = CsvParser_getFields(row);

    for (i = 0; i < CsvParser_getNumFields(row); i++) {

    // printf("FIELD: %s ", rowFields[i]);

    vtRow.push_back(rowFields[i]);

    }

    m_rsData.SetRow(vtRow);

    CsvParser_destroy_row(row);

     

    }

    CsvParser_destroy(csvparser);

    return true;

    }

     

     

     

    CCsvResultSet CCsvHelper::GetResultSet()

    {

    return m_rsData;

    }

     

     

     

     

     

     

     

     

     

     

     

     

     

     

     

  • 相关阅读:
    C#与独孤九剑
    C#系列视频教程字符和字符串操作
    【设计模式】迪米特法则
    【设计模式】考题 模板方法模式
    C#字符和字符串
    【热门技术】解决Win7 下面很多软件安装不兼容的问题
    C#使电脑发出嗡鸣声
    C#视频教程下载(第一章)
    【设计模式】牛市股票还会亏钱 外观模式
    【设计模式】好菜每回味不同 建造者模式
  • 原文地址:https://www.cnblogs.com/skiing886/p/9165900.html
Copyright © 2020-2023  润新知