1、头文件
#ifndef _CSV_HELPER_H_
#define _CSV_HELPER_H_
#include <string>
#include <list>
#include <vector>
using namespace std;
typedef struct CsvRow {
char **fields_;
int numOfFields_;
} CsvRow;
typedef struct CsvParser {
char *filePath_;
char delimiter_;
int firstLineIsHeader_;
char *errMsg_;
CsvRow *header_;
FILE *fileHandler_;
int fromString_;
char *csvString_;
int csvStringIter_;
int iSkipLine;
vector<string> vtSkippedLine;
} CsvParser;
CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine = 0);
CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader);
void CsvParser_destroy(CsvParser *csvParser);
void CsvParser_destroy_row(CsvRow *csvRow);
const CsvRow *CsvParser_getHeader(CsvParser *csvParser);
CsvRow *CsvParser_getRow(CsvParser *csvParser);
int CsvParser_getNumFields(const CsvRow *csvRow);
const char **CsvParser_getFields(const CsvRow *csvRow);
const char* CsvParser_getErrorMessage(CsvParser *csvParser);
int _CsvParser_getNextLinePos(FILE *p);
bool _CsvParser_skipLine(CsvParser *csvParser);
CsvRow *_CsvParser_getRow(CsvParser *csvParser);
int _CsvParser_delimiterIsAccepted(const char *delimiter);
void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage);
class CCsvHelper;
class CCsvResultSet;
using CCsvField = std::pair<string, string>;
class CCsvRow
{
friend class CCsvResultSet;
public:
CCsvRow(){
m_vtField.clear();
}
~CCsvRow(){}
string GetFieldByName(string sName);
string GetFieldByIndex(int index);
void Print();
private:
void SetData(vector<string> vtHead, vector<string> vtData);
public:
vector<CCsvField> m_vtField;
};
class CCsvResultSet
{
friend class CCsvHelper;
public:
CCsvResultSet(){
m_vtHead.clear();
m_vtRow.clear();
m_iPos = 0;
}
~CCsvResultSet(){
m_vtHead.clear();
m_vtRow.clear();
}
void Print();
bool Next();
int GetRowCount();
CCsvRow GetRowByIndex(int index);
CCsvRow FetchRow();
private:
void SetHeader(vector<string> vtHead);
void SetRow(vector<string> vtRow);
private:
int m_iPos;
vector<string> m_vtHead;//表头
vector<vector<string>> m_vtRow;//行数据
};
class CCsvHelper
{
public:
CCsvHelper(){
}
~CCsvHelper(){
}
public:
/*
sFileName 文件路径,path/filename.csv
iSkipLine 略过起始行数,略过的行将不被解析,略过文件头部连续的n行
bFirstLineHead 读取的首行是否是表头,不是表头将直接当作数据
*/
bool LoadFrom(string sFileName, int iSkipLine = 0, bool bFirstLineHead = true);
/*
获取当前文件结果集
*/
CCsvResultSet GetResultSet();
private:
string m_sFileName;
bool m_bHeader;//存在表头
CCsvResultSet m_rsData;
};
#endif
2、实现文件
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <iostream>
#include <iomanip>
#include "CsvHelper.h"
using namespace std;
CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine) {
CsvParser *csvParser = (CsvParser*)malloc(sizeof(CsvParser));
if (filePath == NULL) {
csvParser->filePath_ = NULL;
}
else {
int filePathLen = strlen(filePath);
csvParser->filePath_ = (char*)malloc((filePathLen + 1));
strcpy(csvParser->filePath_, filePath);
}
csvParser->firstLineIsHeader_ = firstLineIsHeader;
csvParser->errMsg_ = NULL;
if (delimiter == NULL) {
csvParser->delimiter_ = ',';
}
else if (_CsvParser_delimiterIsAccepted(delimiter)) {
csvParser->delimiter_ = *delimiter;
}
else {
csvParser->delimiter_ = ' ';
}
csvParser->header_ = NULL;
csvParser->fileHandler_ = NULL;
csvParser->fromString_ = 0;
csvParser->csvString_ = NULL;
csvParser->csvStringIter_ = 0;
csvParser->iSkipLine = iSkipLine;
return csvParser;
}
CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader) {
CsvParser *csvParser = CsvParser_new(NULL, delimiter, firstLineIsHeader);
csvParser->fromString_ = 1;
if (csvString != NULL) {
int csvStringLen = strlen(csvString);
csvParser->csvString_ = (char*)malloc(csvStringLen + 1);
strcpy(csvParser->csvString_, csvString);
}
return csvParser;
}
void CsvParser_destroy(CsvParser *csvParser) {
if (csvParser == NULL) {
return;
}
if (csvParser->filePath_ != NULL) {
free(csvParser->filePath_);
}
if (csvParser->errMsg_ != NULL) {
free(csvParser->errMsg_);
}
if (csvParser->fileHandler_ != NULL) {
fclose(csvParser->fileHandler_);
}
if (csvParser->header_ != NULL) {
CsvParser_destroy_row(csvParser->header_);
}
if (csvParser->csvString_ != NULL) {
free(csvParser->csvString_);
}
free(csvParser);
}
void CsvParser_destroy_row(CsvRow *csvRow) {
int i;
for (i = 0; i < csvRow->numOfFields_; i++) {
free(csvRow->fields_[i]);
}
free(csvRow->fields_);
free(csvRow);
}
const CsvRow *CsvParser_getHeader(CsvParser *csvParser) {
if (!csvParser->firstLineIsHeader_) {
_CsvParser_setErrorMessage(csvParser, "Cannot supply header, as current CsvParser object does not support header");
return NULL;
}
if (csvParser->header_ == NULL) {
csvParser->header_ = _CsvParser_getRow(csvParser);
}
return csvParser->header_;
}
CsvRow *CsvParser_getRow(CsvParser *csvParser) {
if (csvParser->firstLineIsHeader_ && csvParser->header_ == NULL) {
csvParser->header_ = _CsvParser_getRow(csvParser);
}
return _CsvParser_getRow(csvParser);
}
int CsvParser_getNumFields(const CsvRow *csvRow) {
return csvRow->numOfFields_;
}
const char **CsvParser_getFields(const CsvRow *csvRow) {
return (const char**)csvRow->fields_;
}
int _CsvParser_getNextLinePos(FILE *p)
{
int ch = fgetc(p);
while (ch != EOF)
{
// putchar(ch);
if (ch == ' ')
break;
else
ch = fgetc(p);
}
return ftell(p);
}
bool _CsvParser_skipLine(CsvParser *csvParser) {
fseek(csvParser->fileHandler_, _CsvParser_getNextLinePos(csvParser->fileHandler_), SEEK_SET);
return true;
}
CsvRow *_CsvParser_getRow(CsvParser *csvParser) {
int numRowRealloc = 0;
int acceptedFields = 64;
int acceptedCharsInField = 64;
if (csvParser->filePath_ == NULL && (!csvParser->fromString_)) {
_CsvParser_setErrorMessage(csvParser, "Supplied CSV file path is NULL");
return NULL;
}
if (csvParser->csvString_ == NULL && csvParser->fromString_) {
_CsvParser_setErrorMessage(csvParser, "Supplied CSV string is NULL");
return NULL;
}
if (csvParser->delimiter_ == ' ') {
_CsvParser_setErrorMessage(csvParser, "Supplied delimiter is not supported");
return NULL;
}
if (!csvParser->fromString_) {
if (csvParser->fileHandler_ == NULL) {
csvParser->fileHandler_ = fopen(csvParser->filePath_, "r");
if (csvParser->fileHandler_ == NULL) {
int errorNum = errno;
const char *errStr = strerror(errorNum);
char *errMsg = (char*)malloc(1024 + strlen(errStr));
strcpy(errMsg, "");
sprintf(errMsg, "Error opening CSV file for reading: %s : %s", csvParser->filePath_, errStr);
_CsvParser_setErrorMessage(csvParser, errMsg);
free(errMsg);
return NULL;
}
if(csvParser->iSkipLine > 0)
{
int iCnt = csvParser->iSkipLine;
while (iCnt > 0)
{
_CsvParser_skipLine(csvParser);
iCnt--;
}
}
}
}
CsvRow *csvRow = (CsvRow*)malloc(sizeof(CsvRow));
csvRow->fields_ = (char**)malloc(acceptedFields * sizeof(char*));
csvRow->numOfFields_ = 0;
int fieldIter = 0;
char *currField = (char*)malloc(acceptedCharsInField);
int inside_complex_field = 0;
int currFieldCharIter = 0;
int seriesOfQuotesLength = 0;
int lastCharIsQuote = 0;
int isEndOfFile = 0;
while (1) {
char currChar = (csvParser->fromString_) ? csvParser->csvString_[csvParser->csvStringIter_] : fgetc(csvParser->fileHandler_);
csvParser->csvStringIter_++;
int endOfFileIndicator;
if (csvParser->fromString_) {
endOfFileIndicator = (currChar == ' ');
}
else {
endOfFileIndicator = feof(csvParser->fileHandler_);
}
if (endOfFileIndicator) {
if (currFieldCharIter == 0 && fieldIter == 0) {
_CsvParser_setErrorMessage(csvParser, "Reached EOF");
free(currField);
CsvParser_destroy_row(csvRow);
return NULL;
}
currChar = ' ';
isEndOfFile = 1;
}
if (currChar == ' ') {
continue;
}
if (currFieldCharIter == 0 && !lastCharIsQuote) {
if (currChar == '"') {
inside_complex_field = 1;
lastCharIsQuote = 1;
continue;
}
}
else if (currChar == '"') {
seriesOfQuotesLength++;
inside_complex_field = (seriesOfQuotesLength % 2 == 0);
if (inside_complex_field) {
currFieldCharIter--;
}
}
else {
seriesOfQuotesLength = 0;
}
if (isEndOfFile || ((currChar == csvParser->delimiter_ || currChar == ' ') && !inside_complex_field)) {
currField[lastCharIsQuote ? currFieldCharIter - 1 : currFieldCharIter] = ' ';
csvRow->fields_[fieldIter] = (char*)malloc(currFieldCharIter + 1);
strcpy(csvRow->fields_[fieldIter], currField);
free(currField);
csvRow->numOfFields_++;
if (currChar == ' ') {
return csvRow;
}
if (csvRow->numOfFields_ != 0 && csvRow->numOfFields_ % acceptedFields == 0) {
csvRow->fields_ = (char**)realloc(csvRow->fields_, ((numRowRealloc + 2) * acceptedFields) * sizeof(char*));
numRowRealloc++;
}
acceptedCharsInField = 64;
currField = (char*)malloc(acceptedCharsInField);
currFieldCharIter = 0;
fieldIter++;
inside_complex_field = 0;
}
else {
currField[currFieldCharIter] = currChar;
currFieldCharIter++;
if (currFieldCharIter == acceptedCharsInField - 1) {
acceptedCharsInField *= 2;
currField = (char*)realloc(currField, acceptedCharsInField);
}
}
lastCharIsQuote = (currChar == '"') ? 1 : 0;
}
}
int _CsvParser_delimiterIsAccepted(const char *delimiter) {
char actualDelimiter = *delimiter;
if (actualDelimiter == ' ' || actualDelimiter == ' ' || actualDelimiter == ' ' ||
actualDelimiter == '"') {
return 0;
}
return 1;
}
void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage) {
if (csvParser->errMsg_ != NULL) {
free(csvParser->errMsg_);
}
int errMsgLen = strlen(errorMessage);
csvParser->errMsg_ = (char*)malloc(errMsgLen + 1);
strcpy(csvParser->errMsg_, errorMessage);
}
const char *CsvParser_getErrorMessage(CsvParser *csvParser) {
return csvParser->errMsg_;
}
////////////////////////////////////////////////////////////////////////
void CCsvRow::Print()
{
cout << left;
if (m_vtField.size() > 0)
{
for (auto it : m_vtField)
{
cout << setw((it.first.length()>=it.second.length()? it.first.length(): it.second.length()) + 10) << it.first;
// cout << it.first << " ";
}
cout << endl;
cout << left;
for (auto it : m_vtField)
{
cout << setw((it.first.length()>=it.second.length() ? it.first.length() : it.second.length()) + 10) << it.second;
// cout << it.second << " ";
}
cout << endl;
}
cout << endl;
}
void CCsvRow::SetData(vector<string> vtHead, vector<string> vtData)
{
if (vtHead.size() != vtData.size())
{
throw ("col name size is not equal to row col size!!!");
}
int iSize = vtHead.size();
for (int i = 0; i < iSize; i++)
{
CCsvField fld;
fld.first = vtHead[i];
fld.second = vtData[i];
m_vtField.push_back(fld);
}
}
string CCsvRow::GetFieldByName(string sName)
{
for (auto it: m_vtField)
{
if (it.first == sName)
{
return it.second;
}
}
return string();
}
string CCsvRow::GetFieldByIndex(int index)
{
if (index < 0 || index > (int)m_vtField.size())
{
throw ("invalid index ");
}
else
return m_vtField[index].second;
}
void CCsvResultSet::Print()
{
cout << left;
if (m_vtHead.size() > 0)
{
for (auto it : m_vtHead)
{
cout << setw(20) << it;
}
}
cout << endl;
for (auto it : m_vtRow)
{
for (auto row : it)
{
cout << setw(20) << row;
}
cout << endl;;
}
}
bool CCsvResultSet::Next()
{
if (m_iPos < 0 || m_iPos >= (int)m_vtRow.size())
return false;
else
return true;
}
CCsvRow CCsvResultSet::FetchRow()
{
CCsvRow row;
if (Next())
{
vector<string> vtTmp = m_vtRow[m_iPos];
row.SetData(m_vtHead, vtTmp);
m_iPos++;
}
return row;
}
void CCsvResultSet::SetHeader(vector<string> vtHead)
{
m_vtHead = vtHead;
}
void CCsvResultSet::SetRow(vector<string> vtRow)
{
m_vtRow.push_back(vtRow);
}
int CCsvResultSet::GetRowCount()
{
return (int)m_vtRow.size();
}
CCsvRow CCsvResultSet::GetRowByIndex(int index)
{
if (index < 0 || index >= (int)m_vtRow.size())
{
throw ("valid index!!!");
}
CCsvRow row;
vector<string> vtTmp = m_vtRow[index];
row.SetData(m_vtHead, vtTmp);
return row;
}
bool CCsvHelper::LoadFrom(string sFileName, int iSkipLine, bool bFirstLineHead)
{
m_bHeader = bFirstLineHead;
int i = 0;
CsvParser *csvparser = CsvParser_new(sFileName.c_str(), ",", bFirstLineHead, iSkipLine);
CsvRow *row = nullptr;
if (bFirstLineHead) {//读取表头
const CsvRow *header = CsvParser_getHeader(csvparser);
if (header == NULL) {
printf("%s ", CsvParser_getErrorMessage(csvparser));
return false;
}
vector<string> vtHead;
const char **headerFields = CsvParser_getFields(header);
for (i = 0; i < CsvParser_getNumFields(header); i++) {
// printf("TITLE: %s ", headerFields[i]);
vtHead.push_back(headerFields[i]);
}
m_rsData.SetHeader(vtHead);
}
while ((row = CsvParser_getRow(csvparser))) {
vector<string> vtRow;
const char **rowFields = CsvParser_getFields(row);
for (i = 0; i < CsvParser_getNumFields(row); i++) {
// printf("FIELD: %s ", rowFields[i]);
vtRow.push_back(rowFields[i]);
}
m_rsData.SetRow(vtRow);
CsvParser_destroy_row(row);
}
CsvParser_destroy(csvparser);
return true;
}
CCsvResultSet CCsvHelper::GetResultSet()
{
return m_rsData;
}