Sunday, March 27, 2005

CSV To DataTable

This piece of code written in C#, takes a csv file and translates it's contents into a datatable given that the csv file follows the correct format.

The heart of this code is the regex that parses the csv file. It took me 3 months to figure out the damn regex and get the csv file to parse correctly even if there are whitespaces in it where fields should be. I'm not really an expert on regular expressions, it's a whole other field of study.

Pardon the indentations, I'm too lazy to indent them correctly but the code should be understandable enough.

public DataTable ParseCSV (string FPath, out int returnValue, out string errorMsg)
{
returnValue = 0;
errorMsg = "";
string[] rows = null;
DataTable dt = new DataTable();

dt.Clear();

try
{
StreamReader sr = new StreamReader(FPath);
string inputString = sr.ReadToEnd();

//remove delimeters from file
if ( inputString.Length > 0 )
{
char carriageReturn = "\r"[0];
char lineFeed = "\n"[0];
string trimmed = inputString.TrimEnd( carriageReturn, lineFeed );
string cleaned = trimmed.Replace( "\n", "" );

//number of records in file
rows = cleaned.Split( carriageReturn );
}

//the miraculous regular expression
Regex rex = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");

if (rows.Length > 0 )
{
for (int i=0; i < rows.Length; i++)
{
//split records into matching regex
string[] values = rex.Split(rows[i]);
if (i==0)
{
//create columns if from first line/record of file
for (int j=0; j < values.Length; j++)
{
DataColumn dc = new DataColumn();
dt.Columns.Add(dc);
}
}
DataRow dr = dt.NewRow();

//add rows, based only on number of columns from first line
for (int k=0; k < dt.Columns.Count; k++)
{

//check if values are more or less the number of columns
if (k < values.Length) { dr[k] = values[k];
}
else
{
//break the loop if length of values is less than the number of columns
//values exceeding the number of columns will not be considered
break;
}
}
dt.Rows.Add(dr);
}
//end for
}
//end if
}
catch (System.Exception e)
{
//throw new Exception("Cannot process CSV file. " + e.Message.ToString());
returnValue = 1;
errorMsg = e.Message.ToString();
}
if (dt.Rows.Count == 0)
{ returnValue = 1;
errorMsg = "CSV File is empty.";
}
return dt;
}

0 Comments:

Post a Comment

<< Home