Open CSV into Excel without corrupting data types

A CSV is a comma separated values file, which allows data to be saved in a table structured format. Microsoft Excel has the ability to open a CSV file and then save it back into same format. The problem comes when there are multiple data types in CSV. Excel is smart enough to automatically map data types into Excel data types, but sometime it is not able to detect the intended data type, e.g. if a column is an id which looks like number but is actually a text then Excel messes it up. In my case there were 3 such issues which needed to be fixed.

  1. The id column was supposed to be text type was converted to number and which doing do it was replacing the leading 0’s. An id 001234 was being converted to 1234 and when saved back as CSV.
  2. The date column was being mapped to date properly, but the date format was being automatically changed.
  3. The double type column was having the issue that all zeros after the decimal were being trimmed. A double number 0.0023400 was being converted to 0.00234

The file was later saved as CSV and imported into a web application for processing. This did not work properly, and was creating exceptions.

To resolve this issue, I decided to write a tool that would read the CSV and create an excel, which could later be saved into CSV, without losing any formatting. Here is the sample CSV file format and the code that would do the job.

 

Name,Id,DateOfJoining,DateOfJoining,PerformanceFactor1,PerformanceFactor2,PerformanceFactor3,BreakInterval,Permanent

John Travolta,011336,2011-09-01,0.2500000,0.2500000,0.2500000,30,60,N

Michael Jackson,01134,2012-08-01,0.2500000,0.0250000,0.2300000,30,60,Y

Bill Gates,011371,2011-09-01,0.9900000,0.9900000,0.9900000,30,60,Y

Table 1 – Sample CSV File

 

using System;using System.Collections.Generic;using System.Linq;

using System.Text;

using System.IO;

 

using NPOI.XSSF.Model;

using NPOI.XSSF.UserModel;

//using NPOI.HSSF.Model;

//using NPOI.HSSF.UserModel;

using NPOI.SS.UserModel;

 

namespace WriteExcel

{

class Program

{

static XSSFWorkbook wb;

static XSSFSheet sh;

static List<string> headerNames = new List<string>();

 

static List<List<string>> contentRows = new List<List<string>>();

static string outFileName = “”;

 

static void CreateHeader(List<string> headers)

{

if (!File.Exists(outFileName))

{

File.Delete(outFileName);

}

if (!File.Exists(outFileName))

{

//Create Excel workbook

wb =  new XSSFWorkbook();

//Now create sheet

sh = (XSSFSheet)wb.CreateSheet(“Sheet1″);

for (int a = 0; a < 9; a++)

{

if( a==0)

sh.SetColumnWidth(a, 16 * 256);

else

sh.SetColumnWidth(a, 14 * 256);

}

var r = sh.CreateRow(0);

 

int i = 0;

foreach (string header in headers)

{

ICell cell = r.CreateCell(i);

cell.SetCellType(CellType.String);

cell.SetCellValue(header);

i++;

}

}

}

 

static void CreateData(List<List<string>> dataRows)

{

int startRow = 1;

int numDataRows = dataRows.Count;

 

IDataFormat dataFormatCustom = wb.CreateDataFormat();

ICellStyle cellStyle = wb.CreateCellStyle();

cellStyle.DataFormat = dataFormatCustom.GetFormat(“yyyy-MM-dd”);

 

IDataFormat dataFormat = wb.CreateDataFormat();

short iDataFormat = dataFormat.GetFormat(“0.#000000″);

ICellStyle cellStyleDouble = wb.CreateCellStyle();

cellStyleDouble.DataFormat = iDataFormat;

 

IDataFormat dataFormat2 = wb.CreateDataFormat();

short iDataFormat2 = dataFormat2.GetFormat(“0.0000000″);

ICellStyle cellStyleDouble2 = wb.CreateCellStyle();

cellStyleDouble2.DataFormat = iDataFormat2;

 

for (int index = 0; index < numDataRows; index++)

{

var r = sh.CreateRow(index+startRow);

List<string> rowData = dataRows[index];

int i = 0;

foreach (string data in rowData)

{

ICell cell = r.CreateCell(i);

if (data == null || data.Length == 0)

{

i++;

continue;

}

if( i==2)

{

DateTime myDate = DateTime.ParseExact(data, “yyyy-MM-dd”, System.Globalization.CultureInfo.InvariantCulture);

cell.CellStyle = cellStyle;

cell.SetCellValue(myDate);

}

else if (i > 2 && i <= 7)

{

cell.SetCellType(CellType.Numeric);

if (i > 2 && i <= 5)

{

if (data == “0.0000000”)

{

cell.CellStyle = cellStyleDouble2;

}

else

{

cell.CellStyle = cellStyleDouble;

}

cell.SetCellValue(Convert.ToDouble(data));

}

else

cell.SetCellValue(Convert.ToInt32(data));

}

else

{

cell.SetCellType(CellType.String);

cell.SetCellValue(data);

}

 

i++;

 

}

if (index % 1000 == 0)

Console.WriteLine(“Rows written: ” + index.ToString());

 

}

 

Console.WriteLine(“\n\nWriting Xls file to disk. It may take some time.”);

Console.WriteLine(“”);

 

using (var fs = new FileStream(outFileName, FileMode.Create, FileAccess.Write))

{

wb.Write(fs);

}

 

Console.WriteLine(“Open the file created using Microsoft Excel. File name: ” + outFileName);

 

}

static void ReadCSV(string csvPath)

{

System.IO.StreamReader file = new System.IO.StreamReader(csvPath);

string headerLine = file.ReadLine();

string[] headers = headerLine.Split(‘,’);

foreach (string header in headers)

{

headerNames.Add(header);

}

 

string contentLine = “”;

while ((contentLine = file.ReadLine()) != null)

{

string[] rowContent = contentLine.Split(‘,’);

List<string> row = new List<string>();

foreach (string rowStr in rowContent)

{

row.Add(rowStr);

}

contentRows.Add(row);

}

}

 

static void Main(string[] args)

{

if (args == null || args.Length == 0)

{

Console.WriteLine(“Please enter the path to CSV file. See example below:\n”);

Console.WriteLine(“WriteExcel c:\\MyFolder\\myfile.csv\n\n”);

return;

}

if (!File.Exists(args[0]))

{

Console.WriteLine(“The file path is not correct. Please check the path and try again”);

return;

}

string dirName = Path.GetDirectoryName(args[0]);

string tempFileName = Path.GetFileNameWithoutExtension(args[0]);

outFileName = Path.Combine(dirName, tempFileName + DateTime.Now.Ticks + “.csv”);

 

//Read the CSV file

ReadCSV(args[0]);

//Create the header of Excel File

CreateHeader(headerNames);

//Insert data into Excel

CreateData(contentRows);

 

System.Diagnostics.Debug.WriteLine(“DONE”);

}

}

}

 

 

Table 2 – C# .Net code to convert the CSV into Excel

0 replies

Leave a Reply

Want to join the discussion?
Feel free to contribute!

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>