Import from CSV with commas allowed - a solution

If you have need to read data from csv files, then sooner or later you will find that some of the data has commas in it.

This code will work in that situation. Eg the data would be similiar to:

empid, emp name, position title

100, Bobby Brown, Singer

200, Mary Sugar, “Singer, dancer”

300, Fred Bloggs, CEO

I chose to not use the Excel functions as they are a lot slower.

Here is the code, which is based around the function named GetTextAtPositionForCSV.

It probably needs a bit of a cleanup but it works at the moment.

’ Open the file,

’ get each row

’ process each row

’ close the file

dim intDataFile as integer

intDataFile= Freefile

dim strRowOfData as string

Open "c:\temp.csv" For Input As intDataFile

intProcessed = 0 



Do While Not Eof( intDataFile)

	

   ' Read each line of the file.

Line Input #intDataFile, strRowOfData

	

'	Print strRowOfData

	strEmpId = GetTextAtPositionForCSV( strRowOfData, 1, "," )

	strDesc = GetTextAtPositionForCSV( strRowOfData, 3, "," )

Loop

’ end of do while loop

’ close the file

Close intDataFile

Print “Finished”

Function GetTextAtPositionForCSV( strBigString, intPosToFind As Integer, strDelimeter As String ) As String

' Finds text at a certain position given the delimeter



' atk modified for Csv files.. ie checks for ,"

' that is  comma double quote or double quote at the start

'atk modified 12/04/2005. Added + lenstrDelim-1





If strBigString = "" Then

	GetTextAtPositionForCSV =  ""

	Exit Function

End If



Dim RightSide As String		

Dim pos As Integer

Dim lastPos As Integer

Dim count As Integer

Dim NumberOfRightMostChars As Integer



'==========================================================

' Setup an array of any double quotes as these will affect our count of commas

' they will exist becuase a comma is inside them

'==========================================================	

Dim strStartQuotes As String, strEndQuotes As String

Dim strStartQuotesArr As Variant, strEndQuotesArr As Variant

Dim posDoubleQuote As Integer

posDoubleQuote = 0

posDoubleQuote = Instr(1, strBigString, {"}) ' instr starts at 1, not 0

Do While posDoubleQuote > 0 

	

	' The start quote

	strStartQuotes =  strStartQuotes + Cstr(posDoubleQuote)+ ","

	

	' The end quote

	posDoubleQuote = Instr(posDoubleQuote+1, strBigString, {"})

	If posDoubleQuote > 0 Then

		strEndQuotes =  strEndQuotes + Cstr(posDoubleQuote)+ ","

	End If

	

	posDoubleQuote = Instr(posDoubleQuote+1, strBigString, {"})

	

Loop

' remove the last comma

strStartQuotes = RemoveLastNChars(strStartQuotes, 1)

strEndQuotes = RemoveLastNChars(strEndQuotes, 1)



' put into an array

If strStartQuotes <> "" Then

	strStartQuotesArr = Split( strStartQuotes, ",")

	strEndQuotesArr = Split( strEndQuotes, ",")

End If



'==============================================================

' loop through the text again, this time looking for the commas

'==============================================================	

lastPos = 0

pos = 1

Dim idxQuote As Integer ' the quote index

Dim blIgnoreThisComma As Boolean

blIgnoreThisComma = False



Dim v1 As Variant, v2 As Variant



'========================================================

' Get the first valid delimeter

'========================================================	

pos =  Instr ( pos, strBigString , strDelimeter )

' check if we have a valid one, or invalid, that is if it is in a quoted string

idxQuote = 0 

If Isarray(strStartQuotesArr) Then

	Forall strStartDQuote In strStartQuotesArr			

		v1 = Cint(strStartDQuote)

		v2 = Cint(strEndQuotesArr(idxQuote))

		If pos > Cint(strStartDQuote) And pos < Cint(strEndQuotesArr(idxQuote)) Then

			blIgnoreThisComma = True

		End If ' pos

		idxQuote = idxQuote + 1

	End Forall

End If ' is array



Do While blIgnoreThisComma = True And pos > 0 

	blIgnoreThisComma = False

	pos =  Instr ( pos+1, strBigString , strDelimeter )

	idxQuote = 0 

	If Isarray(strStartQuotesArr) Then

		Forall strStartDQuote In strStartQuotesArr			

			v1 = Cint(strStartDQuote)

			v2 = Cint(strEndQuotesArr(idxQuote))

			If pos > Cint(strStartDQuote) And pos < Cint(strEndQuotesArr(idxQuote)) Then

				blIgnoreThisComma = True

			End If ' pos

			idxQuote = idxQuote + 1

		End Forall

	End If ' is array

Loop





' Get the start pos. Store in variable lastpos

' get the end pos . Store in variable po

Do While pos > 0 And count < intPosToFind-1

	

	lastPos = pos

	pos =  Instr ( pos + 1, strBigString , strDelimeter )		

				' check if we are in a quoted section

	idxQuote = 0 

	If Isarray(strStartQuotesArr) Then

		Forall strStartDQuote In strStartQuotesArr			

			v1 = Cint(strStartDQuote)

			v2 = Cint(strEndQuotesArr(idxQuote))

			If pos > Cint(strStartDQuote) And pos < Cint(strEndQuotesArr(idxQuote)) Then

				blIgnoreThisComma = True

			End If ' pos

			idxQuote = idxQuote + 1

		End Forall	

	End If ' is array

	If blIgnoreThisComma = True Then

		

		Do While blIgnoreThisComma = True And pos > 0 

			blIgnoreThisComma = False

			pos =  Instr ( pos + 1, strBigString , strDelimeter )		

				' check if we are in a quoted section

			idxQuote = 0 

			If Isarray(strStartQuotesArr) Then

				Forall strStartDQuote In strStartQuotesArr			

					v1 = Cint(strStartDQuote)

					v2 = Cint(strEndQuotesArr(idxQuote))

					If pos > Cint(strStartDQuote) And pos < Cint(strEndQuotesArr(idxQuote)) Then

						blIgnoreThisComma = True

					End If ' pos

					idxQuote = idxQuote + 1

				End Forall

			End If ' is array

		Loop

		

	End If

	

	If pos > 0 Then

		' valid, so update count

		count = count + 1		

	End If

	

Loop



' If we found at least one of the substring then

' lastPos will be the start of the text we want,

' and pos will be the end

If lastPos > 0 Then

	

	NumberOfRightMostChars = Len( strBigString ) - ( lastPos + Len(strDelimeter)-1 ) ' atk modified12/04/2005. Added + lenstrDelim-1

	RightSide = Right( strBigString, NumberOfRightMostChars  )

	

	If pos > 0 Then 

		

		GetTextAtPositionForCSV = Left( RightSide, pos-lastPos-1 )

		

	Else 

		

		GetTextAtPositionForCSV = RightSide

		

	End If		

	

Elseif lastPos = 0 And pos > 0 Then

	

	' Must have been the first item in the string

	GetTextAtPositionForCSV = Left( strBigString, pos -1 )

	

Elseif lastPos = 0 And pos = 0 And (intPosToFind = 0 Or intPosToFind = 1)  Then

	

	' must be the first item in the string, and no delimeters

	GetTextAtPositionForCSV = strBigString

	

Else

	

	GetTextAtPositionForCSV = ""

	

End If



' And remove any double quotes

GetTextAtPositionForCSV = Replace(GetTextAtPositionForCSV, {"}, "")

End Function ’ GetTextAtPositionForCSV

Subject: The only thing I would add…

…is to have your function pass back an array of ALL the values extracted from a data line.

That way, if you are required to pull alot of information from each line, you need not go thru the entire parsing process each time.

Instead, you can call your function once, then subsequently refer to the appropriate array element for each value you require.

Subject: RE: The only thing I would add…

Thanks for the feedback Terry, sounds like a good idea!