SOLUTION - Remove Duplicate Documents

I have had cause to run a process to remove duplicate documents. The solutions is below:

’ Note that this solution works as an agent in the db you have the duplicates

Lotus Notes Database Synopsis - Generated at 11:14:31 on 04/06/2008

Agent Information

Name: ATK\Simple Find and Remove Duplicates Modules Version

Last Modification: 30/05/2008 16:15:54

Comment: sets duplicates to have the extra code ****

Shared Agent: Yes

Type: LotusScript

State: Enabled

Trigger: Manually From Actions Menu

Acts On: None

LotusScript Code:

Option Public

Option Declare

%INCLUDE “lsconst.lss”

’ finds the % samenesss between 2 documents

’ 100% means all fields match

’ 0% means no fields match.

’ you can ignore blank fields

Sub Initialize

' #######################################################

' loop through view and move duplicates to folder

' Checks based on what is in column 1	

' Identify duplicates by all fields, but ignore certain ones eg $Conflict

' #######################################################

Dim strViewToUse As String

Dim s As New notessession

Dim db As notesdatabase	

Dim vwDocumentsToCheck As NotesView

Dim doc1 As NotesDocument, doc2 As NotesDocument

Dim blDocsAreSame As Boolean

Dim intDocsProcessed As Integer, intDocsTotal As Integer, intDuplicatesFound As Integer

Dim doc3 As notesDocument

Dim v1 As Variant, v2 As Variant

Set db =s.currentdatabase

' #######################################################	

' Ask use which view to use	

' #######################################################	

strViewToUse = PromptForViewName()	

Set vwDocumentsToCheck = db.getview( strViewToUse )

' #######################################################

' Clear out the duplicates folder

' #######################################################	

Dim fldrDuplicatesATK As NotesView

Set fldrDuplicatesATK = db.GetView("DuplicatesATK")

If Not fldrDuplicatesATK Is Nothing Then

	Call fldrDuplicatesATK.AllEntries.RemoveAllFromFolder("DuplicatesATK")

End If

Set doc1 = vwDocumentsToCheck.getfirstdocument

intDocsTotal = vwDocumentsToCheck.AllEntries.Count

' #######################################################

' Loop through all docs in view, compare 1st with 2nd etc. Assume they are in alphabetical order.

' #######################################################	

intDocsProcessed = 0

Set doc2=vwDocumentsToCheck.getnextdocument(doc1)

Do While Not (doc1 Is Nothing) And (Not (doc2 Is Nothing))

	If doc2 Is Nothing Then

		'no more documents, end of view

		Exit Do

	End If

	Set doc3 = vwDocumentsToCheck.GetNextDocument(doc2)

	' if the doc has different fields, then put it into a folder for checking with notes		

	blDocsAreSame = AreDocumentsSame( doc1, doc2 )

CheckAndNextDoc:

	If blDocsAreSame = True Then

		intDuplicatesFound = intDuplicatesFound + 1												

		doc1.PutInFolder("DuplicatesATK")

	End If

NextDoc:

	intDocsProcessed = intDocsProcessed + 1

	If intDocsProcessed Mod 50 = 0 Then

		Print "Duplicates " + Cstr(intDuplicatesFound)  + ". Processed " + Cstr(intDocsProcessed ) " / " + Cstr(intDocsTotal)

	End If

	Set doc1=doc2

	Set doc2=doc3

Loop 

Dim twoLiner As String

twoLiner = "Duplicates moved to folder. Have a look in the folder named duplicates aTk. Cut them to a back updb" 	

Messagebox twoLiner, MB_OK, "Demo"

Dim ws As New NotesUIWorkspace

Dim uidb As NotesUIDatabase

Set uidb = ws.CurrentDatabase

Call uidb.OpenView("DuplicatesATK")

Print "Completed. Duplicates " + Cstr(intDuplicatesFound)  + ". Processed " + Cstr(intDocsProcessed ) " / " + Cstr(intDocsTotal)

End Sub

Function AreDocumentsSame( doc1 As NotesDocument, doc2 As NotesDocument ) As Boolean

Dim strFieldsToIgnoreArr(0 To 1) As String ' hardcoded to use 3 fields to ignore

Dim v As Variant

Dim intNumItemsOnDoc As Integer, intItemIndex As Integer

Dim itemOnDoc1 As NotesItem

Dim itemOnDoc2 As NotesItem

Dim blDocsAreSame As Boolean	

strFieldsToIgnoreArr(0) = "$revisions"

strFieldsToIgnoreArr(1) = "$conflictaction"

intNumItemsOnDoc = Ubound(doc1.Items)

intItemIndex = 0	

blDocsAreSame = True ' assume true, set to false if it fails anywhere	

While intItemIndex <= intNumItemsOnDoc And blDocsAreSame = True

	Set itemOnDoc1 = doc1.Items(intItemIndex)	

	'===========================================		

	' Set itemOnDoc to be the first field from doc1, excluding fields we are ignoreing

	While (intItemIndex < intNumItemsOnDoc And (Lcase(itemOnDoc1.Name) = strFieldsToIgnoreArr(0) Or _

	Lcase(itemOnDoc1.Name) = strFieldsToIgnoreArr(1) ))				

		intItemIndex = intItemIndex + 1

		Set itemOnDoc1 = doc1.Items(intItemIndex)

	Wend

	If intItemIndex = intNumItemsOnDoc And ((Lcase(itemOnDoc1.Name) = strFieldsToIgnoreArr(0) Or _

	Lcase(itemOnDoc1.Name) = strFieldsToIgnoreArr(1)) ) Then

		Goto ExitFunction

	End If				

	'===========================================

	' Check if the item is on doc2, if diff, set to false 

’ Print itemOnDoc1.Name + " " itemOnDoc1.ValueLength

	If Not doc2.HasItem( itemOnDoc1.Name ) Then

		blDocsAreSame = False

	Else

		Set itemOnDoc2 = doc2.GetFirstItem(itemOnDoc1.Name)

		If itemOnDoc1.Text <> itemOnDoc2.Text Then

			blDocsAreSame = False

			'Print "Note about Doc1. " +_

			'"Docs have diff values for " + itemOnDoc1.Name 

			'Print "Item on Doc1 is " + itemOnDoc1.Text

			'Print "Item on Doc2 is " + itemOnDoc2.Text

		End If

	End If

	intItemIndex = intItemIndex + 1

Wend ' all items on doc

ExitFunction:

AreDocumentsSame = blDocsAreSame

End Function

Function PromptForViewName( ) As String

Dim session As New NotesSession

Dim db As  NotesDatabase	

Dim viewsArray As Variant

Dim intSize As Integer

Dim i As Integer	

Dim strViewName As String

Dim strDefaultName As String

Dim ws As New NotesUIWorkspace()

Set db = session.CurrentDatabase

viewsArray = db.Views		

intSize = Ubound(viewsArray)	

Redim strViewNamesArr(intSize) As String	

Forall v In viewsArray

	strViewNamesArr(i) = v.Name

	If v.Name = "Contacts" Then

		strDefaultName = "Contacts" ' if there is a contacts view, we will default to that

	End If

	i = i + 1

End Forall		

'############################################

' Prompt for view name... default to contact if we have it

'############################################

If strDefaultName = "" Then

	strDefaultName = strViewNamesArr(0)

End If

strViewName = ws.Prompt (PROMPT_OKCANCELLIST, _

"Select a View", _

"Select a view to use - (should have first column sorted.)", _

strDefaultName, strViewNamesArr)

If Isempty (strViewName) Then

	strViewName = ""

End If

PromptForViewName = strViewName

End Function

Subject: SOLUTION - Remove Duplicate Documents

Why not filtered by $conflictaction?What is the difference between in $conflict and $conflictaction?

Subject: SOLUTION - Remove Duplicate Documents

Hi,

I tested your solution in one of my databases and doesn´t find all duplicate lines.

Subject: SOLUTION - Remove Duplicate Documents

NSF Duplicate Remover Tool to remove duplicate items from Lotus Note Email Client application. NSF Duplicate Remover allows to remove duplicate copies of emails, messages, notes, tasks, attachments, calendars, journals and etc. and makes user easy to access Lotus Note emails without any hassle. To view this application and for its more detail visit: https://gallery.technet.microsoft.com/NSF-Duplicate-Remover-49dfbf94?redir=0