#native_company# #native_desc#
#native_cta#

mysql duplicate record finder

By Tom Smith — html, php, mysql, linux
on May 27, 2001

Version: 00.999

Type: Full Script

Category: Databases

License: GNU General Public License

Description: Simple loop to find duplicate email addresses. The original problem was to clean the data from a 8000 record database of users. The users had been id’d only by email address and this had not been enforced as unique, so a lot of duplicate records existed. This script loops though and finds teh duplicates.

<?php
###########################################################
 /*
 This script checks for duplicate records in table. 
 This is ungodly slow. Takes like 15 minutes to run through 
 8700 records. Optimize it!
 Script does a select, gets the number of total rows. Then
 it runs through each one and compares it to all the other 
 records. If the count of records is more than 1, it imcrem-
 ents a counter, echos some stuff and goes on about it's bus-
 iness. It could be easily modified to delete the records 
 instead of just printing the ids.
 */
###########################################################

$db = mysql_pconnect("localhost", "user", "pass");
mysql_select_db(mydb, $db);
$table = "contact";

$query = "select * from $table "; //limit 8700, 100
echo "<font color=blue>Query: $query</font><br>";

$res = mysql_query($query, $db)or die("query failed");
$count = mysql_numrows($res);
echo $count."<hr>";
$i = 0;

while($row = mysql_fetch_array($res)) {
	//in my table the 5th column is email, so row[4].
	$query2 = "select id from $table where email = '$row[4]'";
	//echo $query2."<br>";
	
	$res2 = mysql_query($query2, $db)or die("broken on query2");
	//echo mysql_errno().": ".mysql_error()."<BR>";

	$num_email = mysql_numrows($res2);
	//echo $num_email." $count <br>";
	if ($num_email > 1){
		echo "id: $row[0], $row[4], count of entries: $num_email<br>n";	
		$i++;
	}
	
}
echo "Number of duplicates: $i";
?>
<hr>