Version: 00.999
Type: Full Script
Category: Databases
License: GNU General Public License
Description: Simple loop to find duplicate email addresses. The original problem was to clean the data from a 8000 record database of users. The users had been id’d only by email address and this had not been enforced as unique, so a lot of duplicate records existed. This script loops though and finds teh duplicates.
<?php ########################################################### /* This script checks for duplicate records in table. This is ungodly slow. Takes like 15 minutes to run through 8700 records. Optimize it! Script does a select, gets the number of total rows. Then it runs through each one and compares it to all the other records. If the count of records is more than 1, it imcrem- ents a counter, echos some stuff and goes on about it's bus- iness. It could be easily modified to delete the records instead of just printing the ids. */ ########################################################### $db = mysql_pconnect("localhost", "user", "pass"); mysql_select_db(mydb, $db); $table = "contact"; $query = "select * from $table "; //limit 8700, 100 echo "<font color=blue>Query: $query</font><br>"; $res = mysql_query($query, $db)or die("query failed"); $count = mysql_numrows($res); echo $count."<hr>"; $i = 0; while($row = mysql_fetch_array($res)) { //in my table the 5th column is email, so row[4]. $query2 = "select id from $table where email = '$row[4]'"; //echo $query2."<br>"; $res2 = mysql_query($query2, $db)or die("broken on query2"); //echo mysql_errno().": ".mysql_error()."<BR>"; $num_email = mysql_numrows($res2); //echo $num_email." $count <br>"; if ($num_email > 1){ echo "id: $row[0], $row[4], count of entries: $num_email<br>n"; $i++; } } echo "Number of duplicates: $i"; ?> <hr>