57 lines
2.2 KiB
Python
Executable File
57 lines
2.2 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright (C) 2020 Apple Inc. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
|
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
|
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import argparse, hashlib, os, sys
|
|
from collections import defaultdict
|
|
|
|
parser = argparse.ArgumentParser(description='Find any files that have the same contents.')
|
|
args = parser.parse_args()
|
|
|
|
filesByDigest = defaultdict(list)
|
|
|
|
width = 80
|
|
|
|
root = os.getcwd()
|
|
for subroot, directories, files in os.walk(os.getcwd()):
|
|
prefix = subroot[len(root) + 1:]
|
|
for file in files:
|
|
path = os.path.join(prefix, file)
|
|
blockSize = 65536
|
|
hash = hashlib.md5()
|
|
with open(os.path.join(subroot, file), "rb") as handle:
|
|
bytes = handle.read(blockSize)
|
|
while len(bytes) > 0:
|
|
hash.update(bytes)
|
|
bytes = handle.read(blockSize)
|
|
filesByDigest[hash.digest()].append(path)
|
|
|
|
duplicates = list()
|
|
|
|
for fileList in filesByDigest.values():
|
|
if len(fileList) != 1:
|
|
duplicates.append(sorted(fileList))
|
|
|
|
for fileList in sorted(duplicates):
|
|
print(fileList)
|