代码思路:用文件内容作key域,value域需要拼接,是文件全名。
import re
class Solution(object):
def findDuplicate(self, paths):
"""
:type paths: List[str]
:rtype: List[List[str]]
"""
mydict = {}
for item in paths:
temp = item.split()
# 获取当前路径
path = temp[0]
# 获取当前路径下的所有文件
files = temp[1:]
# 遍历每一个文件,取文件内容作为key,往mydict里存
for file in files:
# 用正则表达式获取文件内容
fileContextList = re.findall(r'[(](.*?)[)]', file)
fileContextStr = ''.join(fileContextList)
extraSize = len(fileContextStr) + 2
fileName = list(file)[:-extraSize]
if fileContextStr in mydict.keys():
mydict[fileContextStr].append(path + "/" + ''.join(fileName))
else:
mydict[fileContextStr] = [path + "/" + ''.join(fileName)]
res = []
for item in mydict.values():
if len(item) > 1:
res.append(item)
return res