Commit 669f2690 authored by CZ1004's avatar CZ1004

优化

parent 4b9dcdaf
import Foundation import Foundation
import Photos import Photos
import CommonCrypto
enum SourceType { enum SourceType {
case video case video
...@@ -16,6 +17,7 @@ struct AssetModel { ...@@ -16,6 +17,7 @@ struct AssetModel {
var isCloud: Bool var isCloud: Bool
var width: Int var width: Int
var height: Int var height: Int
var hashValue: String
} }
struct ResourceAllModel { struct ResourceAllModel {
...@@ -38,15 +40,53 @@ class PhotoVideoManager { ...@@ -38,15 +40,53 @@ class PhotoVideoManager {
processingQueue.async { processingQueue.async {
let allAssets = self.findAllResource() let allAssets = self.findAllResource()
var resourceModel = self.createEmptyModel() var resourceModel = self.createEmptyModel()
let totalAssets = allAssets.count
let batchSize = 10 // 根据性能调整批次大小
for (index, asset) in allAssets.enumerated() { // 使用串行队列保证处理顺序
let model = self.getPropertyFromAsset(asset: asset) let serialQueue = DispatchQueue(label: "com.serial.processing")
self.classifyAsset(model, into: &resourceModel) let group = DispatchGroup()
let currentProgress = Double(index + 1) / Double(allAssets.count) * 0.4
self.safeUpdateProgress(currentProgress, model: resourceModel, progress: progress) var currentIndex = 0
var hashDict = [String: [AssetModel]]()
func processNextBatch() {
let endIndex = min(currentIndex + batchSize, totalAssets)
let batch = Array(allAssets[currentIndex..<endIndex])
currentIndex = endIndex
var batchModels = [AssetModel]()
// 并行处理批次内的资源
let innerGroup = DispatchGroup()
for asset in batch {
innerGroup.enter()
self.getAssetModelWithHash(asset: asset) { model in
serialQueue.async {
batchModels.append(model)
innerGroup.leave()
}
}
}
innerGroup.notify(queue: self.processingQueue) {
// 分类并处理重复/相似
self.processBatch(batchModels, &resourceModel, &hashDict)
// 更新进度
let currentProgress = Double(endIndex) / Double(totalAssets) * 0.7
self.safeUpdateProgress(currentProgress, model: resourceModel, progress: progress)
if endIndex < totalAssets {
processNextBatch()
} else {
// 最终处理
self.processAdvancedFeatures(model: &resourceModel, progress: progress)
}
}
} }
self.processAdvancedFeatures(model: &resourceModel, progress: progress) processNextBatch()
} }
} }
...@@ -102,6 +142,48 @@ class PhotoVideoManager { ...@@ -102,6 +142,48 @@ class PhotoVideoManager {
) )
} }
private func findSimilar(in assets: [AssetModel]) -> [[AssetModel]] {
guard !assets.isEmpty else { return [] }
// 根据资源类型设置不同阈值
let typeThresholds: [SourceType: (time: TimeInterval, sizeDiff: Double)] = [
.photo: (300, 0.2), // 5分钟,20%大小差异
.shotScreen: (60, 0.1), // 1分钟,10%差异
.video: (600, 0.3) // 10分钟,30%差异
]
let threshold = typeThresholds[assets[0].resourceType] ?? (300, 0.2)
let sortedAssets = assets.sorted { $0.createTime < $1.createTime }
var groups = [[AssetModel]]()
var currentGroup: [AssetModel] = []
for asset in sortedAssets {
if let last = currentGroup.last {
let timeDiff = asset.createTime.timeIntervalSince(last.createTime)
let sizeRatio = asset.assetSize / last.assetSize
let sizeValid = sizeRatio > (1 - threshold.sizeDiff) && sizeRatio < (1 + threshold.sizeDiff)
let sameAspect = asset.width * last.height == asset.height * last.width
if timeDiff <= threshold.time && sameAspect && sizeValid {
currentGroup.append(asset)
} else {
if currentGroup.count > 1 {
groups.append(currentGroup)
}
currentGroup = [asset]
}
} else {
currentGroup.append(asset)
}
}
if currentGroup.count > 1 {
groups.append(currentGroup)
}
return groups
}
// 新增辅助方法:获取所有重复项的标识符 // 新增辅助方法:获取所有重复项的标识符
private func getAllDuplicateIdentifiers(dupGroups: [[AssetModel]]) -> Set<String> { private func getAllDuplicateIdentifiers(dupGroups: [[AssetModel]]) -> Set<String> {
var identifiers = Set<String>() var identifiers = Set<String>()
...@@ -115,61 +197,143 @@ class PhotoVideoManager { ...@@ -115,61 +197,143 @@ class PhotoVideoManager {
// MARK: - 核心算法 // MARK: - 核心算法
private func findDuplicates(in assets: [AssetModel]) -> [[AssetModel]] { private func findDuplicates(in assets: [AssetModel]) -> [[AssetModel]] {
var groupingDict = [String: [AssetModel]]() var dict = [String: [AssetModel]]()
assets.forEach { dict[$0.hashValue, default: []].append($0) }
return dict.values.filter { $0.count > 1 }
}
private func getAssetModelWithHash(asset: PHAsset, completion: @escaping (AssetModel) -> Void) {
let baseModel = self.getPropertyFromAsset(asset: asset)
for asset in assets { switch baseModel.resourceType {
// 使用更精确的组合键(时间戳+尺寸+宽高) case .photo, .shotScreen:
let timeStamp = String(format: "%.0f", asset.createTime.timeIntervalSince1970) self.calculateImageHash(asset: asset) { hash in
let key = "\(timeStamp)_\(asset.assetSize)_\(asset.width)_\(asset.height)" var model = baseModel
groupingDict[key, default: []].append(asset) model.hashValue = hash
completion(model)
}
case .video:
self.calculateVideoHash(asset: asset) { hash in
var model = baseModel
model.hashValue = hash
completion(model)
}
default:
completion(baseModel)
} }
return groupingDict.values.filter { $0.count > 1 }
} }
private func processBatch(_ batch: [AssetModel],
private func findSimilar(in assets: [AssetModel]) -> [[AssetModel]] { _ model: inout ResourceAllModel,
guard !assets.isEmpty else { return [] } _ hashDict: inout [String: [AssetModel]]) {
// 分类资源
// 根据资源类型设置不同阈值 batch.forEach { self.classifyAsset($0, into: &model) }
let typeThresholds: [SourceType: (time: TimeInterval, sizeDiff: Double)] = [
.photo: (300, 0.2), // 5分钟,20%大小差异
.shotScreen: (60, 0.1), // 1分钟,10%差异
.video: (600, 0.3) // 10分钟,30%差异
]
let threshold = typeThresholds[assets[0].resourceType] ?? (300, 0.2) // 实时查重
let sortedAssets = assets.sorted { $0.createTime < $1.createTime } batch.forEach { asset in
var groups = [[AssetModel]]() guard !asset.hashValue.isEmpty else { return }
var currentGroup: [AssetModel] = []
if var group = hashDict[asset.hashValue] {
for asset in sortedAssets { group.append(asset)
if let last = currentGroup.last { hashDict[asset.hashValue] = group
let timeDiff = asset.createTime.timeIntervalSince(last.createTime)
let sizeRatio = asset.assetSize / last.assetSize
let sizeValid = sizeRatio > (1 - threshold.sizeDiff) && sizeRatio < (1 + threshold.sizeDiff)
let sameAspect = asset.width * last.height == asset.height * last.width
if timeDiff <= threshold.time && sameAspect && sizeValid { // 发现重复时立即更新
currentGroup.append(asset) if group.count == 2 {
} else { model.dupPhotos.append(group)
if currentGroup.count > 1 { } else if group.count > 2 {
groups.append(currentGroup) if let index = model.dupPhotos.firstIndex(where: { $0.contains { $0.hashValue == asset.hashValue } }) {
model.dupPhotos[index] = group
} }
currentGroup = [asset]
} }
} else { } else {
currentGroup.append(asset) hashDict[asset.hashValue] = [asset]
} }
} }
if currentGroup.count > 1 { // 实时相似性检查(示例处理截图)
groups.append(currentGroup) self.processSimilarity(for: .shotScreen, in: &model)
}
private func processSimilarity(for type: SourceType, in model: inout ResourceAllModel) {
let threshold: (time: TimeInterval, sizeDiff: Double) = {
switch type {
case .photo: return (300, 0.2)
case .shotScreen: return (60, 0.1)
case .video: return (600, 0.3)
default: return (300, 0.2)
}
}()
let targetArray: [AssetModel]
switch type {
case .photo: targetArray = model.photos
case .shotScreen: targetArray = model.screenShots
case .video: targetArray = model.videos
default: return
}
var groups = [[AssetModel]]()
var currentGroup = [AssetModel]()
for asset in targetArray.sorted(by: { $0.createTime < $1.createTime }) {
guard let last = currentGroup.last else {
currentGroup.append(asset)
continue
}
let timeDiff = asset.createTime.timeIntervalSince(last.createTime)
let sizeRatio = asset.assetSize / last.assetSize
let sameAspect = asset.width * last.height == asset.height * last.width
let sizeValid = (1 - threshold.sizeDiff)...(1 + threshold.sizeDiff) ~= sizeRatio
if timeDiff <= threshold.time && sameAspect && sizeValid {
currentGroup.append(asset)
} else {
if currentGroup.count > 1 {
groups.append(currentGroup)
}
currentGroup = [asset]
}
} }
return groups // 更新对应相似组
switch type {
case .photo: model.similarPhotos = groups
case .shotScreen: model.similarScreenShots = groups
case .video: model.similarVideos = groups
default: break
}
} }
// MARK: - 辅助方法 // MARK: - 辅助方法
private func calculateImageHash(asset: PHAsset, completion: @escaping (String) -> Void) {
let options = PHImageRequestOptions()
options.isNetworkAccessAllowed = false
options.deliveryMode = .highQualityFormat
PHImageManager.default().requestImageDataAndOrientation(for: asset, options: options) { data, _, _, _ in
completion(data?.sha256() ?? "")
}
}
private func calculateVideoHash(asset: PHAsset, completion: @escaping (String) -> Void) {
let options = PHVideoRequestOptions()
options.isNetworkAccessAllowed = false
options.deliveryMode = .highQualityFormat
PHImageManager.default().requestAVAsset(forVideo: asset, options: options) { avAsset, _, _ in
guard let urlAsset = avAsset as? AVURLAsset else {
completion("")
return
}
DispatchQueue.global(qos: .utility).async {
do {
let fileData = try Data(contentsOf: urlAsset.url, options: .mappedIfSafe)
completion(fileData.sha256())
} catch {
completion("")
}
}
}
}
private func safeUpdateProgress(_ value: Double, model: ResourceAllModel, progress: @escaping (Double, ResourceAllModel) -> Void) { private func safeUpdateProgress(_ value: Double, model: ResourceAllModel, progress: @escaping (Double, ResourceAllModel) -> Void) {
DispatchQueue.main.async { DispatchQueue.main.async {
progress(min(max(value, 0.0), 1.0), model) progress(min(max(value, 0.0), 1.0), model)
...@@ -226,7 +390,8 @@ extension PhotoVideoManager { ...@@ -226,7 +390,8 @@ extension PhotoVideoManager {
assetSize: getAssetSize(asset: asset), assetSize: getAssetSize(asset: asset),
isCloud: isCloud, isCloud: isCloud,
width: asset.pixelWidth, // 新增宽度 width: asset.pixelWidth, // 新增宽度
height: asset.pixelHeight // 新增高度 height: asset.pixelHeight, // 新增高度
hashValue : ""
) )
} }
...@@ -254,3 +419,13 @@ extension PhotoVideoManager { ...@@ -254,3 +419,13 @@ extension PhotoVideoManager {
} }
} }
} }
extension Data {
func sha256() -> String {
var hash = [UInt8](repeating: 0, count: Int(CC_SHA256_DIGEST_LENGTH))
self.withUnsafeBytes {
_ = CC_SHA256($0.baseAddress, CC_LONG(self.count), &hash)
}
return hash.map { String(format: "%02hhx", $0) }.joined()
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment