diff --git a/2025/02/cracking_code_interview_17_26_sparse_similarity/src/main.cpp b/2025/02/cracking_code_interview_17_26_sparse_similarity/src/main.cpp index 5c12535..228b090 100644 --- a/2025/02/cracking_code_interview_17_26_sparse_similarity/src/main.cpp +++ b/2025/02/cracking_code_interview_17_26_sparse_similarity/src/main.cpp @@ -11,21 +11,34 @@ const int MAX_BUFFER = 0x1000; using namespace std; void process_data(map> &documents) { + fprintf(stderr, "Processing data\n"); for (auto it1 = documents.begin(); it1 != documents.end(); it1++) { for (auto it2 = documents.begin(); it2 != documents.end(); it2++) { - if (it1->first == it2->first) { + if (it1->first >= it2->first) { continue; } - set intersection; - set_intersection( - it1->second.begin(), it1->second.end(), - it2->second.begin(), it2->second.end(), - inserter(intersection, next(intersection.begin()))); + set union_set; + set intersection_set; - if (intersection.size()) { - fprintf(stderr, "Documents %d and %d have %d common elements\n", - it1->first, it2->first, intersection.size()); + for (auto el = it1->second.begin(); el != it1->second.end(); el++) { + union_set.insert(*el); + if (auto search = it2->second.find(*el); search != it2->second.end()) { + intersection_set.insert(*el); + } + } + + for (auto el = it2->second.begin(); el != it2->second.end(); el++) { + union_set.insert(*el); + } + + auto common_count = intersection_set.size(); + auto union_count = union_set.size(); + + if (common_count) { + + fprintf(stderr, "Documents %d and %d have %.2f sparse\n", + it1->first, it2->first, (double)common_count / (double)union_count); } } } @@ -81,6 +94,7 @@ int main() { process_input(documents, buffer); } + process_data(documents); fprintf(stderr, "Execution finished\n"); return 0;