Algorithms to Compute the Dot Product of Two Sparse Vectors

  • 时间:2020-10-09 18:35:39
  • 分类:网络文摘
  • 阅读:77 次

Given two sparse vectors, compute their dot product. Implement class SparseVector:

  • SparseVector(nums) Initializes the object with the vector nums
  • dotProduct(vec) Compute the dot product between the instance of SparseVector and vec

A sparse vector is a vector that has mostly zero values, you should store the sparse vector efficiently and compute the dot product between two SparseVector.

Follow up: What if only one of the vectors is sparse?

Example 1:
Input: nums1 = [1,0,0,2,3], nums2 = [0,3,0,4,0]
Output: 8
Explanation: v1 = SparseVector(nums1) , v2 = SparseVector(nums2)
v1.dotProduct(v2) = 1*0 + 0*3 + 0*0 + 2*4 + 3*0 = 8

Example 2:
Input: nums1 = [0,1,0,0,0], nums2 = [0,0,0,0,2]
Output: 0
Explanation: v1 = SparseVector(nums1) , v2 = SparseVector(nums2)
v1.dotProduct(v2) = 0*0 + 1*0 + 0*0 + 0*0 + 0*2 = 0

Example 3:
Input: nums1 = [0,1,0,0,2,0,0], nums2 = [1,0,0,0,3,0,4]
Output: 6

Constraints:
n == nums1.length == nums2.length
1 <= n <= 10^5
0 <= nums1[i], nums2[i] <= 100

Hints:
Because the vector is sparse, use a data structure that stores the index and value where the element is nonzero.

Your SparseVector object will be instantiated and called as such:

1
2
3
SparseVector v1(nums1);
SparseVector v2(nums2);
int ans = v1.dotProduct(v2);
SparseVector v1(nums1);
SparseVector v2(nums2);
int ans = v1.dotProduct(v2);

Straightforward – unoptimised implementation to compute the dot product

We can just take the array (vector of numbers) as it is and a single loop to sum up the product of numbers between two arrays.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class SparseVector {
public:
    SparseVector(vector<int> &nums) {
        data = nums;
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        auto x = vec.getData();
        int s = 0;
        for (int i = 0; i < x.size(); ++ i) {
            s += x[i] * data[i];
        }
        return s;
    }
    
    vector<int> getData() {
        return data;
    }
private:
    vector<int> data;
    
};
class SparseVector {
public:
    SparseVector(vector<int> &nums) {
        data = nums;
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        auto x = vec.getData();
        int s = 0;
        for (int i = 0; i < x.size(); ++ i) {
            s += x[i] * data[i];
        }
        return s;
    }
    
    vector<int> getData() {
        return data;
    }
private:
    vector<int> data;
    
};

As the dotProduct interface take the SparseVector as a parameter – which means that we need to expose the API to return the data.

The implementation is not storage efficient as we are storing the zeros (could be massive).

The time and space complexity is both O(N) where N is the number of elements in a sparse vector.

Using Hash Map to Store the Sparse Vector and Compute the Dot Product

We could easily come up with a solution to store the Sparse vector more efficiently. We can use hash map – to store only the non-zero elements in the vector. And we can expose an API to return the number at a index.

The space complexity is O(M) where M is the non-zero elements (which could be much less than N). However, the time complexity is O(N).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class SparseVector {
public:    
    SparseVector(vector<int> &nums) {        
        sz = nums.size();
        for (int i = 0; i < nums.size(); ++ i) {
            if (nums[i] != 0) { // store only non-zero elements
                data[i] = nums[i];
            }
        }
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        int s = 0;
        for (int i = 0; i < sz; ++ i) {
            s += data[i] * vec.getValue(i);
        }
        return s;
    }
    
private:
    int getValue(int idx) {
        return data[idx];
    }
    
private:
    unordered_map<int, int> data;
    int sz;
};
class SparseVector {
public:    
    SparseVector(vector<int> &nums) {        
        sz = nums.size();
        for (int i = 0; i < nums.size(); ++ i) {
            if (nums[i] != 0) { // store only non-zero elements
                data[i] = nums[i];
            }
        }
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        int s = 0;
        for (int i = 0; i < sz; ++ i) {
            s += data[i] * vec.getValue(i);
        }
        return s;
    }
    
private:
    int getValue(int idx) {
        return data[idx];
    }
    
private:
    unordered_map<int, int> data;
    int sz;
};

Using a Linked List Data Structure to Store Sparse Vectors and Compute the Dot Product

The optimised implementation would be to use a linked-list data structure to store the Sparse vector. Then we can use iterators to advance to next elements one at a time.

We also need to store the index of the non-zero elements so that we can compare the indices – only sum up the product if both indices are equal. And at each iteration we only advance the iterator with smaller index – until we reach one of the end.

The time and space complexity is both O(M). In C++, the STL::list is a linked-list data structure – which is perfect in this case. However, we can still replace it with vector which still works.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class SparseVector {
public:    
    SparseVector(vector<int> &nums) {  
        for (int i = 0; i < nums.size(); ++ i) {
            if (nums[i] != 0) {
                data.push_back(make_pair(i, nums[i]));
            }
        }
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        auto xx = vec.getData();
        auto x = xx.begin();
        auto y = data.begin();
        int s = 0;
        while ((x != end(xx)) && (y != end(data))) {
            if (x->first == y->first) {
                s += x->second * y->second;
                x ++;
                y ++;
            } else if (x->first < y->first) {
                x ++;
            }  else {
                y ++;
            }
        }
        return s;
    }
    
    list<pair<int, int>> getData() {
        return data;
    }    
private:
    list<pair<int, int>> data;
};
class SparseVector {
public:    
    SparseVector(vector<int> &nums) {  
        for (int i = 0; i < nums.size(); ++ i) {
            if (nums[i] != 0) {
                data.push_back(make_pair(i, nums[i]));
            }
        }
    }
    
    // Return the dotProduct of two sparse vectors
    int dotProduct(SparseVector& vec) {
        auto xx = vec.getData();
        auto x = xx.begin();
        auto y = data.begin();
        int s = 0;
        while ((x != end(xx)) && (y != end(data))) {
            if (x->first == y->first) {
                s += x->second * y->second;
                x ++;
                y ++;
            } else if (x->first < y->first) {
                x ++;
            }  else {
                y ++;
            }
        }
        return s;
    }
    
    list<pair<int, int>> getData() {
        return data;
    }    
private:
    list<pair<int, int>> data;
};

–EOF (The Ultimate Computing & Technology Blog) —

推荐阅读:
超实用的14种微博推广方法  新浪微博营销推广新策略分享  微博推广的核心思路及实用的14种方法  老员工分享:微博营销的8大技巧  企业在营销中为何要把握精准营销的策略  企业做微博营销的8个技巧  你知道微博营销中的秘密吗  幼年早慧的高斯  猜数的数学游戏  欧几里得言传身教 
评论列表
添加评论