//************************************************************************************ // Problem statement: This C++ program serves as a simple Web search engine // by computing the similarity of a query against three Web pages (documents) // and outputting the number of the most relevant Web page. // // Main algorithm: do for each of the three documents // 1. multiply the query terms with the document terms // 2. multiply the document terms with the document terms // 3. multiply the query terms with the query terms // 4. sum the numbers in 1 (compute the numerator) // 5. sum the numbers in 2 (start to compute left term in denominator) // 6. sum the numbers in 3 (start to compute right term in denominator) // 7. multiply the square roots of the numbers in 5 and 6 (compute demoninator) // 8. compute the similarity measure find and output the number of the most relevant document // // Major variables: d1 - 1D array for the 1st document // d2 - 1D array for the 2nd document // d3 - 1D array for the 3rd document // q - 1D array for the query // simd1q - the similarity between d1 and q // simd2q - the similarity between d2 and q // simd3q - the similarity between d3 and q // // Program limitations: hardcoded for 3 documents, 1 query, and 7 terms (keywords); // does not rank documents (only outputs most relevant document). // //********************************************************************************* #include #include #include using namespace std; int main() { double d1[7] = {0,0,1,0.4,0,0.2,0.4}; // document 1 double d2[7] = {1,0.25,0.25,0,0,0,0}; // document 2 double d3[7] = {0,0.43,0,0,0.57,0.29,1}; // document 3 double q[7] = {0,0.2,0.6,0,0.2,0.3,0}; // the query double dTermsqTerms = 0.0; double dTermsSquared = 0.0; double qTermsSquared = 0.0; double document =0.0; double query = 0.0; double numerator =0.0; double denominator = 0.0; double simd1q, simd2q, simd3q = 0.0; // compute the similarity function in stages for the 1st document for(int i=0; i<7; i++) { // multiply the appropriate terms dTermsqTerms = d1[i] * q[i]; dTermsSquared = d1[i] * d1[i]; qTermsSquared = q[i] * q[i]; // sum them up accordingly numerator = numerator + dTermsqTerms; document = document + dTermsSquared; query = query + qTermsSquared; } // add code to compute the denominator for the 1st document // add code to compute the similarity for the 1st document // add code to output the similarity for the 1st document // add code to initialize the variables for the 2nd document // add code to compute the similarity function in stages for the 2nd document // add code to initialize the variables for the 3rd document // add code to compute the similarity function in stages for the 3rd document // add code to find and output the most relevant document return 0; }