• 2014年亚马逊在线笔试题目及解法_9_24(字符串编辑距离升级版)


    Amazon Campus(2013-Sep-24)Question 2 / 2 (Amazon Campus(17):Find the differences of items in amazon)

    Amazon has millions of different items in different categories right now, so when sellers want to sell items in our website, sellers want to find the right categories their items belong to.  Suppose we want to build a system to help sellers find the minimal differences items and then find the right category. The difference index is a number that sum of single-word edits (insertion, deletion, substitution) required to change one phrase into the other:
    For example, we get two lines from standard input Hadoop in practice
    Hadoop operations
    The difference index  of ‘Hadoop in practice’ and ‘Hadoop operations’ is 2. Because we can remove ‘practice’ and substitute ‘in’ with ‘operations’, then ‘Hadoop in practice’ can convert to ‘Hadoop operations’

    For example, we get two lines from standard input
    Hadoop cookbook
    Hadoop operations
    The difference index of ‘Hadoop cookbook’ and ‘Hadoop operations’ is 1. Because we can substitute ‘cookbook’ with ‘operations’ then convert 'Hadoop cookbook' can convert to 'Hadoop operations'

    For example, we get two lines from standard input:
    Ruby in action
    Hadoop operations
    The difference index of ‘Ruby in action’ and ‘Hadoop operations’ is 3. Because we can substitute ‘Ruby’ with ‘Hadoop’, ‘in’ with ‘operations’ and remove ‘action’ then 'Ruby in action' can convert to 'Hadoop operations'

    // 其实这道题目考查的两个字符串的编辑距离问题,详细请看另一篇文章《详解字符串编辑距离求解》,不多述,代码如下:

      1 #include <iostream>
      2 #include <string>
      3 #include <vector>
      4 
      5 using namespace std;
      6 
      7 void CutStr2Vec(const char *str, vector<string> &vec)
      8 {
      9     char *pNext = NULL;
     10     int   nLen = strlen(str);
     11     // 因为strtok函数是会影响源字符串的,这里我们不想被影响,所以拷贝
     12     char *strTmp = new char[nLen + 1];
     13     memset(strTmp,0, nLen + 1);
     14     strcpy_s(strTmp,nLen+1,str);
     15 
     16     char *tokenStr = strtok_s(strTmp," ",&pNext);
     17     while (tokenStr != NULL)
     18     {
     19         string str_(tokenStr);
     20         vec.push_back(str_);
     21         tokenStr = strtok_s(NULL," ",&pNext);
     22     }
     23 
     24     delete strTmp;
     25 }
     26 
     27 int Min3Values(int a, int b, int c)
     28 {
     29     int tmp = (a <= b? a:b);
     30     return (tmp <= c ? tmp :c);
     31 }
     32 
     33 
     34 int nDiffOf2Strings(char*strA,char *strB) 
     35 {
     36     // 分别将两个字符串进行分解,存起来成为两个字符串数组
     37     vector<string> VecStrA;
     38     vector<string> VecStrB;
     39     CutStr2Vec(strA,VecStrA);
     40     CutStr2Vec(strB,VecStrB);
     41     // 获得两个字符串容器的大小
     42     int nLenA = VecStrA.size();
     43     int nLenB = VecStrB.size();
     44     // 动态规划中代价数组的计算
     45     int **matrix = new int *[nLenA + 1];
     46     int i,j;
     47     for (i = 0; i != nLenA +1; i++)
     48     {
     49         matrix[i] = new int[nLenB + 1];
     50     }
     51 
     52 
     53     matrix[0][0] = 0;
     54     for (i = 0; i != nLenA+1; i++)
     55     {
     56         matrix[i][0] = i;
     57     }
     58     for (i=0; i!= nLenB+1; i++)
     59     {
     60         matrix[0][i] = i;
     61     }
     62     //
     63     for (i = 1; i != nLenA + 1; i++)
     64     {
     65         for (j = 1; j != nLenB + 1; j++)
     66         {
     67             int Fij = 0;
     68             // 如果两个字符串不一致,则距离+1 ,否则默认为0
     69             if (VecStrA[i-1].compare(VecStrB[j-1]) != 0)
     70             {
     71                 Fij = 1;
     72             }
     73             matrix[i][j] = Min3Values(matrix[i][j-1] +1, matrix[i-1][j] +1, matrix[i-1][j-1] +Fij);
     74         }
     75     }
     76 
     77 
     78     int nDis = matrix[nLenA][nLenB];
     79     for ( i = 0; i != nLenA+1; i++)
     80     {
     81         delete[] matrix[i];
     82     }
     83     delete[] matrix;
     84 
     85     return nDis;
     86 }
     87 
     88 
     89 
     90 int main()
     91 {
     92     int n1 = nDiffOf2Strings("Ruby in action","Hadoop operations");
     93     int n2 = nDiffOf2Strings("Hadoop in practice","Hadoop operations");
     94     int n3 = nDiffOf2Strings("Hadoop cookbook","Hadoop operations");
     95     int n4 = nDiffOf2Strings("Kindle Fire HD Tablet","Kindle Fire HD 8.9" 4G LTE Wireless Tablet");
     96 
     97     cout<<n1<<endl<<n2<<endl<<n3<<endl<<n4<<endl;
     98 
     99     return 0;
    100 }

    结果如下所示:

     如果有什么建议或者问题,希望能够给予帮助,欢迎讨论!谢谢~

  • 相关阅读:
    ZigBee学习二 LED点对点通信
    ZigBee学习一 任务处理函数_ProcessEvent
    关于count(分组字段)的问题
    hive命令行 显示字段名配置
    Linux 查看当前目录下的文件大小
    apache 端口号与 CDH端口号对比
    dbeaver驱动问题解决方案
    【数学】递推算法之平面分割问题总结
    【HDOJ】(1426)Sudoku Killer (dfs)
    【牛客】牛客小白月赛1(数学)
  • 原文地址:https://www.cnblogs.com/jiabei521/p/3352935.html
Copyright © 2020-2023  润新知