• 用c++后缀自动机实现最大公共字符串算法,并封装成Python库


    后缀自动机的C++代码转自https://e-maxx.ru/algo/suffix_automata,其余封装为自写。

    在C++文件同级目录建立setup.py文件,代码如下:

    # !/usr/bin/env python
    from distutils.core import setup, Extension
    mod = "sam"
    setup(name=mod, ext_modules=[Extension(mod, sources=['sam_lcs.cpp'])])

    封装完后缀自动机的源码后,命令行编译、安装、卸载,安装后即可在Python里import调用:

    python setup.py build
    python setup.py install
    python setup.py uninstall

    包装模块的C++函数编写如下:

    #include <map>
    #include <string>
    #include <Python.h>
    
    using namespace std;
    
    struct state
    {
        int len, link;
        map<char, int> next;
    };
    
    const int MAXLEN = 100000;
    state st[MAXLEN * 2];
    int sz, last;
    
    void sa_init()
    {
        sz = last = 0;
        st[0].len = 0;
        st[0].link = -1;
        ++sz;
        // 清除状态:
        for (int i = 0; i < MAXLEN * 2; ++i)
            st[i].next.clear();
    };
    
    void sa_extend(char c)
    {
        int cur = sz++;
        st[cur].len = st[last].len + 1;
        int p;
        for (p = last; p != -1 && !st[p].next.count(c); p = st[p].link)
            st[p].next[c] = cur;
        if (p == -1)
            st[cur].link = 0;
        else
        {
            int q = st[p].next[c];
            if (st[p].len + 1 == st[q].len)
                st[cur].link = q;
            else
            {
                int clone = sz++;
                st[clone].len = st[p].len + 1;
                st[clone].next = st[q].next;
                st[clone].link = st[q].link;
                for (; p != -1 && st[p].next[c] == q; p = st[p].link)
                    st[p].next[c] = clone;
                st[q].link = st[cur].link = clone;
            }
        }
        last = cur;
    };
    
    string lcs(string s, string t)
    {
        sa_init();
        for (int i = 0; i < (int)s.length(); ++i)
            sa_extend(s[i]);
    
        int v = 0, l = 0,
            best = 0, bestpos = 0;
        for (int i = 0; i < (int)t.length(); ++i)
        {
            while (v && !st[v].next.count(t[i]))
            {
                v = st[v].link;
                l = st[v].len;
            }
            if (st[v].next.count(t[i]))
            {
                v = st[v].next[t[i]];
                ++l;
            }
            if (l > best)
                best = l, bestpos = i;
        }
        return t.substr(bestpos - best + 1, best);
    };
    
    static PyObject *sam_lcs(PyObject *self, PyObject *args)
    {
        char *stmp, *ttmp;
        string s, t;
        if (!PyArg_ParseTuple(args, "ss", &stmp, &ttmp))
            return NULL;
        s = stmp;
        t = ttmp;
        return PyUnicode_FromString(lcs(s, t).c_str());
    };
    
    static PyMethodDef sam_lcs_Methods[] = {
        {"lcs", sam_lcs, METH_VARARGS,
         "Get a longest common string of two strings with SAM"},
        {NULL, NULL, 0, NULL}};
    
    static struct PyModuleDef sam = {
        PyModuleDef_HEAD_INIT,
        "sam",
        "SAM",
        -1,
        sam_lcs_Methods};
    
    PyMODINIT_FUNC
    PyInit_sam(void)
    {
        return PyModule_Create(&sam);
    };

    编译安装完成后,就可以在Python里调用了

  • 相关阅读:
    框架代码 2
    框架代码 2
    个人资料  代码
    个人资料  代码
    XHTML表单
    框架代码 1
    计算机科学与技术学习反思录(转载)
    写在Blog点击数超过50000之后...
    小笨霖英语笔记本(6)水电
    SUN服务器及Solaris Serial Console常见设置问题
  • 原文地址:https://www.cnblogs.com/shld/p/10456367.html
Copyright © 2020-2023  润新知