• 笔划输入法查找算法示例(Lua实现)


    公司同事最近利用谷歌拼音输入法源代码实现了自己的拼音输入法,经过了解,最核心的就是一个trie(词典树)的构造和检索(这里不太介绍trie树了,google一搜一大把),于是今天就想实现了一个笔划输入法。大概的想法是:

    1. 找一个所有汉字或者一、二级汉字的笔顺数据库
    2. 用Lua将数据库读进来,构造一个trie树
      1. 每一个节点存一个笔划
      2. 每一个节点带一个子节点集合
      3. 每一个节点带一个汉字集合,表示到这一级时所有笔划组成的完整汉字
    3. 检索时根据用户输入的笔划,检索到一个节点,然后按笔划顺序遍历子树
      1. 遍历子树可以给出所有以这些笔划开始的所有汉字,但是总不能一下显示出来吧,所以需要一个迭代器,每调用一次给出一个可能的值,这个迭代器用C实 现比较复杂,但是用Lua实现简直就是小意思,直接将遍历子树的函数封装到一个coroutine中,每找到一个汉字就 yield(汉字) 即可

    笔顺数据库

    CSDN上可以下载到 http://download.csdn.net/detail/yyjlan/3766691

    下载的mdb格式,我不太喜欢,Lua也不太喜欢。由于luasql支持odbc,所以可以将mdb文件加入到odbc数据源,然后载入后转成sqlite3的格式,方便以后使用,转换代码如下

     1 require "luasql.odbc"
     2 require "luasql.sqlite3"
     3 
     4 odbc_env = luasql.odbc()
     5 
     6 -- 将Access文件在控制面板->管理工具->数据源 中增加到用户DSN,名称是hzbs
     7 odbc_conn = odbc_env:connect("hzbs")
     8 odbc_cur = odbc_conn:execute("SELECT * FROM hzbs;")
     9 
    10 sqlite_env = luasql.sqlite3()
    11 sqlite_conn = sqlite_env:connect("hzbs.sqlite3.db")
    12 sqlite_conn:execute("CREATE TABLE hzbs (id INTEGER primary key, hanzi TEXT, stroke_number INTEGER, stroke_order TEXT, unicode TEXT, gbk TEXT);")
    13 sqlite_conn:setautocommit(false) -- start transaction
    14 
    15 record = {}
    16 while odbc_cur:fetch(record, "n") do
    17     local id = record[1]
    18     local hanzi = record[2]
    19     local stroke_number = record[3]
    20     local stroke_order = record[4]
    21     local unicode = record[5]
    22     local gbk = record[6]
    23     sqlite_conn:execute("INSERT INTO hzbs(id, hanzi, stroke_number, stroke_order, unicode, gbk) VALUES(" .. id .. ",\'" .. hanzi .. "\'," .. stroke_number .. ",\'" .. stroke_order .. "\',\'" .. unicode .. "\',\'" .. gbk .. "\');")
    24 end
    25 
    26 sqlite_conn:commit() -- commit the transaction
    27 sqlite_conn:close()
    28 
    29 odbc_cur:close()
    30 odbc_conn:close()
    31 odbc_env:close()

    构造子树与检索

    多的不说,直接看代码吧。代码写得有点乱,不过凑合看是没什么问题的。要运行代码必须要先安装 LuaForWindows

      1 require "luasql.sqlite3"
      2 require "wx"
      3 
      4 
      5 function _T(s)
      6     return s
      7 end
      8 
      9 -- enum stroke_t {
     10 local stroke_root = 0 -- for trie root, not a valid stroke
     11 local stroke_heng = 1
     12 local stroke_shu = 2
     13 local stroke_pie = 3
     14 local stroke_na = 4
     15 local stroke_zhe = 5
     16 local stroke_max = 5
     17 local stroke_text = {_T"", _T"", _T"丿", _T"", _T""}
     18 -- }
     19 
     20 function new_node(stroke)
     21     return {stroke=stroke,  -- see stroke definition
     22         subnodes = {},  -- next strokes
     23         hanzis={} -- two or more hanzi could have the same stroke order
     24     }
     25 end
     26 
     27 function new_trie()
     28     return new_node(stroke_root)
     29 end
     30 
     31 -- insert hanzi and create the trie
     32 function insert_hanzi(node, stroke_order, hanzi)
     33     local stroke, not_found_index
     34     for i = 1, #stroke_order do
     35         stroke = tonumber(stroke_order:sub(i,i))
     36         if node.subnodes[stroke] then
     37             node = node.subnodes[stroke]
     38         else
     39             not_found_index = i
     40             break
     41         end
     42     end
     43     if not_found_index then
     44         for i = not_found_index, #stroke_order do
     45             stroke = tonumber(stroke_order:sub(i,i))
     46             node.subnodes[stroke] = new_node(stroke)
     47             node = node.subnodes[stroke]
     48         end
     49     end
     50     table.insert(node.hanzis, hanzi)
     51 end
     52 
     53 -- 看看strokes数组组成的笔划顺序的节点是否存在,如果存在则返回节点
     54 function find_node(root, strokes)
     55     local node = root
     56 
     57     if #strokes < 1 then
     58         return nil
     59     end
     60 
     61     for i, stroke in ipairs(strokes) do
     62         if node.subnodes[stroke] then
     63             node = node.subnodes[stroke]
     64         else
     65             return nil
     66         end
     67     end
     68     return node
     69 end
     70 
     71 function db_to_trie(db_name)
     72     local env = luasql.sqlite3()
     73     local conn = env:connect(db_name)
     74     local cur = conn:execute("SELECT hanzi,stroke_order FROM hzbs;")
     75     local trie = new_trie()
     76 
     77     record = {}
     78     while cur:fetch(record, "a") do
     79         insert_hanzi(trie, record.stroke_order, record.hanzi)
     80     end
     81 
     82     cur:close()
     83     conn:close()
     84     env:close()
     85 
     86     return trie
     87 end
     88 
     89 function get_hanzi_enumerator(root)
     90     local traverse
     91 
     92     traverse = function(node)
     93         for i = 1, #node.hanzis do
     94             coroutine.yield(node.hanzis[i])
     95         end
     96 
     97         for stroke = 1, stroke_max do
     98             if node.subnodes[stroke] then
     99                 traverse(node.subnodes[stroke])
    100             end
    101         end
    102     end
    103     local co = coroutine.create(function () traverse(root) end)
    104 
    105     return (function ()
    106         local ret, hanzi = coroutine.resume(co)
    107         if not ret then -- already stopped
    108             return nil
    109         elseif hanzi == nil then -- the last call, no yield and no return value
    110             return nil
    111         else
    112             return hanzi
    113         end
    114     end)
    115 end
    116 
    117 ---------------------------------------------------------------
    118 -- GUI
    119 local new_id = (function ()
    120     local id = wx.wxID_HIGHEST
    121     return (function ()
    122         id = id + 1
    123         return id
    124     end)
    125 end)()
    126 
    127 dialog = wx.wxDialog(wx.NULL, new_id(), _T"Lua笔划输入法演示",
    128     wx.wxDefaultPosition, wx.wxDefaultSize)
    129 panel = wx.wxPanel(dialog, wx.wxID_ANY)
    130 local main_sizer = wx.wxBoxSizer(wx.wxVERTICAL)
    131 
    132 -- 横竖撇捺折 按钮
    133 local stroke_label = wx.wxStaticText(panel, new_id(), _T"可选笔划")
    134 local heng_button = wx.wxButton(panel, stroke_heng, stroke_text[stroke_heng])
    135 local shu_button = wx.wxButton(panel, stroke_shu, stroke_text[stroke_shu])
    136 local pie_button = wx.wxButton(panel, stroke_pie, stroke_text[stroke_pie])
    137 local na_button = wx.wxButton(panel, stroke_na, stroke_text[stroke_na])
    138 local zhe_button = wx.wxButton(panel, stroke_zhe, stroke_text[stroke_zhe])
    139 
    140 local button_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
    141 button_sizer:Add(stroke_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)
    142 button_sizer:Add(heng_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    143 button_sizer:Add(shu_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    144 button_sizer:Add(pie_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    145 button_sizer:Add(na_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    146 button_sizer:Add(zhe_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    147 
    148 main_sizer:Add(button_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    149 
    150 -- 输入笔划列表
    151 local input_label = wx.wxStaticText(panel, new_id(), _T"输入笔划")
    152 local input_textctrl = wx.wxTextCtrl(panel, new_id(), "",
    153     wx.wxDefaultPosition, wx.wxDefaultSize, wx.wxTE_READONLY)
    154 local input_backspace_button = wx.wxButton(panel, new_id(), _T"退格")
    155 local input_clear_button = wx.wxButton(panel, wx.wxID_CANCEL, _T"清除")
    156 
    157 local input_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
    158 input_sizer:Add(input_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)
    159 input_sizer:Add(input_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    160 input_sizer:Add(input_backspace_button, 0, wx.wxALL, 5)
    161 input_sizer:Add(input_clear_button, 0, wx.wxALL, 5)
    162 main_sizer:Add(input_sizer, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    163 
    164 -- 备选汉字
    165 local candidate_label = wx.wxStaticText(panel, new_id(), _T"备选汉字")
    166 local candidate_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
    167 candidate_sizer:Add(candidate_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)
    168 
    169 local candidate_number = 5
    170 function create_candidate_btn(num)
    171     local textctrls = {}
    172     for i= 1, num do
    173         textctrls[i] = wx.wxButton(panel, new_id(), "")
    174         candidate_sizer:Add(textctrls[i], 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)
    175     end
    176     textctrls.start_id = textctrls[1]:GetId()
    177     textctrls.end_id = textctrls.start_id + candidate_number - 1
    178     return textctrls
    179 end
    180 local candidate_textctrls = create_candidate_btn(candidate_number)
    181 main_sizer:Add(candidate_sizer, 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)
    182 
    183 -- 选择输出的汉字
    184 local output_textctrl = wx.wxTextCtrl(panel, new_id(), "", wx.wxDefaultPosition,
    185     wx.wxSize(0, 100), wx.wxTE_MULTILINE)
    186 local output_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
    187 output_sizer:Add(output_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
    188 main_sizer:Add(output_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 0)
    189 
    190 main_sizer:SetSizeHints(dialog)
    191 dialog:SetSizer(main_sizer)
    192 
    193 -- 必须加,否则不能正确退出程序
    194 dialog:Connect(wx.wxEVT_CLOSE_WINDOW,
    195     function (event)
    196         dialog:Destroy()
    197         event:Skip()
    198     end)
    199 
    200 -- 读入笔划数据库
    201 local trie = db_to_trie("hzbs.sqlite3.db")
    202 
    203 -- 输入的stroke数组
    204 input_strokes = {}
    205 get_next_candidate = nil
    206 
    207 function update_candidate()
    208     if get_next_candidate == nil then
    209         for _,textctrl in ipairs(candidate_textctrls) do
    210             textctrl:SetLabel("")
    211         end
    212     else
    213         for _,textctrl in ipairs(candidate_textctrls) do
    214             local hanzi = get_next_candidate()
    215             if hanzi then
    216                 textctrl:SetLabel(hanzi)
    217             else
    218                 textctrl:SetLabel("")
    219             end
    220         end
    221     end
    222 end
    223 
    224 function update_input()
    225     local text = {}
    226     for _,stroke in ipairs(input_strokes) do
    227         table.insert(text, stroke_text[stroke])
    228     end
    229 
    230     input_textctrl:SetValue(table.concat(text, " "))
    231 end
    232 
    233 function insert_stroke(stroke)
    234     table.insert(input_strokes, stroke);
    235     local node = find_node(trie, input_strokes)
    236     if node == nil then
    237         table.remove(input_strokes) -- 删除不合法的输入
    238         -- BEEP
    239     else
    240         get_next_candidate = get_hanzi_enumerator(node)
    241         update_input()
    242         update_candidate()
    243     end
    244 end
    245 
    246 function remove_stroke()
    247     table.remove(input_strokes)
    248     local node = find_node(trie, input_strokes)
    249     if node == nil then
    250         get_next_candidate = nil
    251     else
    252         get_next_candidate = get_hanzi_enumerator(node)
    253     end
    254 
    255     update_input()
    256     update_candidate()
    257 end
    258 
    259 function clear_stroke()
    260     input_strokes = {}
    261     get_next_candidate = nil
    262     update_input()
    263     update_candidate()
    264 end
    265 
    266 dialog:Connect(wx.wxID_ANY, wx.wxEVT_COMMAND_BUTTON_CLICKED,
    267     function(event)
    268         local id = event:GetId()
    269         if id <= stroke_max then
    270             insert_stroke(id)
    271         elseif id >= candidate_textctrls.start_id and id <= candidate_textctrls.end_id then
    272             output_textctrl:AppendText(candidate_textctrls[id-candidate_textctrls.start_id+1]:GetLabel())
    273             clear_stroke()
    274         elseif id == input_backspace_button:GetId() then
    275             remove_stroke()
    276         elseif id == input_clear_button:GetId() then
    277             clear_stroke()
    278         end
    279     end)
    280 
    281 dialog:Connect(wx.wxID_ANY, wx.wxEVT_KEY_DOWN, function (event)
    282     local key = event:GetKeyCode()
    283     local callbacks = {    }
    284     callbacks[wx.WXK_NUMPAD7] = function ()
    285         insert_stroke(stroke_heng)
    286     end
    287     callbacks[wx.WXK_NUMPAD8] = function ()
    288         insert_stroke(stroke_shu)
    289     end
    290     callbacks[wx.WXK_NUMPAD9] = function ()
    291         insert_stroke(stroke_pie)
    292     end
    293     callbacks[wx.WXK_NUMPAD4] = function ()
    294         insert_stroke(stroke_na)
    295     end
    296     callbacks[wx.WXK_NUMPAD5] = function ()
    297         insert_stroke(stroke_zhe)
    298     end
    299     callbacks[wx.WXK_BACK] = function ()
    300         remove_stroke()
    301     end
    302     for i = 1, candidate_number do
    303         callbacks[i - 1 + string.byte("1")] = function ()
    304             output_textctrl:AppendText(candidate_textctrls[i]:GetLabel())
    305             clear_stroke()
    306         end
    307     end
    308 
    309     if callbacks[key] then
    310         callbacks[key]()
    311     end
    312 end)
    313 
    314 -- wxwindgets比较特殊,子窗口的按键是发不到主窗口的,需要这样处理下
    315 function process_children_keydown_event(parent, processer)
    316     local wnd
    317     local wlist = parent:GetChildren()
    318 
    319     for i = 0, wlist:GetCount()-1 do
    320         wnd = wlist:Item(i):GetData():DynamicCast("wxWindow")
    321         wnd:SetNextHandler(processer)
    322         process_children_keydown_event(wnd, processer)
    323     end
    324 end
    325 
    326 process_children_keydown_event(dialog, dialog)
    327 
    328 
    329 dialog:Centre()
    330 dialog:Show(true)
    331 input_textctrl:SetFocus() --放这里没有响声
    332 
    333 wx.wxGetApp():MainLoop()

    打包下载

    源代码包和sqlite3数据库可以在这里下载

  • 相关阅读:
    makefile学习之路
    DiagramDesigner的学习心得一
    MvvmLight的Message使用
    MvvmLight学习心得三
    Nancy学习心得一
    WPF的类似WinForm中的托盘
    py学习记录#10
    PY期末习题全解析
    py学习记录#11
    PY学习记录#9
  • 原文地址:https://www.cnblogs.com/windtail/p/2623167.html
Copyright © 2020-2023  润新知