• python解析FreeMind和XMind思维导图


             记录瞬间

    在实际工作中,通常需要使用思维导图进行一些分析和设计,但是,在设计好之后,想要把思维导图的内容转化成文字进行输出怎么做呢?

    使用python(当然可以使用其他的语言进行处理)可以很好的解决这个问题。

    代码如下:

    # coding:utf-8
    import os
    from html.parser import HTMLParser
    
    
    def analyse_mm_file(mm_file):
        if os.path.isfile(mm_file):
            num = 1         # 记录行号的标记
            point = 0       # 记录叶子节点的标记 1 是根节点
            mark_node = 0   # 记录节点信息标记
            mark_note = 0   # 记录备注信息标记
            flow = ""       # 记录流程信息
            with open(mm_file) as f:
                lines = f.readlines()
                for line in lines:
                    line = line.rstrip('
    ')
                    if mark_node == 1 or mark_note == 1:
                        with open("temp.html", mode="a", encoding="UTF-8") as f:
                            if line.rfind('<richcontent TYPE="NODE"><html>') != -1 or 
                                    line.rfind('<richcontent TYPE="NOTE"><html>') != -1:
                                f.write("<html>
    ")
                                num += 1
                            elif line.rfind('</html>') != -1:
                                f.write("</html>
    ")
                                num += 1
                            elif line.rfind('</richcontent>') != -1:
                                num += 1
                            elif line.rfind('</node>') != -1:
                                point -= 1
                                if mark_node == 1: mark_node = 2
                                if mark_note == 1: mark_note = 2
                                num += 1
                            else:
                                f.write(line + "
    ")
                                num += 1
                        continue
                    if mark_node == 2 or mark_note == 2:
                        data = analyse_html("./temp.html")
                        print("data = ", data)
                        os.remove("./temp.html")
                        if mark_node == 2:
                            # 操作 node 节点信息
                            for i in range(len(data)):
                                result = data[i].replace('
    ', '')
                                print(result)
                        if mark_note == 2:
                            # 操作 note 备注信息
                            for i in range(len(data)):
                                result = data[i].replace('
    ', '')
                                print(result)
                        if mark_node != 0: mark_node = 0
                        if mark_note != 0: mark_note = 0
                    if line.rfind('<map version="1.0.1">') == 0 and num == 1:
                        num += 1
    
                    if line.rfind('</map>') == 0:
                        print("解析文件完成!共解析 {} 行。".format(num))
                    elif line.rfind('</node>') == 0:
                        point -= 1
                        num += 1
                    elif line.rfind('<node ') == 0:
                        point += 1
                        if line.rfind('" TEXT="') != -1 and line[-2:] == '">':
                            start_num = line.rfind('" TEXT="') + 8
                            print("start num = ", start_num)
                            get_value = get_chinese(line[start_num: len(line) - 2])
                            print(get_value)
                        elif line.rfind('" TEXT="') != -1 and line[-2:] == '/>':
                            start_num = line.rfind('" TEXT="') + 8
                            print("start num = ", start_num)
                            get_value = get_chinese(line[start_num: len(line) - 3])
                            print(get_value)
                            point -= 1
    if line.rfind('" TEXT="') == -1: mark_node = 1 # 存在 HTML 网页 num += 1 if len(flow) == 0: flow = "{}".format(point) else: if point == int(flow.split("_")[len(flow.split("_")) - 1]): pass else: if point < int(flow.split("_")[len(flow.split("_")) - 1]): flow = flow.split(str(point))[0] + str(point) else: flow = "{}_{}".format(flow, point) print("总体的线性流程:", flow) elif line.rfind('<richcontent TYPE="NOTE"><html>') == 0: with open("temp.html", mode="a", encoding="UTF-8") as f: f.write('<html> ') mark_note = 1 # 存在备注信息 elif line.rfind('<icon ') == 0: print(line) num += 1 elif line.rfind('<arrowlink ') == 0: # 箭头指向,可以实现关联 print(line) num += 1 elif line.rfind('<hook ') == 0: print(line) num += 1 elif line.rfind('<text>') == 0: # point = point + 1 print(line) num += 1 elif line.rfind('</hook>') == 0: print(line) num += 1 elif line.rfind('<cloud/>') == 0: print(line) num += 1 elif line.rfind('<font ') == 0: print(line) num += 1 elif line.rfind('<edge ') == 0: print(line) num += 1 else: num += 1 else: print("系统中没有找到没有FreeMind文件。{}".format(mm_file)) exit() def analyse_html(file_path): with open(file=file_path, mode="r", encoding="UTF-8") as f: page = f.read() html_parser = HP() html_parser.feed(page) html_parser.close() return html_parser.data def get_chinese(line): get_word = "" array = line.split("&#x") flag = True if line.find("&#x") != -1: for i in range(len(array)): # 遍历数组 if len(array[i]) == 0 and flag: # 第一个值为空时,继续循环 flag = False continue if array[i][4:5] == ";": # 解析Unicode字符 unicode = "\u" + array[i][:4] get_word = get_word + unicode.encode('latin-1').decode('unicode_escape') + array[i][5:] elif array[i][:2] == "a;": # 换行转义 get_word = get_word + " " + array[i][2:] else: get_word = get_word + array[i] return get_word else: return line.replace('&amp;', '&') class HP(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.tag_text = False self.data = [] def handle_starttag(self, tag, attr): if tag == 'p' or tag == 'li': self.tag_text = True if tag == 'img' and self._attr(attr, 'src'): self.data.append("img:{}".format(self._attr(attr, 'src'))) def handle_endtag(self, tag): if tag == 'p' or tag == 'li': self.tag_text = False def handle_data(self, data): if self.tag_text: self.data.append(get_chinese(data)) def _attr(self, attr_list, attr_name): for attr in attr_list: if attr[0] == attr_name: return attr[1] return None analyse_mm_file("./mm/思维导图.mm")

    由于xmind可以通过导出,选择格式为freemind,但是导出的结果是 0.8.1 版本的freemind结果,而且会将所有内容呈现在一行中

    所以需要对xmind导出的结果进行简要的修改

    import uuid
    
    file_path = r'/path/to/mm/file.mm'
    with open(file=file_path, mode='r', encoding='utf-8') as f:
        content = f.readlines()
    
    for line in content:
        if '<map version="0.8.1">' in line:
            get_str = str(content).rstrip("']").lstrip("['").replace('"><', '">
    <')
                .replace('></', '>
    </').replace("\n', '", "\n")
                .replace('</node><node', '</node>
    <node').replace('"/><node', '"/>
    <node')
        elif '<map version="1.0.1">' in line:
            get_str = ""
        break
    
    file_name = r'./temp-' + str(uuid.uuid1()).replace('-', '') + '.txt'
    with open(file=file_name, mode='a', encoding='UTF-8') as f:
        print(get_str, file=f, flush=True)
    
    print(get_str)

    生活和学习都是层层递进的,在经历了,痛苦的解析之后,发现可以直接通过解析xml文件的方式,进行解析最终结果

    少啰嗦,上代码:

    import zipfile
    import os
    import io
    import sys
    import hashlib
    import xml.etree.ElementTree as ET
    
    
    class AnalyseMindMap:
        def __init__(self, file_path, mark):
            self.file_path = file_path
            self.context = ""
            if mark == "":
                self.mark = "    "
            elif mark == "#":
                self.mark = "#"
            elif mark == "*":
                self.mark = "*"
    
        # 解析xmind数据
        def analyse_xmind(self):
            file_name = os.path.basename(self.file_path)
            if os.path.isfile(self.file_path):
                base_dir = os.path.dirname(self.file_path)
                m = hashlib.md5()
                file = io.FileIO(self.file_path, 'r')
                read_bytes = file.read(1024)
                while read_bytes != b'':
                    m.update(read_bytes)
                    read_bytes = file.read(1024)
                file.close()
                md5value = m.hexdigest()
                for dir_name in os.listdir(base_dir):
                    if dir_name == md5value:
                        print('已经存在了该文件', md5value)
                        continue
                file_zip = zipfile.ZipFile(self.file_path, 'r')
                for file in file_zip.namelist():
                    file_zip.extract(file, base_dir + '/' + md5value)
                file_zip.close()
                xml_file = os.path.join(base_dir, md5value, 'content.xml')
                return self.analyse_xml(xml_file)
            else:
                return "{} 不存在".format(file_name)
            # file_list = os.listdir(self.file_path)
            #
            # for file_name in file_list:
            #     print(file_name)
            #     if os.path.splitext(file_name)[1] == '.xmind':
            #         print(file_name)
            #         zip_file = os.path.join(self.file_path, file_name)
            #         m = hashlib.md5()
            #         file = io.FileIO(zip_file, 'r')
            #         read_bytes = file.read(1024)
            #         while read_bytes != b'':
            #             m.update(read_bytes)
            #             read_bytes = file.read(1024)
            #         file.close()
            #         md5value = m.hexdigest()
            #         for dir_name in os.listdir(r'Upload/'):
            #             if dir_name == md5value:
            #                 print('已经存在了该文件', md5value)
            #                 continue
            #         file_zip = zipfile.ZipFile(zip_file, 'r')
            #         for file in file_zip.namelist():
            #             file_zip.extract(file, r'./' + md5value)
            #         file_zip.close()
            #         xml_file = os.path.join('./', md5value, 'content.xml')
            #         self.analyse_xml(xml_file)
                    # os.remove(file_name)
    
        # 解析xml文件
        def analyse_xml(self, xml_file):
            try:
                tree = ET.parse(xml_file)
                # 获得根节点
                root = tree.getroot()
            except Exception as e:  # 捕获除与程序退出sys.exit()相关之外的所有异常
                print("parse test.xml fail!")
                sys.exit()
            pre_tag = '{' + root.tag.split('{')[1].split('}')[0] + '}'
            title_path = pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'title'
            print("h1.", root.find(title_path).text)
            self.context = "
    h1. " + root.find(title_path).text + "
    "
            plain_path = pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'notes/' + pre_tag + 'plain'
            if root.find(plain_path) is not None:
                print("<pre>备注:" + root.find(plain_path).text + "</pre>")
                self.context += "<pre>备注:" + root.find(plain_path).text + "</pre>" + "
    
    "
            third_path = pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'children'
            num = 1        # 对分层进行标记
            for first_topic in root.findall(third_path):
                self.recursive_xml(first_topic, pre_tag, num)
    
            return self.context
    
        # 递归调用获取元素值
        def recursive_xml(self, root, pre_tag, num):
            topics_path = pre_tag + 'topics'    # 一个children下面可能会有多个topics,所以需要循环一下
            for topics in root.findall(topics_path):
                topic_path = pre_tag + 'topic'
                for topic in topics.findall(topic_path):
                    title_path = pre_tag + 'title'
                    if num > 1:
                        print(self.mark * (num - 1), topic.find(title_path).text)
                        self.context += self.mark * (num - 1) + " " + topic.find(title_path).text + "
    "
                    else:
                        print("h3.", topic.find(title_path).text)
                        self.context += "
    h3. " + topic.find(title_path).text + '
    
    '
    
                    plain_path = pre_tag + 'notes/' + pre_tag + 'plain'
                    if topic.find(plain_path) is not None:
                        print("<pre>备注:"+topic.find(plain_path).text + "</pre>")
                        self.context += "<pre>备注:"+topic.find(plain_path).text+"</pre>" + "
    
    "
                    label_path = pre_tag + 'labels/' + pre_tag + 'label'
                    if topic.find(label_path) is not None:
                        print("-->标签:", topic.find(label_path).text + "<--")
                        self.context += "-->标签:" + topic.find(label_path).text + "<--" + "
    
    "
                    children_path = pre_tag + 'children'
                    for new_topic in topic.findall(children_path):
                        self.recursive_xml(new_topic, pre_tag, num+1)
    
        # 解析freemind的xml文件
        def analyse_mm_xml(self):
            if os.path.isfile(self.file_path):
                try:
                    tree = ET.parse(self.file_path)
                    # 获得根节点
                    root = tree.getroot()
                except Exception as e:  # 捕获除与程序退出sys.exit()相关之外的所有异常
                    print("parse test.xml fail!")
                    sys.exit()
                node_path = "node"
                num = 1
                for node in root.findall(node_path):
                    print("h1.", node.attrib['TEXT'])
                    self.context += "
    h1. " + node.attrib['TEXT'] + "
    "
                    if node.find('richcontent') is not None:
                        context_p = 'richcontent/html/body/p'
                        print('<pre>备注:', node.find(context_p).text.replace(' ', '').replace('
    ', ''), '</pre>')
                        self.context += '<pre>备注:' + node.find(context_p).text.replace(' ', '').replace('
    ', '') + '</pre>' + "
    
    "
                    self.recursive_node(node, num)
                return self.context
    
        # 递归运行查看结果
        def recursive_node(self, root, num):
            node_path = 'node'
            richcontent_path = 'richcontent'
            for node in root.findall(node_path):
                if 'TEXT' in node.attrib:
                    if num > 1:
                        print(self.mark * (num - 1), node.attrib['TEXT'])
                        self.context += self.mark * (num - 1) + ' ' + node.attrib['TEXT'] + "
    "
                    else:
                        print("h3.", node.attrib['TEXT'])
                        self.context += "
    h3. " + node.attrib['TEXT'] + "
    
    "
                if node.find(richcontent_path) is not None:
                    context_p = 'richcontent/html/body/p'
                    context_out = ''
                    for p in node.findall(context_p):
                        context_out += p.text.replace(' ', '').replace('
    ', '') + '
    '
                    print('<pre>备注:', context_out, '</pre>')
                    self.context += '<pre>备注:' + context_out + '</pre>' + "
    
    "
                if node.find(node_path) is not None:
                    self.recursive_node(node, num + 1)
    
    
    if __name__ == '__main__':
        file_path = r'C:path	ofile.xmind'
        amm = AnalyseMindMap(file_path, "#")
        amm.analyse_xmind()
        file_path = r'C:path	ofile.mm'
        amm = AnalyseMindMap(file_path, "#")
        amm.analyse_mm_xml()

    ================我是底线================

  • 相关阅读:
    数据库优化
    List,map,Set区别
    ID选择器
    最简单的添加删除行操作
    JQ2
    最简单的JQ实现
    20180416
    一行细分的HTML写法
    out参数的使用
    结构的使用
  • 原文地址:https://www.cnblogs.com/wozijisun/p/10555647.html
Copyright © 2020-2023  润新知