• 即时标记


    handle文件:
    # -*- coding: utf-8 -*-
    import re
    def lines(file):
    """
    把原文件尾加一行空行
    """
    for line in file:
    yield line
    yield ' '
    def blocks(file):
    """
    把文件分段
    """
    block=[]
    for line in lines(file):
    if line.strip():
    block.append(line)
    elif block:
    yield ''.join(block).strip()
    block=[]
    class Handler:
    """
    处理从Parser调用的方法的对象。
    这个解析器会在每个块的开始部分调用start()和end方法,使用合适的块名作为参数。
    sub()方法会用于正则表达式替换中。当使用了'emphasis'这样的名字调用时,他会返回合适的替换函数.
    """
    def callback(self,prefix,name,*args):
    method=getattr(self,prefix+name,None)
    if callable(method):return method(*args)
    def start(self,name):
    self.callback('start_',name)
    def end(self,name):
    self.callback('end_',name)
    def sub(self,name):
    def substitution(match):
    result=self.callback('sub_',name,match)
    if result is None:match.group(0)
    return result
    return substitution
    class HTMLRenderer(Handler):
    """
    用于生成HTML的具体处理程序
    HTMLRenderer内的方法都可以通过超类处理程序的start(),end(),sub()
    方法来访问 他们实现了用于HTML文档的基本标签
    """
    def start_document(self):
    print '<html><head><title>...</title></head><body>'
    def end_document(self):
    print '</body></html>'
    def start_paragraph(self):
    print '<p>'
    def end_paragraph(self):
    print '</p>'
    def start_heading(self):
    print '<h2>'
    def end_heading(self):
    print '</h2>'
    def start_list(self):
    print '<ul>'
    def end_list(self):
    print '</ul>'
    def start_listitem(self):
    print '<li>'
    def end_listitem(self):
    print '</li>'
    def start_title(self):
    print '<h1>'
    def end_title(self):
    print '</h1>'
    def sub_emphasis(self,match):
    return '<em>%s</em>' % match.group(1)
    def sub_url(self,match):
    return '<a href="%s">%s</a>' % (match.group(1),match.group(1))
    def sub_mail(self,match):
    return '<a href="mailto:%s">%s</a>'%(match.group(1),match.group(1))
    def feed(self,data):
    print data
    rules文件:
    # -*- coding:utf-8 -*-
    class Rule:
    """
    所有规则的基类。
    """
    def action(self,block,handler):
    handler.start(self.type)
    handler.feed(block)
    handler.end(self.type)
    return True
    class HeadingRule(Rule):
    """
    标题占一行,最多70个字符,并且不以冒号结束。
    """
    type='heading'
    def condition(self,block):
    return not '/n' in block and len(block)<=70 and not block[-1]==':'
    class TitleRule(HeadingRule):
    """
    题目是文档的第一个块,但前提是它是大标题
    """
    type='title'
    first = True
    def condition(self,block):
    if not self.first:return False
    self.first=False
    return HeadingRule.condition(self,block)
    class ListItemRule(Rule):
    """
    列表项是以连字号开始的段落.作为格式化的一部分 要移除连字号。
    """
    type = 'listitem'
    def condition(self,block):
    return block[0] == '-'
    def action(self,block,handler):
    handler.start(self.type)
    handler.feed(block[1:].strip())
    handler.end(self.type)
    return True
    class ListRule(ListItemRule):
    """
    列表从不是列表项的块和随后的列表项之间。在最后一个连续列表项之后结束
    """
    type='list'
    inside=False
    def condition(self,block):
    return True
    def action(self,block,handler):
    if not self.inside and ListItemRule.condition(self,block):
    handler.start(self.type)
    self.inside=True
    elif self.inside and not ListItemRule.condition(self,block):
    handler.end(self.type)
    self.inside=False
    return False
    class ParagraphRule(Rule):
    """
    段落只是其他规则并没有覆盖到的块
    """
    type='paragraph'
    def condition(self,block):
    return True
    markup文件:
    # -*- coding:utf-8 -*-
    import sys,re
    from handle import *
    from util import *
    from rules import *
    class Parser:
    """
    语法分析器读取文本文件,应用规则并且控制处理程序
    """
    def __init__(self,handler):
    self.handler=handler
    self.rules=[]
    self.filters=[]
    def addRule(self,rule):
    self.rules.append(rule)
    def addFilter(self,pattern,name):
    def filter(block,handler):
    return re.sub(pattern,handler.sub(name),block)
    self.filters.append(filter)
    def parse(self,file):
    self.handler.start('document')
    for block in blocks(file):
    for filter in self.filters:
    block = filter(block,self.handler)
    for rule in self.rules:
    if rule.condition(block):
    last=rule.action(block,self.handler)
    if last:break
    self.handler.end('document')
    class BasicTextParser(Parser):
    """
    在构造函数中增加规则和过滤器的具体语法分析器
    """
    def __init__(self,handler):
    Parser.__init__(self,handler)
    self.addRule(ListRule())
    self.addRule(ListItemRule())
    self.addRule(TitleRule())
    self.addRule(HeadingRule())
    self.addRule(ParagraphRule())
    self.addFilter(r'*(.+?)*','emphasis')
    self.addFilter(r'(http://[.a-zA-Z/]+)','url')
    self.addFilter(r'([.a-zA-Z]+@[.a-zA-z]+[a-zA-Z]+)','mail')
    handler=HTMLRenderer()
    parser=BasicTextParser(handler)
    parser.parse(sys.stdin)

    目标文件:
    Welcome to world Wide Spam,Inc.


    These are the corporate web pages of *World Wide Spam*, Inc. We hope
    you find your stay enjoyable, and that you will sample many of our
    products.

    A short history of the company
    World Wide Spam was started in the summer of 2000. The business
    concept was to ride the dot-com wave and make money both through
    bulk email and by selling canned meat online.

    After receiving several complaints from customers who weren't
    satisfied by their bulk email. World Wide Spam altered their profile,
    and focused 100% on canned goods. Today, they rank as the world's
    13,892nd online supplier of SPAM.

    Destinations
    From this page you may visit several of our interesting web pages:

    - What is SPAM?(http://wwspam.fu/whatisspam)

    - How do they make it?(http://wwspam.fu/howtomakeit)

    - Why should I eat it?(http://wwspam.fu/whyeatit)

    How to gei in touch with us

    You can get in touch with us in *many* ways: By phone(555-1234), by
    email (wwspam@wwspam.fu) or by visiting our customer feedback page
    (http://wwspam.fu/feedback)
    生成文件:
    <html><head><title>...</title></head><body>
    <h1>
    Welcome to world Wide Spam,Inc.
    </h1>
    <p>
    These are the corporate web pages of <em>World Wide Spam</em>, Inc. We hope
    you find your stay enjoyable, and that you will sample many of our
    products.
    </p>
    <p>
    A short history of the company
    World Wide Spam was started in the summer of 2000. The business
    concept was to ride the dot-com wave and make money both through
    bulk email and by selling canned meat online.
    </p>
    <p>
    After receiving several complaints from customers who weren't
    satisfied by their bulk email. World Wide Spam altered their profile,
    and focused 100% on canned goods. Today, they rank as the world's
    13,892nd online supplier of SPAM.
    </p>
    <p>
    Destinations
    From this page you may visit several of our interesting web pages:
    </p>
    <ul>
    <li>
    What is SPAM?(<a href="http://wwspam.fu/whatisspam">http://wwspam.fu/whatisspam</a>)
    </li>
    <li>
    How do they make it?(<a href="http://wwspam.fu/howtomakeit">http://wwspam.fu/howtomakeit</a>)
    </li>
    <li>
    Why should I eat it?(<a href="http://wwspam.fu/whyeatit">http://wwspam.fu/whyeatit</a>)
    </li>
    </ul>
    <h2>
    How to gei in touch with us
    </h2>
    <p>
    You can get in touch with us in <em>many</em> ways: By phone(555-1234), by
    email (<a href="mailto:wwspam@wwspam.fu">wwspam@wwspam.fu</a>) or by visiting our customer feedback page
    (<a href="http://wwspam.fu/feedback">http://wwspam.fu/feedback</a>)
    </p>
    </body></html>

  • 相关阅读:
    144.二叉树的前序遍历
    103.二叉树的锯齿形层次遍历
    shiro系列二、身份验证和授权
    shiro系列一、认识shiro
    发送短信——java
    redis系列二: linux下安装redis
    redis系列一: windows下安装redis
    BootstrapValidator 表单验证超详细教程
    Linux ps 命令详解
    Vmvare 虚拟机固定IP
  • 原文地址:https://www.cnblogs.com/dltts/p/6039587.html
Copyright © 2020-2023  润新知