• 下载优化


    '提取试卷优化
    Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
    Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
    Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
        Dim lngRetVal As Long
        lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
        If lngRetVal = 0 Then
            DeleteUrlCacheEntry ImageURL  '清除缓存
            'MsgBox "成功"
        Else
            'MsgBox "失败"
        End If
    End Sub
    
    Sub LoopDownloadExam()
        Dim Wb As Workbook
        Dim Sht As Worksheet
        Set Wb = Application.ThisWorkbook
        Set Sht = Wb.ActiveSheet
        With Sht
            EndRow = .Cells(.Cells.Rows.Count, 2).End(xlUp).Row
            For i = 2 To EndRow
                If .Cells(i, 2).Text Like "http*" Then
                    NewGetEaxmContent .Cells(i, 2).Text
                End If
            Next i
        End With
        Set Wb = Nothing
        Set Sht = Nothing
        
    End Sub
    
    Sub DownloadExam()
        Dim Rng As Range
        Set Rng = Application.ActiveCell
        If Rng.Text Like "http*" Then
            NewGetEaxmContent Rng.Text
        End If
        Set Rng = Nothing
    End Sub
    
    
    
    
    Sub NewGetEaxmContent(ByVal Url As String)
        Dim ContentCode As String
        Dim dPos As Object
        Set dPos = CreateObject("Scripting.Dictionary")
      
        'send request
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", Url, False
            .Send
            WebText = .responsetext
            'Debug.Print WebText
            ' Stop
        End With
        
        
        With CreateObject("htmlfile")
            .write WebText
            Set examdiv = .getElementById("sina_keyword_ad_area2")
            
            Title = Replace(.getElementsByTagName("title")(0).innerText, "新浪博客", "")
            docPath = ThisWorkbook.Path & "" & Title & ".doc"
            If Dir(docPath) <> "" Then
                MsgBox "该份试卷已经存在!"
                GoTo ErrorExit
            End If
            'Debug.Print Title
            ContentCode = Split(WebText, "sina_keyword_ad_area2")(1)
            ContentCode = Split(ContentCode, "正文结束")(0)
            ContentCode = Replace(ContentCode, Title, "")
            ContentCode = Replace(ContentCode, "宋体", "")
            ContentCode = Replace(ContentCode, "楷体", "")
            'Debug.Print ContentCode
            'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690
            'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690
            Open ThisWorkbook.Path & "html.txt" For Output As #1     '生成CSV文件
            Print #1, ContentCode   '写入CSV的内容
            Close #1    '关闭文件句柄
            
            
            '获取试卷文本内容
            ExamText = examdiv.innerText
            
            'For Each oneP In examdiv.getElementsByTagName("p")
            'Debug.Print oneP.innerText
            'Next oneP
            
            imgIndex = 0
            For Each oneimg In examdiv.getElementsByTagName("img")
                imgIndex = imgIndex + 1
                imgUrl = oneimg.real_src
                imgPath = ThisWorkbook.Path & "" & imgIndex & ".jpg"
                DownloadImageName imgUrl, imgPath
                sp = Split(imgUrl, "&")(0)
                Debug.Print sp
                Debug.Print InStr(ContentCode, sp)
                
                cnt = Split(ContentCode, sp)(1)
                spos = RegGet(cnt, "([u4e00-u9fa5]{5,})")
                dPos(spos) = imgPath
                Debug.Print spos
            Next oneimg
            
            
            
            
            '输出题目内容到Word文档
            Dim wdApp As Object
            Dim Doc As Object
            
            On Error Resume Next
            Set wdApp = GetObject(, "Word.Application")
            On Error GoTo 0
            If Not wdApp Is Nothing Then
                wdApp.Visible = True
                On Error Resume Next
                Set Doc = wdApp.Documents(docName)
                On Error GoTo 0
                If Doc Is Nothing Then
                    Set Doc = wdApp.Documents.Add()
                    Doc.SaveAs docPath
                End If
            Else
                Set wdApp = CreateObject("Word.Application")
                wdApp.Visible = True
                If Dir(docPath) <> "" Then
                    Set Doc = wdApp.Documents.Open(docPath)
                Else
                    Set Doc = wdApp.Documents.Add()
                    Doc.SaveAs docPath
                End If
            End If
            
            Doc.Activate
            
            wdApp.Selection.homekey 6
            For Each oneP In examdiv.getElementsByTagName("p")
                pText = oneP.innerText
                
                For Each oneimg In dPos.keys
                    If InStr(pText, oneimg) > 0 Then
                        ImagePath = dPos(oneimg)
                        '插入图片
                        wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
                        wdApp.Selection.TypeParagraph
                        On Error Resume Next
                        Kill ImagePath
                        On Error GoTo 0
                        
                        
                        
                        Exit For
                    End If
                Next oneimg
                
                wdApp.Selection.Typetext pText
                wdApp.Selection.TypeParagraph
                'Debug.Print oneP.innerText
            Next oneP
            Doc.Save
            Doc.Close True
            wdApp.Quit
        End With
        
    ErrorExit:
        
        Set dPos = Nothing
        Set wdApp = Nothing
        Set Doc = Nothing
    End Sub
    Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
    '传递参数 :原字符串, 匹配模式
        Dim Regex As Object
        Set Regex = CreateObject("VBScript.RegExp")
        With Regex
            .Global = True
            .Pattern = Pattern
        End With
        RegTest = Regex.test(OrgText)
        Set Regex = Nothing
    End Function
    Private Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
    '传递参数 :原字符串, 匹配模式
        Dim Regex As Object
        Dim Mh As Object
        Set Regex = CreateObject("VBScript.RegExp")
        With Regex
            .Global = True
            .Pattern = Pattern
        End With
        If Regex.test(OrgText) Then
            Set Mh = Regex.Execute(OrgText)
            RegGet = Mh.Item(0).submatches(0)
        Else
            RegGet = ""
        End If
        Set Regex = Nothing
    End Function
    Private Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
    '传递参数 :原字符串, 匹配模式 ,替换字符
        Dim Regex As Object
        Dim newText As String
        Set Regex = CreateObject("VBScript.RegExp")
        With Regex
            .Global = True
            .Pattern = Pattern
        End With
        newText = Regex.Replace(OrgText, RepStr)
        RegReplace = newText
        Set Regex = Nothing
    End Function
    Private Function RegGetArray(ByVal OrgText As String, ByVal Pattern As String) As String()
        Dim Reg As Object, Mh As Object, OneMh As Object
        Dim Arr() As String, Index As Long
        Dim Elm As String
        Set Reg = CreateObject("Vbscript.Regexp")
        With Reg
            .MultiLine = True
            .Global = True
            .Ignorecase = False
            .Pattern = Pattern
            Set Mh = .Execute(OrgText)
            
            Index = 0
            ReDim Arr(1 To 1)
            For Each OneMh In Mh
                Index = Index + 1
                ReDim Preserve Arr(1 To Index)
                'If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)
                Arr(Index) = OneMh.submatches(0)
                'Debug.Print OneMh.submatches(0)
            Next OneMh
        End With
        RegGetArray = Arr
        Set Reg = Nothing
        Set Mh = Nothing
    End Function
    
    Private Function RegGetLast(ByVal OrgText As String, ByVal Pattern As String) As String
    '传递参数 :原字符串, 匹配模式
        Dim Regex As Object
        Dim Mh As Object
        Set Regex = CreateObject("VBScript.RegExp")
        With Regex
            .Global = True
            .Pattern = Pattern
        End With
        If Regex.test(OrgText) Then
            Set Mh = Regex.Execute(OrgText)
            'RegGetLast = Mh.Item(0).submatches(0)
            For Each OneMh In Mh
                RegGetLast = OneMh.submatches(0)
            Next OneMh
        Else
            RegGetLast = ""
        End If
        Set Regex = Nothing
    End Function
    

      

  • 相关阅读:
    【Beta阶段】第一次Scrum Meeting
    【Beta阶段】第二次Scrum Meeting
    【Beta阶段】第三次Scrum Meeting
    [BUAA软工]Alpha阶段事后分析
    [BUAA软工]Alpha阶段测试报告
    [北航软工]团队贡献分规则
    Windows Server 2008 R2之二从介质安装 AD DS
    Windows Server 2008 R2之一活动目录服务部署
    DC84问
    获取命令行指定参数
  • 原文地址:https://www.cnblogs.com/nextseven/p/8547336.html
Copyright © 2020-2023  润新知