• GetTextAndImageCreateExamPaper


    Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
    Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
    '下载网络图片
    Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
        Dim lngRetVal As Long
        lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
        If lngRetVal = 0 Then
            DeleteUrlCacheEntry ImageURL  '清除缓存
            'MsgBox "成功"
        Else
            'MsgBox "失败"
        End If
    End Sub
    
    Sub OneKeyCreateExam()
        Dim ImgNames As Variant
        Dim strText As String
        Dim i As Long, n As Long, m As Long
        Dim OneTagP As Object
        Dim OneTagA As Object
        Dim TagP As Object
        Dim PosText As String
        Dim Arr() As String
        ReDim Arr(1 To 1) As String
        Dim Brr() As String
        ReDim Brr(1 To 1)
        Dim ImageURL As String
        Dim FilePath As String
        Dim FileName As String
        
        Dim dContent As Object
        Set dContent = CreateObject("Scripting.Dictionary")
        Dim dImageName As Object
        Set dImageName = CreateObject("Scripting.Dictionary")
        
        Dim StartTime As Variant    '开始时间
        Dim UsedTime As Variant    '使用时间
        StartTime = VBA.Timer    '记录开始时间
        
        AppSettings
        On Error GoTo ErrHandler
        
        '设置URL,访问网页获取网页源码
        URL = ActiveSheet.Range("A2").Text   
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", URL, False
            .Send
            strText = .responsetext
        End With
        
        '创建网页文件
        With CreateObject("htmlfile")
            .write strText
            '获取标题
            FileName = .getElementsByTagName("h2")(0).innerhtml
            Debug.Print FileName
    
            
            Application.StatusBar = ">>>>>>正在下载图片>>>>>>"
            
            i = 0    '初始化序号
            
            For Each OneTagA In .getElementsByTagName("a")    '循环所有A标签
                If OneTagA.HasChildNodes Then
                    If OneTagA.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
                        
                        '获取之前的一个段落
                        Set TagP = OneTagA.PreviousSibling
                        Do While TagP.tagName <> "P"
                            Set TagP = TagP.PreviousSibling
                        Loop
                        
                        i = i + 1
                        
                        '文字内容提取
                        PosText = TagP.innerhtml
                        PosText = RegReplace(PosText, "<.*?>")
                        PosText = Replace(PosText, " ", "")
                        
                        '获取图片URL
                        ImageURL = OneTagA.FirstChild.getAttribute("real_src")
                        ImageName = "Image" & i & ".jpg"
                        ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
                        DownloadImageName ImageURL, ImagePath    '下载图片
                        
                        '获取图片
                        If dImageName.Exists(PosText) = False Then
                            dImageName(PosText) = ImageName
                        Else
                            dImageName(PosText) = dImageName(PosText) & "|" & ImageName
                        End If
                        
                    End If
                End If
            Next
            
            Application.StatusBar = ">>>>>>正在获取文本>>>>>>"
            
            
            i = 0    '初始化序号
            n = 0    '初始化序号
            For Each OneTagP In .getElementsByTagName("p")
                '文字内容提取
                PosText = OneTagP.innerhtml
                PosText = RegReplace(PosText, "<.*?>")
                PosText = Replace(PosText, " ", "")
                
                i = i + 1
                
                If PosText = "喜欢" Then Exit For    '提前结束循环
                If i > 20 Then    '开始记录试卷内容
                If Len(PosText) > 0 Then    '保留非空数组
                n = n + 1
                ReDim Preserve Arr(1 To n)
                Arr(n) = PosText    '存入数组
                'Debug.Print n; "               "; PosText
                'dContent(PosText) = n
            End If
        End If
    Next
    End With
    
    
    
    Application.StatusBar = ">>>>>>正在创建Word文档>>>>>>"
    
    FilePath = ThisWorkbook.Path & "" & FileName & ".doc"
    On Error Resume Next
    Kill FilePath
    On Error GoTo 0
    
    Dim wdApp As Object
    Dim Doc As Object
    Set wdApp = CreateObject("Word.Application")
    Set Doc = wdApp.documents.Add()
    
    Doc.Activate
    
    For i = 1 To UBound(Arr)
        
        PosText = Arr(i)
        
        wdApp.Selection.TypeText Text:=PosText
        wdApp.Selection.TypeParagraph
        
        
        If dImageName.Exists(PosText) Then    '如果含有图片
        If InStr(dImageName(PosText), "|") = 0 Then    '如果只含有一张图片
        ImageName = dImageName(PosText)
        ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
        wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
        wdApp.Selection.TypeParagraph
    Else
        ImgNames = Split(dImageName(PosText), "|")
        For n = LBound(ImgNames) To UBound(ImgNames) Step 1
            ImageName = ImgNames(n)
            ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
            wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
            wdApp.Selection.TypeParagraph
        Next n
    End If
    End If
    
    Next i
    
    Doc.SaveAs FilePath
    Doc.Close
    wdApp.Quit
    
    
    Application.StatusBar = ">>>>>>正在删除Image图片>>>>>>"
    
    For Each Key In dImageName.keys
        If InStr(dImageName(Key), "|") = 0 Then
            ImageName = dImageName(Key)
            ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
            Kill ImagePath
        Else
            ImgNames = Split(dImageName(Key), "|")
            For n = LBound(ImgNames) To UBound(ImgNames) Step 1
                ImageName = ImgNames(n)
                ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
                Kill ImagePath
            Next n
        End If
    Next Key
    
    
    UsedTime = VBA.Timer - StartTime
    MsgBox "本次运行耗时:" & Format(UsedTime, "#0.0000秒")
    
    ErrorExit:
    Set wdApp = Nothing
    Set Doc = Nothing
    
    AppSettings False
    Exit Sub
    ErrHandler:
    If Err.Number <> 0 Then
        MsgBox Err.Description & "!", vbCritical, "QQ 84857038"
        Debug.Print Err.Description
        Err.Clear
        Resume ErrorExit
    End If
    
    End Sub
    Public Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
    '传递参数 :原字符串, 匹配模式 ,替换字符
        Dim Regex As Object
        Dim newText As String
        Set Regex = CreateObject("VBScript.RegExp")
        With Regex
            .Global = True
            .Pattern = Pattern
        End With
        newText = Regex.Replace(OrgText, RepStr)
        RegReplace = newText
        Set Regex = Nothing
    End Function
    Public Sub AppSettings(Optional IsStart As Boolean = True)
        If IsStart Then
            Application.ScreenUpdating = False
            Application.DisplayAlerts = False
            Application.Calculation = xlCalculationManual
            Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
        Else
            Application.ScreenUpdating = True
            Application.DisplayAlerts = True
            Application.Calculation = xlCalculationAutomatic
            Application.StatusBar = False
        End If
    End Sub
    

      

  • 相关阅读:
    【C语言基础】解决C语言error: expected ‘;‘, ‘,‘ or ‘)‘ before ‘&‘ token
    【ubuntu基础】ubuntu terminal shortcut
    【CV项目调试】darknet源码中CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT问题
    【linux基础】linux 多个会话同时执行命令后history记录不全的解决方案
    【DL基础】激活函数总结
    【shell基础】判断目录是否为空
    【socketCAN错误】write: No buffer space available
    【DL基础】损失函数总结
    【CV源码实现及调试】darknet中opencv的问题
    【开源库STBI基础】STBI图像库的理解
  • 原文地址:https://www.cnblogs.com/nextseven/p/7295628.html
Copyright © 2020-2023  润新知