• Word试卷文档模型化解析存储到数据库


    最近在搞一套在线的考试系统,有许多人反映试题的新增比较麻烦(需要逐个输入),于是呼就整个了试卷批量导入了

    poi实现word转html

    模型化解析html

    html转Map数组

    Map数组(数组的操作处理不做说明)

    1.导jar包。 

    2.word试卷导入模板

    链接:http://pan.baidu.com/s/1gfK6g5H

    3.代码实现

      1 package com.web.onlinexam.util;
      2 
      3 import java.io.BufferedWriter;  
      4 import java.io.File;  
      5 import java.io.FileInputStream;  
      6 import java.io.FileNotFoundException;  
      7 import java.io.FileOutputStream;  
      8 import java.io.IOException;  
      9 import java.io.OutputStream;  
     10 import java.io.OutputStreamWriter;  
     11 import java.io.PrintWriter;
     12 import java.util.ArrayList;
     13 import java.util.Date;
     14 import java.util.HashMap;
     15 import java.util.LinkedList;
     16 import java.util.List;
     17 import java.util.Map;
     18 import java.util.regex.Matcher;
     19 import java.util.regex.Pattern;
     20 
     21 import org.apache.commons.lang.StringUtils;
     22 import org.apache.poi.hwpf.HWPFDocument;  
     23 import org.apache.poi.hwpf.model.PicturesTable;  
     24 import org.apache.poi.hwpf.usermodel.CharacterRun;  
     25 import org.apache.poi.hwpf.usermodel.Picture;  
     26 import org.apache.poi.hwpf.usermodel.Range;  
     27 import org.apache.poi.hwpf.usermodel.Paragraph;     
     28 import org.apache.poi.hwpf.usermodel.Table;     
     29 import org.apache.poi.hwpf.usermodel.TableCell;     
     30 import org.apache.poi.hwpf.usermodel.TableIterator;     
     31 import org.apache.poi.hwpf.usermodel.TableRow;  
     32 
     33 import com.common.util.DateFormatUtil;
     34 import com.common.util.FileUploadPathConfig;
     35 
     36 /**
     37  *
     38 
     39  * @Description:Word试卷文档模型化解析
     40 
     41  * @author <a href="mailto:thoslbt@163.com">Thos</a> 42  * @ClassName: WordToHtml 44  * @version V1.0
     45  *
     46  */
     47 public class WordToHtml {
     48 
     49     /**
     50      * 回车符ASCII码
     51      */
     52     private static final short ENTER_ASCII = 13;
     53 
     54     /**
     55      * 空格符ASCII码
     56      */
     57     private static final short SPACE_ASCII = 32;
     58 
     59     /**
     60      * 水平制表符ASCII码
     61      */
     62     private static final short TABULATION_ASCII = 9;
     63 
     64     public static String htmlText = "";
     65     public static String htmlTextTbl = "";
     66     public static int counter=0;
     67     public static int beginPosi=0;
     68     public static int endPosi=0;
     69     public static int beginArray[];
     70     public static int endArray[];
     71     public static String htmlTextArray[];
     72     public static boolean tblExist=false;
     73 
     74     public static final String inputFile="C:\Users\java\Downloads\111222.doc";
     75     public static final String htmlFile="E:/abc.html";
     76 
     77     public static void main(String argv[])
     78     {        
     79         try {
     80             getWordAndStyle(inputFile);
     81         } catch (Exception e) {
     82             e.printStackTrace();
     83         }
     84     }
     85 
     86     /**
     87      * word文档图片存储路径
     88      * @return
     89      */
     90     public static String wordImageFilePath(){
     91 
     92         return  FileUploadPathConfig.FILE_UPLOAD_BASE+"upload/wordImage/"+ DateFormatUtil.formatDate(new Date());
     93     }
     94 
     95     /**
     96      *  word文档图片Web访问路径
     97      * @return
     98      */
     99     public static String wordImgeWebPath(){
    100 
    101         return  "D:/var/e_learning/upload/wordImage/"+ DateFormatUtil.formatDate(new Date())+"/";
    102     }
    103 
    104     /**
    105      * 读取每个文字样式
    106      * 
    107      * @param fileName
    108      * @throws Exception
    109      */
    110 
    111 
    112     public static void getWordAndStyle(String fileName) throws Exception {
    113         FileInputStream in = new FileInputStream(new File(fileName));
    114         HWPFDocument doc = new HWPFDocument(in);
    115 
    116         Range rangetbl = doc.getRange();//得到文档的读取范围   
    117         TableIterator it = new TableIterator(rangetbl); 
    118         int num=100;         
    119 
    120         beginArray=new int[num];
    121         endArray=new int[num];
    122         htmlTextArray=new String[num];
    123 
    124         // 取得文档中字符的总数
    125         int length = doc.characterLength();
    126         // 创建图片容器
    127         PicturesTable pTable = doc.getPicturesTable();
    128 
    129         htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";
    130         // 创建临时字符串,好加以判断一串字符是否存在相同格式
    131 
    132         if(it.hasNext())
    133         {
    134             readTable(it,rangetbl);
    135         }
    136 
    137         int cur=0;
    138 
    139         String tempString = "";
    140         for (int i = 0; i < length - 1; i++) {
    141             // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
    142             Range range = new Range(i, i + 1, doc);
    143 
    144             CharacterRun cr = range.getCharacterRun(0); 
    145             
    146             if(tblExist)
    147             {
    148                 if(i==beginArray[cur])
    149                 {         
    150                     htmlText+=tempString+htmlTextArray[cur];
    151                     tempString="";
    152                     i=endArray[cur]-1;
    153                     cur++;
    154                     continue;
    155                 }
    156             }
    157             if (pTable.hasPicture(cr)) {
    158                 htmlText +=  tempString ;                
    159                 // 读写图片                
    160                 readPicture(pTable, cr);
    161                 tempString = "";                
    162             } 
    163             else {
    164 
    165                 Range range2 = new Range(i + 1, i + 2, doc);
    166                 // 第二个字符
    167                 CharacterRun cr2 = range2.getCharacterRun(0);
    168                 char c = cr.text().charAt(0);
    169 
    170                 // 判断是否为空格符
    171                 if (c == SPACE_ASCII)
    172                     tempString += "&nbsp;";
    173                 // 判断是否为水平制表符
    174                 else if (c == TABULATION_ASCII)
    175                     tempString += "&nbsp;&nbsp;&nbsp;&nbsp;";
    176                 // 比较前后2个字符是否具有相同的格式
    177                 boolean flag = compareCharStyle(cr, cr2);
    178                 if (flag&&c !=ENTER_ASCII)
    179                     tempString += cr.text();
    180                 else {
    181                     String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize() / 2
    182                     + "pt;color:"+getHexColor(cr.getIco24())+";";
    183 
    184                     if (cr.isBold())
    185                         fontStyle += "font-weight:bold;";
    186                     if (cr.isItalic())
    187                         fontStyle += "font-style:italic;";
    188 
    189                     htmlText += fontStyle + "' >" + tempString + cr.text();
    190                     htmlText +="</span>";
    191                     tempString = "";
    192                 }
    193                 // 判断是否为回车符
    194                 if (c == ENTER_ASCII)
    195                     htmlText += "<br/>";
    196 
    197             }
    198         }
    199 
    200         htmlText += tempString+"</body></html>";
    201         //生成html文件
    202         writeFile(htmlText);
    203         System.out.println("------------WordToHtml转换成功----------------");
    204         //word试卷数据模型化
    205         analysisHtmlString(htmlText);
    206         System.out.println("------------WordToHtml模型化成功----------------");
    207     }
    208 
    209     /**
    210      * 读写文档中的表格
    211      * 
    212      * @param pTable
    213      * @param cr
    214      * @throws Exception
    215      */
    216     public static void readTable(TableIterator it, Range rangetbl) throws Exception {
    217 
    218         htmlTextTbl="";
    219         //迭代文档中的表格  
    220 
    221         counter=-1;
    222         while (it.hasNext()) 
    223         { 
    224             tblExist=true;
    225             htmlTextTbl="";
    226             Table tb = (Table) it.next();    
    227             beginPosi=tb.getStartOffset() ;
    228             endPosi=tb.getEndOffset();
    229 
    230             //System.out.println("............"+beginPosi+"...."+endPosi);
    231             counter=counter+1;
    232             //迭代行,默认从0开始
    233             beginArray[counter]=beginPosi;
    234             endArray[counter]=endPosi;
    235 
    236             htmlTextTbl+="<table border>";
    237             for (int i = 0; i < tb.numRows(); i++) {      
    238                 TableRow tr = tb.getRow(i);   
    239 
    240                 htmlTextTbl+="<tr>";
    241                 //迭代列,默认从0开始   
    242                 for (int j = 0; j < tr.numCells(); j++) {      
    243                     TableCell td = tr.getCell(j);//取得单元格
    244                     int cellWidth=td.getWidth();
    245 
    246                     //取得单元格的内容   
    247                     for(int k=0;k<td.numParagraphs();k++){      
    248                         Paragraph para =td.getParagraph(k);      
    249                         String s = para.text().toString().trim();   
    250                         if(s=="")
    251                         {
    252                             s=" ";
    253                         }
    254                         htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";
    255                     }       
    256                 }      
    257             }   
    258             htmlTextTbl+="</table>" ;    
    259             htmlTextArray[counter]=htmlTextTbl;
    260 
    261         } //end while 
    262     }    
    263 
    264     /**
    265      * 读写文档中的图片
    266      * 
    267      * @param pTable
    268      * @param cr
    269      * @throws Exception
    270      */
    271     public static void readPicture(PicturesTable pTable, CharacterRun cr) throws Exception {
    272         // 提取图片
    273         Picture pic = pTable.extractPicture(cr, false);
    274         // 返回POI建议的图片文件名
    275         String afileName = pic.suggestFullFileName();
    276 
    277         File file = new File(wordImageFilePath());
    278         System.out.println(file.mkdirs());
    279         OutputStream out = new FileOutputStream(new File( wordImageFilePath()+ File.separator + afileName));
    280         pic.writeImageContent(out);
    281         htmlText += "<img src='"+wordImgeWebPath()+ afileName
    282         + "' mce_src='"+wordImgeWebPath()+ afileName + "' />";
    283     }
    284 
    285 
    286     public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) 
    287     {
    288         boolean flag = false;
    289         if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) 
    290                 && cr1.getFontSize() == cr2.getFontSize()&& cr1.getColor() == cr2.getColor()) 
    291         {
    292             flag = true;
    293         }
    294         return flag;
    295     }
    296 
    297     /*** 字体颜色模块start ********/
    298     public static int red(int c) {  
    299         return c & 0XFF;  
    300     }  
    301 
    302     public static int green(int c) {  
    303         return (c >> 8) & 0XFF;  
    304     }  
    305 
    306     public static int blue(int c) {  
    307         return (c >> 16) & 0XFF;  
    308     }  
    309 
    310     public static int rgb(int c) {  
    311         return (red(c) << 16) | (green(c) << 8) | blue(c);  
    312     }  
    313 
    314     public static String rgbToSix(String rgb) {  
    315         int length = 6 - rgb.length();  
    316         String str = "";  
    317         while (length > 0) {  
    318             str += "0";  
    319             length--;  
    320         }  
    321         return str + rgb;  
    322     }  
    323 
    324 
    325     public static String getHexColor(int color) {  
    326         color = color == -1 ? 0 : color;  
    327         int rgb = rgb(color);  
    328         return "#" + rgbToSix(Integer.toHexString(rgb));  
    329     }  
    330     /** 字体颜色模块end ******/
    331 
    332     /**
    333      * 写文件
    334      * 
    335      * @param s
    336      */
    337     public static void writeFile(String s) {
    338         FileOutputStream fos = null;
    339         BufferedWriter bw = null;
    340         PrintWriter writer = null;
    341         try {
    342             File file = new File(htmlFile);
    343             fos = new FileOutputStream(file);
    344             bw = new BufferedWriter(new OutputStreamWriter(fos));
    345             bw.write(s);
    346             bw.close();
    347             fos.close();
    348             //编码转换
    349             writer = new PrintWriter(file, "GB2312");
    350             writer.write(s);
    351             writer.flush();
    352             writer.close();
    353         } catch (FileNotFoundException fnfe) {
    354             fnfe.printStackTrace();
    355         } catch (IOException ioe) {
    356             ioe.printStackTrace();
    357         }
    358 
    359     }
    360 
    361     /**
    362      * 分析html
    363      * @param s
    364      */
    365     public static void analysisHtmlString(String s){
    366 
    367         String q[] = s.split("<br/>");
    368 
    369         LinkedList<String> list = new LinkedList<String>();
    370 
    371         //清除空字符
    372         for (int i = 0; i < q.length; i++) {
    373             if(StringUtils.isNotBlank(q[i].toString().replaceAll("</?[^>]+>","").trim())){
    374 
    375                 list.add(q[i].toString().trim());
    376             }
    377         }
    378         String[] result = {};
    379         String ws[]=list.toArray(result);
    380         int singleScore = 0;
    381         int multipleScore = 0;
    382         int fillingScore = 0;
    383         int judgeScore = 0;
    384         int askScore = 0;
    385         int singleNum = 0;
    386         int multipleNum = 0;
    387         int fillingNum = 0;
    388         int judgeNum = 0;
    389         int askNum = 0;
    390         /***********试卷基础数据赋值*********************/
    391         for (int i = 0; i < ws.length; i++) {
    392             String delHtml=ws[i].toString().replaceAll("</?[^>]+>","").trim();//去除html
    393             if(delHtml.contains("、单选题")){
    394                 String numScore=numScore(delHtml);
    395                 singleNum= Integer.parseInt(numScore.split(",")[0]) ;
    396                 singleScore=Integer.parseInt(numScore.split(",")[1]) ;
    397             }else if(delHtml.contains("、多择题")){
    398                 String numScore=numScore(delHtml);
    399                 multipleNum= Integer.parseInt(numScore.split(",")[0]) ;
    400                 multipleScore=Integer.parseInt(numScore.split(",")[1]) ;
    401             }else if(delHtml.contains("、填空题")){
    402                 String numScore=numScore(delHtml);
    403                 fillingNum= Integer.parseInt(numScore.split(",")[0]) ;
    404                 fillingScore=Integer.parseInt(numScore.split(",")[1]) ;
    405             }else if(delHtml.contains("、判断题")){
    406                 String numScore=numScore(delHtml);
    407                 judgeNum= Integer.parseInt(numScore.split(",")[0]) ;
    408                 judgeScore=Integer.parseInt(numScore.split(",")[1]) ;
    409             }else if(delHtml.contains("、问答题")){
    410                 String numScore=numScore(delHtml);
    411                 askNum= Integer.parseInt(numScore.split(",")[0]) ;
    412                 askScore=Integer.parseInt(numScore.split(",")[1]) ;
    413             }
    414 
    415         }
    416         /**************word试卷数据模型化****************/
    417         List<Map<String, Object>> bigTiMaps = new ArrayList<Map<String,Object>>();
    418         List<Map<String, Object>> smalMaps = new ArrayList<Map<String,Object>>();
    419         List<Map<String, Object>> sleMaps = new ArrayList<Map<String,Object>>();
    420         String htmlText="";
    421         int smalScore=0;
    422         for (int j = ws.length-1; j>=0; j--) {
    423             String html= ws[j].toString().trim();//html格式
    424             String delHtml=ws[j].toString().replaceAll("</?[^>]+>","").trim();//去除html
    425             if(!isSelecteTitele(delHtml)&&!isTitele(delHtml)&&!isBigTilete(delHtml)){//
    426                 if(isTitele(delHtml)){
    427                     smalScore=itemNum(delHtml);
    428                 }
    429                 htmlText=html+htmlText;
    430             }else if(isSelecteTitele(delHtml)){//选择题选择项
    431                 Map<String, Object> sleMap = new HashMap<String, Object>();//选择题选择项
    432                 sleMap.put("seleteItem", delHtml.substring(0, 1));
    433                 sleMap.put("seleteQuest", html+htmlText);
    434                 sleMaps.add(sleMap);
    435             }else if(isTitele(delHtml)){//小标题
    436                 Map<String, Object> smalMap = new HashMap<String, Object>();//小标题
    437                 smalMap.put("smalTilete", html+htmlText);
    438                 smalMap.put("smalScore", smalScore>0?smalScore+"":itemNum(delHtml)+"");
    439                 smalMap.put("sleMaps", sleMaps);
    440                 smalMaps.add(smalMap);
    441             }else if(isBigTilete(delHtml)){//大标题
    442                 Map<String, Object> bigTiMap = new HashMap<String, Object>();//大标题
    443                 bigTiMap.put("bigTilete", delHtml.substring(2, 5));
    444                 bigTiMap.put("smalMaps", smalMaps);
    445                 bigTiMaps.add(bigTiMap);
    446             }    
    447 
    448         }
    449         //System.out.println(bigTiMaps.toString());
    450     }
    451 
    452     //获取大题-题目数量以及题目总计分数
    453     public static String numScore(String delHtml){
    454 
    455         String regEx="[^0-9+,|,+^0-9]";   
    456         Pattern p = Pattern.compile(regEx);   
    457         Matcher m = p.matcher(delHtml);
    458         String s=m.replaceAll("").trim();
    459         if(StringUtils.isNotBlank(s)){
    460             if(s.contains(",")){
    461                 return s;
    462             }else if(s.contains(",")){
    463                 return s.replace(",", ",");
    464             }else{
    465                 return "0,0";
    466             }
    467         }else{
    468             return "0,0";
    469         }
    470 
    471     }
    472     //获取每小题分数
    473     public static int itemNum(String delHtml){
    474         Pattern pattern = Pattern.compile("((.*?))"); //中文括号 
    475         Matcher matcher = pattern.matcher(delHtml);
    476         if (matcher.find()&&isNumeric(matcher.group(1))){
    477             return Integer.parseInt(matcher.group(1));
    478         }else {
    479             return 0;
    480         }
    481     }
    482     //判断Str是否是 数字
    483     public static boolean isNumeric(String str){ 
    484         Pattern pattern = Pattern.compile("[0-9]*"); 
    485         return pattern.matcher(str).matches();    
    486     } 
    487     //判断Str是否存在小标题号
    488     public static boolean isTitele(String str){
    489         Pattern pattern = Pattern.compile("^([\d]+[-\、].*)"); 
    490         return pattern.matcher(str).matches();
    491     }
    492     //判断Str是否是选择题选择项
    493     public static boolean isSelecteTitele(String str){
    494         Pattern pattern = Pattern.compile("^([a-zA-Z]+[-\:].*)"); 
    495         return pattern.matcher(str).matches();
    496     }
    497     //判断Str是否是大标题
    498     public static boolean isBigTilete(String str){
    499         boolean iso= false ;
    500         if(str.contains("一、")){
    501             iso=true;
    502         }else if(str.contains("二、")){
    503             iso=true;
    504         }else if(str.contains("三、")){
    505             iso=true;
    506         }else if(str.contains("四、")){
    507             iso=true;
    508         }else if(str.contains("五、")){
    509             iso=true;
    510         }else if(str.contains("六、")){
    511             iso=true;
    512         }else if(str.contains("七、")){
    513             iso=true;
    514         }else if(str.contains("八、")){
    515             iso=true;
    516         }
    517         return iso;
    518     }
    519 }
    so 我们已经完成所有步骤。
    文章出自:http://www.cnblogs.com/libaoting/p/wordToMap.html
    可自由引用,但请注明来源,谢谢。 
  • 相关阅读:
    字符串排序
    java正则表达式教程
    Java Map
    统计字符串中出现次数最多的字母的次数,如果有多个重复的,都求出
    斐波那契数列非递归算法(fibonacci)
    CUDA还未产出,又要出北洋多元统计习题集
    R的GPUTOOLS不能再windows上用
    泪奔,配好了bioconductor环境
    CUDA笔记13
    CUDA笔记12
  • 原文地址:https://www.cnblogs.com/libaoting/p/wordToMap.html
Copyright © 2020-2023  润新知