• js 判断上传文件是否为utf8编码格式


    首先利用fileReader.readAsText(filePart) 默认通过utf8格式读取文件,如果文件中有非utf8字符会替换成�, 如果有�说明有非utf8字符。
    windows下还有utf8 BOM格式的编码,这里通过判断文件头几个字符来判断文件是否是utf8 BOM编码。最后如果文件太大,达到GB级别,如果一次性加载如内存中,可能直接让浏览器卡死,
    下面的代码通过分成1kb读取来逐块判断。

    const getSamples = (file) => {
      const filesize = file.size;
      const parts = []
      if (filesize <50 * 1024 * 1024) {
        parts.push(file)
      } else {
        let total = 100
        const sampleSize = 1024 * 1024
        const chunkSize = Math.floor(filesize / total);
        let start = 0
        let end = sampleSize
        while (total> 1) {
          parts.push(file.slice(start, end))
          start += chunkSize
          end += chunkSize
          total--
        }
      }
      return parts
    }
    
    const isUtf8 = (filePart) => {
      return new Promise((resolve, reject) => {
        const fileReader = new FileReader()
        fileReader.readAsText(filePart)
        fileReader.onload = (e) => {
          const str = e.target.result
          // roughly half
          const sampleStr = str.slice(4, 4 + str.length / 2)
          if (sampleStr.indexOf("�") === -1) {
            resolve(void 0)
          } else {
            reject(new Error("encoding format error, please upload UTF-8 format file"))
          }
        }
        fileReader.onerror = () => {
          reject(new Error("Failed to read the content of the file, please check whether the file is damaged"))
        }
      })
    }
    
    const isBOM = (file) => {
      return new Promise((resolve, reject) => {
        var reader = new FileReader()
        reader.onerror = function (err) {
          console.log('Error:', err)
        };
        reader.onload = function (e) {
          var text = reader.result
          var buf = new Uint8Array(text);
          // check for byte order mark
          // 0xef, 0xbb and 0xbf in hex converts to 239, 187 and 191 in decimal
          if (buf[0] === 239 && buf[1] === 187 && buf[2] === 191) { // check for byte order mark
            console.log('File has byte order mark (BOM)')
            reject(new Error("File has byte order mark (BOM)"))
          } else {
            resolve(void 0)
          }
        };
        reader.onerror = () => {
          reject(new Error("Failed to read the content of the file, please check whether the file is damaged"))
        }
        reader.readAsArrayBuffer(file)
      }) 
    }
    
    export default async function (file) {
      try {
        await isBOM(file)
      } catch (e) {
        console.log(e)
        return false
      }
    
      const samples = getSamples(file)
      let res = true
      for (const filePart of samples) {
        try {
          await isUtf8(filePart)
        } catch (error) {
          console.log("error: ", error)
          res = false
          break
        }
      }
      return res
    }
    

    参考链接

    1. JsChardet
    2. Use js to determine whether the file is utf-8 encoding
    3. Detecting if a file has a byte order mark (BOM) using JavaScript
  • 相关阅读:
    python之切片
    python之递归函数
    python之函数的参数
    python之定义函数
    python函数之调用函数
    小地图
    利用脚本对游戏对象进行操作
    Unity中www的基本应用
    U3D脚本开发基础
    U3D游戏开发基础
  • 原文地址:https://www.cnblogs.com/linyihai/p/16240450.html
Copyright © 2020-2023  润新知