• 分片上传场景中的进阶JS异步实践


    在封装开发分片上传库的过程中,难得在前端遇到了并发问题。下面分析下怎么遇到的问题,以及怎么解决的。


    目录

    topic涉及的内容

    • 事件循环:Promise、async await、setTimout执行顺序
    • 并发问题:竟态、原子性
    • 解决并发:乐观锁、悲观锁、消除共享资源

    大文件分片上传遇到的问题

    功能点

    • 分片上传
    • 并行上传
    • 断点续传
    • 引入Web Worker提高性能

    整体方案

    • 指定每片大小:sizePerChunk,指定并行数量:parallelNumber
    • 将大文件按指定分片大小、并行数量进行切割分组
    • 每组间并行上传,同组内串行
    • 上传完的存Localstorage,上传前判断已经存了就跳过上传
    • Web Worker不支持读写Localstorage,需借助主线程读写
    • 因太多异步方法,充分利用async await 来保证代码逻辑清晰

    分片上传库的伪代码

    为了简化问题便于大家理解,此处代码把与问题不相干的逻辑省略。

    // worker
    function UploadWorker() {
        self.onmessage= e => {
            // 省略:
            // 接受上传文件的命令
            // 计算文件属性:fileUid、处理分片等
            // 调用uploadParallelly并行上传
        }
        // 并行上传
        uploadParallelly = async parallelNumber => {
            // 根据分片大小 计算总分片数
            const chuckCount = Math.ceil((fileSize / sizePerChunk));
            const loopLen = Math.ceil(chunkCount/parallelNumber);
            const loopTaskList = []; // e.g. [[0,1,2,3], [4,5,6,7],[8,9]]
            // 允许多少parallelNumber,就有多少个uplodLoop
            for(let i = 0; i < chunkCount; i+=loopLen) {
                loolTaskList.push(uploadLoop(i, Math.min(i+loopLen, chuckCount)));
            }
            await Promise.all(loopTaskList);
        }
    
        // 循环分片上传
        uploadLoop = (start, end) => {
            for(let i = 0; i < end; i++) {  // [start, end)
                // 省略 文件切割逻辑
                await doUploadChunk(i);
            }
        }
    
        // 上传某一片
        doUploadChunk = async chunkIndex => {
            // 已上传的就跳过,实现断点续传
            if(await isUploaded(chunkIndex)) return;
            // 省略 await上传chunkContent
            await markUploaded(chunkIndex);
        }
        // 标记分片已上传
        markUploaded = async chunkIndex => {
            const chunkList = await getCache(fileUid);
            const nextList = [...chunkList, chunkIndex];
            await setCache(fileUid, nextList);
        }
    
        // 判断分片是否已上传
        isUploaded = chunkIndex => {
            const chunkList = await getCache(fileUid);
            return chunkList.includes(chunkIndex);
        }
    
        getCache = async key {
            // 省略 与主线程通信get LocalStorage
        }
    
        setCache = () => {
            // 省略 与主线程通信set LocalStorage
        }
    }
    
    // 主进程
    class Uploader {
        upload() {
            // 实例化UploadWorker,下达上传命令
            // 协助worker处理Localstorage读写
        }
    }
    
    • 流程图

    问题

    Localstorage记录的某一个fileUid 已上传分片数组,偶尔会少几片,例如:

    key value 现象
    upload_task_xxxx [0, 1, 2, 3, 6, 7, 8, 9] 发现4,5两片不见了(并发数为3)

    原因分析

    我们看下markUploaded方法的实现,异步先读 - 再改 - 异步再写,加上并发调用,典型地容易触发竞态条件。接下来详细分析这个问题。

    // 标记已上传
    markUploaded = async chunkIndex => {
    	const chunkList = await getCache(fileUid);    // 先读
    	const nextList = [...chunkList, chunkIndex];  // 再改
    	await setCache(fileUid, nextList);                 // 再写
    }
    
    抽离出问题原型
    • worker的逻辑:异步的getCache、异步的setCache和异步的getNPush 用来往 一个数组里push,分析在并发场景下,getNPush的稳定性。
    • 主线程的逻辑:配合web worker提供cache data的逻辑
    // Web Worker
    function UploadWorker() {
    	self.onmessage = e => {
    		const {type, data, uid} = e.data;
    		if(type === 'run') run(data);
    		// 监听主线程的cache读写结果
    		trigger(type, data, uid);
    	}
    	getCache = async key => {
    		const uid = Math.random();
    		return new Promise(resolve => {
    			let cb = (result, retUid) => {
    				if(retUid !== uid) return;
    				resolve(result);
    				off('getCache', cb);
    			};
    			on('getCache', cb);
    			self.postMessage({type: 'getCache', data: {key, uid}})
    		});
    	}
    	
    	setCache = async (key, value) => {
    		const uid = Math.random();
    		return new Promise(resolve => {
    			let cb = (result, retUid) => {
    				if(retUid !== uid) return;
    				resolve(result);
    				off('setCache', cb);
    			};
    			on('setCache', cb);
    			self.postMessage({type: 'setCache', data: {key, value, uid}})
    		});
    	}
    	
    	getNPush = async (key, value) => {
    		const src=  await getCache(key) || [];
    		const next = [...src, value];
    		console.log('next', JSON.stringify(next))
    		await setCache(key, next);
    	}
    	
    	// 搞个简单的自定义事件
    	pool = {};
    	on = (type, cb) => {
    		pool[type] = pool.hasOwnProperty(type) ? [...pool[type], cb]: [cb];
    	}
    
    	off = (type, cb) => {
    		if(!pool[type]) return;
    		pool[type].splice(pool[type].indexOf(cb), 1);
    	}
    
    	trigger = (type, ...args) => {
    		if(!pool[type]) return;
    		pool[type].forEach(fn => fn.apply(null, args))
    	}
    
    	// 模拟并发调用
    	run = async type => {
    		KEY = 'key';
    		delay = (sec, fn, ...args) => new Promise(resolve => {
    			setTimeout(() => resolve(fn(...args)), sec)
    		});
    		
    		loop1 = async (start, end) => {
    			for(let i = start; i < end; i++) {
    				await delay(Math.random()*100, getNPush, KEY, i);
    			}
    		}
    		
    		switch(type) {
    			case 'loop1':
    				await setCache(KEY, []);
    				await Promise.all([loop1(0,10), loop1(10,20), loop1(20,30)]);
    				console.log('===loop1===', await getCache(KEY));
    				break;
    		}
    	}
    }
    
    // 主线程
    w = new Worker(URL.createObjectURL(new Blob([`(${UploadWorker.toString()})()`])));
    w.onmessage = e => {
    	const {type, data: {key, value, ...extra}} = e.data;
    	let result;
    	switch(type) {
    		case 'setCache':
    			result = doSet(key, value);
    			break;
    		case 'getCache':
    			result = doGet(key);
    			break;
    	}
    	console.log('main on type', type, result, value, extra)
    	w.postMessage({type, data: result, ...extra})
    }
    
    // 用内存变量代替Localstorage方便调试分析
    data = {};
    doSet = (key, value) => {
    	data[key] = value;
    	return null;
    }
    doGet = key => {
      return key ? data[key] : data;
    }
    
    // 触发worker测试并发执行
    w.postMessage({type: 'run', data: 'loop1'});
    
    
    // 以上 并不是每次执行完 数组长度都是30.
    

    解决方案

    方案1

    getNPush不是原子的。
    原子性:不可中断,而getCache是异步的,会发生上下文切换

    	getNPush = async (key, value) => {
    		const src=  await getCache(key) || [];
    		const next = [...src, value];
    		console.log('next', JSON.stringify(next))
    		await setCache(key, next);
    	}
    

    思路:
    把getNPush改为原子的。
    怎么改?
    去掉await get 和await set,那么getNPush就是纯同步的代码,对于单线程的JS执行,一定是原子的。所以就可以把getNPush放到主线程实现。

    function UploadWorker() {
        self.onmessage = e => {
          const {type, data, uid} = e.data;
          if (type === 'run') run(data);
          // 监听主线程的cache读写结果
          trigger(type, data, uid);
        }
    	
    	getCache = async key => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('getCache', cb);
            }
            on('getCache', cb);
            self.postMessage({type: 'getCache', data: {key, uid}})
          });
        }
    
    	setCache = async (key, value) => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('setCache', cb);
            }
            on('setCache', cb);
            self.postMessage({type: 'setCache', data: {key, value, uid}})
          });
        }
    
        // >>>>>>>> 修改
        getNPush = async(key, value) => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('getNPush', cb);
            }
            on('getNPush', cb);
            self.postMessage({type: 'getNPush', data: {key, value, uid}})
          });
        }
    	// <<<<<<<<< 修改
    
        // 搞个简单的自定义事件
        pool = {};
        on = (type, cb) => {
          pool[type] = pool.hasOwnProperty(type) ? [...pool[type], cb] : [cb]
        }
        off = (type, cb) => {
          if (!pool[type]) return
          pool[type].splice(pool[type].indexOf(cb), 1)
        }
        trigger = (type, ...args) => {
          if (!pool[type]) return
          pool[type].forEach(fn => fn.apply(null, args))
        }
    
        // 模拟并发调用
        run = async type => {
            KEY = 'key'
            delay = (sec, fn, ...args) => new Promise(resolve => setTimeout(() => resolve(fn(...args)), sec))
    
            loop1 = async (start, end) => {
              for (let i=start; i<end; i++) {
                await delay(Math.random()*100, getNPush, KEY, i)
              }
            }
    
            switch(type) {
                case 'loop1':
                    await setCache(KEY, []);
                    await Promise.all([loop1(0, 10), loop1(10, 20), loop1(20, 30)]);
                    console.log('======loop1', await getCache(KEY));
                    break;
            }
        }
    }
    
    
    // 主线程
    w = new Worker(URL.createObjectURL(new Blob([`(${UploadWorker.toString()})()`])))
    w.onmessage = (e) => {
        const {type, data: {key, value, ...extra}} = e.data;
        let result;
        switch (type) {
            case 'setCache':
              result = doSet(key, value);
              break
            case 'getCache':
              result = doGet(key);
              break;
            // >>>>>>>> 增加
            case 'getNPush':
              const src = doGet(key);
              const next = [...src, value];
              result = doSet(key, next);
              break;
    		// <<<<<<<<< 增加
        }
        console.log('main on type', type, result, value, extra)
        w.postMessage({type, data: result, ...extra})
    }
    
    
    // 用内存变量代替LocalStorage方便分析
    data = {}
    doSet = (key, value) => {
      data[key] = value;
      return null;
    }
    doGet = key => {
      return key ? data[key] : data;
    }
    
    // 触发worker测试并发执行
    w.postMessage({type: 'run', data: 'loop1'})
    
    // 每次执行都是稳定的30个了
    

    方案2

    乐观锁的思路:写入时CAS+自旋

    function UploadWorker() {
        self.onmessage = e => {
          const {type, data, uid} = e.data;
          if (type === 'run') run(data);
          // 监听主线程的cache读写结果
          trigger(type, data, uid);
        }
        getCache = async key => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('getCache', cb);
            };
            on('getCache', cb);
            self.postMessage({type: 'getCache', data: {key, uid}})
          });
        }
        setCache = async (key, value) => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('setCache', cb);
            };
            on('setCache', cb);
            self.postMessage({type: 'setCache', data: {key, value, uid}})
          });
        }
        
        getNPush = async(key, value) => {
          const src = await getCache(key) || [];
          const next = [...src, value];
          console.log('next', JSON.stringify(next))
          // >>>>>>>> 修改
          if (!await setCache(key, {next, src})) await getNPush(key, value) // 自旋锁
          // <<<<<<<< 修改
        }
        // 搞个简单的自定义事件
        pool = {};
        on = (type, cb) => {
          pool[type] = pool.hasOwnProperty(type) ? [...pool[type], cb] : [cb];
        }
        off = (type, cb) => {
          if (!pool[type]) return;
          pool[type].splice(pool[type].indexOf(cb), 1);
        }
        trigger = (type, ...args) => {
          if (!pool[type]) return;
          pool[type].forEach(fn => fn.apply(null, args))
        }
        // 模拟并发调用
        run = async type => {
            KEY = 'key';
    	delay = (sec, fn, ...args) => new Promise(resolve =>{ 
    	    setTimeout(() => resolve(fn(...args)), sec)
    	});
            loop1 = async (start, end) => {
              for (let i=start; i<end; i++) {
                await delay(Math.random()*100, getNPush, KEY, i)
              }
            }
            switch(type) {
                case 'loop1':
    				// >>>>>>>> 修改
                    await setCache(KEY, {src: await getCache(KEY), next: []});
    				// <<<<<<<< 修改
                    await Promise.all([loop1(0, 10), loop1(10, 20), loop1(20, 30)]);
                    console.log('===loop1===', await getCache(KEY));
                    break;
            }
        }
    }
    // 主线程
    w = new Worker(URL.createObjectURL(new Blob([`(${UploadWorker.toString()})()`])));
    w.onmessage = e => {
        const {type, data: {key, value, ...extra}} = e.data;
        let result;
        switch (type) {
            case 'setCache':
    		// >>>>>>>> 修改
              const {src, next} = value;
              const now = doGet(key);
              if (JSON.stringify(now) !== JSON.stringify(src)) {
                result = false
              } else {
                doSet(key, next);
                result = true
              }
    		// <<<<<<<< 修改
              break;
            case 'getCache':
              result = doGet(key);
              break;
        }
        console.log('main on type', type, result, value, extra)
        w.postMessage({type, data: result, ...extra})
    }
    // 用内存变量代替LocalStorage方便调试分析
    data = {};
    doSet = (key, value) => {
      data[key] = value;
      return null;
    }
    doGet = key => {
        return key ? data[key] : data;
    }
    
    // 触发worker测试并发执行
    w.postMessage({type: 'run', data: 'loop1'});
    

    方案3

    悲观锁:worker执行setCache要抢锁,因为JS本身没有多线程的概念,需要强行引入“虚拟线程id”,再根据线程id实现tryLock和unLock。比较复杂。

    方案4

    消除共享资源的竞争

    1. 把chunkIndex作为cache的key
    2. 有多少个parallel,就有多少个key

    方案5

    引入队列,串行消费
    因为JS不支持消息队列消费,用setInterval来定时消费。

    function UploadWorker() {
        self.onmessage = e => {
          const {type, data, uid} = e.data;
          if(type === 'run') run(data);
          // 监听主线程的cache读写结果
          trigger(type, data, uid);
        }
        getCache = async key => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('getCache', cb);
            };
            on('getCache', cb);
            self.postMessage({type: 'getCache', data: {key, uid}})
          });
        }
    
        setCache = async (key, value) => {
          const uid = Math.random();
          return new Promise(resolve => {
            let cb = (result, retUid) => {
              if (retUid !== uid) return;
              resolve(result);
              off('setCache', cb);
            };
            on('setCache', cb);
            self.postMessage({type: 'setCache', data: {key, value, uid}})
          });
        }
    
        getNPush = async(key, value) => {
          const src = await getCache(key) || [];
          const next = [...src, value];
          console.log('next', JSON.stringify(next))
          await setCache(key, next);
        }
    
        // 搞个简单的自定义事件
        pool = {};
        on = (type, cb) => {
          pool[type] = pool.hasOwnProperty(type) ? [...pool[type], cb] : [cb];
        }
        off = (type, cb) => {
          if (!pool[type]) return;
          pool[type].splice(pool[type].indexOf(cb), 1);
        }
        trigger = (type, ...args) => {
          if (!pool[type]) return;
          pool[type].forEach(fn => fn.apply(null, args))
        }
        // >>>>>>>> 新增
        writeGetNPushMq = async (key, value) => {
            const uid = Math.random();
            return new Promise(resolve => {
                const cb = (result, retUid) => {
                    if (retUid !== uid) return;
                    resolve(result);
                    off('getNPushMq', cb)
                };
                on('getNPushMq', cb)
                getNPushMq.unshift({key, value, uid})
            });
        }
    
        cacheConsumerHandle = null
        getNPushMq = []
        // <<<<<<<< 新增
    
        // 模拟并发调用
        run = async type => {
            KEY = 'key';
            delay = (sec, fn, ...args) => new Promise(resolve => {
    	    setTimeout(() => resolve(fn(...args)), sec)
    	});
    
            loop1 = async (start, end) => {
              for (let i=start; i<end; i++) {
                // >>>>>>>> 修改
                await delay(Math.random()*100, writeGetNPushMq, KEY, i)
    	    // <<<<<<<< 修改
              }
            }
            clearInterval(cacheConsumerHandle);
            // >>>>>>>> 新增,todo clearInterval
            cacheConsumerHandle = setInterval(async () => {
               const msg = getNPushMq.pop()
               if (!msg) return;
               const {key, value, uid} = msg;
               trigger('getNPushMq', await getNPush(key, value), uid)
            }, 10);
    	// <<<<<<<< 新增
            switch(type) {
                case 'loop1':
                    await setCache(KEY, []);
                    await Promise.all([loop1(0, 10), loop1(10, 20), loop1(20, 30)]);
                    console.log('===loop1===', await getCache(KEY));
                    break;
            }
        }
    }
    
    
    // 主线程
    w = new Worker(URL.createObjectURL(new Blob([`(${UploadWorker.toString()})()`])));
    w.onmessage = e => {
        const {type, data: {key, value, ...extra}} = e.data;
        let result;
        switch (type) {
            case 'setCache':
              result = doSet(key, value);
              break;
            case 'getCache':
              result = doGet(key);
              break;
        }
        console.log('main on type', type, result, value, extra)
        w.postMessage({type, data: result, ...extra})
    }
    
    // 用内存变量代替LocalStorage方便调试分析
    data = {};
    doSet = (key, value) => {
      data[key] = value;
      return null;
    }
    doGet = key => {
      return key ? data[key] : data;
    }
    
    // 触发worker测试并发执行
    w.postMessage({type: 'run', data: 'loop1'});
    
    

    引申问题

    引申问题1

    把delay去掉

    引申问题2

    全是主线程

    pool = {};
    on = (type, cb) => {
      pool[type] = pool.hasOwnProperty(type) ? [...pool[type], cb] : pool[type] = [cb]
    }
    off = (type, cb) => {
      if (!pool[type]) return
      pool[type].splice(pool[type].indexOf(cb), 1)
    }
    trigger = (type, ...args) => {
      if (!pool[type]) return
      pool[type].forEach(fn => fn.apply(null, args))
    }
    
    
    data = {}
    
      wSet = (key, value) => {
        data[key] = value;
        console.log('wSet', value);
        return null;
      }
    
      wGet = key => {
        console.log('wGet', key ? data[key] : data);
        return key ? data[key] : data;
      }
    
      noWorker = (e) => {
        const {type, data: {key, value, ...extra}} = e;
        let result;
        switch (type) {
            case 'set':
              result = wSet(key, value);
              break
            case 'get':
              result = wGet(key);
              break;
        }
        console.log('worker on type', {type, result, ...extra})
        trigger(type, result, extra.uid)
      }
    
    get = async key => {
      const uid = Math.random();
      return new Promise(resolve => {
        let cb = (result, retUid) => {
          if (retUid !== uid) return;
          resolve(result);
          off('get', cb);
        }
        on('get', cb);
        noWorker({type: 'get', data: {key, uid}})
      });
    }
    
    set = async (key, value) => {
      const uid = Math.random();
      return new Promise(resolve => {
        let cb = (result, retUid) => {
          if (retUid !== uid) return;
          resolve(result);
          off('set', cb);
        }
        on('set', cb);
        noWorker({type: 'set', data: {key, value, uid}})
      });
    }
    
    
    getNAppend = async(key, value) => {
      const src = await get(key) || [];
      const next = [...src, value];
      console.log('next', JSON.stringify(next))
      return await set(key, next)
    }
    
    
  • 相关阅读:
    题解:luoguP1070 道路游戏(DP)
    题解:luoguP2577【ZJOI2005】午餐(DP)
    题解:bzoj1801: [Ahoi2009]chess 中国象棋
    题解:bzoj1878: [SDOI2009]HH的项链
    SpringBoot静态资源文件 lion
    简要的Log4Net 应用配置
    Web Service 初级教程
    log4Net 动态改变写入文件
    Ornament 类型资源权限
    JQuery 引发两次$(document).ready事件
  • 原文地址:https://www.cnblogs.com/sameen/p/13028366.html
Copyright © 2020-2023  润新知