• 外部二路归并排序的小尝试


    了解了外部排序的入门知识后,打算简单实践一下。(虽然不是什么原理很难的东西,省略写出焦油坑然后调试半天的若干过程……)
    默认元素数据类型int,使用fstream输入输出,测试在本机上跑1e8的数据集
    全部读入主存用时约35.8秒,最高内存占用接近350M
    在1e4大小的模拟主存跑用时约497.3秒,最高内存占用接近11M
    多用约14倍的时间,占用约3%的空间,大概适合在嵌入式设备大吞吐量读写场景应用?
    (虽然经过测试,可能存在小问题,逃)

    1.外部二路排序代码

    #include <bits/stdc++.h>
    #include <cmath>
    typedef unsigned long long ull;
    const int MAXMM = 1e4; //max main memory
    int mm[MAXMM], ok = false, total;
    int init()
    {
    	std::ifstream fin1("a1.txt"), fin2("a2.txt");
    	std::ofstream fout1("b1.txt"), fout2("b2.txt");
    	std::ofstream *fout = &fout1;
    	std::ifstream *fin = &fin1;
    	int cnt = 0;
    	while(*fin)
    	{
    		int i, j, t;
    		for(i = 0, t = 0; i < MAXMM && (*fin) >> t ; ++i)
    		{
    			mm[i] = t;
    		}
    		cnt += i;
    		std::sort(mm, mm + i);
    		for(j = 0 ; j < i ; ++j)
    		{
    			(*fout) << mm[j] << ' ';
    		}
    		fout = reinterpret_cast<std::ofstream *>((ull)(&fout1) + (ull)(&fout2) - (ull)fout);
    	}
    	return cnt;
    }
    template<typename it>
    int way2_merge(it &in1, it &in2, it &ieof, std::ofstream &fout, int lim = MAXMM)
    {
    	if(in1 == ieof && in2 == ieof) return false;
    	int p1 = 0, p2 = 0;
    	while(p1 < lim && in1 != ieof && p2 < lim && in2 != ieof)
    	{
    		if(*in1 < *in2)
    		{
    			fout << *in1++;
    			p1++;
    		}
    		else
    		{
    			fout << *in2++;
    			p2++;
    		}
    		fout << ' ';
    	}
    	while(p1 < lim && in1 != ieof)
    	{
    		fout << *in1++ << ' ';
    		p1++;
    	}
    	while(p2 < lim && in2 != ieof)
    	{
    		fout << *in2++ << ' ';
    		p2++;
    	}
    	ok = p1 == total | p2 == total;
    	printf("p1: %d, p2: %d
    ", p1, p2);
    	return 1;
    }
    int main()
    {
    	int cnt = init(); total = cnt;
    	typedef const char *cstring;
    	cstring s1 = "a1.txt", s2 = "a2.txt", s3 = "b1.txt", s4 = "b2.txt";
    	cstring in1 = s3, in2 = s4, out1 = s1, out2 = s2;
    	int result;
    	for(int ex = 0; !ok; ex++)
    	{
    		std::ifstream fin1(in1), fin2(in2);
    		std::ofstream fout1(out1), fout2(out2);
    		std::istream_iterator<int> _in1(fin1), _in2(fin2), ieof;
    		do{
    			printf("pass %d:
    ", ex);
    			result = way2_merge(_in1, _in2, ieof, fout1, MAXMM << ex);
    			result &= way2_merge(_in1, _in2, ieof, fout2, MAXMM << ex);
    		}
    		while(result);
    		std::swap(in1, out1);
    		std::swap(in2, out2);
    	}
    	return 0;
    }
    
    

    2.全部读入主存代码

    #include <algorithm>
    #include <cstdio>
    #include <cstring>
    #include <fstream>
    #include <iostream>
    int arr[int(1e8)];
    int main()
    {
    	std::ifstream fin("a.txt");
    	std::ofstream fout("b.txt");
    	for(int i = 0 ; i < int(1e8) ; ++i)
    	{
    		fin >> arr[i];
    	}
    	std::sort(arr, arr + int(1e8));
    	for(int i = 0 ; i < int(1e8) ; ++i)
    	{
    		fout << arr[i];
    	}
    	return 0;
    }
    
    

    3.造数据用的

    #include <algorithm>
    #include <cstdlib>
    #include <cstdio>
    #include <cstring>
    #include <ctime>
    #include <fstream>
    #include <iostream>
    const int MAXN = 1e8;
    int main()
    {
    	srand(time(0));
    	std::ofstream fout1("a1.txt"), fout2("a2.txt"), fout3("b1.txt"), fout4("b2.txt");
    	for(int i = 0 ; i < MAXN ; ++i)
    		fout1 << rand() << ' ';
    	return 0;
    }
    
    

    4. 测试正确性用的

    #include <bits/stdc++.h>
    const int MAXN = 1e8;
    bool check(const char *filename, int total = MAXN)
    {
    	std::ifstream fin(filename);
    	int tmp1, tmp2; fin >> tmp1; int cnt = 1;
    	while(fin >> tmp2 && tmp1 <= tmp2)
    		cnt++, tmp1 = tmp2;
    	printf("cnt: %d, total: %d
    ", cnt, total);
    	return cnt == total;
    }
    bool identify(const char *sorted, const char *src)
    {
    	std::ifstream fin1(sorted), fin2(src);
    	std::vector<int> bucket1(32768, 0), bucket2(32768, 0);
    	int tmp;
    	while(fin1 >> tmp)
    		bucket1[tmp]++;
    	while(fin2 >> tmp)
    		bucket2[tmp]++;
    	for(int i = 0 ; i < 32768 ; ++i)
    		if(bucket1[i] != bucket2[i]) return false;
    	return true;
    }
    int main()
    {
    	printf("cnt: %s
    ", (check("a1.txt") || check("b1.txt")) ? "Success" : "Failure");
    	printf("identification: %s
    ", identify("a1.txt", "a.txt") ? "Success" : "Failure");
    }
    
  • 相关阅读:
    如何使用dom拼接xml字符串(标准方式)
    javascript默认将数字类型的“002,00123”,作为整数,去掉前面的0
    java学习小记
    如何将div排成一行显示(默认垂直显示)
    【转】JDBC调用存储过程之实例讲解
    数组求和算法系列
    《12个球问题》分析
    C#类在什么时候分配内存
    C++请不要问我string s=”a”+”b”分配了几次内存
    算法两道百度笔试题
  • 原文地址:https://www.cnblogs.com/StarOnTheWay/p/15413008.html
Copyright © 2020-2023  润新知