• 64.文件载入内存进行多线程以及根据索引文件进行多线程索引


    一.文件载入内存进行多线程检索

    • 获取并定义行数以及定义索引和文本的路径
      1 char path[256] = "kaifangX.txt";
      2 char indexpath[256] = "indexkaifangX.txt";
      3 #define N 20151574
    • 索引的结构体
      1 //索引内存结构体
      2 struct index  
      3 {
      4     //索引内存首地址
      5     int *pindex;
      6     //文件总的行数
      7     int length;
      8 }allindex = {0};
    • 初始化索引并将索引写入到文件
      /初始化索引
      void  init(char *path)
      {
          //总长度等于行数
          allindex.length = N;
          //初始化分配
          allindex.pindex = calloc(N, sizeof(int));
      
          //打开文件,以二进制文件方式读取
          FILE*pf = fopen(path, "rb");
          if (pf == NULL)
          {
              puts("file open fail");
          }
          else
          {
              //用于给索引赋值
              int allength = 0;
      
              for (int i = 0; i < N;i++)
              {
                  char str[500] = { 0 };
                  //获取字符串
                  fgets(str, 500, pf);
                  allindex.pindex[i] = allength;
                  int length = strlen(str);
                  //索引位置增加
                  allength += length;
              }
              fclose(pf);
          }
          printf("
      结束生成");
      
          //写入到文件
          FILE *pfw = fopen(indexpath, "wb");
          fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
          fclose(pfw);
      }
    • 快速载入索引文件内存
       1 void quick()
       2 {
       3     //设置长度
       4     allindex.length = N;
       5     //初始化分配
       6     allindex.pindex = calloc(N, sizeof(int));
       7 
       8     //读取文件
       9     FILE *pfw = fopen(indexpath, "rb");
      10     //读取到内存中
      11     fread(allindex.pindex, sizeof(int), allindex.length, pfw);
      12     fclose(pfw);
      13 }
    • 创建线程的结构体
      1 //创建线程索引结构体
      2 struct info
      3 {
      4     int *pstart;//开始位置
      5     int length;//长度
      6     char findstr[20];//要查找
      7     int id;//线程编号
      8 
      9 };
    • 创建多线程函数
       1 //多线程函数
       2 void runmem(void *p)
       3 {
       4     //打开文件
       5     FILE *pf = fopen(path, "rb");
       6     //指针类型转换
       7     struct info *pi = p;
       8     //分别读取每个线程索引的位置
       9     for (int i = 0; i < pi->length;i++)
      10     {
      11         //索引的位置
      12         int tempnum = pi->pstart[i];
      13         //根据偏移读文件
      14         fseek(pf, tempnum, SEEK_SET);
      15         char str[512] = { 0 };
      16         fgets(str, 512, pf);
      17         //查找
      18         char *px = strstr(str, pi->findstr);
      19         if (px!=NULL)
      20         {
      21             printf("
      %d线程找到%s", pi->id, str);//打印找到的数据
      22         }
      23     }
      24     fclose(pf);
      25 }
    • 测试函数
       1     //载入内存
       2     quick();
       3     printf("请输入要查询的");
       4     char str[100] = { 0 };
       5     scanf("%s", str);
       6 
       7 //多线程数组个数
       8 #define nthread 100
       9     struct info pthread[nthread];//创建线程使用信息数组
      10     
      11     //如果能被整除
      12     if (N%nthread==0)
      13     {
      14         for (int i = 0; i < nthread;i++)
      15         {
      16             pthread[i].id = i;
      17             strcpy(pthread[i].findstr, str);//拷贝
      18             pthread[i].length = N / nthread;
      19             pthread[i].pstart = allindex.pindex + i*(N / nthread);//
      20             _beginthread(runmem, 0, &pthread[i]);//传递数据
      21         }
      22 
      23     }
      24     //如果不能被整除
      25     else
      26     {                         
      27         //先分配前n-1个
      28         for (int i = 0; i < nthread-1; i++)
      29         {
      30             pthread[i].id = i;
      31             strcpy(pthread[i].findstr, str);//拷贝
      32             pthread[i].length = N / (nthread-1);
      33             pthread[i].pstart = allindex.pindex + i*(N / (nthread-1));//
      34             _beginthread(runmem, 0, &pthread[i]);//传递数据
      35         }
      36 
      37         //分配最后一个
      38         {
      39             int i = nthread - 1;
      40             pthread[i].id = i;
      41             strcpy(pthread[i].findstr, str);//拷贝
      42             pthread[i].length = N % (nthread - 1);
      43             pthread[i].pstart = allindex.pindex + i * (N / (nthread - 1));//
      44             _beginthread(runmem, 0, &pthread[i]);//传递数据
      45         }
      46     }

    二.根据索引文件进行多线程索引

    • 创建多线程索引结构体
       1 //文件检索多线程索引结构体
       2 struct finof
       3 {
       4     //起始的行数
       5     int  start;
       6     //结束的行数
       7     int end;
       8     //线程id
       9     int id;
      10     //要发现的数据
      11     char findstr[20];
      12 
      13 };
    • 文件索引多线程函数
       1 void runfile(void *p)
       2 {
       3     //获取传递至参数地址
       4     struct finof *pf = p;
       5     //打开索引文件
       6     FILE *pf1 = fopen(indexpath, "rb");
       7     //打开数据文件
       8     FILE *pf2 = fopen(path, "rb");
       9 
      10     //进行搜索
      11     for (int num = pf->start; num < pf->end; num++)
      12     {
      13         int indexnum;
      14         //移动到相应的行数对应的地址
      15         fseek(pf1, num*sizeof(int), SEEK_SET);
      16         //读取索引
      17         fread(&indexnum, sizeof(int), 1, pf1);
      18 
      19         //移动到索引对应的位置
      20         fseek(pf2, indexnum, SEEK_SET);
      21         char str[256] = { 0 };
      22         //读取数据
      23         fgets(str, 256, pf2);
      24         //寻找
      25         char *px = strstr(str, pf->findstr);
      26         if (px!=NULL)
      27         {
      28             printf("%d线程找到%s", pf->id, str);
      29         }
      30 
      31     }
      32     //关闭文件
      33     fclose(pf1);
      34     fclose(pf2);
      35 }
    • 测试函数
       1 printf("请输入要查询的");
       2     char str[100] = { 0 };
       3     scanf("%s", str);
       4 
       5 #define nthread 100
       6     struct finof pthread[nthread];//数组
       7     //如果能被整除
       8     if (N%nthread==0)
       9     {
      10         for (int i = 0; i < nthread;i++)
      11         {
      12             //每一个结构体对应的开始的行数
      13             pthread[i].start = (N / nthread)*i;
      14             //每一个结构体对应的结束的行数
      15             pthread[i].end = (N / nthread)*(i+1);
      16             //内容拷贝
      17             strcpy(pthread[i].findstr, str);
      18             pthread[i].id = i;
      19             //开始线程
      20             _beginthread(runfile, 0, &pthread[i]);
      21         }
      22     } 
      23     else
      24     {
      25         //分配起始行数和结尾行数
      26         for (int i = 0; i < nthread-1; i++)
      27         {
      28             pthread[i].start = (N / (nthread - 1))*i;
      29             pthread[i].end = (N / (nthread - 1))*(i + 1);
      30             strcpy(pthread[i].findstr, str);
      31             pthread[i].id = i;
      32             _beginthread(runfile, 0, &pthread[i]);
      33 
      34 
      35         }
      36         //最后一个线程
      37         int i = nthread - 1;
      38         pthread[i].start = (N / (nthread - 1))*i;
      39         pthread[i].end = (N / (nthread - 1))*i + N % (nthread - 1);
      40         strcpy(pthread[i].findstr, str);
      41         pthread[i].id = i;
      42         _beginthread(runfile, 0, &pthread[i]);
      43     }
  • 相关阅读:
    array and ram
    char as int
    pointer of 2d array and address
    Install SAP HANA EXPRESS on Google Cloud Platform
    Ubuntu remount hard drive
    Compile OpenSSL with Visual Studio 2019
    Install Jupyter notebook and tensorflow on Ubuntu 18.04
    Build OpenCV text(OCR) module on windows with Visual Studio 2019
    Reinstall VirtualBox 6.0 on Ubuntu 18.04
    Pitfall in std::vector<cv::Mat>
  • 原文地址:https://www.cnblogs.com/xiaochi/p/8439850.html
Copyright © 2020-2023  润新知