• 60.大数据创建索引,并实现大文件的二分查找,迁移实现分层


    • index.h
       1 #define  _CRT_SECURE_NO_WARNINGS
       2 #include<stdio.h>
       3 #include<stdlib.h>
       4 #define N 10000000
       5 
       6 struct index
       7 {
       8     int *pindex;
       9     int length;
      10 };
      11 
      12 char **g_pp;//保存指针数组
      13 char filepath[256];
      14 char sortpath[256];
      15 char indexpath[256];
      16 struct index allindex;//索引
      17 
      18 int getN();//函数声明
      19 void eatg(char *str);
      20 void eatN(char *str);
    • index.c
       1 #include"index.h"
       2 
       3 char **g_pp = NULL;//保存指针数组
       4 char filepath[256] = { 0 };
       5 char sortpath[256] = { 0 };
       6 char indexpath[256] = { 0 };
       7 struct index allindex = { 0 };//索引
       8 
       9 int getN()
      10 {
      11     FILE *pf = fopen("file.txt", "r");
      12     if (pf == NULL)
      13     {
      14         return -1;
      15     }
      16     else
      17     {
      18         int i = 0;
      19         while (!feof(pf))
      20         {
      21             char str[50] = { 0 };
      22             fgets(str, 50, pf);//读取
      23             i++;
      24         }
      25         fclose(pf);
      26         return i;
      27 
      28     }
      29 }
      30 void eatg(char *str)
      31 {
      32     while (*str != '')
      33     {
      34 
      35         if (*str == '-')
      36         {
      37             *str = '';
      38         }
      39         str++;
      40     }
      41 
      42 }
      43 void eatN(char *str)
      44 {
      45     while (*str != '')
      46     {
      47         if (*str == '
      ' || *str == '
      ')
      48         {
      49             *str = '';
      50         }
      51 
      52         str++;
      53     }
      54 
      55 }
    • createsort.h
      1 #include "index.h"
      2 
      3 void initmem();
      4 int com(void *p1, void*p2);
      5 void sort();
      6 void show();
      7 void writetofile();
    • createsort.cp
       1 #include "createsort.h"
       2 void initmem()
       3 {
       4     g_pp = calloc(N, sizeof(char*));//分配指针数组
       5     FILE *pf = fopen(filepath, "r");
       6     if (pf == NULL)
       7     {
       8         return -1;
       9     }
      10     else
      11     {
      12         for (int i = 0; i < N; i++)
      13         {
      14             char str[50] = { 0 };
      15             fgets(str, 50, pf);//读取
      16             g_pp[i] = calloc(strlen(str) + 1, sizeof(char));//分配
      17             if (g_pp[i]!=NULL)
      18             {
      19                 //sprintf(g_pp[i], str);//打印进去
      20                 strcpy(g_pp[i], str);
      21                 eatN(g_pp[i]);
      22             }
      23             
      24             //printf("%s", g_pp[i]);//显示测试
      25 
      26 
      27         }
      28 
      29 
      30         fclose(pf);
      31 
      32 
      33     }
      34 
      35 
      36 
      37 
      38 
      39 
      40 }
      41 
      42 int com(void *p1, void*p2)
      43 {
      44     char **pp1 = p1;
      45     char **pp2 = p2;
      46 
      47     return strcmp(*pp1, *pp2);
      48 
      49 }
      50 
      51 void sort()
      52 {
      53     qsort(g_pp, N, sizeof(char*), com);
      54 
      55 
      56 }
      57 void show()
      58 {
      59     printf("
      此时状态
      ");
      60     for (int i = 0; i < N; i++)
      61     {
      62         printf("
      %s", g_pp[i]);
      63     }
      64 }
      65 void writetofile()
      66 {
      67     FILE *pf = fopen(sortpath, "w");
      68     for (int i = 0; i < N; i++)
      69     {
      70         char temp[100] = { 0 };
      71     //    printf("
      %s", g_pp[i]);
      72         sprintf(temp, "%s
      ", g_pp[i]);
      73     //    printf("
      %s", temp);
      74         fputs(temp, pf);
      75     }
      76 
      77     fclose(pf);
      78 }
    • createindex.h
      1 #include "index.h"
      2 void init();
      3 void qucik();
    • createindex.c
       1 #include "createindex.h"
       2 
       3 
       4 void init()
       5 {
       6     printf("
      索引数组开始分配");
       7     allindex.length = N;
       8     allindex.pindex = calloc(N, sizeof(int));//分配内存
       9     printf("
      索引数组完成分配");
      10 
      11     printf("
      开始读取");
      12     FILE *pf = fopen(sortpath, "rb");//
      ->
      
      13     if (pf == NULL)
      14     {
      15         return -1;
      16     }
      17     else
      18     {
      19         int alllength = 0;
      20         for (int i = 0; i < N; i++)
      21         {
      22             char str[50] = { 0 };
      23             fgets(str, 50, pf);
      24             allindex.pindex[i] = alllength;//错位从0开始
      25 
      26             int length = strlen(str);
      27             alllength += length;
      28 
      29         }
      30 
      31         fclose(pf);
      32     }
      33     printf("
      结束读取");
      34 
      35     printf("
      开始写入");
      36     FILE *pfw = fopen(indexpath, "wb");//写入索引
      37     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
      38     fclose(pfw);//关闭
      39     printf("
      结束写入");
      40 
      41 
      42     free(allindex.pindex);
      43 
      44 }
      45 void qucik()
      46 {
      47     printf("
      索引数组开始分配");
      48     allindex.length = N;
      49     allindex.pindex = calloc(N, sizeof(int));//分配内存
      50     printf("
      索引数组完成分配");
      51 
      52     printf("
      开始读取");
      53     FILE *pfw = fopen("index.txt", "rb");//写入索引
      54     fread(allindex.pindex, sizeof(int), allindex.length, pfw);
      55     fclose(pfw);//关闭
      56     printf("
      结束读取");
      57 }
    • binsearch.h
      1 #include "index.h"
      2 void binsearch(char *searchstr);
    • binsearch.c
       1 #include "binsearch.h"
       2 
       3 void binsearch(char *searchstr)
       4 {
       5     int tou = 0;
       6     int wei = N - 1;
       7     int flag = 0;
       8     while (tou <= wei)
       9     {
      10         int zhong = (tou + wei) / 2;
      11         char zhongstr[256] = { 0 };
      12         {
      13             FILE *pf1 = fopen(indexpath, "rb");
      14             FILE *pf2 = fopen(sortpath, "rb");
      15 
      16 
      17             int indexnum = 0;
      18             fseek(pf1, zhong*sizeof(int), SEEK_SET);
      19             fread(&indexnum, sizeof(int), 1, pf1);//读索引zhong到indexnum
      20 
      21             fseek(pf2, indexnum, SEEK_SET);
      22             fgets(zhongstr, 128, pf2);//读取
      23 
      24             fclose(pf1);
      25             fclose(pf2);
      26         }
      27         eatN(zhongstr);
      28         char pnewzhongstr[256] = { 0 };
      29         sprintf(pnewzhongstr, zhongstr);
      30         eatg(pnewzhongstr);//遇到-终止
      31         int res = strcmp(pnewzhongstr, searchstr);//1 0  -1
      32 
      33 
      34         if (res == 0)
      35         {
      36             flag = 1;
      37             printf("%s", zhongstr);
      38             break;
      39         }
      40         else if (res == 1)
      41         {
      42             wei = zhong - 1;
      43         }
      44         else
      45         {
      46             tou = zhong + 1;
      47         }
      48 
      49 
      50     }
      51 
      52 
      53     if (flag)
      54     {
      55         printf("
      find");
      56     }
      57     else
      58     {
      59         printf("
       not find");
      60     }
      61 
      62 
      63 }
    • main.c
       1 #include "binsearch.h"
       2 void initall()
       3 {
       4     strcpy(filepath, "1E~001OK.txt");
       5     strcpy(sortpath, "1E~001sort.txt");
       6     strcpy(indexpath, "1E~001index.txt");
       7 
       8 }
       9 
      10 void main()
      11 {
      12     initall();
      13     //初始化内存
      14     initmem();
      15     //排序
      16     sort();
      17     //写入文件
      18     writetofile();
      19 
      20     //初始化索引
      21     init();
      22     
      23     //二分查找
      24     while (1)
      25     {
      26         char str[256] = { 0 };
      27         scanf("%s", str);
      28         binsearch(str);
      29     }
      30     system("pause");
      31 
      32 }
  • 相关阅读:
    遗传基因有多大?
    【Unity3D】【NGUI】怎样动态给EventDelegate加入參数
    怎样通过Java程序提交yarn的mapreduce计算任务
    Cocos2D-X2.2.3学习笔记9(处理重力感应事件,移植到Android加入两次返回退出游戏效果)
    java多线程----拒绝策略
    java多线程-----volatile
    java多线程----JUC集合”01之 框架
    java多线程----Semaphore信号量
    java多线程---CyclicBarrier
    java多线程----CountDownLatch
  • 原文地址:https://www.cnblogs.com/xiaochi/p/8437174.html
Copyright © 2020-2023  润新知