问题引出:
//有限的额内存情况怎么干活
//索引载入内存,多线程
//索引映射为内存
一.载入内存,按行读取进行多线程
- 定义路径以及其他信息
1 char path[256] = "kaifangX.txt"; 2 char indexpath[256] = "indexkf.txt"; 3 #define N 20151574
- 创建索引
1 struct index 2 { 3 int *pindex;//地址 4 int length; 5 }allindex = {0};
- 读取文件到内存
1 void readmem() 2 { 3 allindex.length = N; 4 allindex.pindex = calloc(N, sizeof(int)); 5 6 7 FILE *pfw = fopen(indexpath, "rb"); 8 fread(allindex.pindex, sizeof(int),N,pfw); 9 fclose(pfw); 10 }
- 创建索引的线程结构体,每一个线程读取length的长度的行数
1 void readmem() 2 { 3 allindex.length = N; 4 allindex.pindex = calloc(N, sizeof(int)); 5 6 7 FILE *pfw = fopen(indexpath, "rb"); 8 fread(allindex.pindex, sizeof(int),N,pfw); 9 fclose(pfw); 10 }
- 多线程函数
1 void run(void *p) 2 { 3 struct info *pi = p;//参数 4 for (int i = 0; i < pi->length-1; i++)//限定 5 { 6 //起始地址到结束地址之间是一个字符串,可以读取出来 7 char *start = infoall.pfilestart + pi->pstart[i];//起始地址 8 char *end = infoall.pfilestart + pi->pstart[i + 1];//结束地址 9 10 11 12 char *str = calloc(500, 1);//分配内存用于拷贝字符串,文件整块,没有/0,有/n 13 if (str != NULL)//分配成功 14 { 15 strncpy(str, start, pi->pstart[i + 1] - pi->pstart[i ]);//拷贝字符串 16 if (str != NULL) 17 { 18 char *ps = strstr(str, pi->findstr);//检索 19 if (ps != NULL) 20 { 21 printf("%s", str); 22 } 23 } 24 25 } 26 free(str);//释放 27 } 28 }
- 载入到内存进行多线程检索
1 //根据内存进行多线程索引 2 void test1() 3 { 4 printf("please input"); 5 char str[100]; 6 scanf("%s", str); 7 #define nthread 100 8 struct info pthread[nthread] = {0}; 9 HANDLE hd[nthread] = { 0 };//线程参数,线程句柄 10 if (N%nthread==0) 11 { 12 for (int i = 0; i < nthread; i++) 13 { 14 pthread[i].id = i; 15 pthread[i].length = N / nthread; 16 pthread[i].pstart = allindex.pindex + i*(N / nthread); 17 strcpy(pthread[i].findstr,str ); 18 _beginthread(run, 0, &pthread[i]); 19 } 20 } 21 else 22 { 23 for (int i = 0; i < nthread-1; i++) 24 { 25 pthread[i].id = i; 26 pthread[i].length = N / (nthread - 1); 27 pthread[i].pstart = allindex.pindex + i*(N / (nthread - 1)); 28 strcpy(pthread[i].findstr, str); 29 _beginthread(run, 0, &pthread[i]); 30 } 31 int i = nthread - 1; 32 pthread[i].id = i; 33 pthread[i].length = N % (nthread - 1); 34 pthread[i].pstart = allindex.pindex + i*(N / (nthread - 1)); 35 strcpy(pthread[i].findstr, str); 36 _beginthread(run, 0, &pthread[i]); 37 38 } 39 WaitForMultipleObjects(nthread, hd, TRUE, INFINITE);//等待 40 41 42 system("pause"); 43 44 }
二.内存映射进行多线程检索
- 创建文件映射内存的首地址和尾地址
1 struct 2 { 3 //文本文件首尾地址 4 char *pfilestart; 5 char *pfileend; 6 //索引文件首尾地址 7 char *pindexfilestart; 8 char *pindexfileend; 9 10 }infoall = {0};
- 映射文本文件到内存,文件是独家享有
1 void map1() 2 { 3 //打开一个文件 4 HANDLE hfile = CreateFileA(path, GENERIC_READ | GENERIC_WRITE, 0, NULL, 5 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 6 7 if (hfile == INVALID_HANDLE_VALUE) 8 { 9 printf("打开文件失败"); 10 system("pause"); 11 } 12 //获取文件大小 13 printf(" %d", GetFileSize(hfile, NULL)); 14 15 HANDLE hmap = CreateFileMappingA(hfile, 16 NULL, PAGE_READWRITE | SEC_COMMIT,//读写 17 0, 18 GetFileSize(hfile, NULL) + 1,//大小 19 NULL); 20 21 if (hmap == NULL) 22 { 23 printf("映射失败"); 24 CloseHandle(hfile); 25 system("pause"); 26 } 27 28 //创建一个指针,存储映射以后的首地址 29 PVOID pvfile = MapViewOfFile(hmap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0); 30 31 if (pvfile == NULL) 32 { 33 printf("指针映射失败"); 34 CloseHandle(hfile); 35 CloseHandle(hmap); 36 system("pause"); 37 38 } 39 puts("映射成功"); 40 char *pstart = pvfile;//首地址 41 char *pend = pstart + GetFileSize(hfile, NULL);//结束地址 42 infoall.pfileend = pend;//保存地址 43 infoall.pfilestart = pstart; 44 }
- 映射索引文件
1 void map2() 2 { 3 HANDLE hfile = CreateFileA(indexpath, GENERIC_READ | GENERIC_WRITE, 0, NULL, 4 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);//打开一个文件 5 6 if (hfile == INVALID_HANDLE_VALUE) 7 { 8 printf("打开文件失败"); 9 system("pause"); 10 } 11 printf(" %d", GetFileSize(hfile, NULL));//获取文件大小 12 HANDLE hmap = CreateFileMappingA(hfile, 13 NULL, PAGE_READWRITE | SEC_COMMIT,//读写 14 0, 15 GetFileSize(hfile, NULL) + 1,//大小 16 NULL); 17 if (hmap == NULL) 18 { 19 printf("映射失败"); 20 CloseHandle(hfile); 21 system("pause"); 22 } 23 PVOID pvfile = MapViewOfFile(hmap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0); 24 //创建一个指针,存储映射以后的首地址 25 if (pvfile == NULL) 26 { 27 printf("指针映射失败"); 28 CloseHandle(hfile); 29 CloseHandle(hmap); 30 system("pause"); 31 32 } 33 puts("映射成功"); 34 char *pstart = pvfile;//首地址 35 char *pend = pstart + GetFileSize(hfile, NULL);//结束地址 36 infoall.pindexfileend = pend;//保存地址 37 infoall.pindexfilestart = pstart; 38 }
- 内存映射多线程结构体
1 struct minfo 2 { 3 int*pstart; 4 int length;//0 9 10 5 int id; 6 char findstr[20]; 7 };
- 内存映射多线程函数
1 void mrun(void *p) 2 { 3 struct minfo *pi = p; 4 for (int i = 0; i < pi->length - 1; i++) 5 { 6 char *start = infoall.pfilestart + pi->pstart[i];//起始地址 7 char *end = infoall.pfilestart + pi->pstart[i + 1];//结束地址 8 char *str = calloc(500, 1);//分配内存用于拷贝字符串 9 if (str != NULL)//分配成功 10 { 11 strncpy(str, start, pi->pstart[i + 1] - pi->pstart[i]);//拷贝字符串 12 if (str != NULL) 13 { 14 char *ps = strstr(str, pi->findstr); 15 if (ps != NULL) 16 { 17 printf("%s", str); 18 } 19 } 20 21 } 22 free(str);//释放 23 24 25 } 26 }
- 创建多线程
1 void test2() 2 { 3 printf("please input"); 4 char str[100]; 5 scanf("%s", str); 6 #define nthread 100 7 struct minfo pthread[nthread] = { 0 }; 8 HANDLE hd[nthread] = { 0 };//线程参数,线程句柄 9 if (N%nthread == 0) 10 { 11 for (int i = 0; i < nthread; i++) 12 { 13 pthread[i].id = i; 14 pthread[i].length = N / nthread; 15 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / nthread); 16 strcpy(pthread[i].findstr, str); 17 _beginthread(mrun, 0, &pthread[i]); 18 } 19 20 21 } 22 else 23 { 24 for (int i = 0; i < nthread - 1; i++) 25 { 26 pthread[i].id = i; 27 pthread[i].length = N / (nthread - 1);
//索引对应的位置信息 28 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / (nthread - 1)); 29 strcpy(pthread[i].findstr, str); 30 _beginthread(mrun, 0, &pthread[i]); 31 } 32 int i = nthread - 1; 33 pthread[i].id = i; 34 pthread[i].length = N % (nthread - 1); 35 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / (nthread - 1)); 36 strcpy(pthread[i].findstr, str); 37 _beginthread(mrun, 0, &pthread[i]); 38 39 } 40 WaitForMultipleObjects(nthread, hd, TRUE, INFINITE); 41 42 43 system("pause"); 44 45 }
完整代码:
1 #define _CRT_SECURE_NO_WARNINGS 2 #include<stdio.h> 3 #include<stdlib.h> 4 #include<Windows.h> 5 #include<process.h> 6 //有限的额内存情况怎么干活 7 //索引载入内存,多线程 8 //索引映射为内存 9 10 11 char path[256] = "kaifangX.txt"; 12 char indexpath[256] = "indexkf.txt"; 13 #define N 20151574 14 15 //创建文件映射内存的首地址和尾地址 16 struct 17 { 18 //文本文件首尾地址 19 char *pfilestart; 20 char *pfileend; 21 //索引文件首尾地址 22 char *pindexfilestart; 23 char *pindexfileend; 24 25 }infoall = {0}; 26 27 28 //映射文本文件到内存,文件是独家享有 29 void map1() 30 { 31 //打开一个文件 32 HANDLE hfile = CreateFileA(path, GENERIC_READ | GENERIC_WRITE, 0, NULL, 33 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 34 35 if (hfile == INVALID_HANDLE_VALUE) 36 { 37 printf("打开文件失败"); 38 system("pause"); 39 } 40 //获取文件大小 41 printf(" %d", GetFileSize(hfile, NULL)); 42 43 HANDLE hmap = CreateFileMappingA(hfile, 44 NULL, PAGE_READWRITE | SEC_COMMIT,//读写 45 0, 46 GetFileSize(hfile, NULL) + 1,//大小 47 NULL); 48 49 if (hmap == NULL) 50 { 51 printf("映射失败"); 52 CloseHandle(hfile); 53 system("pause"); 54 } 55 56 //创建一个指针,存储映射以后的首地址 57 PVOID pvfile = MapViewOfFile(hmap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0); 58 59 if (pvfile == NULL) 60 { 61 printf("指针映射失败"); 62 CloseHandle(hfile); 63 CloseHandle(hmap); 64 system("pause"); 65 66 } 67 puts("映射成功"); 68 char *pstart = pvfile;//首地址 69 char *pend = pstart + GetFileSize(hfile, NULL);//结束地址 70 infoall.pfileend = pend;//保存地址 71 infoall.pfilestart = pstart; 72 } 73 74 //映射索引文件到内存 75 void map2() 76 { 77 HANDLE hfile = CreateFileA(indexpath, GENERIC_READ | GENERIC_WRITE, 0, NULL, 78 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);//打开一个文件 79 80 if (hfile == INVALID_HANDLE_VALUE) 81 { 82 printf("打开文件失败"); 83 system("pause"); 84 } 85 printf(" %d", GetFileSize(hfile, NULL));//获取文件大小 86 HANDLE hmap = CreateFileMappingA(hfile, 87 NULL, PAGE_READWRITE | SEC_COMMIT,//读写 88 0, 89 GetFileSize(hfile, NULL) + 1,//大小 90 NULL); 91 if (hmap == NULL) 92 { 93 printf("映射失败"); 94 CloseHandle(hfile); 95 system("pause"); 96 } 97 PVOID pvfile = MapViewOfFile(hmap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0); 98 //创建一个指针,存储映射以后的首地址 99 if (pvfile == NULL) 100 { 101 printf("指针映射失败"); 102 CloseHandle(hfile); 103 CloseHandle(hmap); 104 system("pause"); 105 106 } 107 puts("映射成功"); 108 char *pstart = pvfile;//首地址 109 char *pend = pstart + GetFileSize(hfile, NULL);//结束地址 110 infoall.pindexfileend = pend;//保存地址 111 infoall.pindexfilestart = pstart; 112 } 113 114 //读取索引到内存 115 struct index 116 { 117 int *pindex;//地址 118 int length; 119 }allindex = {0}; 120 121 //读取文件到内存 122 void readmem() 123 { 124 allindex.length = N; 125 allindex.pindex = calloc(N, sizeof(int)); 126 127 128 FILE *pfw = fopen(indexpath, "rb"); 129 fread(allindex.pindex, sizeof(int),N,pfw); 130 fclose(pfw); 131 } 132 133 //创建索引的线程结构体,每一个线程读取length的长度的行数 134 struct info 135 { 136 int*pstart;//索引的收地址 137 int length;//长度 138 int id;//编号 139 char findstr[20];//查找的字符串 140 }; 141 142 //多线程函数 143 void run(void *p) 144 { 145 struct info *pi = p;//参数 146 for (int i = 0; i < pi->length-1; i++)//限定 147 { 148 //起始地址到结束地址之间是一个字符串,可以读取出来 149 char *start = infoall.pfilestart + pi->pstart[i];//起始地址 150 char *end = infoall.pfilestart + pi->pstart[i + 1];//结束地址 151 152 153 154 char *str = calloc(500, 1);//分配内存用于拷贝字符串,文件整块,没有/0,有/n 155 if (str != NULL)//分配成功 156 { 157 strncpy(str, start, pi->pstart[i + 1] - pi->pstart[i ]);//拷贝字符串 158 if (str != NULL) 159 { 160 char *ps = strstr(str, pi->findstr);//检索 161 if (ps != NULL) 162 { 163 printf("%s", str); 164 } 165 } 166 167 } 168 free(str);//释放 169 } 170 } 171 172 //根据内存进行多线程索引 173 void test1() 174 { 175 printf("please input"); 176 char str[100]; 177 scanf("%s", str); 178 #define nthread 100 179 struct info pthread[nthread] = {0}; 180 HANDLE hd[nthread] = { 0 };//线程参数,线程句柄 181 if (N%nthread==0) 182 { 183 for (int i = 0; i < nthread; i++) 184 { 185 pthread[i].id = i; 186 pthread[i].length = N / nthread; 187 pthread[i].pstart = allindex.pindex + i*(N / nthread); 188 strcpy(pthread[i].findstr,str ); 189 _beginthread(run, 0, &pthread[i]); 190 } 191 } 192 else 193 { 194 for (int i = 0; i < nthread-1; i++) 195 { 196 pthread[i].id = i; 197 pthread[i].length = N / (nthread - 1); 198 pthread[i].pstart = allindex.pindex + i*(N / (nthread - 1)); 199 strcpy(pthread[i].findstr, str); 200 _beginthread(run, 0, &pthread[i]); 201 } 202 int i = nthread - 1; 203 pthread[i].id = i; 204 pthread[i].length = N % (nthread - 1); 205 pthread[i].pstart = allindex.pindex + i*(N / (nthread - 1)); 206 strcpy(pthread[i].findstr, str); 207 _beginthread(run, 0, &pthread[i]); 208 209 } 210 WaitForMultipleObjects(nthread, hd, TRUE, INFINITE);//等待 211 212 213 system("pause"); 214 215 } 216 217 218 //内存映射结构体 219 struct minfo 220 { 221 int*pstart; 222 int length;//0 9 10 223 int id; 224 char findstr[20]; 225 }; 226 227 void mrun(void *p) 228 { 229 struct minfo *pi = p; 230 for (int i = 0; i < pi->length - 1; i++) 231 { 232 char *start = infoall.pfilestart + pi->pstart[i];//起始地址 233 char *end = infoall.pfilestart + pi->pstart[i + 1];//结束地址 234 char *str = calloc(500, 1);//分配内存用于拷贝字符串 235 if (str != NULL)//分配成功 236 { 237 strncpy(str, start, pi->pstart[i + 1] - pi->pstart[i]);//拷贝字符串 238 if (str != NULL) 239 { 240 char *ps = strstr(str, pi->findstr); 241 if (ps != NULL) 242 { 243 printf("%s", str); 244 } 245 } 246 247 } 248 free(str);//释放 249 250 251 } 252 } 253 254 //根据内存映射进行多线程索引 255 void test2() 256 { 257 printf("please input"); 258 char str[100]; 259 scanf("%s", str); 260 #define nthread 100 261 struct minfo pthread[nthread] = { 0 }; 262 HANDLE hd[nthread] = { 0 };//线程参数,线程句柄 263 if (N%nthread == 0) 264 { 265 for (int i = 0; i < nthread; i++) 266 { 267 pthread[i].id = i; 268 pthread[i].length = N / nthread; 269 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / nthread); 270 strcpy(pthread[i].findstr, str); 271 _beginthread(mrun, 0, &pthread[i]); 272 } 273 274 275 } 276 else 277 { 278 for (int i = 0; i < nthread - 1; i++) 279 { 280 pthread[i].id = i; 281 pthread[i].length = N / (nthread - 1); 282 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / (nthread - 1)); 283 strcpy(pthread[i].findstr, str); 284 _beginthread(mrun, 0, &pthread[i]); 285 } 286 int i = nthread - 1; 287 pthread[i].id = i; 288 pthread[i].length = N % (nthread - 1); 289 pthread[i].pstart = ((int*)infoall.pindexfilestart) + i*(N / (nthread - 1)); 290 strcpy(pthread[i].findstr, str); 291 _beginthread(mrun, 0, &pthread[i]); 292 293 } 294 WaitForMultipleObjects(nthread, hd, TRUE, INFINITE); 295 296 297 system("pause"); 298 299 } 300 301 void main() 302 { 303 //readmem(); 304 map1();//一旦被映射,文件被锁定 305 map2(); 306 //test1(); 307 test2(); 308 getchar(); 309 }