题目一
将英文文本article-1e.txt~ article-82e.txt共82个文本文件的数据写入一个大文件article.dat,并对article.dat建立索引文件article.idx,在article.idx文件中记录每篇文章在article.dat文件中的起始位置和长度(此为子程序1:建立索引程序);
实现文档检索:通过控制台输入文档编号,程序可通过article.idx查询到该文档在article.dat中的位置,读出该文档并输出到控制台(此为子程序2:检索程序)。
参考方法:
index.cpp
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <memory.h> 4 #define MAXFILESIZE 20480 5 struct idx_t//索引结构体 6 { 7 long pos; 8 int len; 9 }idx; 10 /* 本程序只为说明过程,因而去掉了所有错误判断过程(如文件打开失败等),仅供参考 */ 11 int main() 12 { 13 FILE *srcfp,*datfp,*idxfp;//源文件指针、dat文件指针、index文件指针 14 int len,i,count; 15 char *content; 16 char filename[50];//存储要打开文件的“文件路径” 17 18 content=(char*)malloc(MAXFILESIZE); 19 20 datfp = fopen("data.dat", "wb+"); 21 idxfp = fopen("data.idx", "wb+"); 22 23 for (i=1;i<=82;i++) 24 { 25 sprintf(filename,"data\\article-%de.txt",i);//格式化要打开文件的“文件路径”到filename[]中 26 27 srcfp = fopen(filename, "rb+"); 28 29 //读取源文件的长度 30 fseek(srcfp,0L,SEEK_END); 31 len = ftell(srcfp); 32 fseek(srcfp,0L,SEEK_SET); 33 memset(content,0,MAXFILESIZE);//初始化动态申请空间为0 34 fread(content,len,1,srcfp);//从源文件读的内存 35 idx.pos=ftell(datfp);//保存该文件在dat文件中的的起始位置 36 fwrite(content,len,1,datfp);//从内存写到dat文件 37 idx.len = len;//保存该文件在dat文件中的长度 38 fwrite(&idx,sizeof(struct idx_t),1,idxfp);//将索引写入index文件 39 fclose(srcfp); 40 } 41 free(content); 42 fclose(datfp); 43 fclose(idxfp); 44 return 0; 45 }
search.cpp
#include <stdio.h> #include <stdlib.h> #include <memory.h> #define MAXFILESIZE 20480 struct idx_t { long pos; int len; }idx; /* 本程序只为说明过程,因而去掉了所有错误判断过程,仅供参考 */ int main() { FILE *datfp,*idxfp; int len,num,ret; char *content; printf("input file number:"); scanf("%d",&num); datfp = fopen("data.dat", "rb"); idxfp = fopen("data.idx", "rb"); fseek(idxfp,sizeof(struct idx_t)*(num-1),SEEK_SET); fread(&idx,sizeof(struct idx_t),1,idxfp); content=(char*)malloc(MAXFILESIZE); memset(content,0,MAXFILESIZE); ret=fseek(datfp,idx.pos,SEEK_SET); ret=fread(content,1,idx.len,datfp); printf("%s",content); free(content); fclose(datfp); fclose(idxfp); system("pause"); return 0; }
myself方法:
myself.cpp
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 struct index 6 { 7 char filename[20]; 8 long start; 9 long end; 10 }strindex[83]; 11 12 void BuildIndex() 13 { 14 FILE *fromfp,*to_datfp,*indexfp; 15 long len,temp_dat_start,temp_dat_end; 16 char *content; 17 int i,j,temp; 18 for (i=1; i<83; i++) 19 { 20 char path[100] = "data\\article-"; 21 strcpy(strindex[i].filename,"article-"); 22 temp = i / 10; 23 if (temp == 0) 24 { 25 j = strlen(path); 26 path[j] = (char)(i+48); 27 path[j+1] = '\0'; 28 strcat(path,"e.txt"); 29 30 strindex[i].filename[8]= (char)(i+48); 31 strindex[i].filename[9]= '\0'; 32 } 33 else 34 { 35 j = strlen(path); 36 path[j] = (char)(i/10+48); 37 path[j+1] = (char)(i%10+48); 38 path[j+2] = '\0'; 39 strcat(path,"e.txt"); 40 41 strindex[i].filename[8]= (char)(i/10+48); 42 strindex[i].filename[9]= (char)(i%10+48); 43 strindex[i].filename[10]= '\0'; 44 45 } 46 47 fromfp = fopen(path,"r"); 48 if (i == 1) 49 { 50 to_datfp = fopen("article.dat", "w"); 51 indexfp = fopen("article.idx", "w"); 52 } 53 else 54 { 55 to_datfp = fopen("article.dat", "a"); 56 indexfp = fopen("article.idx", "a"); 57 } 58 59 //获取插入到article.dat文件中的article-xe.txt文件的大小 60 fseek(fromfp,0L,SEEK_END); 61 len = ftell(fromfp); 62 fseek(fromfp,0L,SEEK_SET);//将文件位置置为“文件开始位置” 63 64 //更新索引表中strindex[i]文件的开始位置 65 fseek(to_datfp,0L,SEEK_END); 66 temp_dat_start = ftell(to_datfp); 67 strindex[i].start = temp_dat_start; 68 69 //拷贝article-xe.txt文件内容到article.dat文件中 70 content=(char*)malloc(len+1);//动态申请“article-xe.txt”大小的空间 71 memset(content,0,len+1);//置0 72 fread(content,len,1,fromfp);//读 73 fwrite(content,len,1,to_datfp);//写 74 free(content); 75 76 //更新索引表中strindex[i]文件的结束位置 77 fseek(to_datfp,0L,SEEK_END); 78 temp_dat_end = ftell(to_datfp); 79 strindex[i].end = temp_dat_end; 80 81 fwrite(&strindex[i],sizeof(struct index),1,indexfp);//将strindex[i]内容写入索引文件article.idx中 82 83 fclose(fromfp); 84 fclose(to_datfp); 85 fclose(indexfp); 86 } 87 } 88 void search(int num) 89 { 90 FILE *indexfp,*datafp; 91 indexfp = fopen("article.idx","r"); 92 datafp = fopen("article.dat", "r"); 93 int i; 94 for(i=num-1;i<num;i++) 95 { 96 fseek(indexfp,i*(sizeof(struct index)),SEEK_SET); 97 fread(&strindex[i],sizeof(struct index),1, indexfp); 98 } 99 i--; 100 int length; 101 length = strindex[i].end-strindex[i].start; 102 char *content; 103 fseek(datafp,(strindex[i].start),SEEK_SET);//文件开始位置 104 content=(char*)malloc(length+1);//动态申请“article-1e.txt”大小的空间 105 memset(content,0,length+1);//置0 106 fread(content,length +1,1,datafp);//读 107 printf("%s",content); 108 free(content); 109 110 fclose(datafp); 111 fclose(indexfp); 112 } 113 114 int main() 115 { 116 int order; 117 BuildIndex(); 118 printf("请输入要检索的文件序号(1~82):"); 119 while (scanf("%d",&order)) 120 { 121 search(order); 122 printf("请输入要检索的文件序号(1~82):"); 123 } 124 system("pause"); 125 return 0; 126 }
myself方法(改进版):
使用sprintf()改进文件路径;
myself—better.cpp
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 struct index 6 { 7 char filename[20]; 8 long start; 9 long end; 10 }strindex[83]; 11 12 void BuildIndex() 13 { 14 FILE *fromfp,*to_datfp,*indexfp; 15 long len,temp_dat_start,temp_dat_end; 16 char *content; 17 int i,j,temp; 18 for (i=1; i<83; i++) 19 { 20 char path[100]; 21 22 sprintf(path,"data\\article-%de.txt",i);//myself改进 23 sprintf(strindex[i].filename,"article-%de",i);//myself改进 24 25 fromfp = fopen(path,"r"); 26 if (i == 1) 27 { 28 to_datfp = fopen("article.dat", "w"); 29 indexfp = fopen("article.idx", "w"); 30 } 31 else 32 { 33 to_datfp = fopen("article.dat", "a"); 34 indexfp = fopen("article.idx", "a"); 35 } 36 37 //获取插入到article.dat文件中的article-xe.txt文件的大小 38 fseek(fromfp,0L,SEEK_END); 39 len = ftell(fromfp); 40 fseek(fromfp,0L,SEEK_SET);//将文件位置置为“文件开始位置” 41 42 //更新索引表中strindex[i]文件的开始位置 43 fseek(to_datfp,0L,SEEK_END); 44 temp_dat_start = ftell(to_datfp); 45 strindex[i].start = temp_dat_start; 46 47 //拷贝article-xe.txt文件内容到article.dat文件中 48 content=(char*)malloc(len+1);//动态申请“article-xe.txt”大小的空间 49 memset(content,0,len+1);//置0 50 fread(content,len,1,fromfp);//读 51 fwrite(content,len,1,to_datfp);//写 52 free(content); 53 54 //更新索引表中strindex[i]文件的结束位置 55 fseek(to_datfp,0L,SEEK_END); 56 temp_dat_end = ftell(to_datfp); 57 strindex[i].end = temp_dat_end; 58 59 fwrite(&strindex[i],sizeof(struct index),1,indexfp);//将strindex[i]内容写入索引文件article.idx中 60 61 fclose(fromfp); 62 fclose(to_datfp); 63 fclose(indexfp); 64 } 65 } 66 void search(int num) 67 { 68 FILE *indexfp,*datafp; 69 indexfp = fopen("article.idx","r"); 70 datafp = fopen("article.dat", "r"); 71 int i; 72 for(i=num-1;i<num;i++) 73 { 74 fseek(indexfp,i*(sizeof(struct index)),SEEK_SET); 75 fread(&strindex[i],sizeof(struct index),1, indexfp); 76 } 77 i--; 78 int length; 79 length = strindex[i].end-strindex[i].start; 80 char *content; 81 fseek(datafp,(strindex[i].start),SEEK_SET);//文件开始位置 82 content=(char*)malloc(length+1);//动态申请“article-1e.txt”大小的空间 83 memset(content,0,length+1);//置0 84 fread(content,length +1,1,datafp);//读 85 printf("%s",content); 86 free(content); 87 88 fclose(datafp); 89 fclose(indexfp); 90 } 91 92 int main() 93 { 94 int order; 95 BuildIndex(); 96 printf("请输入要检索的文件序号(1~82):"); 97 while (scanf("%d",&order)) 98 { 99 search(order); 100 printf("请输入要检索的文件序号(1~82):"); 101 } 102 system("pause"); 103 return 0; 104 }