⚠ 注意
小文件排序是没有意义的,当然我们这里只是模拟,所以给 100 个数据
🔑 核心思想 🔑
磁盘的读取速度相比内存差距非常大,所以我们不可能像在内存中两两归并。正确的归并方法是大文件平均分割成 N 份,保证每份大小都可以加载到内存,那么就可以把每个小文件加载到内存中,使用快排排序,再写回小文件,这时就达到文件中归并的先决条件
🧿 实现代码 :
void _MergeFile(const char* File1, const char* File2, const char* mFile) { //读文件1 FILE* fout1 = fopen(File1, "r"); if (fout1 == NULL) { printf("打开文件失败\n"); exit(-1); } //读文件2 FILE* fout2 = fopen(File2, "r"); if (fout2 == NULL) { printf("打开文件失败\n"); exit(-1); } //写文件3,把文件1和文件2写到文件3里 FILE* fin = fopen(mFile, "w"); if (fin == NULL) { printf("打开文件失败\n"); exit(-1); } int num1, num2; //对于内存中没有问题,但是磁盘就有问题了。不管num1和num2谁小谁大,只要读了fout1和fout2它们都会往后走 /*while (fscanf(fout1, "%d\n", &num1) != EOF && fscanf(fout2, "%d\n", &num2) != EOF) { if (num1 < num2) fprintf(fin, "%d\n", num1); else fprintf(fin, "%d\n", num2); }*/ int ret1 = fscanf(fout1, "%d\n", &num1); int ret2 = fscanf(fout2, "%d\n", &num2); //下面保证了谁读谁走;fout1和fout2都不为空再比较 while (ret1 != EOF && ret2 != EOF) { if (num1 < num2) { fprintf(fin, "%d\n", num1); ret1 = fscanf(fout1, "%d\n", &num1);//更新字符 } else { fprintf(fin, "%d\n", num2); ret2 = fscanf(fout2, "%d\n", &num2); //更新字符 } } /*注意这样会导致少写一个数据 //fout2完了,写剩下的fout1 while (fscanf(fout1, "%d\n", &num1) != EOF) { fprintf(fin, "%d\n", num1); } //fout1完了,写剩下的fout2 while (fscanf(fout2, "%d\n", &num2) != EOF) { fprintf(fin, "%d\n", num2); }*/ //fout2完了,写剩下的fout1 while (ret1 != EOF) { fprintf(fin, "%d\n", num1); ret1 = fscanf(fout1, "%d\n", &num1);//更新字符 } //fout1完了,写剩下的fout2 while (ret2 != EOF) { fprintf(fin, "%d\n", num2); ret2 = fscanf(fout2, "%d\n", &num2); //更新字符 } //关闭文件 fclose(fout1); fclose(fout2); fclose(fin); } void MergeSortFile(const char* file) { FILE* fout = fopen(file, "r"); if (fout == NULL) { printf("打开文件失败\n"); exit(-1); } int n = 10; int a[10]; int i = 0; int num = 0; char subfile[20]; int filei = 1; memset(a, 0, sizeof(int) * n); //从fout文件流里读,直至EOF while (fscanf(fout, "%d\n", &num) != EOF) { //每次循环读10个数据放在内存中(if里先放9个,else再放最后一个) if (i < n - 1) { a[i++] = num; } else { a[i] = num; //快排10个数据 QuickSort(a, 0, n - 1); //生成文件名sub_sort1/2/3... sprintf(subfile, "%d", filei++); //写文件,subfile里存储生成的文件名 FILE* fin = fopen(subfile, "w"); if (fin == NULL) { printf("打开文件失败\n"); exit(-1); } //写回小文件 for (int i = 0; i < n; i++) { fprintf(fin, "%d\n", a[i]); } //关闭文件 fclose(fin); //重置i i = 0; memset(a, 0, sizeof(int) * n); } } //互相归并到文件,实现整体有序 char mFile[100] = "12"; char File1[100] = "1"; char File2[100] = "2"; for (i = 2; i <= n; i++) { //读取File1和File2,归并出mFile _MergeFile(File1, File2, mFile); //拷贝迭代File1的文件名12/123/1234... strcpy(File1, mFile); //循环迭代File2的文件名3/4/5... sprintf(File2, "%d", i + 1); //循环迭代mFile的文件名123/1234/12345... sprintf(mFile, "%s%d",mFile, i + 1); } //关闭文件 fclose(fout); }