0
点赞
收藏
分享

微信扫一扫

Java计算两个文件中重复的文字,报告共同字数并计算百分比

小桥流水2016 2022-04-14 阅读 63

示例程序中使用到的文件如下:

 

示例程序:

package Array_list_study;

import java.io.*;
import java.util.*;

public class Vocabulary_compare {
    public static void main(String[] args) throws FileNotFoundException{
        Scanner console = new Scanner(System.in);
        giveIntro();

        System.out.print("file #1 name?");  // poem1
        Scanner in1 = new Scanner(new File(console.nextLine()));
        System.out.print("file #2 name?");  // poem2
        Scanner in2 = new Scanner(new File(console.nextLine()));
        System.out.println();

        ArrayList<String> list1 = getWords(in1);
        ArrayList<String> list2 = getWords(in2);
        ArrayList<String> common = getOverlap(list1,list2);

        reportResults(list1,list2,common);
    }

    // 读取单词,转换为小写,返回唯一单词的排序列表
    public static ArrayList<String> getWords(Scanner input){
        // 忽略除字母和撇号以外的所有字符
        input.useDelimiter("[^a-zA-Z']+");   // 参数是一个正则表达式
        // 读入文件中的单词并排序
        ArrayList<String> words = new ArrayList<String>();
        while (input.hasNext()){
            String next = input.next().toLowerCase();  // 将单词统一转换为小写形式
            words.add(next);
        }
        Collections.sort(words);

        // 创建一个没有重复单词的新列表并返回
        ArrayList<String> result = new ArrayList<String>();
        if(words.size() > 0){
            result.add(words.get(0));
            for(int i = 1;i < words.size();i++){
                if(!words.get(i).equals(words.get(i-1))){
                    result.add(words.get(i));
                }
            }
        }
        return result;
    }

    // 前提:list1,list2经过排序且不存在重复的单词
    // 该方法用于返回一个包含两个列表中重叠部分的列表
    public static ArrayList<String> getOverlap(ArrayList<String> lst1,ArrayList<String> lst2){
        ArrayList<String> result = new ArrayList<String>();
        int i1 = 0;
        int i2 = 0;
        while(i1 < lst1.size() && i2 < lst2.size()){
            int num = lst1.get(i1).compareTo(lst2.get(i2));
            if(num == 0){
                result.add(lst1.get(i1));
                i1++;
                i2++;
            }else  if(num<0){
                i1++;
            }else {
                i2++;
            }
        }
        return result;
    }

    // 向用户解释该程序
    public static void giveIntro(){
        System.out.println("这个程序比较两个文本文件,并报告共同的字数和重叠的百分比。");
        System.out.println();
    }

    // 有关列表及其重叠的统计信息
    public static void reportResults(ArrayList<String> lst1,ArrayList<String> lst2,ArrayList<String> common){
        System.out.println("file #1 words = " + lst1.size());
        System.out.println("file #2 words = " + lst2.size());
        System.out.println("commom words = " + common.size());

        double percent1 = 100.0 * common.size()/lst1.size();
        double percent2 = 100.0 * common.size()/lst2.size();
        System.out.println("% of file #1 in overlap = " + percent1);
        System.out.println("% of file #2 in overlap = " + percent2);

    }
}

运行结果:

举报

相关推荐

0 条评论