FileProcessingMap.java

  1. package org.drip.zen.juice;

  2. import java.io.BufferedReader;
  3. import java.io.FileReader;
  4. import java.util.HashMap;
  5. import java.util.HashSet;
  6. import java.util.Set;
  7. import java.util.TreeMap;

  8. public class FileProcessingMap {

  9.     static int NumberOfLinesInFile (String fullFileName)
  10.         throws Exception
  11.     {
  12.         String line = "";
  13.         int numberOfLines = 1;
  14.         boolean stopReading = false;

  15.         BufferedReader reader = new BufferedReader (new FileReader (fullFileName));

  16.         while (stopReading == false) {
  17.             line = reader.readLine();

  18.             if (line == null) {
  19.                 stopReading = true;
  20.             } else {
  21.                 numberOfLines = numberOfLines + 1;
  22.             }
  23.         }

  24.         reader.close();

  25.         return numberOfLines;
  26.     }

  27.     static String[] ReadFile (String fullFileName)
  28.         throws Exception
  29.     {
  30.         int numberOfLinesToRead = NumberOfLinesInFile (fullFileName);

  31.         String[] fileLines = new String[numberOfLinesToRead];

  32.         String line = "";
  33.         int lineNumber = 0;
  34.         boolean stopReading = false;

  35.         BufferedReader reader = new BufferedReader (new FileReader (fullFileName));

  36.         while (stopReading == false) {
  37.             line = reader.readLine();

  38.             if (line == null)
  39.                 stopReading = true;
  40.             else {
  41.                 fileLines[lineNumber] = line;
  42.                 lineNumber = lineNumber + 1;
  43.             }
  44.         }

  45.         reader.close();

  46.         return fileLines;
  47.     }

  48.     static int WordCount (String singleLine)
  49.     {
  50.         if (singleLine == null)
  51.         {
  52.             return 0;
  53.         }

  54.         int count = 1;

  55.         for (int letterIndex = 0; letterIndex < singleLine.length(); letterIndex = letterIndex + 1)
  56.         {
  57.             char letter = singleLine.charAt (letterIndex);

  58.             if (letter == ' ')
  59.             {
  60.                 count = count + 1;
  61.             }
  62.         }

  63.         return count;
  64.     }

  65.     static String[] Words (String singleLine)
  66.     {
  67.         if (singleLine == null)
  68.         {
  69.             return null;
  70.         }

  71.         String[] wordsInLine = singleLine.split (" ");

  72.         return wordsInLine;
  73.     }

  74.     static void AddWordToCountMap (String singleLine, HashMap<String, Integer> wordCountMap)
  75.     {
  76.         String[] wordArray = Words (singleLine);

  77.         if (wordArray == null)
  78.         {
  79.             return;
  80.         }

  81.         for (int j = 0; j < wordArray.length; j = j + 1)
  82.         {
  83.             String thisWord = wordArray[j];

  84.             if (wordCountMap.containsKey (thisWord)) {
  85.                 int wordOccurrances = wordCountMap.get (thisWord);

  86.                 wordCountMap.put (thisWord, wordOccurrances + 1);
  87.             } else {
  88.                 wordCountMap.put (thisWord, 1);
  89.             }
  90.         }
  91.     }

  92.     static TreeMap<Integer, String> CountToWordMap (HashMap<String, Integer> wordToCountMap)
  93.     {
  94.         TreeMap<Integer, String> counterToWordsMap = new TreeMap<Integer, String>();

  95.         Set<String> wordSet = wordToCountMap.keySet();

  96.         for (String word : wordSet)
  97.         {
  98.             counterToWordsMap.put (wordToCountMap.get (word), word);
  99.         }

  100.         return counterToWordsMap;
  101.     }

  102.     /*
  103.      * Begin Added 7 May 2016
  104.      */

  105.     static TreeMap<Integer, Set<String>> CountToWordArrayMap (HashMap<String, Integer> wordToCountMap)
  106.     {
  107.         TreeMap<Integer, Set<String>> counterToWordArrayMap = new TreeMap<Integer, Set<String>>();

  108.         Set<String> wordSet = wordToCountMap.keySet();

  109.         for (String word : wordSet)
  110.         {
  111.             int wordCount = wordToCountMap.get (word);

  112.             if (counterToWordArrayMap.containsKey (wordCount))
  113.             {
  114.                 Set<String> countedWordSet = counterToWordArrayMap.get (wordCount);

  115.                 countedWordSet.add (word);

  116.                 counterToWordArrayMap.put (wordCount, countedWordSet);
  117.             }
  118.             else
  119.             {
  120.                 Set<String> countedWordSet = new HashSet<String>();

  121.                 countedWordSet.add (word);

  122.                 counterToWordArrayMap.put (wordCount, countedWordSet);
  123.             }
  124.         }

  125.         return counterToWordArrayMap;
  126.     }

  127.     /*
  128.      * End Added 7 May 2016
  129.      */

  130.     public static final void main (String[] input)
  131.         throws Exception
  132.     {
  133.         String fileLocation = "C:\\DRIP\\CreditAnalytics\\Daemons\\Feeds\\TextMiner\\RomeoAndJuliet.txt";

  134.         int fileLineCount = NumberOfLinesInFile (fileLocation);

  135.         String[] fileContents = ReadFile (fileLocation);

  136.         int totalWords = 0;

  137.         HashMap<String, Integer> wordToCountMap = new HashMap<String, Integer>();

  138.         for (int i = 0; i < fileLineCount; i = i + 1)
  139.         {
  140.             String currentLine = fileContents[i];

  141.             int numberOfWordsInCurrentLine = WordCount (currentLine);

  142.             totalWords = totalWords + numberOfWordsInCurrentLine;

  143.             String[] wordsInCurrentLine = Words (currentLine);

  144.             String wordDump = "";

  145.             AddWordToCountMap (currentLine, wordToCountMap);

  146.             for (int j = 0; j < numberOfWordsInCurrentLine; j = j + 1)
  147.             {
  148.                 wordDump = wordDump + wordsInCurrentLine[j] + ",";
  149.             }

  150.             System.out.println (wordDump);
  151.         }

  152.         System.out.println ("\tNumber of Lines in File: " + fileLineCount);

  153.         System.out.println ("\tNumber of Words in File: " + totalWords);

  154.         /* Set<String> wordSet = wordToCountMap.keySet();

  155.         for (String word : wordSet)
  156.         {
  157.             System.out.println ("\t\t[" + word + "] => " + wordToCountMap.get (word));
  158.         } */

  159.         /*
  160.          * Begin Added 7 May 2016
  161.          */

  162.         TreeMap<Integer, String> wordCounterMap = CountToWordMap (wordToCountMap);

  163.         Set<Integer> wordCounterSet = wordCounterMap.descendingKeySet();

  164.         for (int wordCount : wordCounterSet)
  165.         {
  166.             System.out.println ("\t\t[" + wordCount + "] => '" + wordCounterMap.get (wordCount) + "'");
  167.         }

  168.         TreeMap<Integer, Set<String>> wordCountSetMap = CountToWordArrayMap (wordToCountMap);

  169.         Set<Integer> wordSetCountSet = wordCountSetMap.descendingKeySet();

  170.         for (int wordSetCount : wordSetCountSet)
  171.         {
  172.             String counterLineToPrint = "\t\t[" + wordSetCount + "] =>";

  173.             Set<String> countedWordSet = wordCountSetMap.get (wordSetCount);

  174.             for (String countedWord : countedWordSet)
  175.             {
  176.                 counterLineToPrint = counterLineToPrint + " '" + countedWord + "' |";
  177.             }

  178.             System.out.println (counterLineToPrint);
  179.         }

  180.         /*
  181.          * End Added 7 May 2016
  182.          */
  183.     }
  184. }