FileProcessingMap.java
- package org.drip.zen.juice;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Set;
- import java.util.TreeMap;
- public class FileProcessingMap {
- static int NumberOfLinesInFile (String fullFileName)
- throws Exception
- {
- String line = "";
- int numberOfLines = 1;
- boolean stopReading = false;
- BufferedReader reader = new BufferedReader (new FileReader (fullFileName));
- while (stopReading == false) {
- line = reader.readLine();
- if (line == null) {
- stopReading = true;
- } else {
- numberOfLines = numberOfLines + 1;
- }
- }
- reader.close();
- return numberOfLines;
- }
- static String[] ReadFile (String fullFileName)
- throws Exception
- {
- int numberOfLinesToRead = NumberOfLinesInFile (fullFileName);
- String[] fileLines = new String[numberOfLinesToRead];
- String line = "";
- int lineNumber = 0;
- boolean stopReading = false;
- BufferedReader reader = new BufferedReader (new FileReader (fullFileName));
- while (stopReading == false) {
- line = reader.readLine();
- if (line == null)
- stopReading = true;
- else {
- fileLines[lineNumber] = line;
- lineNumber = lineNumber + 1;
- }
- }
- reader.close();
- return fileLines;
- }
- static int WordCount (String singleLine)
- {
- if (singleLine == null)
- {
- return 0;
- }
- int count = 1;
- for (int letterIndex = 0; letterIndex < singleLine.length(); letterIndex = letterIndex + 1)
- {
- char letter = singleLine.charAt (letterIndex);
- if (letter == ' ')
- {
- count = count + 1;
- }
- }
- return count;
- }
- static String[] Words (String singleLine)
- {
- if (singleLine == null)
- {
- return null;
- }
- String[] wordsInLine = singleLine.split (" ");
- return wordsInLine;
- }
- static void AddWordToCountMap (String singleLine, HashMap<String, Integer> wordCountMap)
- {
- String[] wordArray = Words (singleLine);
- if (wordArray == null)
- {
- return;
- }
- for (int j = 0; j < wordArray.length; j = j + 1)
- {
- String thisWord = wordArray[j];
- if (wordCountMap.containsKey (thisWord)) {
- int wordOccurrances = wordCountMap.get (thisWord);
- wordCountMap.put (thisWord, wordOccurrances + 1);
- } else {
- wordCountMap.put (thisWord, 1);
- }
- }
- }
- static TreeMap<Integer, String> CountToWordMap (HashMap<String, Integer> wordToCountMap)
- {
- TreeMap<Integer, String> counterToWordsMap = new TreeMap<Integer, String>();
- Set<String> wordSet = wordToCountMap.keySet();
- for (String word : wordSet)
- {
- counterToWordsMap.put (wordToCountMap.get (word), word);
- }
- return counterToWordsMap;
- }
- /*
- * Begin Added 7 May 2016
- */
- static TreeMap<Integer, Set<String>> CountToWordArrayMap (HashMap<String, Integer> wordToCountMap)
- {
- TreeMap<Integer, Set<String>> counterToWordArrayMap = new TreeMap<Integer, Set<String>>();
- Set<String> wordSet = wordToCountMap.keySet();
- for (String word : wordSet)
- {
- int wordCount = wordToCountMap.get (word);
- if (counterToWordArrayMap.containsKey (wordCount))
- {
- Set<String> countedWordSet = counterToWordArrayMap.get (wordCount);
- countedWordSet.add (word);
- counterToWordArrayMap.put (wordCount, countedWordSet);
- }
- else
- {
- Set<String> countedWordSet = new HashSet<String>();
- countedWordSet.add (word);
- counterToWordArrayMap.put (wordCount, countedWordSet);
- }
- }
- return counterToWordArrayMap;
- }
- /*
- * End Added 7 May 2016
- */
- public static final void main (String[] input)
- throws Exception
- {
- String fileLocation = "C:\\DRIP\\CreditAnalytics\\Daemons\\Feeds\\TextMiner\\RomeoAndJuliet.txt";
- int fileLineCount = NumberOfLinesInFile (fileLocation);
- String[] fileContents = ReadFile (fileLocation);
- int totalWords = 0;
- HashMap<String, Integer> wordToCountMap = new HashMap<String, Integer>();
- for (int i = 0; i < fileLineCount; i = i + 1)
- {
- String currentLine = fileContents[i];
- int numberOfWordsInCurrentLine = WordCount (currentLine);
- totalWords = totalWords + numberOfWordsInCurrentLine;
- String[] wordsInCurrentLine = Words (currentLine);
- String wordDump = "";
- AddWordToCountMap (currentLine, wordToCountMap);
- for (int j = 0; j < numberOfWordsInCurrentLine; j = j + 1)
- {
- wordDump = wordDump + wordsInCurrentLine[j] + ",";
- }
- System.out.println (wordDump);
- }
- System.out.println ("\tNumber of Lines in File: " + fileLineCount);
- System.out.println ("\tNumber of Words in File: " + totalWords);
- /* Set<String> wordSet = wordToCountMap.keySet();
- for (String word : wordSet)
- {
- System.out.println ("\t\t[" + word + "] => " + wordToCountMap.get (word));
- } */
- /*
- * Begin Added 7 May 2016
- */
- TreeMap<Integer, String> wordCounterMap = CountToWordMap (wordToCountMap);
- Set<Integer> wordCounterSet = wordCounterMap.descendingKeySet();
- for (int wordCount : wordCounterSet)
- {
- System.out.println ("\t\t[" + wordCount + "] => '" + wordCounterMap.get (wordCount) + "'");
- }
- TreeMap<Integer, Set<String>> wordCountSetMap = CountToWordArrayMap (wordToCountMap);
- Set<Integer> wordSetCountSet = wordCountSetMap.descendingKeySet();
- for (int wordSetCount : wordSetCountSet)
- {
- String counterLineToPrint = "\t\t[" + wordSetCount + "] =>";
- Set<String> countedWordSet = wordCountSetMap.get (wordSetCount);
- for (String countedWord : countedWordSet)
- {
- counterLineToPrint = counterLineToPrint + " '" + countedWord + "' |";
- }
- System.out.println (counterLineToPrint);
- }
- /*
- * End Added 7 May 2016
- */
- }
- }