FileProcessingMap.java
package org.drip.zen.juice;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeMap;
public class FileProcessingMap {
static int NumberOfLinesInFile (String fullFileName)
throws Exception
{
String line = "";
int numberOfLines = 1;
boolean stopReading = false;
BufferedReader reader = new BufferedReader (new FileReader (fullFileName));
while (stopReading == false) {
line = reader.readLine();
if (line == null) {
stopReading = true;
} else {
numberOfLines = numberOfLines + 1;
}
}
reader.close();
return numberOfLines;
}
static String[] ReadFile (String fullFileName)
throws Exception
{
int numberOfLinesToRead = NumberOfLinesInFile (fullFileName);
String[] fileLines = new String[numberOfLinesToRead];
String line = "";
int lineNumber = 0;
boolean stopReading = false;
BufferedReader reader = new BufferedReader (new FileReader (fullFileName));
while (stopReading == false) {
line = reader.readLine();
if (line == null)
stopReading = true;
else {
fileLines[lineNumber] = line;
lineNumber = lineNumber + 1;
}
}
reader.close();
return fileLines;
}
static int WordCount (String singleLine)
{
if (singleLine == null)
{
return 0;
}
int count = 1;
for (int letterIndex = 0; letterIndex < singleLine.length(); letterIndex = letterIndex + 1)
{
char letter = singleLine.charAt (letterIndex);
if (letter == ' ')
{
count = count + 1;
}
}
return count;
}
static String[] Words (String singleLine)
{
if (singleLine == null)
{
return null;
}
String[] wordsInLine = singleLine.split (" ");
return wordsInLine;
}
static void AddWordToCountMap (String singleLine, HashMap<String, Integer> wordCountMap)
{
String[] wordArray = Words (singleLine);
if (wordArray == null)
{
return;
}
for (int j = 0; j < wordArray.length; j = j + 1)
{
String thisWord = wordArray[j];
if (wordCountMap.containsKey (thisWord)) {
int wordOccurrances = wordCountMap.get (thisWord);
wordCountMap.put (thisWord, wordOccurrances + 1);
} else {
wordCountMap.put (thisWord, 1);
}
}
}
static TreeMap<Integer, String> CountToWordMap (HashMap<String, Integer> wordToCountMap)
{
TreeMap<Integer, String> counterToWordsMap = new TreeMap<Integer, String>();
Set<String> wordSet = wordToCountMap.keySet();
for (String word : wordSet)
{
counterToWordsMap.put (wordToCountMap.get (word), word);
}
return counterToWordsMap;
}
/*
* Begin Added 7 May 2016
*/
static TreeMap<Integer, Set<String>> CountToWordArrayMap (HashMap<String, Integer> wordToCountMap)
{
TreeMap<Integer, Set<String>> counterToWordArrayMap = new TreeMap<Integer, Set<String>>();
Set<String> wordSet = wordToCountMap.keySet();
for (String word : wordSet)
{
int wordCount = wordToCountMap.get (word);
if (counterToWordArrayMap.containsKey (wordCount))
{
Set<String> countedWordSet = counterToWordArrayMap.get (wordCount);
countedWordSet.add (word);
counterToWordArrayMap.put (wordCount, countedWordSet);
}
else
{
Set<String> countedWordSet = new HashSet<String>();
countedWordSet.add (word);
counterToWordArrayMap.put (wordCount, countedWordSet);
}
}
return counterToWordArrayMap;
}
/*
* End Added 7 May 2016
*/
public static final void main (String[] input)
throws Exception
{
String fileLocation = "C:\\DRIP\\CreditAnalytics\\Daemons\\Feeds\\TextMiner\\RomeoAndJuliet.txt";
int fileLineCount = NumberOfLinesInFile (fileLocation);
String[] fileContents = ReadFile (fileLocation);
int totalWords = 0;
HashMap<String, Integer> wordToCountMap = new HashMap<String, Integer>();
for (int i = 0; i < fileLineCount; i = i + 1)
{
String currentLine = fileContents[i];
int numberOfWordsInCurrentLine = WordCount (currentLine);
totalWords = totalWords + numberOfWordsInCurrentLine;
String[] wordsInCurrentLine = Words (currentLine);
String wordDump = "";
AddWordToCountMap (currentLine, wordToCountMap);
for (int j = 0; j < numberOfWordsInCurrentLine; j = j + 1)
{
wordDump = wordDump + wordsInCurrentLine[j] + ",";
}
System.out.println (wordDump);
}
System.out.println ("\tNumber of Lines in File: " + fileLineCount);
System.out.println ("\tNumber of Words in File: " + totalWords);
/* Set<String> wordSet = wordToCountMap.keySet();
for (String word : wordSet)
{
System.out.println ("\t\t[" + word + "] => " + wordToCountMap.get (word));
} */
/*
* Begin Added 7 May 2016
*/
TreeMap<Integer, String> wordCounterMap = CountToWordMap (wordToCountMap);
Set<Integer> wordCounterSet = wordCounterMap.descendingKeySet();
for (int wordCount : wordCounterSet)
{
System.out.println ("\t\t[" + wordCount + "] => '" + wordCounterMap.get (wordCount) + "'");
}
TreeMap<Integer, Set<String>> wordCountSetMap = CountToWordArrayMap (wordToCountMap);
Set<Integer> wordSetCountSet = wordCountSetMap.descendingKeySet();
for (int wordSetCount : wordSetCountSet)
{
String counterLineToPrint = "\t\t[" + wordSetCount + "] =>";
Set<String> countedWordSet = wordCountSetMap.get (wordSetCount);
for (String countedWord : countedWordSet)
{
counterLineToPrint = counterLineToPrint + " '" + countedWord + "' |";
}
System.out.println (counterLineToPrint);
}
/*
* End Added 7 May 2016
*/
}
}