Sei sulla pagina 1di 5

/*"Mini Search Engine" - Data Structures and Algorithms

Caleb Smith - Due 8/2/2013 */


#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include "hash.h"
#include <dirent.h>
using namespace std;
int main(){
/*Main Function-
Builds HashTable class for storing words.
Reads words from documents.
Stores words in HashTable
Accepts queries - displays documents containing specified words.*/
stringstream stream;
HashTable hashT;
char buffer[100];
char key;
string word;
string order;
int select = 0;
int check = 0;
Node* queries[3] = {0, 0, 0};
Node* temp = 0;
Node* wordLoc;
cout << endl;
/*New input stuff.*/
ifstream fin;
string dir, filepath;
int doc;
DIR *dp;
struct dirent *dirp;
/*Reading words from documents begins here.
Opens documents directory, reads for files
one-by-one.
Opens file, reads file line-by-line
(skipping formatting lines beginning with
'<' character),
reads individual words from line.
(As discussed in class, replaced any
mid-word punctuation (except '-') with spaces, to
clean up words.)
Stores words in hash table using
HashTable class' hash() function.*/
dp = opendir("documents");
if(dp == NULL){
cout << endl << "Error - No documents folder; cannot store words
." << endl;
return 0;
}
else{
cout << endl << "Reading from documents." << endl;
}
dir = "documents";
while((dirp = readdir(dp))){
/*Get file # for storage.*/
doc=((dirp->d_name[9] - 48)*1000) + ((dirp->d_name[10] - 48)*100
);
doc=doc + ((dirp->d_name[11] - 48)*10) + (dirp->d_name[12] - 48)
;
/*Subtract 1; needs to correspond to array indexing,
so Doc 1 will be noted as 0, and Doc 50 as 49.*/
doc = doc - 1;
filepath = dir + "/" + dirp->d_name;
fin.open(filepath.c_str() );
while(fin.getline(buffer, 100)){
/*Reads through document line-by-line.*/
if(buffer[0] != '<'){
/*Iteratively "cleaning" the line, removing unneeded
punctuation.*/
for(int i = 0;i < 100;i++){
if(!isalpha(buffer[i])&&buffer[i]!='-'){
buffer[i] = ' ';
}
}
stream << buffer;
/*Reading words individually, storing.*/
while(stream.getline(buffer, 100, ' ')){
word = buffer;
hashT.hash(buffer[0], word, doc);
};
}
/*Resetting stringstream for latter use.*/
stream << "";
stream.clear();
};
fin.close();
}
/*File reading/word storage process ends here.*/
/*User input/actually functional, useful part of program begins here.*/
while(1 == 1){
/*Loop prompts user for command/query.*/
cout << "Input command: > ";
/*Commands first read by line, then split
and read by character using similar method
as the above word storage (without the cleaning).*/
while(cin.getline(buffer, 100)){
///output command, for reading input from a file
///so that the user knows which command query was
///carried out.
cout<<endl<<buffer<<endl;
stream << buffer;
while(stream.getline(buffer, 50, ' ')){
/*Reads input word-by-word.*/
/// cout << buffer << endl;
word = buffer;
/*resets pointer*/
if(temp != 0){
temp = 0;
}
/*Program-ending user command, makes
reading input from a file for
testing purposes easier.*/
if(word == "QUIT"){
return 1;
}
/*Begins checking process for queries. Not included
as separate function for simplicity.*/
if(word == "AND"){
/*If next read word is "AND" query operator.*/
stream << "";
stream.clear();
stream.getline(buffer, 50, ' ');
word = buffer;
/*Calls function to check if word contai
ned
in HashTable. Stores location of the n
ode
containing the word into the query res
ult array.*/
queries[1] = hashT.findWord(word);
/*Makes temporary new node denoting whic
h documents
contain both query in queries[0] AND t
he previous
query, i.e. queries[1]*/
temp = new Node(queries[0], queries[1],
"AND");
/*Stores resultant query/node in queries
[0], as
basis for any succeding parts of the c
urret line
of input.*/
queries[0] = temp;
/*Used for checking if a query was "comp
lex"
i.e. multi-word.*/
check = 1;
}
else if(word == "OR"){
/*If next read word is "OR" query operator.
Functionally similar to the if(word == "AND")
code.*/
stream << "";
stream.clear();
stream.getline(buffer, 50, ' ');
word = buffer;
/*Calls function checking if word contai
ned in
HashTable. Stores location of node con
taining
the wrd in the query result array.*/
queries[1] = hashT.findWord(word);
/*Makes temporary new node denoting whic
h documents
contain query queries[0] OR the previo
us queries[1]*/
temp = new Node(queries[0], queries[1],
"OR");
/*Stores resultant query/node in queries
[0], as
basis for any succeding parts of the c
urrent line
of input.*/
queries[0] = temp;
/*Used for checking if a query was "comp
lex".*/
check = 1;
}
else if(queries[0] == 0){
/*If input is just a word, not a command.*/
/*Calls function searching for word in
HashTable. Stores location of the nod
e
containing word in queries[0].*/
queries[0] = hashT.findWord(word);
}
}
///Outputting results of query.
if(queries[0] != 0){
/*Will only == 0 when the queried word is contained
in NO documents, meaning not stored in the HashTable.*
/
/*Prints all documents matching query queries[0]
's
search criteria.*/
queries[0]->printDocs();
}
/*else*/ if(queries[0] == 0){
/*When word is not stored anywhere in HashTable.*/
cout << "Those words are in no documents." << en
dl;
}
if(check == 1){
/*Only == 1 when a complex query has occured.
when a complex query is carried out, queries[0]
ends up containing a node that is NOT part of the
HashTable. As such, it can be deleted, as a memory-sav
er.*/
delete queries[0];
check = 0;
}
/*"Resets" queries array after query is done.*/
queries[0] = 0;
queries[1] = 0;
queries[2] = 0;
/*When done reading line, clear stream for next use.*/
stream << "";
stream.clear();
}
/*Goes to beginnig of the while loop to accept another query.*/
};
///End of while loop.
///End of program.
return 1;
};

Potrebbero piacerti anche