SymphonyElectron/js/search/search.js

598 lines
19 KiB
JavaScript
Raw Normal View History

2017-07-25 10:28:38 -05:00
'use strict';
const fs = require('fs');
const randomString = require('randomstring');
2017-07-31 01:19:16 -05:00
const childProcess = require('child_process');
2017-07-27 00:18:11 -05:00
const path = require('path');
const isDevEnv = require('../utils/misc.js').isDevEnv;
const isMac = require('../utils/misc.js').isMac;
const makeBoundTimedCollector = require('./queue');
const searchConfig = require('./searchConfig');
const log = require('../log.js');
const logLevels = require('../enums/logLevels.js');
2017-07-27 00:18:11 -05:00
2017-07-31 08:07:30 -05:00
const libSymphonySearch = require('./searchLibrary');
const Crypto = require('../cryptoLib');
const INDEX_VALIDATOR = searchConfig.LIBRARY_CONSTANTS.INDEX_VALIDATOR;
2017-07-25 10:28:38 -05:00
2017-08-08 00:35:40 -05:00
/**
* This search class communicates with the SymphonySearchEngine C library via node-ffi.
* There should be only 1 instance of this class in the Electron
*/
2017-07-25 10:28:38 -05:00
class Search {
2017-08-08 00:35:40 -05:00
/**
* Constructor for the SymphonySearchEngine library
* @param userId (for the index folder name)
* @param key
2017-08-08 00:35:40 -05:00
*/
constructor(userId, key) {
2017-07-25 10:28:38 -05:00
this.isInitialized = false;
this.userId = userId;
this.key = key;
this.indexFolderName = `${searchConfig.FOLDERS_CONSTANTS.PREFIX_NAME_PATH}_${this.userId}_${searchConfig.INDEX_VERSION}`;
this.dataFolder = searchConfig.FOLDERS_CONSTANTS.INDEX_PATH;
this.realTimeIndex = searchConfig.FOLDERS_CONSTANTS.TEMP_REAL_TIME_INDEX;
this.batchIndex = searchConfig.FOLDERS_CONSTANTS.TEMP_BATCH_INDEX_FOLDER;
2017-08-07 00:12:46 -05:00
this.messageData = [];
this.isRealTimeIndexing = false;
this.crypto = new Crypto(userId, key);
this.decryptAndInit();
this.collector = makeBoundTimedCollector(this.checkIsRealTimeIndexing.bind(this),
searchConfig.REAL_TIME_INDEXING_TIME, this.realTimeIndexing.bind(this));
}
/**
* Decrypting the existing user .enc file
* and initialing the library
*/
decryptAndInit() {
this.crypto.decryption().then(() => {
this.init();
}).catch(() => {
this.init();
});
2017-07-25 10:28:38 -05:00
}
2017-08-08 00:35:40 -05:00
/**
* returns isInitialized boolean
* @returns {boolean}
2017-08-08 00:35:40 -05:00
*/
2017-07-25 10:28:38 -05:00
isLibInit() {
return this.isInitialized;
2017-07-25 10:28:38 -05:00
}
2017-08-08 00:35:40 -05:00
/**
* This init function
* initialise the SymphonySearchEngine library
* and creates a folder in the userData
*/
2017-07-25 10:28:38 -05:00
init() {
libSymphonySearch.symSEInit();
2017-08-07 00:12:46 -05:00
libSymphonySearch.symSEEnsureFolderExists(this.dataFolder);
Search.deleteIndexFolders(this.realTimeIndex);
Search.deleteIndexFolders(this.batchIndex);
2017-07-31 08:07:30 -05:00
Search.indexValidator(this.indexFolderName);
2017-08-07 00:12:46 -05:00
Search.indexValidator(this.realTimeIndex);
let indexDateStartFrom = new Date().getTime() - searchConfig.SEARCH_PERIOD_SUBTRACTOR;
// Deleting all the messages except 3 Months from now
2017-07-25 10:28:38 -05:00
libSymphonySearch.symSEDeleteMessages(this.indexFolderName, null,
searchConfig.MINIMUM_DATE, indexDateStartFrom.toString());
2017-07-25 10:28:38 -05:00
this.isInitialized = true;
}
2017-08-08 00:35:40 -05:00
/**
* An array of messages is passed for indexing
* it will be indexed in a temporary index folder
* @param {Array} messages
2017-08-08 00:35:40 -05:00
* @returns {Promise}
*/
2017-07-25 10:28:38 -05:00
indexBatch(messages) {
return new Promise((resolve, reject) => {
if (!messages) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'Batch Indexing: Messages not provided');
2018-01-08 05:27:18 -06:00
reject(new Error('Batch Indexing: Messages are required'));
return;
}
try {
let msg = JSON.parse(messages);
if (!(msg instanceof Array)) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'Batch Indexing: Messages must be an array');
reject(new Error('Batch Indexing: Messages must be an array'));
return;
}
} catch(e) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'Batch Indexing: parse error -> ' + e);
reject(new Error(e));
return;
}
2017-07-25 10:28:38 -05:00
if (!this.isInitialized) {
log.send(logLevels.ERROR, 'Library not initialized');
reject(new Error('Library not initialized'));
return;
2017-07-25 10:28:38 -05:00
}
2018-01-09 02:50:29 -06:00
if (!fs.existsSync(this.dataFolder)) {
log.send(logLevels.ERROR, 'User index folder not found');
reject(new Error('User index folder not found'));
return;
}
const indexId = randomString.generate(searchConfig.BATCH_RANDOM_INDEX_PATH_LENGTH);
libSymphonySearch.symSECreatePartialIndexAsync(this.batchIndex, indexId, messages, (err, res) => {
if (err) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'Batch Indexing: error ->' + err);
reject(new Error(err));
2017-11-20 00:50:19 -06:00
return;
}
2017-07-27 09:50:26 -05:00
resolve(res);
2017-07-25 10:28:38 -05:00
});
});
}
2017-08-08 00:35:40 -05:00
/**
* Merging the temporary
* created from indexBatch()
*/
2017-07-25 10:28:38 -05:00
mergeIndexBatches() {
return new Promise((resolve, reject) => {
2018-01-09 02:50:29 -06:00
if (!fs.existsSync(this.dataFolder)) {
log.send(logLevels.ERROR, 'User index folder not found');
reject(new Error('User index folder not found'));
return;
}
libSymphonySearch.symSEMergePartialIndexAsync(this.indexFolderName, this.batchIndex, (err, res) => {
if (err) {
log.send(logLevels.ERROR, 'Error merging the index ->' + err);
reject(new Error(err));
2017-11-20 00:50:19 -06:00
return;
}
Search.deleteIndexFolders(this.batchIndex);
resolve(res);
});
2017-07-25 10:28:38 -05:00
});
}
2017-11-20 00:50:19 -06:00
/**
* Batching the real time
* messages for queue and flush
* @param {Object} message
*/
batchRealTimeIndexing(message) {
this.collector(message);
}
2017-11-20 00:50:19 -06:00
/**
* Returns the current state of the
* real-time indexing
* @returns {boolean}
*/
checkIsRealTimeIndexing() {
2017-11-20 00:50:19 -06:00
return this.isRealTimeIndexing;
}
2017-08-08 00:35:40 -05:00
/**
* An array of messages to be indexed
* in real time
* @param message
*/
2017-07-27 09:50:26 -05:00
realTimeIndexing(message) {
try {
let msg = JSON.parse(message);
if (!(msg instanceof Array)) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'RealTime Indexing: Messages must be an array real-time indexing');
return (new Error('RealTime Indexing: Messages must be an array'));
}
} catch(e) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'RealTime Indexing: parse error -> ' + e);
2017-12-26 06:04:18 -06:00
throw (new Error(e));
}
if (!this.isInitialized) {
log.send(logLevels.ERROR, 'Library not initialized');
2017-12-26 06:04:18 -06:00
throw new Error('Library not initialized');
}
2018-01-09 02:50:29 -06:00
if (!fs.existsSync(this.dataFolder)) {
log.send(logLevels.ERROR, 'User index folder not found');
throw new Error('User index folder not found');
}
this.isRealTimeIndexing = true;
return libSymphonySearch.symSEIndexRealTimeAsync(this.realTimeIndex, message, (err, result) => {
this.isRealTimeIndexing = false;
if (err) {
2017-11-23 10:15:37 -06:00
log.send(logLevels.ERROR, 'RealTime Indexing: error -> ' + err);
2017-12-26 06:04:18 -06:00
throw new Error(err);
}
return result;
});
2017-07-27 09:50:26 -05:00
}
2017-08-08 00:35:40 -05:00
/**
* Reading a json file
* for the demo search app only
* @param {String} batch
2017-08-08 00:35:40 -05:00
* @returns {Promise}
*/
2017-07-27 09:50:26 -05:00
readJson(batch) {
2017-07-25 10:28:38 -05:00
return new Promise((resolve, reject) => {
let dirPath = path.join(searchConfig.FOLDERS_CONSTANTS.EXEC_PATH, isMac ? '..' : '', 'msgsjson', batch);
2017-07-31 08:07:30 -05:00
let messageFolderPath = isDevEnv ? path.join('./msgsjson', batch) : dirPath;
2017-07-27 00:18:11 -05:00
let files = fs.readdirSync(messageFolderPath);
this.messageData = [];
files.forEach((file) => {
2017-07-27 09:50:26 -05:00
let tempPath = path.join(messageFolderPath, file);
let data = fs.readFileSync(tempPath, "utf8");
2017-07-25 10:28:38 -05:00
if (data) {
try {
this.messageData.push(JSON.parse(data));
} catch (err) {
reject(new Error(err))
}
2017-07-25 10:28:38 -05:00
} else {
reject(new Error('Error reading batch'))
2017-07-25 10:28:38 -05:00
}
});
resolve(this.messageData);
2017-07-25 10:28:38 -05:00
});
}
/**
* Encrypting the index after the merging the index
* to the main user index
*/
encryptIndex(key) {
2017-12-27 00:50:34 -06:00
return this.crypto.encryption(key);
}
2017-08-08 00:35:40 -05:00
/**
* This returns the search results
2017-08-08 00:35:40 -05:00
* which returns a char *
* @param {String} query
* @param {Array} senderIds
* @param {Array} threadIds
* @param {String} fileType
* @param {String} startDate
* @param {String} endDate
* @param {Number} limit
* @param {Number} offset
* @param {Number} sortOrder
2017-08-08 00:35:40 -05:00
* @returns {Promise}
*/
searchQuery(query, senderIds, threadIds, fileType, startDate,
2017-08-14 06:27:07 -05:00
endDate, limit, offset, sortOrder) {
2017-07-25 10:28:38 -05:00
2017-08-07 00:12:46 -05:00
let _limit = limit;
let _offset = offset;
let _sortOrder = sortOrder;
2017-07-25 10:28:38 -05:00
return new Promise((resolve, reject) => {
if (!this.isInitialized) {
log.send(logLevels.ERROR, 'Library not initialized');
reject(new Error('Library not initialized'));
return;
}
if (!fs.existsSync(this.indexFolderName) || !fs.existsSync(this.realTimeIndex)) {
log.send(logLevels.ERROR, 'Index folder does not exist.');
2017-12-27 00:27:49 -06:00
reject(new Error('Index folder does not exist.'));
return;
2017-07-25 10:28:38 -05:00
}
let q = Search.constructQuery(query, senderIds, threadIds, fileType);
2017-07-25 10:28:38 -05:00
if (q === undefined) {
reject(new Error('Search query error'));
return;
2017-07-25 10:28:38 -05:00
}
2017-11-23 10:20:05 -06:00
let searchPeriod = new Date().getTime() - searchConfig.SEARCH_PERIOD_SUBTRACTOR;
2017-11-23 09:21:32 -06:00
let startDateTime = searchPeriod;
2017-11-23 04:35:06 -06:00
if (startDate) {
2017-11-23 09:21:32 -06:00
startDateTime = new Date(parseInt(startDate, 10)).getTime();
if (!startDateTime || startDateTime < searchPeriod) {
startDateTime = searchPeriod;
2017-07-25 10:28:38 -05:00
}
}
let endDateTime = searchConfig.MAXIMUM_DATE;
2017-11-23 04:35:06 -06:00
if (endDate) {
2017-11-23 09:21:32 -06:00
let eTime = new Date(parseInt(endDate, 10)).getTime();
if (eTime) {
endDateTime = eTime;
}
2017-07-25 10:28:38 -05:00
}
2017-12-27 00:27:49 -06:00
if (!_limit || _limit === "" || typeof _limit !== 'number' || Math.round(_limit) !== _limit) {
2017-08-07 00:12:46 -05:00
_limit = 25;
2017-07-25 10:28:38 -05:00
}
2017-12-27 00:27:49 -06:00
if (!_offset || _offset === "" || typeof _offset !== 'number' || Math.round(_offset) !== _offset) {
2017-08-07 00:12:46 -05:00
_offset = 0
2017-07-25 10:28:38 -05:00
}
2017-12-27 00:27:49 -06:00
if (!_sortOrder || _sortOrder === "" || typeof _sortOrder !== 'number' || Math.round(_sortOrder) !== _sortOrder) {
_sortOrder = searchConfig.SORT_BY_SCORE;
2017-07-25 10:28:38 -05:00
}
2017-11-23 09:21:32 -06:00
const returnedResult = libSymphonySearch.symSESearch(this.indexFolderName, this.realTimeIndex, q, startDateTime.toString(), endDateTime.toString(), _offset, _limit, _sortOrder);
try {
let ret = returnedResult.readCString();
resolve(JSON.parse(ret));
} finally {
2017-07-27 09:50:26 -05:00
libSymphonySearch.symSEFreeResult(returnedResult);
2017-07-27 00:18:11 -05:00
}
2017-07-25 10:28:38 -05:00
});
}
/**
* returns the latest message timestamp
* from the indexed data
* @returns {Promise}
*/
getLatestMessageTimestamp() {
return new Promise((resolve, reject) => {
if (!this.isInitialized) {
log.send(logLevels.ERROR, 'Library not initialized');
2017-12-27 00:27:49 -06:00
reject(new Error('Not initialized'));
return;
}
if (!fs.existsSync(this.indexFolderName)) {
log.send(logLevels.ERROR, 'Index folder does not exist.');
2017-12-27 00:27:49 -06:00
reject(new Error('Index folder does not exist.'));
return;
}
libSymphonySearch.symSEGetLastMessageTimestampAsync(this.indexFolderName, (err, res) => {
if (err) {
log.send(logLevels.ERROR, 'Error getting the index timestamp ->' + err);
reject(new Error(err));
}
const returnedResult = res;
try {
let ret = returnedResult.readCString();
resolve(ret);
} finally {
libSymphonySearch.symSEFreeResult(returnedResult);
}
});
});
}
deleteRealTimeFolder() {
Search.deleteIndexFolders(this.realTimeIndex);
Search.indexValidator(this.realTimeIndex);
}
2017-08-08 00:35:40 -05:00
/**
* This the query constructor
* for the search function
* @param {String} searchQuery
* @param {Array} senderId
* @param {Array} threadId
2017-10-09 04:00:10 -05:00
* @param {String} fileType
2017-08-08 00:35:40 -05:00
* @returns {string}
*/
static constructQuery(searchQuery, senderId, threadId, fileType) {
2017-10-09 05:31:07 -05:00
let searchText = "";
let textQuery = "";
if(searchQuery !== undefined) {
2017-10-09 05:31:07 -05:00
searchText = searchQuery.trim().toLowerCase(); //to prevent injection of AND and ORs
textQuery = Search.getTextQuery(searchText);
}
2017-08-04 03:37:48 -05:00
let q = "";
2017-10-09 05:31:07 -05:00
let hashTags = Search.getHashTags(searchText);
let hashCashTagQuery = "";
2017-08-04 03:37:48 -05:00
if(hashTags.length > 0) {
hashCashTagQuery = " OR tags:(";
hashTags.forEach((item) => {
hashCashTagQuery = hashCashTagQuery + "\"" + item + "\" "
});
hashCashTagQuery += ")";
}
2017-08-04 03:37:48 -05:00
let hasAttachments = false;
let additionalAttachmentQuery = "";
if(fileType) {
hasAttachments = true;
if(fileType.toLowerCase() === "attachment") {
additionalAttachmentQuery = "(hasfiles:true)";
} else {
additionalAttachmentQuery = "(filetype:(" + fileType +"))";
}
}
2017-10-09 05:31:07 -05:00
if (searchText.length > 0 ) {
q = "((text:(" + textQuery + "))" + hashCashTagQuery ;
if(hasAttachments) {
2017-10-09 05:31:07 -05:00
q += " OR (filename:(" + searchText + "))" ;
}
q = q + ")";
2017-08-14 06:27:07 -05:00
}
q = Search.appendFilterQuery(q, "senderId", senderId);
q = Search.appendFilterQuery(q, "threadId", threadId);
2017-08-04 03:37:48 -05:00
if(q === "") {
if(hasAttachments) {
q = additionalAttachmentQuery;
} else {
q = undefined; //will be handled in the search function
}
} else {
if(hasAttachments){
q = q + " AND " + additionalAttachmentQuery
}
2017-08-04 03:37:48 -05:00
}
return q;
}
2017-08-04 03:37:48 -05:00
/**
* appending the senderId and threadId for the query
2017-10-09 05:31:07 -05:00
* @param {String} searchText
* @param {String} fieldName
* @param {Array} valueArray
* @returns {string}
*/
2017-10-09 05:31:07 -05:00
static appendFilterQuery(searchText, fieldName, valueArray) {
let q = "";
if (valueArray && valueArray.length > 0 ) {
q += "(" + fieldName +":(";
valueArray.forEach((item)=>{
q+= "\"" + item + "\" ";
});
q += "))";
2017-10-09 05:31:07 -05:00
if(searchText.length > 0 ) {
q = searchText + " AND " + q;
}
} else {
2017-10-09 05:31:07 -05:00
q = searchText;
}
2017-08-07 00:12:46 -05:00
return q;
2017-07-25 10:28:38 -05:00
}
// hashtags can have any characters(before the latest release it was
// not like this). So the only regex is splitting the search query based on
// whitespaces
/**
* return the hash cash
* tags from the query
2017-10-09 05:31:07 -05:00
* @param {String} searchText
* @returns {Array}
*/
2017-10-09 05:31:07 -05:00
static getHashTags(searchText) {
let hashTags = [];
2017-10-09 05:31:07 -05:00
let tokens = searchText.toLowerCase()
.trim()
.replace(/\s\s+/g, ' ')
.split(' ').filter((el) => {return el.length !== 0});
tokens.forEach((item) => {
if (item.startsWith('#') || item.startsWith('$')) {
hashTags.push(item);
}
});
return hashTags;
}
/**
* If the search query does not have double quotes (implying phrase search),
* then create all tuples of the terms in the search query
* @param {String} searchText
* @returns {String}
*/
static getTextQuery(searchText) {
let s1 = searchText.trim().toLowerCase();
//if contains quotes we assume it will be a phrase search
if(searchText.indexOf("\"") !== -1 ) {
return s1;
}
//else we will create tuples
let s2 = s1.replace(/\s{2,}/g," ").trim();
let tokens = s2.split(" ");
let i,j = 0;
let out = "";
for(i = tokens.length; i > 0; i--) {// number of tokens in a tuple
for(j = 0; j < tokens.length-i + 1 ; j++){ //start from index
if(out !== ""){
out += " ";
}
out += Search.putTokensInRange(tokens, j, i);
}
}
return out;
}
/**
* Helper function for getTextQuery()
* Given a list of tokens create a tuple given the start index of the
* token list and given the number of tokens to create.
* @param {Array} tokens
* @param {Number} start
* @param {Number} numTokens
* @returns {String}
*/
static putTokensInRange(tokens, start, numTokens) {
let out = "\"";
for(let i = 0; i < numTokens; i++) {
if(i !== 0) {
out += " ";
}
out+= tokens[start+i];
}
out += "\"";
return out;
}
2017-08-08 00:35:40 -05:00
/**
* Validate the index folder exist or not
* @param {String} file
2017-08-08 00:35:40 -05:00
* @returns {*}
*/
2017-07-31 08:07:30 -05:00
static indexValidator(file) {
let data;
let result = childProcess.execFileSync(INDEX_VALIDATOR, [file]).toString();
try {
data = JSON.parse(result);
if (data.status === 'OK') {
2017-07-31 10:25:19 -05:00
return data;
2017-07-31 08:07:30 -05:00
}
log.send(logLevels.ERROR, 'Unable validate index folder');
2017-07-31 10:25:19 -05:00
return new Error('Unable validate index folder')
2017-07-31 08:07:30 -05:00
} catch (err) {
throw new Error(err);
2017-07-31 08:07:30 -05:00
}
}
/**
* Removing all the folders and files inside the data folder
* @param location
*/
static deleteIndexFolders(location) {
if (fs.existsSync(location)) {
fs.readdirSync(location).forEach((file) => {
let curPath = location + "/" + file;
if (fs.lstatSync(curPath).isDirectory()) {
Search.deleteIndexFolders(curPath);
} else {
fs.unlinkSync(curPath);
}
});
fs.rmdirSync(location);
}
}
}
/**
* Deleting the data index folder
* when the app is closed/signed-out/navigates
*/
function deleteIndexFolder() {
Search.deleteIndexFolders(searchConfig.FOLDERS_CONSTANTS.INDEX_PATH);
2017-07-25 10:28:38 -05:00
}
2017-08-08 00:35:40 -05:00
/**
* Exporting the search library
* @type {{Search: Search}}
*/
2017-07-25 10:28:38 -05:00
module.exports = {
Search: Search,
deleteIndexFolder: deleteIndexFolder
};