##################  row_normalization.py  ########################

# A script for performing normalization across each row in the data set.
# A set of continuous columns in the current dataset can be selected
# using the dialog box. For these selected columns, the mean value across each
# row is found. The mean is subtracted
# from each one of the selected columns. You can choose to appened the 
# mean subtracted columns or create a child dataset with these columns. 
# The mean subtrated column is given a column prefix "row_mean_norm_" to the original 
# column name when appended or the child dataset is given the name "row_mean_normalized" 
#
# While averaging across the rows, it is assumed that there are no missing values.
#####################################################################

# Including libraries.
from script.algorithm import *
from script.view import *
from script.dataset import *
from script.omega import createComponent, showDialog
from com.strandgenomics.cube.dataset import DatasetUtil
from java.lang import Float
from javax.swing import *
import string, math


# A function for opening a dialog box which has two parts.
# One for choosing the columns for row normalization, and the
# other for the output options.
 
def openDialogBox1(dset):
	p1=createComponent(type="columnlist",id="name1",description="Choose the columns for Row Normalization", columnType="continuous",dataset=dset)
	p2=createComponent(type="enum",id="name", description="Output option", options=["Append columns to dataset", "Create new child dataset"])
	panel= createComponent(type="group", id="alltogether", description="Row Mean Normalization",components=[p2,p1])
	result1=showDialog(panel)
	colChoice = script.coercion.to_py(p1.getValue())
	optionString = str(p2.getValue())
	return result1, colChoice, optionString


# A function for finding mean values across the rows for a given column list.
# "dset" is the dataset, and "colList" is the selected column list from previous dialog box.
# While computing means across rows, it is assumed that there are no missing values.
def rowMean(dset, colList):
	newCol=[]
	rowSum=0.0
	for icol in colList:
		rowSum = rowSum + dset[icol]
		rowMean = rowSum/len(colList) 
	return rowMean



# Main script starts here.

#get current data set
node=script.project.getActiveDatasetNode()
d=node.getDataset()

[result, selectedColumns, outputOption] = openDialogBox1(d)

# For appending columns to the current data set.
if outputOption=="Append columns to dataset" and len(selectedColumns)>0 and result!=None:
	tempCol = rowMean(d, selectedColumns)
	meanColumn=createFloatColumn("mean-column", tempCol)
	d.addColumn(meanColumn)

	for i in selectedColumns:
		colName = "row_mean_norm_" + d[i].getName() 
		tempCol = d[i] - meanColumn
		newCol = createFloatColumn(colName, tempCol)
		d.addColumn(newCol)
	
	text1 = str(len(selectedColumns)+1)
	text2 = "new columns added to the current dataset"
	text = text1 + " " + text2
	t = JTextArea(text)
	t.setBackground(JLabel().getBackground())
	p = createComponent(type="ui", id="box", description="Message", component=t)
	result=showDialog(p)



# For creating a child data set.
if outputOption=="Create new child dataset" and len(selectedColumns)>0 and result!=None:
	tempCol = rowMean(d, selectedColumns)
	rowInd = [i for i in range(d.getRowCount())]
	colInd = []
	cname=node.addChildDatasetNode(name="row_mean_normalized", rowIndices=rowInd, columnIndices=colInd, setActive=1, addMarkedColumns=0).getName()
	script.view.Table().show()
	d2=node.getChildNode(cname).getDataset()
	meanColumn=createFloatColumn("mean-column", tempCol)
	#d2.addColumn(meanColumn)

	for i in selectedColumns:
		colName = "row_mean_norm_" + d[i].getName() 
		tempCol = d[i] - meanColumn
		newCol = createFloatColumn(colName, tempCol)
		d2.addColumn(newCol)




	
