################# trim_name.py################################
# TRIMS THE STRING ELEMENT IN THE DATASET IF THE STRING CONTAINS MORE THAN ONE COMMA SEPARATED SUB STRINGS.

# For selected categorical columns, it trims the string element in each row,
#  if that string has more than one comma separated sub string. In that case,
# it creates a new column by retaining only the first sub string in the name.
# Those columns which are trimmed this way are appended to the data set with
# an extension name "__Trimmed" added to the original name.
# Other columns are left as they are in the data set.
# ------------------------------------------------------------------------

# including the required libraries
from script.algorithm import *
from script.view import *
from script.dataset import *
from script.omega import createComponent, showDialog
from com.strandgenomics.cube.dataset import DatasetUtil
from java.lang import Float
from javax.swing import *
import string


# trims the given string at the first comma location, and returns the trimmed string.
def nameTrim(str):
	newstr=""
	index=0
	for i in str:
		if i != ",":
			newstr=newstr + i
		if i==",":
			index=1
			return newstr,index
	return newstr,index




# Dialog box for selecting many columns.
def openDialogBox(dset):        
	p=createComponent(type="columnlist", id="chosenColumns", columnType="categorical",description="Multiple Column Chooser- Categorical columns", dataset=dset)
	result=showDialog(p)    
	if result!=None:
		return p.getValue()


# Main script starts here.

#get current data set
node=script.project.getActiveDatasetNode()
d=node.getDataset()

# Open the dialog boxes to get the list of columns for trimming.
lis=openDialogBox(d)
lis=script.coercion.to_py(lis)

# Trim the names, and create new column(s) with trimmed names.
# Add these new column(s) to the data set.
trimColList=[]
for i in lis:
	trimStatus=0
	trimmedColumn = []
	firstTime=0
	for j in range(d.getRowCount()):
		orgstr = str(d[i][j])
		[trimstr,trimStatus] = nameTrim(orgstr)
		firstTime=firstTime+trimStatus
		trimmedColumn.append(trimstr)
	if firstTime>0:
		newColumn=createStringColumn(d[i].getName()+"_Trimmed", trimmedColumn)
		d.addColumn(newColumn)



		
