After a very happy couple of days using streamlit - managed a couple of running apps and managed to package an offline app pretty cool and not bad for a biologist! first app here Streamlit used as a test case for data viewing.
The goal is an offline app for people to run on their own machines. It doesn’t like providing a path to files like local hosting for nativefier… not sure why… so trying to using file uploader. However, this isn’t working as expected - likely something I have missed…however the documents suggests that UploadFile ‘type’ is a online version of the csv file and should pass direct to pandas. So passing a single file or passing a loop set of files direct to pandas read_csv seems to work I get the normal error when reading these files.
so calling a globbed path to files or URL of raw csv files works.
arrays = ArayaManager(files) # this normally contains a globbed list of local file names and work if
#passing st.text input and through pathlib.
comp = arrays.concatenate_dataframes() # access the required files.
passing…
uploaded_files = st.file_uploader("Upload CSV", type='csv', accept_multiple_files=True)
does not work - error concatenate_dataframes takes one argument got 2.
Why is this?
so try
uploaded_files = st.file_uploader('Select files',type=['csv'],accept_multiple_files=True)
file_lst = [uploaded_file.getbuffer for uploaded_file in uploaded_files]
files = st.write(file_lst)
and try passing to ArayaManger - get error - NoneType ‘object’ is not iterable
so I am not passing anything and there is nothing for concatenate_dataframes to work on…
The parser class
class WellDataManager:
"""Parent class for importing data from any instrument"""
def __init__(self,
files,
run_name_split_loc=1,
group_name=""):
super().__init__()
# project attributes
self.file_names = files
self.group_name = group_name
self.run_name = ""
self.split_char_loc = run_name_split_loc
self.run_df = pd.DataFrame()
self.group_df = pd.DataFrame()
# csv read attributes
self.tables = 1
self.index_column = [0]
self.header_row = [1]
self.row_count = [8]
self.column_names = ['Row_ID', 'Col_ID', 'Value']
def concatenate_dataframes(self):
for each_file in self.file_names:
self.get_run_name(each_file)
self.build_dataframes(each_file)
self.group_df = pd.concat([self.group_df, self.run_df], ignore_index=True)
# print(self.group_df)
return self.group_df
def build_dataframes(self, each_file):
self.read_file(each_file)
self.coerce_numeric_values()
self.run_df['Group_ID'] = self.group_name
self.run_df['File_root'] = each_file
self.run_df['Run_ID'] = self.run_name
# print(self.run_df)
def coerce_numeric_values(self):
# may be used used to force values to numeric. Not applicable to all instruments
pass
def read_file(self, file_name):
"""Reads Initial Data from CSV file"""
df = pd.read_csv(file_name, header=self.header_row, nrows=self.row_count, index_col=self.index_column)
df = df.stack()
self.run_df = df.reset_index()
self.run_df.columns = self.column_names
def get_run_name(self, file_name):
"""Splits string to get run name from file name."""
self.run_name = file_name[:self.split_char_loc]
self.run_file_in = os.path.basename(csv)
class ArtelVMSManager(WellDataManager):
"""Class that handles Well Data Data"""
def __init__(self,
files,
run_name_split_loc=1,
group_name=""):
super().__init__(files,
run_name_split_loc,
group_name)
# csv read attributes
self.tables = 1
self.index_column = [0]
self.header_row = [18]
self.row_count = [8]
self.column_names = ['Row_ID', 'Col_ID', 'Volume']
def coerce_numeric_values(self):
"""Coerce the 'volume' data to numeric. Otherwise mixture of strings and numeric values"""
num_col = self.column_names[2]
self.run_df[num_col] = pd.to_numeric(self.run_df[num_col], errors='coerce')
class ArayaManager(WellDataManager):
"""Class that handles Well Data Data"""
def __init__(self,
files,
run_name_split_loc=6,
group_name="",
dyes=None,
separate_column=True):
super().__init__(files,
run_name_split_loc,
group_name)
if dyes is None:
dyes = ['FAM', 'VIC', 'ROX']
# Ayara-specific items
self.separate_column_per_dye = separate_column
self.channel_df = pd.DataFrame()
self.dyes = dyes
self.channels = ['CH1', 'CH2', 'CH3']
# csv read attributes
self.tables = 3
self.index_column = ["<>", "<>", "<>"]
self.header_row = [5, 23, 41]
self.row_count = [16, 16, 16]
if self.separate_column_per_dye:
# TODO: generalize for other dye names
self.column_names = ['Row_ID', 'Col_ID', 'FAM_RFU', 'VIC_RFU', 'ROX_RFU']
else:
self.column_names = ['Row_ID', 'Col_ID', 'RFU', 'Channel', 'Dye']
def read_each_channel(self, file_name, ch):
"""Reads Individual Channel Data from CSV file"""
df = pd.read_csv(file_name,
header=self.header_row[ch],
nrows=self.row_count[ch],
na_values="<>")
# Need to shift to get rid of annoying '<>'. Otherwise won't parse correctly.
df = df.shift(periods=1, axis='columns')
#df.drop('<>', axis=1, inplace=True)
# Stack df for various dyes and add additional columns
df = df.stack()
self.channel_df = df.reset_index()
# For separate columns for each dye, rename RFU columns. pd.concat() method does the rest!
if self.separate_column_per_dye:
self.channel_df.columns = self.column_names[0:3]
self.channel_df.rename(columns={'FAM_RFU': f'{self.dyes[ch]}_RFU'},
inplace=True)
# case to stack all dyes into common RFU and Dye channels.
else:
self.channel_df['Channel'] = self.channels[ch]
self.channel_df['Dye'] = self.dyes[ch]
self.channel_df.columns = self.column_names
def read_file(self, file_name):
"""Reads Each Channel Data from CSV file"""
# loops through the 3 channel tables in the csv output files.
self.run_df = pd.DataFrame()
for ch in range(self.tables):
self.read_each_channel(file_name, ch)
# case to have separate columns for each dye
if self.separate_column_per_dye:
self.channel_df = self.channel_df[self.channel_df.columns.difference(self.run_df.columns)]
self.run_df = pd.concat([self.run_df, self.channel_df], axis=1)
# case to stack all dyes into common RFU and Dye channels.
else:
self.run_df = pd.concat([self.run_df, self.channel_df], ignore_index=True)
# Force columns to correct order. Fixed bug with concat of separate dye columns.
self.run_df = self.run_df[self.column_names]
def get_run_name(self, file_name):
"""Splits string to get run name from file name."""
self.run_name = file_name[-(self.split_char_loc+4):-4]`
Not sure why I cannot pass the UploadedFile type data directly to pandas - if I directly pass the file to pd.read_csv it will read single files. A for loop to read each does the same thing. Any help would be appreciated.