save dataframe containing lists as csv file - python

I have created DataFrame as the following
df = pd.DataFrame({'name': imgname, 'pose': pose})
where imgname is a list of string such as ['image1','image2' ...]
the pose is a list of list such as pose = [array([ 55.77614093, 8.45208199, 2.69841043, 2.17110961]),
array([ 66.61236215, 5.87653161, -31.70704038, -21.68979529])]
I use this line to write the Dataframe to csv file
df.to_csv('dataset.csv',index = 'False')
However, the pose col is converted to a string with '\n' and extra spaces
How can I save the values as numbers in csv format

For pose, you probably do not mean arrays as list of list.
Your code would work if you remove the array part -
import pandas as pd
imgname = ['image1','image2']
pose = [[ 55.77614093, 8.45208199, 2.69841043, 2.17110961],[6.61236215, 5.87653161, -31.70704038, -21.68979529]]
df = pd.DataFrame({'name': imgname, 'pose': pose})
df output -
name pose
0 image1 [55.77614093, 8.45208199, 2.69841043, 2.17110961]
1 image2 [66.61236215, 5.87653161, -31.70704038, -21.68...
after that,
df.to_csv('dataset.csv',index = 'False')
works just fine.

df = pd.DataFrame({'Data': [np.array([1, 2, 3, 4]), np.array([5,6,7,8])], 'IMAGE' : ['IMAGE1', 'IMAGE2']})
print (df)
df.to_csv('dataset.csv',index = 'False')
and the result is the following :

Related

How do conver and I access the properties of an array of objects python saved as string

I have the following array structure, which I consume from a .csv file
0,Done,"[{'id': '7-84-1811', 'idType': 'CIP', 'suscriptionId': '89877485'}]"
0,Done,"[{'id': '1-232-42', 'idType': 'IO', 'suscriptionId': '23532r32'}]"
0,Done,"[{'id': '2323p23', 'idType': 'LP', 'suscriptionId': 'e32e23dw'}]"
0,Done,"[{'id': 'AU23242', 'idType': 'LL', 'suscriptionId': 'dede143234'}]"
To be able to handle it with pandas, I created its respective columns, but I only need to access the "id" and "idType" properties.
My code
from pandas.io.json import json_normalize
import pandas as pd
path = 'path_file'
df_fet = pd.read_csv(path, names=['error', 'resul', 'fields'])
df_work = df_fet[['fields'][0]['id', 'idType']]
print(df_work.head())
Retorn error
TypeError: string indices must be integers
desired output
id, idType
0. '7-84-1811', 'CIP'
1. '1-232-42', 'IO'
...
Here's a way to achieve the desired output
import pandas as pd
path = 'filepath'
df = pd.read_csv(path, names=['error', 'resul', 'fields'])
df["fields"] = df["fields"].fillna("[]").apply(lambda x: eval(x))
arr = []
for row in df["fields"]:
arr.append([row[0]["id"], row[0]["idType"]])
new = pd.DataFrame(arr, columns=["id", "idType"])
print(new)
Output:
Using eval() function python interprets the argument as a python expression thus the string is interpreted as a list itself

compare multiple column with Numpy and panda

Hello my goal and to find which circuit (Nom_ci)
I can't find the right path, I'm trying to find the right method,
I had done it with a set of IF ELIF ... but the times were enormous
Can you help me find the best method
thanks in advance
import pandas as pd
import numpy as np
import re
cycling = pd.DataFrame(
{
'Comp_ci': [1, 2, 3, 3, 3, 3, 3, 2, 1, 1],
'Nom_ci': ['RONCQ_A2_OPTI_SRV_S3',
'RONCQ_A3_SRV_S3, RONCQ_A2_OPTI_SRV_S3',
'RONCQ_A2_TEMP_SRV_S3, RONCQ_A3_SRV_S3, RONCQ_A2_OPTI_SRV_S3',
'RONCQ_A2_SRV_PC_S3, RONCQ_A2_TEMP_SRV_S3, RONCQ_A3_SRV_S3',
'RONCQ_A2_PC_SRV_S3, RONCQ_A2_SRV_S3, RONCQ_A2_TEMP_SRV_S3',
'RONCQ_A2_OPTI_SRV_S3, RONCQ_A2_PC_SRV_S3, RONCQ_A2_SRV_S3',
'RONCQ_A3_SRV_S3, RONCQ_A2_OPTI_SRV_S3, RONCQ_A2_PC_SRV_S3',
'RONCQ_A2_TEMP_SRV_S3, RONCQ_A3_SRV_S3',
'RONCQ_A2_SRV_S3',
'RONCQ_A2_PC_SRV_S3'],
'result hope':['autre','RONCQ_A3_VSR_S3','RONCQ_A3_VSR_S3','RONCQ_A3_VSR_S3','RONCQ_A2_VSR_S3','RONCQ_A2_VSR_S3','RONCQ_A3_VSR_S3','RONCQ_A3_VSR_S3','RONCQ_A2_VSR_S3','autre']
}
)
print(cycling)
condition=((cycling['Count RSF Circuit']==1) &
(cycling['Nom ConcatSet'][0].str.contains("_OPTI").eq(False)) &
(cycling['Nom ConcatSet'][0].str.contains("_TEMP").eq(False))&
(cycling['Nom ConcatSet'][0].str.contains("_PC").eq(False)))
cycling['col3'] = np.where(condition, cycling['Nom ConcatSet'], 'autre')
print(cycling)
EDIT :
Ok, I think I have understood what you tried to achieve : is that it ?
temp = cycling.Nom_ci.str.split(', +') # will split on ',' or ' ' (using regex)
print(temp)
print('-'*50)
temp = temp.explode() #will explode the lists to one serie (do note that the indexes are kept untouched)
print(temp)
print('-'*50)
temp = temp.to_frame() #will convert your serie to a dataframe
print(type(temp))
print('-'*50)
temp['match'] = temp['Nom_ci'].str.contains('(_TEMP)|(_PC)|(_OPTI)')==False #will get you a boolean serie (using regex) from your patterns, which will allow you to select the desired strings
print(temp)
print('-'*50)
temp = temp[temp.match==True] #do select the rows corresponding to your criteria (note that the indexes are still untouched)
print(temp)
print('-'*50)
temp.rename({'Nom_ci':'col3'}, axis=1, inplace=True) #rename your column to whatever you want
print(temp)
print('-'*50)
temp.drop('match', inplace=True, axis=1) #drop the "match" column which is now useless
print(temp)
print('-'*50)
cycling = cycling.join(temp) #join the dataframes based on indexes
print(temp)
print('-'*50)
cycling['col3'].fillna('autre', inplace=True) #fill the "nan" values with "autres"
print(cycling)

How can we represent a pandas.series value on Django?

I have the following code, where I am binning a Pandas dataframe into given number of bins:
def contibin(data, target, bins=10):
#Empty Dataframe
newDF,woeDF = pd.DataFrame(), pd.DataFrame()
#Extract Column Names
cols = data.columns
for ivars in cols[~cols.isin([target])]:
if (data[ivars].dtype.kind in 'bifc') and (len(np.unique(data[ivars]))>10):
binned_x = pd.qcut(data[ivars], bins, duplicates='drop')
d0 = pd.DataFrame({'x': binned_x, 'y': data[target]})
#print(d0)
else:
d0 = pd.DataFrame({'x': data[ivars], 'y': data[target]})
d = d0.groupby("x", as_index=False).agg({"y": ["count", "sum"]})
d.columns = ['Range', 'Total', 'No. of Good']
d['No. of Bad'] = d['Total'] - d['No. of Good']
d['Dist. of Good'] = np.maximum(d['No. of Good'], 0.5) / d['No. of Good'].sum()
d['Dist. of Bad'] = np.maximum(d['No. of Bad'], 0.5) / d['No. of Bad'].sum()
d['WoE'] = np.log(d['Dist. of Good']/d['Dist. of Bad'])
d['IV'] = d['WoE'] * (d['Dist. of Good'] - d['Dist. of Bad'])
#temp =pd.DataFrame({"Variable" : [ivars], "IV" : [d['IV'].sum()]}, columns = ["Variable", "IV"])
#newDF=pd.concat([newDF,temp], axis=0)
woeDF=pd.concat([woeDF,d], axis=0)
return woeDF
The problem I am facing is when I try to integrate the code on front end using Django, I am not being able to represent woeDF['Range'] in Django the way I am able to see it normally. I tried converting the Pandas.Series to string, but it still isn't giving me what I want. To illustrate what I want to see in my frontend, I am attaching a picture of a sample table which I got by running this code on the Churn modelling Dataset.The image of the table I need
You can turn the Dataframe in an array of objects using DataFrame.itertuples(index=False)
you will then be able to iterate through the dataframe in Jinja by accessing the columns via their names. See the below example in Python:
import pandas as pd
columns = {"name": ["john", "skip", "abu", "harry", "ben"],
"age": [10, 20, 30, 40, 50]}
df = pd.DataFrame(columns)
print(df)
df_objects = df.itertuples(index=False)
for person in df_objects:
print("{0}: {1}".format(person.name, person.age))

Saving results of df.show()

I need to capture the contents of a field from a table in order to append it to a filename. I have sorted the renaming process. Is there anyway I can save the output of the following in order to append it to renamed file? I can't use Scala, it has to be in python
df = sqlContext.sql("select replace(value,'-','') from dbsmets1mig02_technical_build.tbl_Tech_admin_data where type = 'Week_Starting'")
df.show()
Have you tried using indexing?
df = pd.DataFrame({
'Name':['Peter', 'Peter', 'Peter', 'Peter'],
'Planned_Start':['1/1/2019', '1/2/2019', '1/15/2019', '1/2/2019'],
'Duration':[2, 3, 5, 6],
'Hrs':[0.6, 1, 1.2, 0.3]})
df.iloc[3][1]
The syntax is df.iloc[< index of row containing desired enty >][< position of your entry >]
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html
You could convert the df object into a pandas DataFrame/Series object, then can use other Python commands more easily on this;
pandasdf = df.toPandas()
Once you have this as a pandas data frame - say it looks something like this;
import pandas as pd
pandasdf = pd.DataFrame({"col1" : ["20191122"]})
Then you can pull out the string and use f strings to join it into a filename;
date = pandasdf.iloc[0, 0]
filename = f"my_file_{date}.csv"
Then we have the filename object as 'my_file_20191122.csv'

remove parentheses from complex numbers - pandas

As a follow up to this post python pandas complex number and now that complex works fine with pandas, I want to save the complex numbers but without the parentheses -
when I use the following command the last column (complex number) is printed inside parentheses
EDIT: here is the full code, to read the data file (sample here)
import numpy as np
import pandas as pd
df = pd.read_csv('final.dat', sep=",", header=None)
df.columns=['X.1', 'X.2', 'X.3', 'X.4','X.5', 'X.6', 'X.7', 'X.8']
df['X.8'] = df['X.8'].str.replace('i','j').apply(lambda x: np.complex(x))
df1 = df.groupby(["X.1","X.2","X.5"])["X.8"].mean().reset_index()
df1['X.3'] = df["X.3"] #add extra columns
df1['X.4']=df["X.4"]
df1['X.6']=df["X.6"]
df1['X.7']=df["X.7"]
sorted_data = df1.reindex_axis(sorted(df1.columns), axis=1)
sorted_data.to_csv = ('final_sorted.dat', sep=',', header = False)
all works well, but the in the output csv file the complex are inside parentheses - and I cannot use them this way, so I want to remove them
Prob could have better support for reading/writing complex, but ATM this will work.
In [25]: df = DataFrame([[1+2j],[2-1j]],columns=list('A'))
In [26]: df
Out[26]:
A
0 (1+2j)
1 (2-1j)
In [27]: df['A'] = df['A'].apply(str).str.replace('\(|\)','')
In [28]: df
Out[28]:
A
0 1+2j
1 2-1j
In [29]: df.to_csv('test.csv')
In [30]: !cat test.csv
,A
0,1+2j
1,2-1j

Categories

Resources