I'm trying to convert a dictionary of the format:
d = {'A1': ['a', 'a', 'A2 (A3-)', 'a'],
'B1': ['b', 'b', 'B2 (B3-)', 'b'],
'C1': ['c', 'c', 'C2 (C3)-', 'c']}
To a list of the form:
e = [['A1', 'A2', 'A3'], ['B1', 'B2', 'B3'], ['C1', 'C2', 'C3']]
I know I should use regex to get the A2 and A3 data, but I'm having trouble putting this all together...
import re
regex = re.compile(r'(\w+) \((\w+)-.*')
# I suppose that you meant (C3-) and not (C3)-
d = {'A1': ['a', 'a', 'A2 (A3-)', 'a'], 'B1': ['b', 'b', 'B2 (B3-)', 'b'], 'C1': ['c', 'c', 'C2 (C3-)', 'c']}
out = []
for key, values_list in d.items():
v2, v3 = regex.match(values_list[2]).groups()
out.append([key, v2, v3])
print(out)
# [['C1', 'C2', 'C3'], ['B1', 'B2', 'B3'], ['A1', 'A2', 'A3']]
Note that the order is random, as your original dict is unordered.
Related
import numpy as np
import math
import pandas as pd
# making an example DataFrame
data = DataFrame({'cust_id': ['c1', 'c1', 'c1', 'c2', 'c2', 'c2', 'c3', 'c3', 'c3',
'c1', 'c1', 'c1', 'c2', 'c2', 'c2', 'c3', 'c3', 'c3'],
'step_seq': ['123', '123', '123', '123', '123', '123', '123', '123', '123',
'456','456','456','456','456','456','456','456','456'],
'grade' : ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B',
'C','C','C','C','C','C','C','C','D'],
'pch_amt': [1, 2, 3, 4, 5, 6, 7, 8, 9,
1, 2, 3, 4, 5, 6, 7, 8, 9]})
print(data)
data = pd.pivot_table(data, index='step_seq', columns='pch_amt', values='grade', aggfunc=np.sum)
a = data.iloc[0,:].tolist()
b = set(a)
len(b)
for i in range(len(data.index)):
a = data.iloc[i,:].tolist()
print(a)
b = set(a)
# Qestion1 Related
print(b)
print(len(b))
data.loc[i,'Number of types']=len(b)
data
# Qestion2 Related
Before asking questions, thank you for your help all the time.
I ask two question as above
Q1) Why second set get 'nan' ??.. and how can I remove it..?
Q2) How to make to append 'Number of types' in Coumuns(pivot) ?
I have a list that I want to split into multiple sublists
acq=['A1', 'A2', 'D', 'A3', 'A4', 'A5', 'D', 'A6']
ll=[]
for k,v in enumerate(acq):
if v == 'D':
continue # continue here
ll.append(v)
print(ll)
Above solution give gives an expanded appended list, which is not what I am looking for. My desired solution is:
['A1', 'A2']
['A3', 'A4', 'A5']
['A6']
Try itertools.groupby:
from itertools import groupby
acq = ["A1", "A2", "D", "A3", "A4", "A5", "D", "A6"]
for v, g in groupby(acq, lambda v: v == "D"):
if not v:
print(list(g))
Prints:
['A1', 'A2']
['A3', 'A4', 'A5']
['A6']
No additional Library, and return a list of lists:
acq=['A1', 'A2', 'D', 'A3', 'A4', 'A5', 'D', 'A6']
all_list=[]
ll=[]
for i in acq:
if i == 'D':
all_list.append(ll)
ll=[]
continue
ll.append(i)
all_list.append(ll)
print(*all_list,sep='\n')
print:
['A1', 'A2']
['A3', 'A4', 'A5']
['A6']
acq=['A1', 'A2', 'D', 'A3', 'A4', 'A5', 'D', 'A6']
ll=[]
temp=[]
for k,v in enumerate(acq):
if v == 'D':
ll.append(temp)
temp=[]
continue # continue here
temp.append(v)
l1.append(temp)
print(ll)
I want to convert this csv file Format:
into a hdf5 file with this structure:
I am using Pandas. Is there a simple way to do that?
You can use nested dictionaries via collections.defaultdict for this:
from collections import defaultdict
import pandas as pd
# read csv file
# df = pd.read_csv('input.csv', header=None)
df = pd.DataFrame([['A', 'a', 'a1'],
['A', 'a', 'a2'],
['A', 'b', 'b1'],
['A', 'b', 'b2'],
['A', 'c', 'c1'],
['A', 'c', 'c2']],
columns=['col1', 'col2', 'col3'])
d = defaultdict(lambda: defaultdict(list))
for row in df.itertuples():
d[row[1]][row[2]].append(row[3])
Result
defaultdict(<function __main__.<lambda>>,
{'A': defaultdict(list,
{'a': ['a1', 'a2'],
'b': ['b1', 'b2'],
'c': ['c1', 'c2']})})
Thanks, I will check out defaultdict. My solution is probably more hacky, but in case someone needs something customizable:
import pandas as pd
df = pd.DataFrame([['A', 'a', 'a1'],
['A', 'a', 'a2'],
['A', 'b', 'b1'],
['A', 'b', 'b2'],
['A', 'c', 'c1'],
['A', 'c', 'c2']],
columns=['col1', 'col2', 'col3'])
cols = ['col1', 'col2', 'col3']
children = {p : {} for p in cols}
parent = {p : {} for p in cols}
for x in df.iterrows():
for i in range(len(cols)-1):
_parent = x[1][cols[i]]
_child = x[1][cols[i+1]]
parent[cols[i+1]].update({_child : _parent})
if _parent in children[cols[i]]:
children_list = children[cols[i]][_parent]
children_list.add(_child)
children[cols[i]].update({_parent : children_list})
else:
children[cols[i]].update({_parent : set([_child])})
Result:
parent =
{'col1': {},
'col2': {'a': 'A', 'b': 'A', 'c': 'A'},
'col3': {'a1': 'a', 'a2': 'a', 'b1': 'b', 'b2': 'b', 'c1': 'c', 'c2': 'c'}}
Then you can walk up and down your hierarchy.
I have created a list which has the totality of all the data in the csv file.
How do I seperately call upon data in rows and columns?
For instance:
**a, b ,c**
**1** a1 b1 c1
**2** a2 b2 c2
How can I identify a single cell within the list?
try below code:
l = ['a', 'b', 'c','1','a1', 'b1', 'c1', '2', 'a2', 'b2','c2']
columns = 3
result = list(zip(*[iter(l[columns:])]*(columns+1)))
result2 = {i[0]:i[1:] for i in result}
item_id = '2'
result2[item_id]
output:
('a2', 'b2', 'c2')
or you could try below code:
l = ['a', 'b', 'c','1','a1', 'b1', 'c1', '2', 'a2', 'b2','c2']
columns = 3
item_id = '2'
index = l.index(item_id)
l[index:index+columns]
output:
['a2', 'b2', 'c2']
I have this
d = \
[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
I want the ouput like this
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
I have tried this
In [51]: [a.values() for k,a in d]
Out[51]: [['c1', 'f1'], ['f2', 'c2'], ['c3', 'f3']]
I want to do that simplest and shortest possible way
>>> d = \
[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
>>> [y for x in d for y in x[1].values()]
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
You can use itertools.chain:
>>> d=[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
>>> from itertools import chain
>>> list(chain.from_iterable( x[1].values() for x in d ))
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
Just an alternative answer using reduce:
import operator
reduce(operator.add,(a.values() for k,a in d))
Maybe not the best idea, but it works. Essentially equivalent to Blender's
sum([a.values() for k, a in d], [])