I have two columns of data
(sample data) and I want to calculate total users for each week day.
For instance, I'd want my output like this (dict/list anything will do):
Monday: 25,
Tuesday: 30,
Wednesday:45,
Thursday: 50,
Friday:24,
Saturday:22,
Sunday:21
Here's my attempt:
def rider_ship (filename):
with open('./data/Washington-2016-Summary.csv','r') as f_in:
Sdict = []
Cdict = []
reader = csv.DictReader(f_in)
for row in reader:
if row['user_type']=="Subscriber":
if row['day_of_week'] in Sdict:
Sdict[row['day_of_week']]+=1
else:
Sdict [row['day_of_week']] = row['day_of_week']
else:
if row ['day_of_week'] in Cdict:
Cdict[row['day_of_week']] +=1
else:
Cdict[row['day_of_week']] = row['day_of_week']
return Sdict, Cdict
print (Sdict)
print (Cdict)
t= rider_ship ('./data/Washington-2016-Summary.csv')
print (t)
TypeError::list indices must be integers or slices, not str
How about using pandas?
Let's first create a file-like object with io library:
import io
s = u"""day_of_week,user_type
Monday,subscriber
Tuesday,customer
Tuesday,subscriber
Tuesday,subscriber"""
file = io.StringIO(s)
Now to the actual code:
import pandas as pd
df = pd.read_csv(file) # "path/to/file.csv"
Sdict = df[df["user_type"] == "subscriber"]["day_of_week"].value_counts().to_dict()
Cdict = df[df["user_type"] == "customer"]["day_of_week"].value_counts().to_dict()
Now we have:
Sdict = {'Tuesday': 2, 'Monday': 1}
Cdict = {'Tuesday': 1}
Related
How can I simplify this function I am trying to create? I would like to pull data from a csv. Turn it into a Dataframe, randomly select a choice, add that choice to a corresponding dictionary key value pair.
def generate_traits():
import pandas as pd
df_bonds = pd.read_csv('/file/location_1')
df_alignments = pd.read_csv('/file/location_2')
df_faiths = pd.read_csv('/file/location_3')
df_flaws = pd.read_csv('/file/location_4')
df_ideals = pd.read_csv('/file/location_5')
df_lifestyles = pd.read_csv('/file/location_6')
df_organizations = pd.read_csv('/file/location_7')
df_personalities = pd.read_csv('/file/location_8')
df_names = pd.read_csv("/file/location_9")
random_bond = df_bonds.sample(1)
random_alignment = df_alignments.sample(1)
random_faith = df_faiths.sample(1)
random_flaw = df_flaws.sample(1)
random_ideal = df_ideals.sample(1)
random_lifestyle = df_lifestyles.sample(1)
random_organization = df_organizations.sample(1)
random_personaltiy = df_personalities.sample(1)
random_name = df_names.sample(1)
traits_dict={"Name:": random_name.iloc[0,0],
"Alignment:": random_alignment.iloc[0,0],
"Bond:":random_bond.iloc[0,0],
"Religion:":random_faith.iloc[0,0],
"Flaw:":random_flaw.iloc[0,0],
"Ideal:":random_ideal.iloc[0,0],
"Lifestyle:":random_lifestyle.iloc[0,0],
"Organization:":random_organization.iloc[0,0],
"Personality:":random_personaltiy.iloc[0,0]}
return traits_dict
The function does behave as expected however, I know there must be a way to loop through this I just have not found any way to do so.
You can chain your operations:
import pandas as pd
def generate_traits():
return {'Name': pd.read_csv('/file/location_1').sample(1).iloc[0,0],
'Alignment:': pd.read_csv('/file/location_2').sample(1).iloc[0,0],
'Bond': pd.read_csv('/file/location_3').sample(1).iloc[0,0],
'Religion': pd.read_csv('/file/location_4').sample(1).iloc[0,0],
'Flaw': pd.read_csv('/file/location_5').sample(1).iloc[0,0],
'Ideal': pd.read_csv('/file/location_6').sample(1).iloc[0,0],
'Lifestyle': pd.read_csv('/file/location_7').sample(1).iloc[0,0],
'Organization': pd.read_csv('/file/location_8').sample(1).iloc[0,0],
'Personality': pd.read_csv('/file/location_9').sample(1).iloc[0,0]}
def generate_traits():
import pandas as pd
name_location = {'Bond': 'location_1'
'Alignment': 'location_2'
'Religion': 'location_3'
'Flaw': 'location_4'
'ideals': 'location_5'
'Lifestyle': 'location_6'
'Organization': 'location_7'
'Personality': 'location_8'
'Name': 'location_9'}
all_df = {name: pd.read_csv(f'/file/{loc}') for name, loc in name_location.items()}
traits_dict = {name: df.sample(1).iloc[0, 0] for name, df in all_df.items()}
return traits_dict
Im having some performance issues with the code below, mostly because of the apply function that im using on a huge dataframe. I want to update the semi_dict dictionary with some other data that im calculating with the some functions. Is it any way to improve this?
def my_function_1(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data"].append(random_dict)
def my_function_2(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data2"].append(random_dict)
dictionary_list = []
for v in values:
df_1_rows = df_1_rows[(df_1_rows.values == v)]
df_2_rows = df_2_rows[(df_2_rows.values == v)]
semi_dict = dict(value=v, data=[], data2=[])
function = partial(my_function_1, semi_dict)
function_2 = partial(my_function_2, semi_dict)
df_1_rows.apply(lambda row : function(row), axis=1)
df_2_rows.apply(lambda row : function_2(row), axis=1)
dictionary_list.append(semi_dict)
This answer uses dictionary merge from How to merge dictionaries of dictionaries?, but depending on your use case, you might not need it in the end:
import pandas as pd
import random
len_df = 10
row_values = list("ABCD")
extra_col_values = list("12345")
df_1 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col1', 'extra1'])
df_2 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col2', 'extra2'])
def make_dict(df):
# some calculations on the df
return {
'data': df.head(1).values.tolist(),
}
def make_dict_2(df):
# some calculations on the df
return {
'data_2': df.head(1).values.tolist(),
}
def merge(a, b, path=None):
"merges b into a, taken from https://stackoverflow.com/questions/7204805/how-to-merge-dictionaries-of-dictionaries "
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
dict1 = df_1.groupby('col1').apply(make_dict).to_dict()
dict2 = df_2.groupby('col2').apply(make_dict_2).to_dict()
result = merge(dict1, dict2)
result
Why can't I convert the loop group in groupby as list? Currently, I am working on Django==2.2.1 and when I try this data = [...] below into python console, it is working fine.
from itertools import groupby
from operator import itemgetter
#login_required
def list(request, template_name='cart/list.html'):
# I also try with this dummy data
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
print(type(data)) # a list
sorted_totals = sorted(test_data, key=itemgetter('total_order'))
for agent_name, group in groupby(sorted_totals, key=lambda x: x['agent_name']):
print(agent_name, list(group)) # I stopped here when converting the `group` as list.
But, I am getting an error looking like this when I try it at views in Django.
I also tried it with defaultdict
from collections import defaultdict
#login_required
def list(request, template_name='cart/list.html'):
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = defaultdict(list)
for data_total in test_data:
grouped[data_total['agent_name']].append(data_total) # stoped here
grouped_out = []
for agent_name, group in grouped.items():
total_order = 0
total_pcs = 0
total_kg = 0
if isinstance(group, list):
for data_total in group:
total_order += data_total.get('total_order')
total_pcs += data_total.get('total_pcs')
total_kg += data_total.get('total_kg')
grouped_out.append({
'agent_name': agent_name,
'total_order': total_order,
'total_pcs': total_pcs,
'total_kg': total_kg
})
But the error I found stoped by wrapper view. If we following the previous issue, it referenced with this _wrapped_view
Finally, I fixed it manually by using a dict.
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = {}
for data_total in test_data:
agent_name = data_total.get('agent_name')
if agent_name in grouped:
new_data = grouped[agent_name] # dict
new_data['total_order'] += data_total.get('total_order')
new_data['total_pcs'] += data_total.get('total_pcs')
new_data['total_kg'] += data_total.get('total_kg')
grouped[agent_name].update(**new_data)
else:
grouped[agent_name] = data_total
And the result of grouped is look like this:
{'agent123': {'agent_name': 'agent123',
'total_kg': 18.0,
'total_order': 3,
'total_pcs': 3},
'agentbeli': {'agent_name': 'agentbeli',
'total_kg': 17.0,
'total_order': 3,
'total_pcs': 3}}
I have the following CSV Data,
Rule1,Status1,1
Rule1,Status2,1
Rule1,Status3,1
Rule1,Status4,2
Rule2,Status1,2
Rule2,Status2,1
Rule2,Status3,1
Rule2,Status4,3
I have unique rules (first column) stored in a list called Rules. I want my dictionary to look like the following:
DictionaryFull = {
'Rule1' : {1 : [Status1, Status2, Status3], 2 : [Status4]},
'Rule2' : {1 : [Status2, Status3], 2 : [Status1], 3 : [Status4]}
}
Here is what I tried:
openfile = ('data.csv', 'rU')
finalfile = csv.reader(openfile, delimiter=',')
FullDictionary = {}
for row in finalfile:
for j in range (0, 300): #300 number of rules
if Rules[j] not in FullDictionary:
for i in range(1, 71): #These are third column numbers 1 - 71
if i == int(row[2]) and row[0] == Rules[j]:
FullDictionary = {Rules[j] : { i : [].append[row[1]}}
print FullDictionary
But I am getting the following as the result:
{'Rule1': {1 : None}} and so on
Am I doing something wrong? How to accomplish this task of having a dictionary with both another dictionary and a list.
I tried this:
def something():
full_dictionary = {}
with open(DataFilePath) as f:
reader = csv.reader(f)
for row in reader:
rule = row[2], status = row[0], num = int(row[5])
r = full_dictionary.setdefault(rule, {})
r.setdefault(num, []).append(status)
print full_dictionary
The error: ValueError: I/O operation on closed file
Hwo about using collection.defaultdict:
import csv
from collections import defaultdict
full_dictionary = defaultdict(lambda: defaultdict(list))
with open('data.csv') as f:
reader = csv.reader(f)
for rule, status, num in reader:
full_dictionary[rule][num].append(status)
print full_dictionary
output:
defaultdict(<function <lambda> at 0x00000000025A6438>, {
'Rule2': defaultdict(<type 'list'>, {
'1': ['Status2', 'Status3'],
'3': ['Status4'],
'2': ['Status1']
}),
'Rule1': defaultdict(<type 'list'>, {
'1': ['Status1', 'Status2', 'Status3'],
'2': ['Status4']
})
})
If you don't want to use defaultdict, you have to care new key.
For example, using dict.setdefault:
import csv
full_dictionary = {}
with open('data.csv') as f:
reader = csv.reader(f)
for rule, status, num in reader:
r = full_dictionary.setdefault(rule, {})
r.setdefault(num, []).append(status)
print full_dictionary
output:
{'Rule1': {'1': ['Status1', 'Status2', 'Status3'], '2': ['Status4']},
'Rule2': {'1': ['Status2', 'Status3'], '2': ['Status1'], '3': ['Status4']}}
list.append returns None, so your assignment Rules[j] = [].append([row[1]) is setting Rules[j] = None.
Amend that to:
FullDictionary = {Rules[j] : { i : [row[1]}}
or
old_value = Rules[j].get(i, [])
old_value.append(row[1])
depending on what you're wishing to achieve.
I have a code which is able to give me the list like this:
Name id number week number
Piata 4 6
Mali 2 20,5
Goerge 5 4
Gooki 3 24,64,6
Mali 5 45,9
Piata 6 1
Piata 12 2,7,8,27,16 etc..
with the below code:
import csv
from datetime import date
datedict = defaultdict(set)
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.reader(csvfile, 'excel')
#passing the header
read_header = False
start_date=date(year=2009,month=1,day=1)
#print((seen_date - start_date).days)
tdic = {}
for row in filereader:
if not read_header:
read_header = True
continue
# reading the rest rows
name,id,firstseen = row[0],row[1],row[3]
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
key = name,id
currentvalue = tdic.get(key, set())
currentvalue.add(deltaweeks)
tdic[key] = currentvalue
except ValueError:
print('Date value error')
pass
Right now I want to convert my list to a list that give me number of ids for each name and its weeks numbers like the below list:
Name number of ids weeknumbers
Mali 2 20,5,45,9
Piata 3 1,6,2,7,8,27,16
Goerge 1 4
Gooki 1 24,64,6
Can anyone help me with writing the code for this part?
Since it looks like your csv file has headers (which you are currently ignoring) why not use a DictReader instead of the standard reader class? If you don't supply fieldnames the DictReader will assume the first line contains them, which will also save you from having to skip the first line in your loop.
This seems like a great opportunity to use defaultdict and Counter from the collections module.
import csv
from datetime import date
from collections import defaultdict, Counter
datedict = defaultdict(set)
namecounter = Counter()
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.DictReader(csvfile)
start_date=date(year=2009,month=1,day=1)
for row in filereader:
name,id,firstseen = row['name'], row['id'], row['firstseen']
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
except ValueError:
print('Date value error')
pass
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
datedict[name].add(deltaweeks)
namecounter.update([name]) # Without putting name into a list, update will index each character
This assumes that (name, id) is unique. If this is not the case then you can use anotherdefaultdict for namecounter. I've also moved the try-except statement so it is more explicit in what you are testing.
givent that :
tdict = {('Mali', 5): set([9, 45]), ('Gooki', 3): set([24, 64, 6]), ('Goerge', 5): set([4]), ('Mali', 2): set([20, 5]), ('Piata', 4): set([4]), ('Piata', 6): set([1]), ('Piata', 12): set([8, 16, 2, 27, 7])}
then to output the result above:
names = {}
for ((name, id), more_weeks) in tdict.items():
(ids, weeks) = names.get(name, (0, set()))
ids = ids + 1
weeks = weeks.union(more_weeks)
names[name] = (ids, weeks)
for (name, (id, weeks)) in names.items():
print("%s, %s, %s" % (name, id, weeks)