I wanna parse excel& make dictionary and connect the model(User) which has same user_id of dictionary.
Excel is
user_id is in F1,so I really cannot understand how to make dictionary.
Now views.py is
#coding:utf-8
from django.shortcuts import render
import xlrd
from .models import User
book = xlrd.open_workbook('../data/excel1.xlsx')
sheet = book.sheet_by_index(1)
def build_employee(employee):
if employee == 'leader':
return 'l'
if employee == 'manager':
return 'm'
if employee == 'others':
return 'o'
for row_index in range(sheet.nrows):
rows = sheet.row_values(row_index)
is_man = rows[4] != ""
emp = build_employee(rows[5])
user = User(user_id=rows[1], name_id=rows[2], name=rows[3],
age=rows[4],man=is_man,employee=emp)
user.save()
book2 = xlrd.open_workbook('../data/excel2.xlsx')
sheet2 = book2.sheet_by_index(0)
headers = sheet2.row_values(0)
large_item = None
data_dict = {}
for row_index in range(sheet2.nrows):
rows2 = sheet2.row_values(row_index)
large_item = rows2[1] or large_item
# Create dict with headers and row values
row_data = {}
for idx_col, value in enumerate(rows2):
header_value = headers[idx_col]
# Avoid to add empty column. A column in your example
if header_value:
row_data[headers[idx_col]] = value
# Add row_data to your data_dict with
data_dict[row_index] = row_data
for row_number, row_data in data_dict.items():
user1 = User.objects.filter(user_id = data['user_id']).exists()
if user1:
user1.__dict__.update(**data_dict)
user1.save()
My codes only can catch data in same place(in this case B4~E4),so I cannot understand how to write to achieve my goal.How should I write it?
Ideal dictionary is
{"user_id":1, "name":"Blear","nationality":"America","domitory":"A","group":1}
Your spreadsheet appears to only have one entry? If this is the case, you do not need to iterate over the rows, but instead just extract the locations you need, for example:
import xlrd
book = xlrd.open_workbook('excel1.xlsx')
sheet = book.sheet_by_index(0)
cells = [
('user_id', 0, 5),
('name', 3, 1),
('nationality', 3, 2),
('domitory', 3, 3),
('group', 3, 4)]
user1 = {key:sheet.cell_value(rowy, colx) for key, rowy, colx in cells}
print user1
Giving you:
{'nationality': u'America', 'user_id': 1.0, 'name': u'Blear', 'group': 1.0, 'domitory': u'A'}
This uses a Python dictionary comprehension to build the user1 dictionary based on cells.
Related
I'm trying to take the First Column (Name) and the fourth column (is active) from a CSV file and do the following:
Create a single entry for the Company Name
If 'is active' = yes then increment the value and output the final result.
If 'is active' = NO, then increment that number and give me a 'is active', 'is not active' list with a value at the end.
Data1 and Data2 fields are other columns that I don't care about at this time.
csv =
Name,Data1,Data2, Is Active:
Company 1,Data1,Data2,Yes
Company 1,Data1,Data2,Yes
Company 1,Data1,Data2,Yes
Company 2,Data1,Data2,Yes
Company 2,Data1,Data2,No
Company 2,Data1,Data2,Yes
Company 2,Data1,Data2,Yes
Company 3,Data1,Data2,No
Company 3,Data1,Data2,No
Ideal result would be in the format of:
Company name, Yes-count, no-count
I've started with csvreader to read the columns and I can put them into lists, but i'm unsure how to compare and consolidate names and counts after that.
Any help would be greatly appreciated.
One way to do, Use this:
with open("your_csv_file", "r") as file:
reader = csv.reader(file)
_ = next(reader) # skip header
consolidated = {}
for line in reader:
company_name = line[0]
is_active = line[3]
if company_name not in consolidated:
consolidated[company_name] = { "yes_count": 0, "no_count": 0}
if is_active == "Yes":
consolidated[company_name]["yes_count"] += 1
else:
consolidated[company_name]["no_count"] += 1
Sample Output:
>>> print(consolidated)
{
'Company 1': {'yes_count': 3, 'no_count': 0},
'Company 2': {'yes_count': 3, 'no_count': 1},
'Company 3': {'yes_count': 0, 'no_count': 2}
}
I have a text file which contains some data to be mined.
The structure is shown below
name (personA {
field1 : data1
field2 : data2
fieldN : dataN
subfield() {
fieldx1 : datax1
fieldxN : dataxN
}
}
name (personB {
field1 : data11
field2 : data12
fieldN : data1N
}
In some person's record the subfield is absent and output should specify subfield to be unknown in that case. Now below is the code I use to extract the data
import re
data = dict()
with open('data.txt', 'r') as fin:
FLAG, FLAGP, FLAGS = False, False, False
for line in fin:
if FLAG:
if re.search('field1', line):
d1 = line.split()[2]
data['field1'] = d1
if re.search('fieldN', line):
dN = line.split()[2]
data['fieldN'] = dN
data['fieldxn'] = 'unknown'
FLAGP = True
if FLAGS:
if re.search('fieldxN', line):
dsN = line.split()[2]
data['fieldxn'] = dsN
if re.search('name\ \(', line):
pn = line.split()[1]
FLAG = True
data['name'] = pn
if re.search('subfield', line):
FLAGS = True
if len(data) == 4:
if FLAGP:
print data
FLAGP = False
FLAG = False
FLAGS = False
The output is shown below
{'field1': 'data1', 'fieldN': 'dataN', 'name': '(personA', 'fieldxn': 'unknown'}
{'field1': 'data11', 'fieldN': 'data1N', 'name': '(personB', 'fieldxn': 'unknown'}
The problem has been that I don't know where to print data so current I am using below statment to print data which is wrong
if len(data) == 4:
if FLAGP:
print data
FLAGP = False
FLAG = False
FLAGS = False
I would appreciate if someone could give any suggestion to retrieve the data correctly
I would take a different approach to parsing, storing the subfields (and other fields) in a dictionary.
data = open('data.txt', 'rt').read()
### Given a string containing lines of "fieldX : valueY"
### return a dictionary of values
def getFields(field_data):
fields = {}
if (field_data != None):
field_lines = field_data.strip().split("\n")
for pair in field_lines:
name, value = pair.split(":")
fields[name.strip()] = value.strip()
return fields
### Split the data by name
people_data = data.strip().split("name (")[1:]
### Loop though every person record
for person_data in people_data:
name, person_data = person_data.split(" {", 1) # split the name and the fields
# Split out the subfield data, if any
subfield_data = None
if (person_data.find("subfield()") > -1):
field_data, subfield_data = person_data.split("subfield() {", 1)
subfield_data = subfield_data.split("}")[0]
# Separate the fields into single lines of pairs
fields = getFields(field_data)
# and any subfields
subfields = getFields(subfield_data)
print("Person: "+str(name))
print("Fields: "+str(fields))
print("Sub_Fields:"+str(subfields))
Which gives me:
Person: personA
Fields: {'field1': 'data1', 'field2': 'data2', 'fieldN': 'dataN'}
Sub_Fields:{'fieldx1': 'datax1', 'fieldxN': 'dataxN'}
Person: personB
Fields: {'field1': 'data1', 'field2': 'data2', 'fieldN': 'dataN'}
Sub_Fields:{}
So you could just adjust your output based on whether subfields was None, or otherwise. The idea is to get your data input into more flexible structures, rather than "brute-force" parsing like you have done. In the above I use split() a lot to give a more flexible way through, rather than relying on finding exact names. Obviously it depends on your design requirements too.
I wanna parse excel and put data in the model(User). However now,only last excel data is put in model and the number of the data is 4.4 is the number of all excel rows like
Now db.sqlite3 is
|10|Karen|||
|10|Karen|||
|10|Karen|||
|10|Karen|||
My ideal db.sqlite3 is
1|1|Blear|40|false|l
2|5|Tom|23|true|o
3|9|Rose|52|false|m
|10|Karen|||
all data wanna be put in there.
Why does such result happen?
views.py is
#coding:utf-8
from django.shortcuts import render
import xlrd
from .models import User
book = xlrd.open_workbook('../data/data.xlsx')
sheet = book.sheet_by_index(1)
for row_index in range(sheet.nrows):
rows = sheet.row_values(row_index)
print(rows)
def build_employee(employee):
if employee == 'leader':
return 'l'
if employee == 'manager':
return 'm'
if employee == 'others':
return 'o'
for row in rows:
is_man = rows[4] != ""
emp = build_employee(rows[5])
user = User(user_id=rows[1], name_id=rows[2], name=rows[3],
age=rows[4],man=is_man,employee=emp)
user.save()
When i print out rows in print(rows) ,result is
Blear
Tom
Rose
Karen
so I think rows has all data in excel.
models.py is
class User(models.Model):
user_id = models.CharField(max_length=200)
name_id = models.CharField(max_length=200)
name = models.CharField(max_length=200)
age = models.CharField(max_length=200)
man = models.BooleanField()
TYPE_CHOICES = (
('m', 'manager'),
('l', 'leader'),
('o', 'others'),
)
employee =models.CharField(max_length=1, choices=TYPE_CHOICES)
How can i fix this?
At the end of this block rows has only the values of last row(The row withKaren).
for row_index in range(sheet.nrows):
rows = sheet.row_values(row_index)
print(rows)
Now after the above when you do the below you are iterating over values in the last row. Also remember you are not using row inside the for block which is a single cell value iterating over['',10,'Karen','','','']
for row in rows:
is_man = rows[4] != ""
emp = build_employee(rows[5])
user = User(user_id=rows[1], name_id=rows[2], name=rows[3],
age=rows[4],man=is_man,employee=emp)
user.save()
You should correct the above block as below..
for row_index in range(sheet.nrows):
rows = sheet.row_values(row_index)
is_man = rows[4] != ""
emp = build_employee(rows[5])
user = User(user_id=rows[1], name_id=rows[2], name=rows[3],
age=rows[4],man=is_man,employee=emp)
user.save()
Please note that I've not taken due care about the header row. Please do so at your end if need be.
I wanna parse excel& make dictionary and connect the model(User) which has same user_id of dictionary.
Now dictionary is
dict_data = {'user_id': 1,'nationarity': America, 'dormitory':'A', 'group': 3}
Models in views.py is
user = User(user_id=rows[1],name_id=rows[2],age=rows[3],employee=rows[4])
If I wanna add dictionary's data to model,I should write like
for data in dict_data:
User(**data)
but how should I connect dictionary's user_id& models' one?What should I write it?
Now I wrote like
#coding:utf-8
from django.shortcuts import render
import xlrd
from app.models import User
book3 = xlrd.open_workbook('./data/XXX.xlsx')
sheet3 = book3.sheet_by_index(0)
headers = sheet3.row_values(0)
large_item = None
dicts = {}
for row_index in range(sheet3.nrows):
rows3 = sheet3.row_values(row_index)
large_item = rows3[1] or large_item
# Create dict with headers and row values
row_data = {}
for idx_col, value in enumerate(rows3):
header_value = headers[idx_col]
# Avoid to add empty column. A column in your example
if header_value:
row_data[headers[idx_col]] = value
# Add row_data to your data_dict with
dicts[row_index] = row_data
for data in dicts:
user1 = User.objects.filer(user_id = data['user_id']).exists()
if user1:
user1.__dict__.update(**dicts)
user1.save()
When I run this code,
AttributeError: 'Manager' object has no attribute 'filer'
user1 = User.objects.filer(user_id = data['user_id']).exists()
How should I fix this?
for data in dict_datas:
user = User.object.filter(user_id = data['user_id']).exists()
if user:
user.__dict__.update(**dict_data)
user.save()
dict_data you posted is a dict,you shouldn't iterate it like a list.
I guess your dict_data is a list of dict, so:
for data in dict_datas:
user = User.objects.get(user_id=data['user_id'])
user.name_id = data['**']
...
user.save()
First, fetch the user object with user_id in your xecel&dict, then change the value, and save it.
Am trying to populate my table using csv with foreignkey constraints. the problem is it saves all as none in the database. After reading the csv file, i change the list to a dictionary (song_params). I dont no where i could have got it wrong beacuse all seems to be the way i wanted it to work
header = ['artist', 'album', 'genre', 'song', 'price', 'download_link', 'duration']
for row in csv_file:
song_params = dict(zip(header, row))
song_values = {}
dbsession = DBSession()
for key, value in song_params.iteritems():
if key == 'artist':
martist = dbsession.query(Artist).filter_by(artist = value).first()
song_values['artist'] = martist
else:
song_values[key] = value
if key == 'album':
malbum =dbsession.query(Album).filter_by(album_name = value).first()
song_values['album'] = malbum
else:
song_values[key] = value
if key == 'genre':
mgenre = dbsession.query(Genre).filter_by(genre = value).first()
song_values['genre'] = mgenre
else:
song_values[key] = value
song = Song(song_values)
dbsession.add(song)
Try Song(**song_values) - unpack dict to an argument list.