Django ORM create rows dynamically with _meta.get_fields() - python

I am writing a function to read xlsx and write this data to the database with Django. But I have so many fields that I cannot define statically. I want to write these fields to the database with a certain algorithm. When designing this, I get the class attributes with _meta.get_fields(). But as seen in this example, attributes always remain None. In views.py there is an example between BEGIN and END codes. How can i solve this problem?
views.py
# ... some codes ...
#login_required(login_url='/admin/login/')
def import_process(request):
if request.method == 'POST':
form = UploadFileForm(request.POST, request.FILES)
if form.is_valid():
file_in_memory = request.FILES['file'].read()
wb = load_workbook(filename=BytesIO(file_in_memory), data_only=True)
ws = wb.worksheets[0]
ws_company_name = str(ws["D9"].value)
ws_company_address = str(ws["D10"].value)
# ... some codes ...
# Company Tower Get ID loop
company_staff = CompanyStaff.objects.filter(company_id=Company.objects.filter(company_name=ws_company_name)[0].id)[0]
for col in COLUMN_PARAMETER_LIST:
column_cell = str(col) + str(COLUMN_PARAMETER_BEGIN)
ws_tower_type = str(ws[column_cell].value)
tower_type = TowerType.objects.filter(tower_type=ws_tower_type)[0]
c_tower_object = CompanyTower.objects.filter(company_staff_id=company_staff,tower_type_id=tower_type)[0]
tower_data = TowerData.objects.create(company_tower_id=c_tower_object)
ROW_IDX = int(ROW_PARAMETER_BEGIN-1)
# ****************** BEGIN ****************** #
site_fields = tower_data._meta.get_fields()
site_fields_names = [f.name for f in site_fields][1:]
for mfield in site_fields_names:
if any(word in str(mfield) for word in TOWER_DATA_EXCLUDE_FIELDS_NEW):
continue
else:
ROW_IDX += 1
tower_data_cell = str(col)+str(ROW_IDX)
tower_data_cell_value = ws[tower_data_cell].value
tower_data.mfield = tower_data_cell_value
if str(mfield) == 'ph': # Example Field
print(tower_data.mfield)
print(tower_data.ph)
# ******************* END ******************* #
tower_data.save()
print("****************")
return render(request, template_name='import.html',context={"form": UploadFileForm()})
# ... some codes ...

You're currently setting and replacing an attribute called mfield over and over: tower_data.mfield = tower_data_cell_value.
What you want is setattr(tower_data, mfield, tower_data_cell_value)

Related

Python- Flask CSV File upload error handling [duplicate]

This question already has an answer here:
Python: One Try Multiple Except
(1 answer)
Closed 4 years ago.
Essentially I am able to do what I want with my current code, which is upload a csv file, manipulate it with pandas and then update a MSQL Database. I would like to add error handling somehow. Essentially the upload function will only work for one particular file and throw different errors for all others.
Is there a way that I can catch multiple errors and return an error message to the user ?
Possibly something like a check on the input csv file column headers.
#app.route('/upload', methods =['GET', 'POST'])
def csv_input():
tempfile_path = tempfile.NamedTemporaryFile().name
#file.save(tempfile_path)
#sheet = pd.read_csv(tempfile_path)
if request.method == 'POST':
file = request.files['file']
if file: #and allowed_filename(file.filename):
#filename = secure_filename(file.filename)
file.save(tempfile_path)
input_csv = pd.read_csv(tempfile_path,sep=",", engine='python')
#### Data Cleansing From Uploded Data
col_titles = ['id','title','vote_average','w_average','vote_count','year','runtime',
'budget','revenue','profit']
# Only Keep Data where the Original Language is English
input_csv = input_csv[input_csv['original_language']=='en']
# New Dataframe that only contains data with vote count > 10
input_csv = input_csv[input_csv['vote_count'] >= 10]
# Fill all NA values to 0 - Needed to set datatypes
input_csv = input_csv.fillna(0)
# Remove all Rows with no Runtime
input_csv = input_csv[input_csv['runtime']!=0]
# Revmove all duplciate Rows
input_csv = input_csv.drop_duplicates()
input_csv['vote_average'] = input_csv.vote_average.astype(float).round(1)
input_csv.vote_average.round(1)
input_csv['runtime'] = input_csv.runtime.astype(int)
input_csv['vote_count'] = input_csv.vote_count.astype(int)
input_csv['revenue'] = input_csv.revenue.astype('int64')
input_csv['budget'] = input_csv.budget.astype('int64')
profit_cal(input_csv,'revenue','budget','profit')
input_csv['profit']=input_csv.profit.astype('int64')
input_csv['profit']=input_csv.profit.replace(0,'No Data')
#reorder_data = pd.DataFrame(input_csv)
# Year Cleaning
input_csv['year'] = pd.to_datetime(input_csv['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)
#C = reorder_data['vote_average'].mean()
#m = reorder_data['vote_count'].quantile(0.10)
#w_average = org_data.copy().loc[reorder_data['vote_count'] >= m]
#### IMDB Data Calculation
V = input_csv['vote_count']
R = input_csv['vote_average']
C = input_csv['vote_average'].mean()
m = input_csv['vote_count'].quantile(0.10)
input_csv['w_average'] = (V/(V+m) * R) + (m/(m+V) * C)
#C = input_csv['vote_average'].mean()
#m = input_csv['vote_count'].quantile(0.10)
#input_csv['w_average'] = input_csv.apply(weighted_rating, axis = 1)
input_csv['w_average'] = input_csv.w_average.astype(float).round(1)
reorder_data = input_csv[col_titles]
reorder_data.to_sql(name='title_data', con=engine, if_exists = 'replace', index=False)
# Reorder the data and output in the correct order
##### Genre Loads == DataFrame 2
df = input_csv
v = df.genres.apply(json.loads)
df = pd.DataFrame(
{
'id' : df['id'].values.repeat(v.str.len(), axis=0),
'genre' : np.concatenate(v.tolist())
})
df['genre'] = df['genre'].map(lambda x: x.get('name'))
genre_data = df.genre.str.get_dummies().sum(level=0)
genre_data = df.loc[(df!=0).any(1)]
#genre_data = genre_data.set_index('id')
genre_order = ['id','genre']
## Dataframw to SQL
genre_data[genre_order].to_sql(name='genre_data', con=engine, if_exists = 'replace', index=False)
####### Keyword Search ### Dataframe
#genre_data.to_csv("genre_data.csv")
#return genre_data[genre_order].to_html()
flash('Database has been updated successfully','success')
#return reorder_data[col_titles].to_html()
#stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
#csv_input = csv.reader(stream)
#return reorder_data.to_html(index=False)
#flash('File Uploaded Successfully')
#return redirect(url_for('index'))
return render_template('upload.html')
There are several methods:
The Python way, just add try: and except with the relevant exception classes.
try:
# parsing & processing logic here
pass
except EmptyDataError as ex:
# tell user we don't except empty data
pass
except ParserError as ex:
# tell user we failed to parse their input
pass
except Exception as ex:
# tell user that something went wrong.
pass
The Flask way, register error handlers with flask for specific exceptions (this effects the whole flask application):
#app.errorhandler(pandas.errors.EmptyDataError)
def handle_empty_data():
return 'Failed parsing Input', 200

How should I Execute this Python Script in powershell

I've solved the problem. The problem is related my %PATH%
I have a script which work with an argument. In powershell I've tried the command you can see below;
.\dsrf2csv.py C:\Python27\a\DSR_testdata.tsv.gz
And also you can see the script below,
def __init__(self, dsrf2csv_arg):
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
self.report_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Summary').replace('tsv.gz', 'csv'))
But when I try to run this script there is no any action. How should I run this script with a file? (example : testdata.tsv.gz)
Note : Script and file in same location.
Full Scritp;
import argparse
import atexit
import collections
import csv
import gzip
import os
SKIP_ROWS = ['HEAD', '#HEAD', '#SY02', '#SY03', '#AS01', '#MW01', '#RU01',
'#SU03', '#LI01', '#FOOT']
REPORT_HEAD = ['Asset_ID', 'Asset_Title', 'Asset_Artist', 'Asset_ISRC',
'MW_Asset_ID', 'MW_Title', 'MW_ISWC', 'MW_Custom_ID',
'MW_Writers', 'Views', 'Owner_name', 'Ownership_Claim',
'Gross_Revenue', 'Amount_Payable', 'Video_IDs', 'Video_views']
SUMMARY_HEAD = ['SummaryRecordId', 'DistributionChannel',
'DistributionChannelDPID', 'CommercialModel', 'UseType',
'Territory', 'ServiceDescription', 'Usages', 'Users',
'Currency', 'NetRevenue', 'RightsController',
'RightsControllerPartyId', 'AllocatedUsages', 'AmountPayable',
'AllocatedNetRevenue']
class DsrfConverter(object):
"""Converts DSRF 3.0 to YouTube CSV."""
def __init__(self, dsrf2csv_arg):
""" Creating output file names """
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
print(dsrf_filename)
input("Press Enter to continue...")
self.report_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Summary').replace('tsv.gz', 'csv'))
def parse_blocks(self, reader):
"""Generator for parsing all the blocks from the file.
Args:
reader: the handler of the input file
Yields:
block_lines: A full block as a list of rows.
"""
block_lines = []
current_block = None
for line in reader:
if line[0] in SKIP_ROWS:
continue
# Exit condition
if line[0] == 'FOOT':
yield block_lines
raise StopIteration()
line_block_number = int(line[1])
if current_block is None:
# Initialize
current_block = line_block_number
if line_block_number > current_block:
# End of block, yield and build a new one
yield block_lines
block_lines = []
current_block = line_block_number
block_lines.append(line)
# Also return last block
yield block_lines
def process_single_block(self, block):
"""Handles a single block in the DSR report.
Args:
block: Block as a list of lines.
Returns:
(summary_rows, report_row) tuple.
"""
views = 0
gross_revenue = 0
summary_rows = []
owners_data = {}
# Create an ordered dictionary with a key for every column.
report_row_dict = collections.OrderedDict(
[(column_name.lower(), '') for column_name in REPORT_HEAD])
for line in block:
if line[0] == 'SY02': # Save the financial Summary
summary_rows.append(line[1:])
continue
if line[0] == 'AS01': # Sound Recording information
report_row_dict['asset_id'] = line[3]
report_row_dict['asset_title'] = line[5]
report_row_dict['asset_artist'] = line[7]
report_row_dict['asset_isrc'] = line[4]
if line[0] == 'MW01': # Composition information
report_row_dict['mw_asset_id'] = line[2]
report_row_dict['mw_title'] = line[4]
report_row_dict['mw_iswc'] = line[3]
report_row_dict['mw_writers'] = line[6]
if line[0] == 'RU01': # Video level information
report_row_dict['video_ids'] = line[3]
report_row_dict['video_views'] = line[4]
if line[0] == 'SU03': # Usage data of Sound Recording Asset
# Summing up views and revenues for each sub-period
views += int(line[5])
gross_revenue += float(line[6])
report_row_dict['views'] = views
report_row_dict['gross_revenue'] = gross_revenue
if line[0] == 'LI01': # Ownership information
# if we already have parsed a LI01 line with that owner
if line[3] in owners_data:
# keep only the latest ownership
owners_data[line[3]]['ownership'] = line[6]
owners_data[line[3]]['amount_payable'] += float(line[9])
else:
# need to create the entry for that owner
data_dict = {'custom_id': line[5],
'ownership': line[6],
'amount_payable': float(line[9])}
owners_data[line[3]] = data_dict
# get rid of owners which do not have an ownership or an amount payable
owners_to_write = [o for o in owners_data
if (owners_data[o]['ownership'] > 0
and owners_data[o]['amount_payable'] > 0)]
report_row_dict['owner_name'] = '|'.join(owners_to_write)
report_row_dict['mw_custom_id'] = '|'.join([owners_data[o]
['custom_id']
for o in owners_to_write])
report_row_dict['ownership_claim'] = '|'.join([owners_data[o]
['ownership']
for o in owners_to_write])
report_row_dict['amount_payable'] = '|'.join([str(owners_data[o]
['amount_payable'])
for o in owners_to_write])
# Sanity check. The number of values must match the number of columns.
assert len(report_row_dict) == len(REPORT_HEAD), 'Row is wrong size :/'
return summary_rows, report_row_dict
def run(self):
finished = False
def removeFiles():
if not finished:
os.unlink(self.report_outfilename)
os.unlink(self.summary_outfilename)
atexit.register(removeFiles)
with gzip.open(self.dsrf_filename, 'rb') as dsr_file, gzip.open(
self.report_outfilename, 'wb') as report_file, open(
self.summary_outfilename, 'wb') as summary_file:
dsr_reader = csv.reader(dsr_file, delimiter='\t')
report_writer = csv.writer(report_file)
summary_writer = csv.writer(summary_file)
report_writer.writerow(REPORT_HEAD)
summary_writer.writerow(SUMMARY_HEAD)
for block in self.parse_blocks(dsr_reader):
summary_rows, report_row = self.process_single_block(block)
report_writer.writerow(report_row.values())
summary_writer.writerows(summary_rows)
finished = True
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(
description='Converts DDEX DSRF UGC profile reports to Standard CSV.')
required_args = arg_parser.add_argument_group('Required arguments')
required_args.add_argument('dsrf2csv_arg', type=str)
args = arg_parser.parse_args()
dsrf_converter = DsrfConverter(args.dsrf2csv_arg)
dsrf_converter.run()
In general to execute a python script in powershell like this .\script.py has two requirements:
Add the path to the python binaries to your %path%: $env:Path = $env:Path + ";C:\Path\to\python\binaries\"
Add the ending .py to the pathtext environment variable: $env:PATHEXT += ";.PY"
The latter will only be used in the current powershell session. If you want to add it to all future powershell sessions, add this line to your powershell profile (f.e. notepad $profile).
In your case there is also an issue with the python script you are trying to excute. def __init__(self) is an constructor for a class, like:
class Foo:
def __init__(self):
print "foo"
Did you give us your complete script?

I/O operation on closed file when saving a model

Hi guys i have my problem in this part in the code. I have multiple files uploaded from o global variable.
file_car = None # Global variable
and in my views.py
if request.method == 'POST':
if file_car is None:
print request.FILES
file_car = [request.FILES.get('dzfile[%d]' % i)
for i in range(0, len(request.FILES))]
I need to use like this because I need to use this images in a lot of other views. But the problem is when I want to save
for f in file_car:
print f
myfyle= File(f)
#myfyle.open()
aux = Attachment()
aux.name= f
aux.car = car
aux.save() # here is the error. I tried opening the file but it said you cannot reopen the file
It throw me I/O operation on closed file.I'm going crazy with this. Sorry for my bad english
Edit Here is a more compressive code if it helps
from django.core.files import File
def createCarView(request):
global file_car
if request.method == 'POST':
if file_car is None:
form = CarForm(request.POST, request.FILES) # other valus
print request.FILES # Print all the file that I get
file_car = [request.FILES.get('dzfile[%d]' % i)
for i in range(0, len(request.FILES))] # Have all the fileS!
if form.is_valid():
# omitted all the other part of the form!
car.save()
for f in file_car:
print f
#myfyle.open()
aux = Attachment()
aux.name=myfyle
aux.car = car
aux.save() #Error in here
print aux.name
file_car=None # cleaning the global var
return HttpResponseRedirect('/car/create')
models.py
class Attachment(models.Model):
car = models.ForeignKey('Car', on_delete=models.CASCADE)
name = models.FileField(_('File'),
upload_to=upload_to_get,
)
Code is not clear but error says you should open the file so how about trying something like this ? :
for f in file_car:
with open(f,"a") as f:
print f
myfyle= File(f)
aux = Attachment()
aux.name=myfyle
aux.car = car
aux.save()

Python - Getting Attributes From A File of Constants

I have a file of constant variables that I need to query and I am not sure how to go about it.
I have a database query which is returning user names and I need to find the matching user name in the file of constant variables.
The file looks like this:
SALES_MANAGER_01 = {"user_name": "BO01", "password": "password", "attend_password": "BO001",
"csm_password": "SM001", "employee_num": "BOSM001"}
There is just a bunch of users just like the one above.
My function looks like this:
#attr("user_test")
def test_get_user_for_login(self):
application_code = 'BO'
user_from_view = self.select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
password = ""
global_users = dir(gum)
for item in global_users:
if user_wo_ent not in item.__getattr__("user_name"):
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = item.__getattr__("password")
print(user_wo_ent, password)
global_users = dir(gum) is my file of constants. So I know I am doing something wrong since I am getting an attribute error AttributeError: 'str' object has no attribute '__getattr__', I am just not sure how to go about resolving it.
You should reverse your looping as you want to compare each item to your match condition. Also, you have a dictionary, so use it to do some heavy lifting.
You need to add some imports
import re
from ast import literal_eval
I've changed the dir(gum) bit to be this function.
def get_global_users(filename):
gusers = {} # create a global users dict
p_key = re.compile(ur'\b\w*\b') # regex to get first part, e.g.. SALES_MANAGER_01
p_value = re.compile(ur'\{.*\}') # regex to grab everything in {}
with (open(filename)) as f: # open the file and work through it
for line in f: # for each line
gum_key = p_key.match(line) # pull out the key
gum_value = p_value.search(line) # pull out the value
''' Here is the real action. update a dictionary
with the match of gum_key and with match of gum_value'''
gusers[gum_key.group()] = literal_eval(gum_value.group())
return(gusers) # return the dictionary
The bottom of your existing code is replaced with this.
global_users = get_global_users(gum) # assign return to global_users
for key, value in global_users.iteritems(): # walk through all key, value pairs
if value['user_name'] != user_wo_ent:
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = value['password']
So a very simple answer was get the dir of the constants file then parsing over it like so:
global_users = dir(gum)
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if gum.__dict__[item].get("user_name") == user_wo_ent:
print(user_wo_ent, o.get("password"))
else:
print("User was not in global_user_mappings")
I was able to find the answer by doing the following:
def get_user_for_login(application_code='BO'):
user_from_view = BaseServiceTest().select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[4:]
global_users = dir(gum)
user_dict = {'user_name': '', 'password': ''}
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if user_wo_ent == o.get("user_name"):
user_dict['user_name'] = user_wo_ent
user_dict['password'] = o.get("password")
return user_dict

Aggregating/optimizing object.save()?

I'm working on import feature which allows user to create django database models from selected csv file.
Models are related which each other with foreign keys and many-to-many fields.
There is a lot
object.save()
and Object.objects.get(...) in my code which, I suppose, cause it to run so slow.
When an error (for example integrity error) occurs, I need all the changes in database to be rolled back. So I'm using
transaction.atomic
decorator on my view and it works fine.
The problem is, my import is really slow. Parsing file containing ~2000 lines (which could possibly add about 1000 objects to my database) takes about 3 minutes, which is too long.
Is there a way to make it faster? I've read about
bulk_create
function, but "It does not work with many-to-many relationships.".
If this is important, I'm using postgresql.
EDIT:
File structure looks like this:
subject_name
day [A/B] begins_at - ends_at;lecturer_info
Then multiple lines like:
student_uid;student_info
Ok, here's the code.
def csv_import(market, csv_file):
lines = [line.strip().decode('utf-8') for line in csv_file.readlines()]
lines = [line for line in lines if line]
pattern = re.compile(r'[0-9]+;.+')
week_days = {
'monday': 0,
.
.
.
}
term, subject, lecturer, student = None, None, None, None
for number, line in enumerate(lines):
if not ';' in line:
subject = Subject(subject_id=number, name=line, market=market)
subject.save()
elif not pattern.match(line):
term_info, lecturer_info = line.split(';') # term_info - 'day begins_at - ends_at', lecturer_info - lecturer
term_info = term_info.replace(' - ', ' ').split()
term = Term(term_id=number, subject=subject, day=week_days[term_info[0]], begin_at=term_info[-2],
ends_at=term_info[-1])
if len(term_info) == 4:
term.week = term_info[1]
lecturer_info = lecturer_info.rsplit(' ', 1)
try:
lecturer = Lecturer.objects.get(first_name=lecturer_info[0], last_name=lecturer_info[1])
except Lecturer.DoesNotExist:
lecturer = Lecturer(first_name=lecturer_info[0], last_name=lecturer_info[1])
lecturer.save()
term.lecturer = lecturer
term.save()
else:
gradebook_id, student_info = line.split(';')
student_info = student_info.rsplit(' ', 1)
try:
student = TMUser.objects.get(uid=int(gradebook_id))
except TMUser.DoesNotExist:
student = TMUser(uid=int(gradebook_id), username='student'+gradebook_id, first_name=student_info[0],
last_name=student_info[1], password=make_password('passwd'), user_group='user')
student.save()
student.terms.add(term)
student.save()
This is some pseudo code to show you the basic idea of what I meant by caching results:
cache = {}
for number, line in enumerate(lines):
...
elif not pattern.match(line):
...
term = Term(term_id=number, subject=subject, ...)
lecturer_id = (lecturer_info[0], lecturer_info[1]) #first name and last
if cache[lecturer_id]:
#retrieve from cache
lecturer = cache[lecturer_id]
else:
try:
lecturer = Lecturer.objects.get(first_name= lecturer_id[0], last_name= lecturer_id[1])
except Lecturer.DoesNotExist:
lecturer = Lecturer(first_name= lecturer_id[0], last_name= lecturer_id[1])
lecturer.save()
#add to cache
cache[lecturer_id] = lecturer
term.lecturer = lecturer
term.save()
#etc.

Categories

Resources