How can Import a csv file into a Django project? - python

I can't import a csv file into my Django project.
I have a code that is already working that basically takes a csv file and output spendings on different categories (using pandas). I'm learning Django right now and I'm trying to include that code into my project. But everytime I try to run in it says 'No module named StatementOCTRev'.
I'm not sure if it's a directory thing or a pandasXdjango thing.
I tried different approaches but none of them work. That led to an ugly code. So I apologize. Anyway here is my view.py:
from django.shortcuts import render, get_object_or_404
from .models import Post
from .budget import Categorize, Spending
import numpy as np
import pandas as pd
import csv
import mysite.StatementOCTRev
from import_export import resources
#import
# Create your views here.
def home(request):
posts = Post.objects
return render(request, 'mysite/home.html', {'posts':posts})
def post(request,post_id):
################################
coffeelist = ["WING", "COSTA", "CAFFEINE", "PASSO", "LA FELICE", "GULLUOGLU", "CARIBOU", "ARK COFFEE"]
restlist = ["TALABAT", "SOLO", "MELENZANE", "PIZZA", "HARDEES", "HARDEE'S", "MCDONALDS", "GULF ROYAL", "SARAY", "Carriage"]
gaslist = ["KNPC", "OULA", "ALFA"]
read = pd.read_csv('StatementOCTRev.csv', encoding="ISO-8859-1") # Read CSV
# create a new dataframe
df = pd.DataFrame(columns=["Date", "Reference", "Debit", "Credit", "Balance"])
df['Date'] = read.iloc[:, 0]
df['Reference'] = read.iloc[:, 1]
df['Debit'] = read.iloc[:, 2]
df['Credit'] = read.iloc[:, 3]
df['Balance'] = read.iloc[:, 4]
df['Category'] = 'Misc.'
for name in df.Debit:
if name < 0:
df.loc[df.Debit == name, 'Debit'] = name * -1
df.Reference = [str(x).replace(str(x), str(x).upper()) for x in df.Reference]
df.Reference = [x.strip().replace('POS', '') for x in df.Reference]
df.Reference = [x.strip().replace('UTAP', '') for x in df.Reference]
df.Reference = [x.strip().replace('#', '') for x in df.Reference]
df.Reference = [x.strip().replace('-', '') for x in df.Reference]
df.Reference = [x.strip().replace('_', '') for x in df.Reference]
# ------------------------ Coffee
Categorize(coffeelist, "Coffee")
SpentOnCoffee = Spending("Coffee")
# ------------------------ Restaurant
Categorize(restlist, "Restaurant")
SpentOnRest = Spending("Restaurant")
# ------------------------ Gas
Categorize(gaslist, "Gas")
SpentOnGas = Spending("Gas")
Other = 1296 - SpentOnRest - SpentOnCoffee - SpentOnGas
coffeecategorized = Categorize(coffeelist, "Coffee")
coffeespending = Spending("Coffee")
post = get_object_or_404(Post, pk=post_id)
return render(request, 'mysite/post.html', {'post':post})
Note: You can see, I'm not telling post function to return anything related to the file, that's because I actually couldn't go any further. Of course I want to. but I just want to make the code read the csv file first.
Also, models.py has nothing related to this csv file (it only has a Post class as app is just a blog that I'm making to exercise Django.)

Related

How to redirect from StreamingHttpResponse in Django

I would like to achieve this flow:
User's face is detected by webcam in homepage
The app takes the attendance of the user, display a webpage with the attendance details
The attendance details page will redirect back to the homepage after few seconds with the webcam still running
As of now, my app is able to take the attendance of the user but will not render to the attendance details page even though I used return render(). It will remain on the homepage with the webcam still running. Is there a way I can solve this problem or am I getting something wrong? I have tried changing the request details manually like this but it is not working.
request.resolver_match = resolve('/takeAttendance/')
request.path='/takeAttendance/'
request.path_info='/takeAttendance/'
A problem similar to How to redirect to another url after detect face in django but none of the answers worked for me.
The involved code is as below:
views.py
from django.shortcuts import render, redirect
from django.contrib import messages
from django.http import HttpResponse , StreamingHttpResponse
from datetime import datetime, date
import cv2
import face_recognition
import numpy as np
import threading
foundFace = False
vs = cv2.videoCapture(0)
lock = threading.Lock()
frame = None
def videoFeed(request):
return StreamingHttpResponse(getFace(request),content_type="multipart/x-mixed-replace;boundary=frame")
def getFace(request):
global vs,outputFrame,lock,foundFace
known_face_names,known_face_encodings = getFiles() # get the image files from my project directory
face_location = []
face_encoding = []
while foundFace==False:
check,frame = vs.read()
small_frame = cv2.resize(frame,(0,0),fx=0.5,fy=0.5)
face_roi = small_frame[:,:,::-1]
face_location = face_recognition.face_locations(face_roi)
face_encoding = face_recognition.face_encodings(face_roi,face_location)
face_names = []
names=[]
for encoding in face_encoding:
matches = face_recognition.compare_faces(known_face_encodings,np.array(encoding),tolerance=0.6)
distances = face_recognition.face_distance(known_face_encodings,encoding)
matches = face_recognition.compare_faces(known_face_encodings,np.array(encoding),tolerance=0.6)
distances = face_recognition.face_distance(known_face_encodings,encoding)
best_match_index = np.argmin(distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
if name not in names:
names.append(name)
#process the frame (add text and rectangle, add the name of the identified user to names)
with lock:
(flag,encodedImg) = cv2.imencode(".jpg",frame)
if len(names)!=0:
foundFace=True
if foundFace==True:
takeAttendance(request,names)
foundFace==False
yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' +
bytearray(encodedImg) + b'\r\n')
def takeAttendance(request,names):
context={}
if request.method=='GET':
if user_id in names:
attendance = Attendance(user_ID = str(user_id),
date_in = date.today(),
time_in = datetime.now())
attendance.save()
context={'attendance':attendance}
messages.success(request,'Check in successful')
return render(request,'Attendance/attendance.html',context)
else:
messages.error(request,'Check in failed')
return redirect('home')
else:
return redirect('home')
urls.py
from django.urls import path
from . import views
urlpatterns=[
path('home/',views.home,name='home'),
path('takeAttendance/',views.takeAttendance,name='takeAttendance'),
path('videoFeed/',views.videoFeed,name='videoFeed'),
]
I'm using Django 3.1 and I'm quite new to it, thank you!
Edit
Actually I would like to redirect to Attendance.html but keep the video stream running, like in a loop so I can redirect from Attendance.html to the webcam page with Javascript and still have the video stream running. Sorry for not making it clear.
Oh... why I didn't notice that...
The problem is:
...
if foundFace==True:
takeAttendance(request,names)
...
Yes, you execute function that returns render output inside getFace. And that's all, getFace don't use return value at all.
Correct code should be like:
...
if foundFace==True:
returned_render = takeAttendance(request,names)
return returned_render
...
or simply:
...
if foundFace==True:
return takeAttendance(request,names)
# or maybe you should use yield instead of return?
# I don't know. Check both
yield takeAttendance(request,names)
...

How do I import various Data Frames from different python file?

I have a python file called 'clean_data.py' which has all the data frames I need, and I want to import them for use in another python file called 'main.py' to use in creating a dashboard.
Is it possible to create a class in my clean_data.py, and if so can someone direct me to an article (which I struggled to find so far) so that I can figure it out?
The aim is to shift from CSV to an API overtime, so I wanted to keep data side wrangling side of things in a different file while the web app components in the main.py file.
Any help would be much appreciated.
The code from the clean_data.py is:
import pandas as pd
import csv
import os # To access my file directory
print(os.getcwd()) # Let's me know the Current Work Directory
fdi_data = pd.read_csv(r'Data/fdi_data.csv')
fdi_meta = pd.read_csv(r'Data/fdi_metadata.csv')
debt_data = pd.read_csv(r'Data/debt_data.csv')
debt_meta = pd.read_csv(r'Data/debt_metadata.csv')
gdp_percap_data = pd.read_csv(r'Data/gdp_percap_data.csv', header=2)
gdp_percap_meta = pd.read_csv(r'Data/gdp_percap_metadata.csv')
gov_exp_data = pd.read_csv(r'Data/gov_exp_data.csv', header=2)
gov_exp_meta = pd.read_csv(r'Data/gov_exp_metadata.csv')
pop_data = pd.read_csv(r'Data/pop_data.csv', header=2)
pop_meta = pd.read_csv(r'Data/pop_metadata.csv')
"""
'wb' stands for World Bank
"""
def wb_merge_data(data, metadata):
merge = pd.merge(
data,
metadata,
on = 'Country Code',
how = 'inner'
)
return merge
fdi_merge = wb_merge_data(fdi_data, fdi_meta)
debt_merge = wb_merge_data(debt_data, debt_meta)
gdp_percap_merge = wb_merge_data(gdp_percap_data, gdp_percap_meta)
gov_exp_merge = wb_merge_data(gov_exp_data, gov_exp_meta)
pop_merge = wb_merge_data(pop_data, pop_meta)
def wb_drop_data(data):
drop = data.drop(['Country Code','Indicator Name','Indicator Code','TableName','SpecialNotes','Unnamed: 5'], axis=1)
return drop
fdi_merge = wb_drop_data(fdi_merge)
debt_merge = wb_drop_data(debt_merge)
gdp_percap_merge = wb_drop_data(gdp_percap_merge)
gov_exp_merge = wb_drop_data(gov_exp_merge)
pop_merge = wb_drop_data(pop_merge)
def wb_mr_data(data, value_name):
data = data.melt(['Country Name','Region','IncomeGroup']).reset_index()
data = data.rename(columns={'variable': 'Year', 'value': value_name})
data = data.drop('index', axis = 1)
return data
fdi_merge = wb_mr_data(fdi_merge, 'FDI')
debt_merge = wb_mr_data(debt_merge, 'Debt')
gdp_percap_merge = wb_mr_data(gdp_percap_merge, 'GDP per Cap')
gov_exp_merge = wb_mr_data(gov_exp_merge, 'Gov Expend.')
pop_merge = wb_mr_data(pop_merge, 'Population')
def avg_groupby(data, col_cal, cn=False, ig=False, rg=False):
if cn == True:
return data.groupby('Country Name')[col_cal].mean().reset_index()
elif ig == True:
return data.groupby('IncomeGroup')[col_cal].mean().reset_index()
elif rg == True:
return data.groupby('Region')[col_cal].mean().reset_index()
"""
avg_cn_... For country
avg_ig_... Income Group
avg_rg_... Region
"""
avg_cn_fdi = avg_groupby(fdi_merge, 'FDI', cn=True)
avg_ig_fdi = avg_groupby(fdi_merge, 'FDI', ig=True)
avg_rg_fdi = avg_groupby(fdi_merge, 'FDI', rg=True)
avg_cn_debt = avg_groupby(debt_merge, 'Debt', cn=True)
avg_ig_debt = avg_groupby(debt_merge, 'Debt', ig=True)
avg_rg_debt = avg_groupby(debt_merge, 'Debt', rg=True)
avg_cn_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', cn=True)
avg_ig_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', ig=True)
avg_rg_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', rg=True)
avg_cn_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', cn=True)
avg_ig_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', ig=True)
avg_rg_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', rg=True)
avg_cn_pop = avg_groupby(pop_merge, 'Population', cn=True)
avg_ig_pop = avg_groupby(pop_merge, 'Population', ig=True)
avg_rg_pop = avg_groupby(pop_merge, 'Population', rg=True)
In Python, every file is a module. So if you want to re-use your code, you can simple import this module. For example,
# main.py
import clean_data
print(clean_data.avg_cn_fdi)
Maybe you needn't create a class for this
You can import the whole python file like you'd import any other locally created files and have access to the DataFrames in them. Here's an example:
I created a file called temporary.py:
import pandas as pd
data = pd.read_csv("temp.csv")
And then in a separate file I was able to use data like so:
import temporary
print(temporary.data)
Or, you could also do:
from temporary import data
print(data)
All that being said, I don't believe that this would be the best way to handle your data.

DJANGO : Can't list the content of a folder located in static directory in my django project

In my project I have a folder datasets in the static folder that content 2 others folders. I want to list the content of each of thosefolders and return it in HttpResponse. I define a utility function list_dir_content in utils/data.py where I use glob.glob() function by passing it paths of those folders but I recieve an empty result: an empty list [] from the glob.glob() function. How can I fix thet issue regardless of the os (I'm developping my internship project either on ubuntu or windows 10). Thanks!
Here are the structure of my project and files views.py, models.py
You can also see the code which call the utility function list_dir_content
#in views.py
def server_uts_datasets(request):
if request.method == 'GET':
uts_datasets = Dataset.get_uts_datasets()
uts_datasets_serializer = DatasetSerializer(uts_datasets, many=True)
print(uts_datasets)
return JsonResponse(uts_datasets_serializer.data, safe=False)
#in models.py
#classmethod
def get_mts_datasets(cls):
mts_datasets_files = data.list_dir_content(settings.DATASETS_DIR)
mts_datasets = []
for mts_datasets_file in mts_datasets_files:
dataset_type = 'mts'
dataset_path = mts_datasets_file
dataset_name = data.get_dataset_name(mts_datasets_file)
dataset_nb_instances = data.get_nb_instances(mts_datasets_file)
mts_dataset = Dataset(dataset_path = dataset_path, dataset_name = dataset_name, dataset_nb_instances = dataset_nb_instances, dataset_type = dataset_type)
mts_datasets.append(mts_dataset)
return mts_datasets
#in data.py
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def list_dir_content(dir_path, file_extension=''):
files_pattern = dir_path + '/*.' + file_extension
print("files_pattern : ", files_pattern)
#files_results_paths = glob.glob(files_pattern)
files_results_paths = glob.glob('../tsanalysisapp/static/tsanalysisapp/datasets/uts/*.')
print("files_results_paths : ", files_results_paths)
return files_results_paths
#in settings.py
MTS_DATASETS_DIR = os.path.join(BASE_DIR, 'tsanalysisapp/static/tsanalysisapp/datasets/mts') # paths to datasets in tne server
UTS_DATASETS_DIR = os.path.join(BASE_DIR, 'tsanalysisapp/static/tsanalysisapp/datasets/uts') # paths to datasets in tne server
Thanks in advance for your kindly help...
structure of my django project
Just only replace in the os.path.join() function all the slashes / with ', ' that result in os.path.join('parent_dir_name', 'subdir_name', ..., 'last_dir_name', ''):
MTS_DATASETS_DIR = os.path.join(BASE_DIR, 'tsanalysisapp', 'static', 'tsanalysisapp', 'datasets', 'mts', '*.')

Running python script from django view with parameters

I want to call a python script with paramters from django view. The python script is stored in a subfolder (see attached picture).
Basically I have an input form and want to call crawl.py with the input data from that form. The form data should be stored in variable "production_number" in crawl.py.
view.py
from .forms import CustomerForm, LoginForm
from .models import Orders
from .ERPProgramm.crawl import crawlmain
def newOrders(request):
if request.method == 'POST':
form = CustomerForm(request.POST)
if form.is_valid():
formdata = form.cleaned_data['product_ID']
# call crawl.py with paramter formdata
return HttpResponseRedirect('/customer/newOrders')
crawl.py
import db
import sys
import requests
import json
from datetime import datetime
def query(resource):
r = requests.get('http://11.111.11.11:8080/webapp/api/v1/' + resource,
headers={'AuthenticationToken': '11111-11111-1111-1111-11111'}
)
return r
costumer_id = 1
production_number = formdata
d = query('productionOrder/?productionOrderNumber-eq={}'.format(production_number)).json()
session = db.Session()
costumer = session.query(db.Costumer).get(costumer_id)
if 'result' in d and len(d['result']) > 0:
r = d['result'][0]
order = db.Order()
try:
order.article_id = r['articleId']
order.amount = r['targetQuantity']
order.create_date = datetime.fromtimestamp(r['createdDate'] / 1000)
order.start_date = datetime.fromtimestamp(r['targetStartDate'] / 1000)
order.end_date = datetime.fromtimestamp(r['targetEndDate'] / 1000)
except NameError as e:
sys.exit('Error {}'.format(e.what()))
article_number = r['articleNumber']
d = query('article/?articleNumber-eq={}'.format(article_number)).json()
if 'result' in d and len(d['result']) > 0:
r = d['result'][0]
article_image_id = r['articleImages'][0]['id']
order.price_offer = r['articlePrices'][0]['price']
r = query('article/id/{}/downloadArticleImage?articleImageId={}'.format(order.article_id, article_image_id))
order.article_image = r.content
else:
print('No result for article with number', article_number)
costumer.orders.append(order)
session.add(costumer)
session.commit()
else:
print('No result for production order with article number', article_number)
How can I call crawl.py from django view?
Directory Overview
you can check this
In your case you need to find exact file path for doing this you need to find the path from base dir in settings.py import it in view.py .
settings.py
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FILE_DIR = os.path.abspath(os.path.join(BASE_DIR,'/Customer/ERPProgram')
views.py
import sys
sys.path.insert(0, os.path.join(settings.FILE_DIR))
import crawl

Python / Django compare and update model objects

Iv only just started python but have learned a lot over the last few month, now I have hit a wall about updating objects on a model at a good speed.
I have a model called Products and this is populated from a csv file, every day this file get updated with changes like cost, and quantity, I can compare each line of the file with the Products Model but having 120k lines this takes 3-4hours.
What process can I take to make this process this file faster. I only want to modify the objects if cost and quantity have changed
Any suggestions how I tackle this?
Ver3 of what i have tried.
from django.core.management import BaseCommand
from multiprocessing import Pool
from django.contrib.auth.models import User
from pprint import pprint
from CentralControl.models import Product, Supplier
from CentralControl.management.helpers.map_ingram import *
from CentralControl.management.helpers.helper_generic import *
from tqdm import tqdm
from CentralControl.management.helpers.config import get_ingram
import os, sys, csv, zipfile, CentralControl
# Run Script as 'SYSTEM'
user = User.objects.get(id=1)
# Get Connection config.
SUPPLIER_CODE, FILE_LOCATION, FILE_NAME = get_ingram()
class Command(BaseCommand):
def handle(self, *args, **options):
list_in = get_file()
list_current = get_current_list()
pool = Pool(6)
pool.map(compare_lists(list_in, list_current))
pool.close()
def compare_lists(list_in, list_current):
for row_current in tqdm(list_current):
for row_in in list_in:
if row_in['order_code'] == row_current['order_code']:
#do more stuff here.
pass
def get_current_list():
try:
supplier = Supplier.objects.get(code='440040')
current_list = Product.objects.filter(supplier=supplier).values()
return current_list
except:
print('Error no products with supplier')
exit()
def get_file():
with zipfile.ZipFile(FILE_LOCATION + 'incoming/' + FILE_NAME, 'r') as zip:
with zip.open('228688 .csv') as csvfile:
reader = csv.DictReader(csvfile)
list_in = (list(reader))
for row in tqdm(list_in):
row['order_code'] = row.pop('Ingram Part Number')
row['order_code'] = (row['order_code']).lstrip("0")
row['name'] = row.pop('Ingram Part Description')
row['description'] = row.pop('Material Long Description')
row['mpn'] = row.pop('Vendor Part Number')
row['gtin'] = row.pop('EANUPC Code')
row['nett_cost'] = row.pop('Customer Price')
row['retail_price'] = row.pop('Retail Price')
row['qty_at_supplier'] = row.pop('Available Quantity')
row['backorder_date'] = row.pop('Backlog ETA')
row['backorder_date'] = (row['backorder_date'])
row['backorder_qty'] = row.pop('Backlog Information')
zip.close()
#commented out for dev precess.
#os.rename(FILE_LOCATION + 'incoming/' + FILE_NAME, FILE_LOCATION + 'processed/' + FILE_NAME)
return list_in
I have once faced a problem of slow load of data, I can tell you what i did maybe it can help you somehow, I passed the execution to debug mode and tried to find out wich colomn is causing the slow loading, and everytime i see that a colomn is causing the problem I add an index on it (in the SGBD --> postgreSQL in my case), and it worked. I hope that you are facing the same problem so my answer can help you.
Here it's rough idea:
1, when reading csv, use pandas as suggest by #BearBrow into array_csv
2, convert the obj data from Django into Numpy Arrary array_obj
3, don't compare them one by one , using numpy substraction
compare_index = (array_csv[['cost',['quantity']]] - array[['cost',['quantity']]] == 0)
4, find the updated column
obj_need_updated = array_obj[np.logic_any(compare_index['cost'], compare['quantity'])]
then use Django bulk update https://github.com/aykut/django-bulk-update to bulk update
Hope this will give you hints to speed up your code

Categories

Resources