SITE_ROOT = os.path.realpath(os.path.dirname(__file__))
json_url = os.path.join(SITE_ROOT, "data", "results.json")
json_data = json.loads(open(json_url).read()).
What I want to hopefully do is, that it reads my csv file, column - question, and loads it as json format.
you can use pandas for this issue i think.
import pandas as pd
read_csv = pd.read_csv('path/csv_name.csv',delimiter= ',') # or delimiter = ';'
read_csv.head() # display your data and check it
csv_to_json = read_csv.to_json(orient = 'columns')
Related
so ive been trying to load 1000 lines of a csv into elasticsearhc as 1000 different documents, the csv has 8 headers: telease year, title, origin/ethnicity, director, cast, wiki page, plot.
my current code for loading the dataset loads it using the bulk command from helpers
import csv
from elasticsearch import helpers, Elasticsearch
es = Elasticsearch("http://localhost:9200")
es.indices.delete(index='movie-plots', ignore=[400, 404])
es.indices.create(index='movie-plots', body=body)
filename = 'wiki_movie_plots_deduped.csv'
def csv_reader(file_name):
with open(file_name, 'r') as outfile:
reader = csv.DictReader(outfile)
helpers.bulk(es, reader, index="movie-plots", doc_type="_doc")
this i think loads 1000 lines into one document.
You are on the right path, the code below will split the csv into 1000 different items but splitting out the headers and turning each line item into a map/dictionary item with appropriate headers. This is then appended to a list so you upload a list of dictionary items.
import csv, sys
from elasticsearch import helpers, Elasticsearch, RequestsHttpConnection
es = Elasticsearch(
hosts=[{
'host': 'localhost',
'port': '9200'}],
use_ssl=False,
verify_certs=True,
connection_class=RequestsHttpConnection
)
upload_list = [] # list of items for upload
# Load all csv data
with open('my_folder/my_csv_file.csv', newline='') as csvfile:
data_list = []
csv_data = csv.reader(csvfile)
for row in csv_data:
data_list.append(row)
# separate out the headers from the main data
headers = data_list[0]
# drop headers from data_list
data_list.pop(0)
for item in data_list: # iterate over each row/item in the csv
item_dict = {}
# match a column header to the row data for an item
i = 0
for header in headers:
item_dict[header] = item[i]
i = i+1
# add the transformed item/row to a list of dicts
upload_list += [item_dict]
# using helper library's Bulk API to index list of Elasticsearch docs
try:
resp = helpers.bulk(
es,
upload_list,
index="my-index-name"
)
msg = "helpers.bulk() RESPONSE: " + str(resp)
print(msg) # print the response returned by Elasticsearch
except Exception as err:
msg = "Elasticsearch helpers.bulk() ERROR: " + str(err)
print(msg)
sys.exit(1)
Say there's a dataframe from pandas like :
mediabuy cpa mediabuy cpc
cost 2020-02 0.00 371929.95 15956581.16 16328511.11
2020-04 1311.92 224747.07 26710431.81 26936490.80
total 1311.92 596677.02 42667012.97 43265001.91
I want to create an excel file in django, and I've tried with codes as below:
# return excel view
df = pd.DataFrame(data, index=index, columns=column)
# saved as excel
excel_writer = pd.ExcelWriter(path='temp.xlsx', engine='openpyxl')
df.to_excel(excel_writer)
wb = excel_writer.book
response = HttpResponse(save_virtual_workbook(wb))
response["Content-Type"] = 'application/vnd.ms-excel'
response['Content-Disposition'] = 'attachment; filename={}.xlsx'.format("data"))
return response
I'm working with python3.6.8, django2.2.4, pandas1.0.3, openpyxl3.0.3
But I always get an error saying "excel file cannot opened because the file format or file extension is not valid".
Why am I getting this error?
Thanks.
Unless there is a problem with the structure of the data in the dataframe you should be able to achieve this using:
from io import BytesIO
df = pd.DataFrame(data, index=index, columns=column)
stream_file = BytesIO()
df.to_excel(stream_file)
stream_file.seek(0)
response = HttpResponse(stream_file)
response["Content-Type"] = 'application/vnd.ms-excel'
response['Content-Disposition'] = 'attachment; filename={}.xlsx'.format("data")
return response
I am trying to download some user data into a csv file. I am able to generate the User fields just fine but when I try to access the onetoone relation field I am running into issues!
Tried numerous different way to get here. Just can't seem to figure out how to get the correct related data.
def export_to_csv (modeladmin, request, queryset):
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
from wsgiref.util import FileWrapper
cols = ['username','email','first_name','last_name','my_profile.dealer_num']
# get qs values
data = list( queryset.values_list(*cols) )
if not data:
messages.error(request, 'No data to export')
return HttpResponseRedirect( request.get_full_path() )
# create empty csv
csv_file = StringIO()
csv_writer = csv.writer(csv_file, quoting = csv.QUOTE_ALL)
# add headers
csv_writer.writerow( cols )
# add qs values
for row in data:
csv_writer.writerow( [s.encode('utf-8') for s in row] )
csv_file.flush()
csv_file.seek(0)
response = HttpResponse(FileWrapper( csv_file ), content_type='text/csv')
response['Content-Disposition'] = "attachment; filename=user-csv-export.csv"
return response
export_to_csv.short_description = "Export to CSV"
Models.py
class MyProfile(UserenaBaseProfile):
user = models.OneToOneField(User,
unique=True,
verbose_name=_('user'),
related_name='my_profile')
dealer_num = models.CharField(blank=True,
max_length=15,
verbose_name="Dealer Number")
Should return everything including a 5 digit dealer number in a csv
You are not accessing the field correctly, you need to use __ as in a query.
Change cols to:
cols = ['username','email','first_name','last_name','my_profile__dealer_num']
I am a Python noob, and working with the Plaid API to get bank transactions. I would like each transaction to be it's own line, and I only want to pull four values per record: date, _account, name & amount, and populate a CSV file with that data. I have the below code which populates a single line CSV (JSON file also attached). I can't seem to figure out what I am looking for as far as examples on how to do this after a bit of Googling. Any help is much appreciated.
import csv
#Configuration
from plaid import Client
Client.config({
'url': 'https://api.plaid.com'
})
#Connect to Plaid
from plaid import Client
from plaid import errors as plaid_errors
from plaid.utils import json
client = Client(client_id='test_id', secret='test_secret')
account_type = 'suntrust'
try:
response = client.connect(account_type, {
'username': 'plaid_test',
'password': 'plaid_good'
})
except plaid_errors.PlaidError:
pass
else:
connect_data = response.json()
#Get transactions from Plaid
response = client.connect_get()
transactions = response.json()
#Save the transactions JSON response to a csv file in the Python Projects directory
with open('transactions.csv', 'w') as outfile:
json.dump(transactions, outfile)
csvfile = open('transactions.csv', 'r')
jsonfile = open('transactions.json', 'w')
fieldnames = ("date", "_account","name","amount")
reader = csv.DictReader(csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
JSON FILE
I think you are making this over-complicated and confusing JSON with CSV. Hat tip to #thalesmallo who beat me to the punch on using the DictWriter class. Try this:
import csv
from plaid import Client
Client.config({
'url': 'https://api.plaid.com'
})
#Connect to Plaid
from plaid import Client
from plaid import errors as plaid_errors
from plaid.utils import json
client = Client(client_id='test_id', secret='test_secret')
account_type = 'suntrust'
try:
response = client.connect(account_type, {
'username': 'plaid_test',
'password': 'plaid_good'
})
except plaid_errors.PlaidError:
pass
else:
connect_data = response.json()
response = client.connect_get()
data = response.json()
transactions = data['transactions'] # see https://plaid.com/docs/api/#data-overview
#Save the transactions JSON response to a csv file in the Python Projects directory
header = ("date", "_account", "name", "amount")
with open('transactions.csv', 'w') as f:
writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
writer.writeheader()
for x in transactions:
writer.writerow(x)
I have to write a function which on input has a name of a table and conditionals/filters for that query, on output it returns a link which should be automatically be download by the client (browser).
How to implement this task using python/django?
E.g. I've written a small piece of code, but I'm not sure that it works correctly and there is no implementation of query conditionals parsing (I don't know how to implement it):
direct_db.py:
from django.db import connection
class DirectSQL:
def __init__(self,in_sql):
self.sql=in_sql
self.cursor = connection.cursor()
self.cursor.execute(in_sql)
def getDescription(self):
columns = [desc[0] for desc in self.cursor.description]
return columns
def getResult(self):
row = self.cursor.fetchall()
return row
def getResultAsDict(self):
desc = self.cursor.description
return [dict(zip([col[0].lower() for col in desc], row)) for row in self.cursor.fetchall()]
excel.py:
from ecc.direct_db import DirectSQL
import pandas as ps
class Excel:
def __init__(self, table_name):
self.table_name = table_name
def convert(in_args):
q = DirectSQL("select * from self.table_name" ) # where... order by... like...
columns = [desc[0] for desc in q.getDescription()]
data = q.getResults()
df = ps.DataFrame(list(data), columns)
writer = ps.ExcelWriter('converted.xlsx')
df.to_excel(writer, sheet_name='converted')
writer.save()
I've worked in something like this before, I used xlsxwriter, you can check its docs to find out how to create a xlsx and how set data into it. Then you should need some view:
from django.views.generic import View
from django.http import HttpResponse
class CreateReport(View):
def get_data(self):
# Query your data here, probably using self.request to get query string
...
return data
def generate_report(self):
# Here you will create xlsx doc and populate with data according to docs linked before
...
return workbook
def get(self, request, *args, **kwargs):
document = self.generate_report()
_file = open(document.filename, 'r')
response(HttpResponse(_file, content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
response['Content-Disposition'] = 'attachment; filename=%s' % document.filename.split('/')[-1] # Here will return a full path, that's why probably you will need a split to get only the filename
add_never_cache_headers(response=response) # To avoid download the same file with out of date data.
return response
Then you will need an url
from myapp.views import CreateReport
url(r'^create_report/(?P<some_param_if_needed>[-\w]+)',
CreateReport.as_view(),
name='create_report'),
and finally in template
Download Report
EDIT
Here's a more complete example for get_data() method.
get_data(self):
# Let's supose you have a `MyElements` model
elements = MyElements.objects.all()
# And let's supose you want to filter data with some GET parameter
filter = self.request.GET.get('filter_name', None)
if filter is not None:
elements = elements.filter(filter_field=filter)
return elements