JSON Single Line Parse to Multi-Line CSV with Python - python

I am a Python noob, and working with the Plaid API to get bank transactions. I would like each transaction to be it's own line, and I only want to pull four values per record: date, _account, name & amount, and populate a CSV file with that data. I have the below code which populates a single line CSV (JSON file also attached). I can't seem to figure out what I am looking for as far as examples on how to do this after a bit of Googling. Any help is much appreciated.
import csv
#Configuration
from plaid import Client
Client.config({
'url': 'https://api.plaid.com'
})
#Connect to Plaid
from plaid import Client
from plaid import errors as plaid_errors
from plaid.utils import json
client = Client(client_id='test_id', secret='test_secret')
account_type = 'suntrust'
try:
response = client.connect(account_type, {
'username': 'plaid_test',
'password': 'plaid_good'
})
except plaid_errors.PlaidError:
pass
else:
connect_data = response.json()
#Get transactions from Plaid
response = client.connect_get()
transactions = response.json()
#Save the transactions JSON response to a csv file in the Python Projects directory
with open('transactions.csv', 'w') as outfile:
json.dump(transactions, outfile)
csvfile = open('transactions.csv', 'r')
jsonfile = open('transactions.json', 'w')
fieldnames = ("date", "_account","name","amount")
reader = csv.DictReader(csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
JSON FILE

I think you are making this over-complicated and confusing JSON with CSV. Hat tip to #thalesmallo who beat me to the punch on using the DictWriter class. Try this:
import csv
from plaid import Client
Client.config({
'url': 'https://api.plaid.com'
})
#Connect to Plaid
from plaid import Client
from plaid import errors as plaid_errors
from plaid.utils import json
client = Client(client_id='test_id', secret='test_secret')
account_type = 'suntrust'
try:
response = client.connect(account_type, {
'username': 'plaid_test',
'password': 'plaid_good'
})
except plaid_errors.PlaidError:
pass
else:
connect_data = response.json()
response = client.connect_get()
data = response.json()
transactions = data['transactions'] # see https://plaid.com/docs/api/#data-overview
#Save the transactions JSON response to a csv file in the Python Projects directory
header = ("date", "_account", "name", "amount")
with open('transactions.csv', 'w') as f:
writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
writer.writeheader()
for x in transactions:
writer.writerow(x)

Related

FastAPI: How to post dictionary with String and Bytes object to an endpoint?

I want to post a payload dict to my FastAPI endpoint that holds metadata like a file name and a bytesObject.
I accomplished to send the bytes object stand-alone using the File() Class of Fast API, but not in a dict structure.
For my FastAPI Endpoint I tried two approches.
Approach 1
#app.post("/process_file/", tags=["File porcessing"])
def process_excel_file(payload_dict: dict):
file_name = payload_dict["file_name"]
pyld = payload_dict["payload"]
data = FlatFileParser(file_name, pyld)
logger.debug(f"Received: {data}")
return {"Validator Response": data}
Approach 2
from fastapi import FastAPI, File, Request
#app.post("/process_file/", tags=["File porcessing"])
def process_excel_file(payload_dict: Request):
file_name = payload_dict.body()["file_name"]
payload = payload_dict.body()["payload"]
data = FlatFileParser(file_name, payload)
logger.debug(f"Received: {data}")
data = strip_cols(data)
data = clean_columns(data)
data = prep_schema_cols(data)
data = drop_same_name_columns(data)
return {"Validator Response": data}
My Post request looks the following:
url = "http://localhost:8000/process_file/"
file_path = r'sample_file.xlsx'
with open(file_path, 'rb') as f:
file = f.read()
payload = {
"file_name": file_path,
"payload": file
}
response = requests.post(url, data=payload)
But I receive following error for Approach 1:
Out[34]: b'{"detail":[{"loc":["body"],"msg":"value is not a valid
dict","type":"type_error.dict"}]}'
And for Approach 2 i am unable to parse the body() in a way to retrieve the necessary data.
Any suggestions or advice?

how to load 1000 lines of a csv into elasticsearch as 1000 different documents using elasticsearch API

so ive been trying to load 1000 lines of a csv into elasticsearhc as 1000 different documents, the csv has 8 headers: telease year, title, origin/ethnicity, director, cast, wiki page, plot.
my current code for loading the dataset loads it using the bulk command from helpers
import csv
from elasticsearch import helpers, Elasticsearch
es = Elasticsearch("http://localhost:9200")
es.indices.delete(index='movie-plots', ignore=[400, 404])
es.indices.create(index='movie-plots', body=body)
filename = 'wiki_movie_plots_deduped.csv'
def csv_reader(file_name):
with open(file_name, 'r') as outfile:
reader = csv.DictReader(outfile)
helpers.bulk(es, reader, index="movie-plots", doc_type="_doc")
this i think loads 1000 lines into one document.
You are on the right path, the code below will split the csv into 1000 different items but splitting out the headers and turning each line item into a map/dictionary item with appropriate headers. This is then appended to a list so you upload a list of dictionary items.
import csv, sys
from elasticsearch import helpers, Elasticsearch, RequestsHttpConnection
es = Elasticsearch(
hosts=[{
'host': 'localhost',
'port': '9200'}],
use_ssl=False,
verify_certs=True,
connection_class=RequestsHttpConnection
)
upload_list = [] # list of items for upload
# Load all csv data
with open('my_folder/my_csv_file.csv', newline='') as csvfile:
data_list = []
csv_data = csv.reader(csvfile)
for row in csv_data:
data_list.append(row)
# separate out the headers from the main data
headers = data_list[0]
# drop headers from data_list
data_list.pop(0)
for item in data_list: # iterate over each row/item in the csv
item_dict = {}
# match a column header to the row data for an item
i = 0
for header in headers:
item_dict[header] = item[i]
i = i+1
# add the transformed item/row to a list of dicts
upload_list += [item_dict]
# using helper library's Bulk API to index list of Elasticsearch docs
try:
resp = helpers.bulk(
es,
upload_list,
index="my-index-name"
)
msg = "helpers.bulk() RESPONSE: " + str(resp)
print(msg) # print the response returned by Elasticsearch
except Exception as err:
msg = "Elasticsearch helpers.bulk() ERROR: " + str(err)
print(msg)
sys.exit(1)

Django : How to upload CSV file in unit test case using APIClient from rest_framework

def test_upload_csv_success(self):
"""Test uploading a csv file"""
with open("innovators.csv", "w") as file:
writer = csv.writer(file)
writer.writerow(["SN", "Name", "Contribution"])
writer.writerow([1, "Linus Torvalds", "Linux Kernel"])
writer.writerow([2, "Tim Berners-Lee", "World Wide Web"])
writer.writerow([3, "Guido van Rossum", "Python Programming"])
with open("innovators.csv", "r") as file:
res = self.client.post(
CSV_URL, {"file": file}, content_type="multipart/form-data"
)
file.close()
self.assertEqual(res.status_code, status.HTTP_201_CREATED)
#self.assertIn('file', res.data)
#self.assertTrue(os.path.exists(self.csv_model.file.path))
Below is the error, I/m getting
System check identified no issues (0 silenced).
.F.
FAIL: test_upload_csv_success (core.tests.test_csv_api.CsvUploadTests)
Test uploading a csv file
Traceback (most recent call last):
File "/Users/rounaktadvi/django_rest_api_projects/csv-store-api/core/tests/test_csv_api.py", line 56, in test_upload_csv_success
self.assertEqual(res.status_code, status.HTTP_201_CREATED)
AssertionError: 400 != 201
I figured it, out here's what i did
#patch("pandas.read_csv")
#patch("pandas.DataFrame.to_sql")
def test_upload_csv_success(self, mock_read_csv, mock_to_sql) -> None:
"""Test uploading a csv file"""
file_name = "test.csv"
# Open file in write mode (Arrange)
with open(file_name, "w") as file:
writer = csv.writer(file)
# Add some rows in csv file
writer.writerow(["name", "area", "country_code2", "country_code3"])
writer.writerow(
["Albania", 28748, "AL", "ALB"],
)
writer.writerow(
["Algeria", 2381741, "DZ", "DZA"],
)
writer.writerow(
["Andorra", 468, "AD", "AND"],
)
# open file in read mode
data = open(file_name, "rb")
# Create a simple uploaded file
data = SimpleUploadedFile(
content=data.read(), name=data.name, content_type="multipart/form-data"
)
# Perform put request (Act)
res = self.client.put(CSV_URL, {"file_name": data}, format="multipart")
# Mock read_csv() and to_sql() functions provided by pandas module
mock_read_csv.return_value = True
mock_to_sql.return_value = True
# Assert
self.assertEqual(res.status_code, status.HTTP_201_CREATED)
self.assertEqual(res.data, "Data set uploaded")
# Delete the test csv file
os.remove(file_name)

Save a writable file into database?

I am using django 1.8 and python 3.4 and trying to create a json file and then writing into it, after that I need to save it to my database but on save it returns me an error '_io.TextIOWrapper' object has no attribute '_committed'. Can anyone please help where I am doing wrong?
Here is my models.py
class ConvertedFile(models.Model):
file = models.FileField(upload_to='json/upload', max_length=5000)
created_on = models.DateTimeField(auto_now_add=True)
My views.py is-
def convert_file(request):
url = request.GET.get('q', None)
r = requests.get(url, stream=True)
with open('file.csv', 'wb') as out_file:
shutil.copyfileobj(r.raw, out_file)
csvfile = open("file.csv", "r")
jsonfile = open("file.json", "w")
csv_rows = []
reader = csv.DictReader(csvfile)
title = reader.fieldnames
try:
for row in reader:
csv_rows.extend([{title[i]: row[title[i]] for i in range(len(title))}])
except:
pass
jsonfile.write(json.dumps(csv_rows, sort_keys=False, indent=4, separators=(',', ': '), ensure_ascii=False))
os.remove("file.csv")
jsonfile.close()
new_json = ConvertedFile.objects.create()
new_json.file = jsonfile
new_jsone.save()
The error raises on model.save() in the last line, right? The line above new_json.file = jsonfile is the problem. You pass the reference to a closed (plain python) file object to the FileField from django and it does not know how to deal with it (_commited is missing for example).
Have a look at Django - how to create a file and save it to a model's FileField?

Want to prompt browser to save csv

Want to prompt browser to save csv using pyramid.response.Response searched for clues and found here's a link Django answer but i can't use it with Pyramid wsgi my code looks like this:
from pyramid.response import Response
def get_list_names_emails(request):
session, env = request.db, request.client_env
response = Response(content_type='text/csv')
output = StringIO()
writer = csv.writer(output)
writer.writerow(['SomeName', 'SomeEmail', 'CompanyName])
csv_output = output.getvalue()
return csv_output
As a cleaner way to do that, you can register a renderer.
In your configuration set-up, add:
config.add_renderer(name='csv',
factory='mypackage.renderers.CSVRenderer')
then in mypackage/renderers.py:
class CSVRenderer(object):
def __init__(self, info):
pass
def __call__(self, value, system):
fout = StringIO.StringIO()
writer = csv.writer(fout, delimiter=';', quoting=csv.QUOTE_ALL)
writer.writerow(value['header'])
writer.writerows(value['rows'])
resp = system['request'].response
resp.content_type = 'text/csv'
resp.content_disposition = 'attachment;filename="report.csv"'
return fout.getvalue()
After that, you can decorate your view with the renderer:
#view_config(..., renderer='csv')
def myview(self):
header = ['name', 'surname', 'address']
rows = [
(
row['name'],
row['surname'],
row['address'],
)
for row in query_rows(.....)
]
return {
'header': header,
'rows': rows
}
The advantage of this approach is better testable view code (you just check for the dictionary values, no need to parse anything) and you can also add a XLS or whatever renderer to the same view:
#view_config(..., renderer='xls')
#view_config(..., renderer='csv')
def myview(self):
...
Try adding Content-Disposition:
response['Content-Disposition'] = 'attachment; filename="report.csv"'
It's better to set content type as well
response['Content-type'] = 'text/csv'
response['Content-Disposition'] = 'attachment; filename="report.csv"'

Categories

Resources