Writing data from an API to a CSV - python

Basically I have this code that is working for me and its purpose is to download an entire series from an API about how many times a stock ticker is mentioned on the wallstreetbets sub.
This is the code:
import requests
tickers = open("ticker_list.txt","r")
for ticker in tickers:
ticker = ticker.strip()
url = "https://XXX SENSIBLE INFO/historical/wallstreetbets/"+ticker
headers = {'XXX (SENSIBLE INFO'}
r = requests.get(url, headers=headers)
print(r.content)
Where the file .txt is a simple list with about 8000 stock simbols.
I show you what are the first lines of the output, just for an example:
b'[{"Date": "2018-08-10", "Ticker": "AA", "Mentions": 1}, {"Date": "2018-08-28", "Ticker": "AA", "Mentions": 1}, {"Date": "2018-09-07", "Ticker": "AA", "Mentions": 1}, etc...
b'[{"Date": "2020-12-07", "Ticker": "AACQ", "Mentions": 1}, {"Date": "2020-12-08", "Ticker": "AACQ", "Mentions": 1}, {"Date": "2020-12-22", "Ticker": "AACQ", "Mentions": 1},... etc...
b'[{"Date": "2018-08-08", "Ticker": "AAL", "Mentions": 1}, {"Date": "2018-08-20", "Ticker": "AAL", "Mentions": 1}, {"Date": "2018-09-11", "Ticker": "AAL", "Mentions": 1}, .... etc
What I want to do now is to store all the data in a csv file so that the resulting table would be interpreted like this:
AA
AACQ
AAL
......
1/1/2018
3
3
7
...
2/1/2018
45
89
3
....
3/1/2018
21
4
2
......
....
(where the numbers in the middle represents the mentions per date per ticker, in this case to simplify I just put random numbers but they need to be the same numbers i got on the output as "mentions")
Alternatively, if it's easier, I need to create a single csv file for every ticker with the date in the first column and the numbers of mentions in the second column

The data that is being returned from the site is in JSON format, so this could be converted into a Python data structure using r.json(). Next, two things will help you here. Firstly a Counter can be used to keep track of all of the Mentions in your json data, and a defaultdict can be used to build a per date entry for each ticker. The set all_tickers can be used to keep track of all the tickers seen in the data and then be used to form the header for your output CSV file.
For example:
from collections import defaultdict, Counter
from datetime import datetime
import requests
import csv
dates = defaultdict(Counter)
all_tickers = set()
tickers = open("ticker_list.txt")
for ticker in tickers:
ticker = ticker.strip()
url = f"https://XXX SENSIBLE INFO/historical/wallstreetbets/{ticker}"
headers = {'XXX (SENSIBLE INFO'}
r = requests.get(url, headers=headers)
for row in r.json():
all_tickers.add(row['Ticker'])
date = datetime.strptime(row['Date'], '%Y-%m-%d') # convert to datetime format
dates[date][row['Ticker']] += row['Mentions']
with open('output.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=['Date', *sorted(all_tickers)])
csv_output.writeheader()
for date, values in sorted(dates.items(), key=lambda x: x[0]):
row = {'Date' : date.strftime('%d/%m/%Y')} # Create an output date format of day/month/year
row.update(values)
csv_output.writerow(row)
This should produce the output you need.

Related

Python snscrape: How to scrape tweet URL/link using snscrape?

What parameter should we use to include the URL/link of tweets? I have here the date, username, and content. Another question also is how can we transform the date in the dataframe into GMT+8? The timezone is in UTC. Please see code below for reference:
import snscrape.modules.twitter as sntwitter
import pandas as pd
query = "(from:elonmusk) until:2023-01-28 since:2023-01-27"
tweets = []
limit = 100000
for tweet in sntwitter.TwitterSearchScraper(query).get_items():
if len(tweets) == limit:
break
else:
tweets.append([tweet.date, tweet.username, tweet.content])
df = pd.DataFrame(tweets, columns=['Date', 'Username', 'Tweet'])
#Save to csv
df.to_csv('tweets.csv')
df
The get_items() return every single a search result with class type.
So the count of tweets needs to count by for loop.
This code will works
100K tweets is possible but it take too much time, I reduced 1K tweets.
import snscrape.modules.twitter as sntwitter
import pandas as pd
query = 'from:elonmusk since:2022-08-01 until:2023-01-28'
limit = 1000
tweets = sntwitter.TwitterSearchScraper(query).get_items()
index = 0
df = pd.DataFrame(columns=['Date','URL' ,'Tweet'])
for tweet in tweets:
if index == limit:
break
URL = "https://twitter.com/{0}/status/{1}".format(tweet.user.username,tweet.id)
df2 = {'Date': tweet.date, 'URL': URL, 'Tweet': tweet.rawContent}
df = pd.concat([df, pd.DataFrame.from_records([df2])])
index = index + 1
# # Converting time zone from UTC to GMT+8
df['Date'] = df['Date'].dt.tz_convert('Etc/GMT+8')
print(df)
df.to_csv('tweets.csv')
This single data of get_items()
it needs to extract only required key's value
tweet.date -> Date
https://twitter.com/tweet.user.username/status/tweet.id-> URL
tweet.rawContent-> Tweet
{
"_type": "snscrape.modules.twitter.Tweet",
"url": "https://twitter.com/elonmusk/status/1619164489710178307",
"date": "2023-01-28T02:44:31+00:00",
"rawContent": "#tn_daki #ShitpostGate Yup",
"renderedContent": "#tn_daki #ShitpostGate Yup",
"id": 1619164489710178307,
"user": {
"_type": "snscrape.modules.twitter.User",
"username": "elonmusk",
"id": 44196397,
"displayname": "Mr. Tweet",
"rawDescription": "",
"renderedDescription": "",
"descriptionLinks": null,
"verified": true,
"created": "2009-06-02T20:12:29+00:00",
"followersCount": 127536699,
"friendsCount": 176,
"statusesCount": 22411,
"favouritesCount": 17500,
"listedCount": 113687,
"mediaCount": 1367,
"location": "",
"protected": false,
"link": null,
"profileImageUrl": "https://pbs.twimg.com/profile_images/1590968738358079488/IY9Gx6Ok_normal.jpg",
"profileBannerUrl": "https://pbs.twimg.com/profile_banners/44196397/1576183471",
"label": null,
"url": "https://twitter.com/elonmusk"
}
... cut off
Result
>python get-data.py
Date URL Tweet
0 2023-01-27 15:29:36-08:00 https://twitter.com/elonmusk/status/1619115435... #farzyness No way
0 2023-01-27 15:14:05-08:00 https://twitter.com/elonmusk/status/1619111533... #mtaibbi Please correct your bs #PolitiFact &a...
0 2023-01-27 14:52:55-08:00 https://twitter.com/elonmusk/status/1619106207... #WallStreetSilv A quarter of all taxes just to...
0 2023-01-27 13:28:26-08:00 https://twitter.com/elonmusk/status/1619084945... #nudubabba #mikeduncan Yeah, whole thing
0 2023-01-27 13:12:16-08:00 https://twitter.com/elonmusk/status/1619080876... #TaraBull808 That’s way more monkeys than the ...
.. ... ... ...
0 2022-12-14 11:14:53-08:00 https://twitter.com/elonmusk/status/1603106271... #Jason Advertising revenue next year will be l...
0 2022-12-14 04:08:43-08:00 https://twitter.com/elonmusk/status/1602999020... #Balyx_ He would be welcome
0 2022-12-14 03:42:47-08:00 https://twitter.com/elonmusk/status/1602992493... #NorwayMFA #TwitterSupport #jonasgahrstore #AH...
0 2022-12-14 03:35:14-08:00 https://twitter.com/elonmusk/status/1602990594... #AvidHalaby Wow
0 2022-12-14 03:35:03-08:00 https://twitter.com/elonmusk/status/1602990549... #AvidHalaby Live & learn …
[1000 rows x 3 columns]
Reference
Converting time zone pandas dataframe
Tweet URL format
Detain information in here
Example:
URL = "https://twitter.com/elonmusk/status/1619111533216403456"
It saved into csv file.
0,2023-01-27 15:14:05-08:00,https://twitter.com/elonmusk/status/1619111533216403456,#mtaibbi Please correct your bs #PolitiFact & #snopes
It matched the tweet content and pandas Tweet column.
Also, you can add column, followers Count, friends Count, statuses Count, favourites Count, listed Count, media Count, reply Count, retweet Count, like Count and view Count too.

How to parse tab-delimited text file with 4th column as json and remove certain keys?

I have a text file that is 26 Gb, The line format is as follow
/type/edition /books/OL10000135M 4 2010-04-24T17:54:01.503315 {"publishers": ["Bernan Press"], "physical_format": "Hardcover", "subtitle": "9th November - 3rd December, 1992", "key": "/books/OL10000135M", "title": "Parliamentary Debates, House of Lords, Bound Volumes, 1992-93", "identifiers": {"goodreads": ["6850240"]}, "isbn_13": ["9780107805401"], "languages": [{"key": "/languages/eng"}], "number_of_pages": 64, "isbn_10": ["0107805405"], "publish_date": "December 1993", "last_modified": {"type": "/type/datetime", "value": "2010-04-24T17:54:01.503315"}, "authors": [{"key": "/authors/OL2645777A"}], "latest_revision": 4, "works": [{"key": "/works/OL7925046W"}], "type": {"key": "/type/edition"}, "subjects": ["Government - Comparative", "Politics / Current Events"], "revision": 4}
I'm trying to get only the last columns which is a json and from that Json I'm only trying to save the "title", "isbn 13", "isbn 10"
I was able to save only the last column with this code
csv.field_size_limit(sys.maxsize)
# File names: to read in from and read out to
input_file = '../inputFile/ol_dump_editions_2019-10-31.txt'
output_file = '../outputFile/output.txt'
## ==================== ##
## Using module 'csv' ##
## ==================== ##
with open(input_file) as to_read:
with open(output_file, "w") as tmp_file:
reader = csv.reader(to_read, delimiter = "\t")
writer = csv.writer(tmp_file)
desired_column = [4] # text column
for row in reader: # read one row at a time
myColumn = list(row[i] for i in desired_column) # build the output row (process)
writer.writerow(myColumn) # write it
but this doesn't return a proper json object instead returns everything with a double quotations next to it. Also how would I extract certain values from the json as a new json
EDIT:
"{""publishers"": [""Bernan Press""], ""physical_format"": ""Hardcover"", ""subtitle"": ""9th November - 3rd December, 1992"", ""key"": ""/books/OL10000135M"", ""title"": ""Parliamentary Debates, House of Lords, Bound Volumes, 1992-93"", ""identifiers"": {""goodreads"": [""6850240""]}, ""isbn_13"": [""9780107805401""], ""languages"": [{""key"": ""/languages/eng""}], ""number_of_pages"": 64, ""isbn_10"": [""0107805405""], ""publish_date"": ""December 1993"", ""last_modified"": {""type"": ""/type/datetime"", ""value"": ""2010-04-24T17:54:01.503315""}, ""authors"": [{""key"": ""/authors/OL2645777A""}], ""latest_revision"": 4, ""works"": [{""key"": ""/works/OL7925046W""}], ""type"": {""key"": ""/type/edition""}, ""subjects"": [""Government - Comparative"", ""Politics / Current Events""], ""revision"": 4}"
EDIT 2:
so im trying to read this file which is a tab separated file with the following columns:
type - type of record (/type/edition, /type/work etc.)
key - unique key of the record. (/books/OL1M etc.)
revision - revision number of the record
last_modified - last modified timestamp
JSON - the complete record in JSON format
Im trying to read the JSON file and from that Json im only trying to get the "title", "isbn 13", "isbn 10" as a json and save it to the file as a row
so every row should look like the original but with only those key and values
Here's a straight-forward way of doing it. You would need to repeat this and extract the desired data from each line of the file as it's being read, line-by-line (the default way text file reading is handled in Python).
import json
line = '/type/edition /books/OL10000135M 4 2010-04-24T17:54:01.503315 {"publishers": ["Bernan Press"], "physical_format": "Hardcover", "subtitle": "9th November - 3rd December, 1992", "key": "/books/OL10000135M", "title": "Parliamentary Debates, House of Lords, Bound Volumes, 1992-93", "identifiers": {"goodreads": ["6850240"]}, "isbn_13": ["9780107805401"], "languages": [{"key": "/languages/eng"}], "number_of_pages": 64, "isbn_10": ["0107805405"], "publish_date": "December 1993", "last_modified": {"type": "/type/datetime", "value": "2010-04-24T17:54:01.503315"}, "authors": [{"key": "/authors/OL2645777A"}], "latest_revision": 4, "works": [{"key": "/works/OL7925046W"}], "type": {"key": "/type/edition"}, "subjects": ["Government - Comparative", "Politics / Current Events"], "revision": 4}'
csv_cols = line.split('\t')
json_data = json.loads(csv_cols[4])
#print(json.dumps(json_data, indent=4))
desired = {key: json_data[key] for key in ("title", "isbn_13", "isbn_10")}
result = json.dumps(desired, indent=4)
print(result)
Output from sample line:
{
"title": "Parliamentary Debates, House of Lords, Bound Volumes, 1992-93",
"isbn_13": [
"9780107805401"
],
"isbn_10": [
"0107805405"
]
}
So given that your current code returns the following:
result = '{""publishers"": [""Bernan Press""], ""physical_format"": ""Hardcover"", ""subtitle"": ""9th November - 3rd December, 1992"", ""key"": ""/books/OL10000135M"", ""title"": ""Parliamentary Debates, House of Lords, Bound Volumes, 1992-93"", ""identifiers"": {""goodreads"": [""6850240""]}, ""isbn_13"": [""9780107805401""], ""languages"": [{""key"": ""/languages/eng""}], ""number_of_pages"": 64, ""isbn_10"": [""0107805405""], ""publish_date"": ""December 1993"", ""last_modified"": {""type"": ""/type/datetime"", ""value"": ""2010-04-24T17:54:01.503315""}, ""authors"": [{""key"": ""/authors/OL2645777A""}], ""latest_revision"": 4, ""works"": [{""key"": ""/works/OL7925046W""}], ""type"": {""key"": ""/type/edition""}, ""subjects"": [""Government - Comparative"", ""Politics / Current Events""], ""revision"": 4}'
Looks like what you need to do is: First - Replace those double-double-quotes with regular double quotes, otherwise things are not parsible:
res = result.replace('""','"')
Now res is convertible to a JSON object:
import json
my_json = json.loads(res)
my_json now looks like this:
{'authors': [{'key': '/authors/OL2645777A'}],
'identifiers': {'goodreads': ['6850240']},
'isbn_10': ['0107805405'],
'isbn_13': ['9780107805401'],
'key': '/books/OL10000135M',
'languages': [{'key': '/languages/eng'}],
'last_modified': {'type': '/type/datetime',
'value': '2010-04-24T17:54:01.503315'},
'latest_revision': 4,
'number_of_pages': 64,
'physical_format': 'Hardcover',
'publish_date': 'December 1993',
'publishers': ['Bernan Press'],
'revision': 4,
'subjects': ['Government - Comparative', 'Politics / Current Events'],
'subtitle': '9th November - 3rd December, 1992',
'title': 'Parliamentary Debates, House of Lords, Bound Volumes, 1992-93',
'type': {'key': '/type/edition'},
'works': [{'key': '/works/OL7925046W'}]}
You can conveniently get any field you want from this object:
my_json['title']
# 'Parliamentary Debates, House of Lords, Bound Volumes, 1992-93'
my_json['isbn_10'][0]
# '0107805405'
Especially because your example is so large, I'd recommend using a specialized library such as pandas, which has a read_csv method, or even dask, which supports out-of-memory operations.
Both of these systems will automatically parse out the quotations for you, and dask will do so in "pieces" direct from disk so you never have to try to load 26GB into RAM.
In both libraries, you can then access the columns you want like this:
data = read_csv(PATH)
data["ColumnName"]
You can then parse these rows either using json.loads() (import json) or you can use the pandas/dask json implementations. If you can give some more details of what you're expecting, I can help you draft a more specific code example.
Good luck!
I saved your data to a file to see if i could read just the rows, let me know if this works:
lines = zzread.split('\n')
temp=[]
for to_read in lines:
if len(to_read) == 0:
break
new_to_read = '{' + to_read.split('{',1)[1]
temp.append(json.loads(new_to_read))
for row in temp:
print(row['isbn_13'])
If that works this should create a json for you:
lines = zzread.split('\n')
temp=[]
for to_read in lines:
if len(to_read) == 0:
break
new_to_read = '{' + to_read.split('{',1)[1]
temp.append(json.loads(new_to_read))
new_json=[]
for row in temp:
new_json.append({'title': row['title'], 'isbn_13': row['isbn_13'], 'isbn_10': row['isbn_10']})

Conversion from nested json to csv with pandas

I am trying to convert a nested json into a csv file, but I am struggling with the logic needed for the structure of my file: it's a json with 2 objects and I would like to convert into csv only one of them, which is a list with nesting.
I've found very helpful "flattening" json info in this blog post. I have been basically adapting it to my problem, but it is still not working for me.
My json file looks like this:
{
"tickets":[
{
"Name": "Liam",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Piano",
"Sports"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "barkele01",
"salary" : 870000
},
{
"Name": "John",
"Location": {
"City": "Los Angeles",
"State": "CA"
},
"hobbies": [
"Music",
"Running"
],
"year" : 1985,
"teamId" : "ATL",
"playerId" : "bedrost01",
"salary" : 550000
}
],
"count": 2
}
my code, so far, looks like this:
import json
from pandas.io.json import json_normalize
import argparse
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Converting json files into csv for Tableau processing')
parser.add_argument(
"-j", "--json", dest="json_file", help="PATH/TO/json file to convert", metavar="FILE", required=True)
args = parser.parse_args()
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
flat_json = flatten_json(json_data)
# normalizing flat json
final_data = json_normalize(flat_json)
with open(args.json_file.replace(".json", ".csv"), "w") as outputFile: # open csv file
# saving DataFrame to csv
final_data.to_csv(outputFile, encoding='utf8', index=False)
What I would like to obtain is 1 line per ticket in the csv, with headings:
Name,Location_City,Location_State,Hobbies_0,Hobbies_1,Year,TeamId,PlayerId,Salary.
I would really appreciate anything that can do the click!
Thank you!
I actually wrote a package called cherrypicker recently to deal with this exact sort of thing since I had to do it so often!
I think the following code would give you exactly what you're after:
from cherrypicker import CherryPicker
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
picker = CherryPicker(data)
flat = picker['tickets'].flatten().get()
df = pd.DataFrame(flat)
print(df)
This gave me the output:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
You can install the package with:
pip install cherrypicker
...and there's more docs and guidance at https://cherrypicker.readthedocs.io.
An you already have a function to flatten a Json object, you have just to flatten the tickets:
...
with open(args.json_file, "r") as inputFile: # open json file
json_data = json.loads(inputFile.read()) # load json content
final_data = pd.DataFrame([flatten_json(elt) for elt in json_data['tickets']])
...
With your sample data, final_data is as expected:
Location_City Location_State Name hobbies_0 hobbies_1 playerId salary teamId year
0 Los Angeles CA Liam Piano Sports barkele01 870000 ATL 1985
1 Los Angeles CA John Music Running bedrost01 550000 ATL 1985
There may be a simpler solution for this. But this should work!
import json
import pandas as pd
with open('file.json') as file:
data = json.load(file)
df = pd.DataFrame(data['tickets'])
for i,item in enumerate(df['Location']):
df['location_city'] = dict(df['Location'])[i]['City']
df['location_state'] = dict(df['Location'])[i]['State']
for i,item in enumerate(df['hobbies']):
df['hobbies_{}'.format(i)] = dict(df['hobbies'])[i]
df = df.drop({'Location','hobbies'}, axis=1)
print(df)

JSON Parsing help in Python

I have below data in JSON format, I have started with code below which throws a KEY ERROR.
Not sure how to get all data listed in headers section.
I know I am not doing it right in json_obj['offers'][0]['pkg']['Info']: but not sure how to do it correctly.
how can I get to different nodes like info,PricingInfo,Flt_Info etc?
{
"offerInfo":{
"siteID":"1",
"language":"en_US",
"currency":"USD"
},
"offers":{
"pkg":[
{
"offerDateRange":{
"StartDate":[
2015,
11,
8
],
"EndDate":[
2015,
11,
14
]
},
"Info":{
"Id":"111"
},
"PricingInfo":{
"BaseRate":1932.6
},
"flt_Info":{
"Carrier":"AA"
}
}
]
}
}
import os
import json
import csv
f = open('api.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = ['Id' , 'StartDate', 'EndDate', 'Id', 'BaseRate', 'Carrier']
default = ''
writer.writerow(headers)
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
It looks like you're accessing the json incorrectly. After you have accessed json_obj['offers'], you accessed [0], but there is no array there. json_obj['offers'] gives you another dictionary.
For example, to get PricingInfo like you asked, access like this:
json_obj['offers']['pkg'][0]['PricingInfo']
or 11 from the StartDate like this:
json_obj['offers']['pkg'][0]['offerDateRange']['StartDate'][1]
And I believe you get the KEY ERROR because you access [0] in the dictionary, which since that isn't a key, you get the error.
try to substitute this piece of code:
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
With this:
for pkg in json_obj['offers']['pkg']:
row.append(pkg['Info']['Id'])
year = pkg['offerDateRange']['StartDate'][0]
month = pkg['offerDateRange']['StartDate'][1]
day = pkg['offerDateRange']['StartDate'][2]
StartDate = "%d-%d-%d" % (year,month,day)
print StartDate
writer.writerow(row)
Try this
import os
import json
import csv
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["StartDate"][2])
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["EndDate"][2])
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print json_obj["offers"]["pkg"][0]["PricingInfo"]["BaseRate"]
print json_obj["offers"]["pkg"][0]["flt_Info"]["Carrier"]

Python 27 CSV to JSON POST

I'm transferring my movie ratings from IMDB to Trakt. I use a Python script to do so and can't get it to turn my list into serializable JSON.
My script consists of a JSON uploader and an CSV reader, both work fine separately.
I've looked into list vs. tuple, json.dumps options and syntax and into json.encoder. There is a lot on the topic available online but no complete CSV to JSON example.
The following script includes all steps and a few lines of example data. If you want to test this script, you need the username, pass-SHA1 and API key of your Trakt account.
Current Error:
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: `enter code here`set(['["tt1535108", "Elysium", "8", "2013"]']) is not JSON
serializable
#===============================================================================
# Used CSV file (imdb_ratings.csv)
#===============================================================================
# position,const,created,modified,description,Title,Title type,Directors,You rated,IMDb Rating,Runtime (mins),Year,Genres,Num. Votes,Release Date (month/day/year),URL
# 1,tt1683526,Sat Feb 1 00:00:00 2014,,,Detachment,Feature Film,Tony Kaye,8,7.7,97,2011,drama,36556,2011-04-25,http://www.imdb.com/title/tt1683526/
# 2,tt1205537,Wed Jan 29 00:00:00 2014,,,Jack Ryan: Shadow Recruit,Feature Film,Kenneth Branagh,6,6.6,105,2014,"action, mystery, thriller",11500,2014-01-15,http://www.imdb.com/title/tt1205537/
# 3,tt1535108,Tue Jan 28 00:00:00 2014,,,Elysium,Feature Film,Neill Blomkamp,8,6.7,109,2013,"action, drama, sci_fi, thriller",176354,2013-08-07,http://www.imdb.com/title/tt1535108/
#===============================================================================
# Imports etc.
#===============================================================================
import csv
import json
import urllib2
ifile = open('imdb_ratings.csv', "rb")
reader = csv.reader(ifile)
included_cols = [1, 5, 8, 11]
#===============================================================================
# CSV to JSON
#===============================================================================
rownum = 0
for row in reader:
# Save header row.
if rownum == 0:
header = row
else:
content = list(row[i] for i in included_cols)
print(content)
rownum += 1
ifile.close()
#===============================================================================
# POST of JSON
#===============================================================================
data = {
"username": "<username>",
"password": "<SHA1>",
"movies": [
{
# Expected format:
# "imdb_id": "tt0114746",
# "title": "Twelve Monkeys",
# "year": 1995,
# "rating": 9
json.dumps(content)
}
]
}
req = urllib2.Request('http://api.trakt.tv/rate/movies/<api>')
req.add_header('Content-Type', 'application/json')
response = urllib2.urlopen(req, json.dumps(data))
Construct the dict:
{
"imdb_id": "tt0114746",
"title": "Twelve Monkeys",
"year": 1995,
"rating": 9
}
instead of calling json.dumps(content), which creates a string.
You could create the list of dicts using a list comprehension and a dict comprehension:
movies = [{field:row[i] for field, i in zip(fields, included_cols)} for row in reader]
import csv
import json
import urllib2
with open('imdb_ratings.csv', "rb") as ifile:
reader = csv.reader(ifile)
next(reader) # skip header row
included_cols = [1, 5, 8, 11]
fields = ['imdb_id', 'title', 'rating', 'year']
movies = [{field: row[i] for field, i in zip(fields, included_cols)}
for row in reader]
data = {"username": "<username>",
"password": "<SHA1>",
"movies": movies}
req = urllib2.Request('http://api.trakt.tv/rate/movies/<api>')
req.add_header('Content-Type', 'application/json')
response = urllib2.urlopen(req, json.dumps(data))

Categories

Resources