Related
I have a PDF that contains many tables. )I converted this PDF to CSV online, to extract the needed data more easily.)
The CSV rows are composed of many columns, but each table contains only 3 columns, so it is hard to know which column refers to a cell.
I also should mention that one cell can be composed of more than one line and column.
An example of a table.
So is there any solution to extract these cells?
import csv
import re
class PDF_EXTRACTOR:
FILE_NAME=None
Ttableau=None
NUMBER_OF_PAGES=None
def __init__(self,fn):
self.FILE_NAME=fn
self.Ttableau=0
self.NUMBER_OF_PAGES=0
def EXTRACT_CELLULE(self):
csv.register_dialect('mydialect',delimiter =',',skipinitialspace=True)
print(csv.list_dialects())
with open(self.FILE_NAME,'r',encoding='utf8',errors='ignore') as csvFile:
reader = csv.reader(csvFile, dialect='mydialect')
for index, row in enumerate(reader):
print(row)
I expected an output like this:
["Region 1","Region 2", "Region3]
["8,3-9","AUXILIAIRES DE LA MÉTÉOROLOGIE 5.54A 5.54B 5.54C",""]
["70-72","70-90","70-72"]
["RADIONAVIGATION 5.60","FIXE","MARITIME 5.60"]
But instead I got this:
['7 450-8 100', 'FIXE', '', '', '', '']
['', 'MOBILE sauf mobile aéronautique (R)', '', '', '', '']
['', '5.144', '', '', '', '']
['8 100-8 195', 'FIXE', '', '', '', '']
['', 'MOBILE MARITIME', '', '', '', '']
['8 195-8 815', 'MOBILE MARITIME', '5.109', '5.11', '5.132', '5.145']
['', '5.111', '', '', '', '']
['8 815-8 965', 'MOBILE AÉRONAUTIQUE (R)', '', '', '', '']
['8 965-9 040', 'MOBILE AÉRONAUTIQUE (OR)', '', '', '', '']
['9 040-9 305', '9 040-9 400', '', '', '', '9 040-9 305']
['FIXE', 'FIXE', '', '', '', 'FIXE']
['9 305-9 355', '', '', '', '', '9 305-9 355']
['FIXE', '', '', '', '', 'FIXE']
['Radiolocalisation 5.145A', '', '', '', '', 'Radiolocalisation 5.145A']
['5.145B', '', '', '', '', '']
['9 355-9 400', '', '', '', '', '9 355-9 400']
I apologize for any formatting issues or unclear parts, I'm VERY new to Python and programming in general. I want to make a script that pulls a list of research participant records (the code provided here is sample data) and the list contains separate dictionary-like items that have all of the screening questions (including the record id, or subject ID). I want to pull particular items (self-harm reports and suicidal thoughts questions) from this depending on what the script's user inputs as the record id
I want the script to be able to pull from a growing list of dictionaries, so it has to index So far I have tried to return a tuple based on the user input, but it returns the same values
regardless of what I input for subj, it returns the same three values ('1', '2', '1'), the values of ONLY the first dictionary
from redcap import Project, RedcapError
URL = 'https://redcap.lib.umd.edu/api/'
#API KEY for sample data
API_KEY = 'B2E685118B86FA89F57C49A1C9A38BDC'
project = Project(URL, API_KEY)
all_data = project.export_records()
def find(subj, data):
index = 0
j = 0
for i in data:
for k,v in i.items():
if k == 'record_id' and v == subj:
index = j
j+=1
else:
j+=1
return data[index]['record_id'],data[index]['selfharm_18yr'],data[index]['talksaboutkillingself_18yr']
AN EXAMPLE OF DATA RECORD
[{'record_id': '1', 'child_gender': '', 'c_age': '', 'c_dob': '', 't_date': '', 'school_yn': '', 'school_grade': '', 'father_job': '', 'mother_work': '', 'parentgender': '', 'relation_to_child': '', 'other': '', 'no_sports': '', 'sport_a': '', 'average_time_a': '', 'average_skill_a': '', 'sport_b_yes': '', 'sport_b': '', 'average_time_b': '', 'average_skill_b': '', 'sport_c_yes': '', 'sport_c': '', 'average_time_c': '', 'average_skill_c': '', 'hobby_a_yes': '', 'hobby_a': '', 'hobby_a_time': '', 'hobby_a_skill': '', 'hobby_b_yes': '', 'hobby_b': '', 'hobby_b_time': '', 'hobby_b_skill': '', 'hobby_c_yes': '', 'hobby_c': '', 'hobby_c_time': '', 'hobby_c_skill': '', 'clubs': '', 'club1': '', 'activeclub1': '', 'clubs_2': '', 'club2': '', 'activeclub2': '', 'clubs_3': '', 'club3': '', 'activeclub3': '', 'chore_a_yes': '', 'chore_a': '', 'chore_a_skill': '', 'chore_b_yes': '', 'chore_b': '', 'chore_b_skill': '', 'chore_c_yes': '', 'chore_c': '', 'chore_c_skill': '', 'close_friends': '', 'friends': '', 'get_along_siblings': '', 'along_withkids': '', 'behave': '', 'play_work': '', 'attend_school': '', 'school_reason': '', 'performance1': '', 'performance2': '', 'performance3': '', 'performance4': '', 'othersubjects': '', 'other_subjects': '', 'performanceother': '', 'other2': '', 'other_subjects_2': '', 'performanceother_2': '', 'other3': '', 'other_subjects_3': '', 'performanceother_3': '', 'specialeducation': '', 'sp_ed': '', 'repeat_grades': '', 'repeat2': '', 'academic_problems': '', 'describe_problems': '', 'problems_date': '', 'problems_yn': '', 'end_problems': '', 'disabilities': '', 'disability2': '', 'concerns': '', 'best_things': '', 'too_young': '', 'alcohol': '', 'describe_alc18yr': '', 'argues': '', 'fails_finishing_things': '', 'enjoyment': '', 'bm': '', 'bragging': '', 'concentration': '', 'obsessions': '', 'describe_obesessions': '', 'restlessness': '', 'dependence': '', 'lonely': '', 'confusion': '', 'crying': '', 'cruelty_animals': '', 'cruelty': '', 'daydreams': '', 'selfharm_18yr': '2', 'attention': '', 'destruction': '', 'destruction2': '', 'disobedience': '', 'school_disobedience': '', 'eating_well': '', 'getting_along': '', 'guilt_misbehaving': '', 'jealousy': '', 'rule_breaking': '', 'fearful': '', 'describe_fears': '', 'fears_school': '', 'fears_thoughts': '', 'perfection': '', 'loveless': '', 'others_outtoget': '', 'worthlessness': '', 'accident_prone': '', 'fights': '', 'teasing': '', 'trouble_makers': '', 'voices': '', 'describe_voices': '', 'impulsive_acts': '', 'solitary': '', 'lying_cheating': '', 'fingernails': '', 'tense': '', 'movements': '', 'describe_movements': '', 'nightmares': '', 'likeability': '', 'constipation': '', 'fear_anxiety': '', 'dizziness': '', 'guilt': '', 'overeating': '', 'overtired': '', 'overweight': '', 'aches_pains': '', 'headaches': '', 'nausea': '', 'eye_problems': '', 'describe_eyes': '', 'skin_problems': '', 'stomach_aches': '', 'vomiting': '', 'other_conditions': '', 'describe_other': '', 'physical_violence': '', 'picks_skin': '', 'describe_skin': '', 'public': '', 'public2': '', 'school_work': '', 'coordination': '', 'older_kids': '', 'younger_kids': '', 'talking_refusal': '', 'compulsions': '', 'describe_compulsions': '', 'runs_away': '', 'screams': '', 'secretive': '', 'seeing_things': '', 'describe_seeingthings': '', 'self_conscious': '', 'sets_fires': '', 'sexual_problems': '', 'describe_sexualproblems': '', 'clowning': '', 'shy_timid': '', 'sleeps_less': '', 'sleeps_more': '', 'describe_sleeping': '', 'inattentive': '', 'speech_problems': '', 'describe_speechproblems': '', 'stares_blankly': '', 'steals_home': '', 'steals_outside': '', 'stores': '', 'describe_hoarding': '', 'strange_behavior': '', 'describe_strangebehavior': '', 'strange_ideas': '', 'describe_ideas': '', 'stubborn_sullen': '', 'mood_changes': '', 'sulking': '', 'suspicious': '', 'swearing_obscenities': '', 'talksaboutkillingself_18yr': '1', 'sleeptalking_walking': '', 'describe_sleeptalking': '', 'talks_toomuch': '', 'frequent_teasing': '', 'temper_tantrums': '', 'thinks_sex': '', 'threatens_people': '', 'thumb_sucking': '', 'smoking': '', 'sleeping_troubles': '', 'describe_sleepingtroubles': '', 'truancy': '', 'low_energy': '', 'depression': '', 'loud': '', 'uses_drugs': '', 'describe_drugusage': '', 'vandalism': '', 'wets_self': '', 'wets_bed': '', 'whining': '', 'opposite_sex': '', 'withdrawn': '', 'frequent_worries': '', 'additional_problems': '', 'problem_a': '', 'prob_a_true': '', 'problem_b_yes': '', 'problem_b': '', 'prob_b_true': '', 'problem_c_yes': '', 'problem_c': '', 'prob_c_true': ''}, {'record_id': '2', 'child_gender': '', 'c_age': '', 'c_dob': '', 't_date': '', 'school_yn': '', 'school_grade': '', 'father_job': '', 'mother_work': '', 'parentgender': '', 'relation_to_child': '', 'other': '', 'no_sports': '', 'sport_a': '', 'average_time_a': '', 'average_skill_a': '', 'sport_b_yes': '', 'sport_b': '', 'average_time_b': '', 'average_skill_b': '', 'sport_c_yes': '', 'sport_c': '', 'average_time_c': '', 'average_skill_c': '', 'hobby_a_yes': '', 'hobby_a': '', 'hobby_a_time': '', 'hobby_a_skill': '', 'hobby_b_yes': '', 'hobby_b': '', 'hobby_b_time': '', 'hobby_b_skill': '', 'hobby_c_yes': '', 'hobby_c': '', 'hobby_c_time': '', 'hobby_c_skill': '', 'clubs': '', 'club1': '', 'activeclub1': '', 'clubs_2': '', 'club2': '', 'activeclub2': '', 'clubs_3': '', 'club3': '', 'activeclub3': '', 'chore_a_yes': '', 'chore_a': '', 'chore_a_skill': '', 'chore_b_yes': '', 'chore_b': '', 'chore_b_skill': '', 'chore_c_yes': '', 'chore_c': '', 'chore_c_skill': '', 'close_friends': '', 'friends': '', 'get_along_siblings': '', 'along_withkids': '', 'behave': '', 'play_work': '', 'attend_school': '', 'school_reason': '', 'performance1': '', 'performance2': '', 'performance3': '', 'performance4': '', 'othersubjects': '', 'other_subjects': '', 'performanceother': '', 'other2': '', 'other_subjects_2': '', 'performanceother_2': '', 'other3': '', 'other_subjects_3': '', 'performanceother_3': '', 'specialeducation': '', 'sp_ed': '', 'repeat_grades': '', 'repeat2': '', 'academic_problems': '', 'describe_problems': '', 'problems_date': '', 'problems_yn': '', 'end_problems': '', 'disabilities': '', 'disability2': '', 'concerns': '', 'best_things': '', 'too_young': '', 'alcohol': '', 'describe_alc18yr': '', 'argues': '', 'fails_finishing_things': '', 'enjoyment': '', 'bm': '', 'bragging': '', 'concentration': '', 'obsessions': '', 'describe_obesessions': '', 'restlessness': '', 'dependence': '', 'lonely': '', 'confusion': '', 'crying': '', 'cruelty_animals': '', 'cruelty': '', 'daydreams': '', 'selfharm_18yr': '3', 'attention': '', 'destruction': '', 'destruction2': '', 'disobedience': '', 'school_disobedience': '', 'eating_well': '', 'getting_along': '', 'guilt_misbehaving': '', 'jealousy': '', 'rule_breaking': '', 'fearful': '', 'describe_fears': '', 'fears_school': '', 'fears_thoughts': '', 'perfection': '', 'loveless': '', 'others_outtoget': '', 'worthlessness': '', 'accident_prone': '', 'fights': '', 'teasing': '', 'trouble_makers': '', 'voices': '', 'describe_voices': '', 'impulsive_acts': '', 'solitary': '', 'lying_cheating': '', 'fingernails': '', 'tense': '', 'movements': '', 'describe_movements': '', 'nightmares': '', 'likeability': '', 'constipation': '', 'fear_anxiety': '', 'dizziness': '', 'guilt': '', 'overeating': '', 'overtired': '', 'overweight': '', 'aches_pains': '', 'headaches': '', 'nausea': '', 'eye_problems': '', 'describe_eyes': '', 'skin_problems': '', 'stomach_aches': '', 'vomiting': '', 'other_conditions': '', 'describe_other': '', 'physical_violence': '', 'picks_skin': '', 'describe_skin': '', 'public': '', 'public2': '', 'school_work': '', 'coordination': '', 'older_kids': '', 'younger_kids': '', 'talking_refusal': '', 'compulsions': '', 'describe_compulsions': '', 'runs_away': '', 'screams': '', 'secretive': '', 'seeing_things': '', 'describe_seeingthings': '', 'self_conscious': '', 'sets_fires': '', 'sexual_problems': '', 'describe_sexualproblems': '', 'clowning': '', 'shy_timid': '', 'sleeps_less': '', 'sleeps_more': '', 'describe_sleeping': '', 'inattentive': '', 'speech_problems': '', 'describe_speechproblems': '', 'stares_blankly': '', 'steals_home': '', 'steals_outside': '', 'stores': '', 'describe_hoarding': '', 'strange_behavior': '', 'describe_strangebehavior': '', 'strange_ideas': '', 'describe_ideas': '', 'stubborn_sullen': '', 'mood_changes': '', 'sulking': '', 'suspicious': '', 'swearing_obscenities': '', 'talksaboutkillingself_18yr': '2', 'sleeptalking_walking': '', 'describe_sleeptalking': '', 'talks_toomuch': '', 'frequent_teasing': '', 'temper_tantrums': '', 'thinks_sex': '', 'threatens_people': '', 'thumb_sucking': '', 'smoking': '', 'sleeping_troubles': '', 'describe_sleepingtroubles': '', 'truancy': '', 'low_energy': '', 'depression': '', 'loud': '', 'uses_drugs': '', 'describe_drugusage': '', 'vandalism': '', 'wets_self': '', 'wets_bed': '', 'whining': '', 'opposite_sex': '', 'withdrawn': '', 'frequent_worries': '', 'additional_problems': '', 'problem_a': '', 'prob_a_true': '', 'problem_b_yes': '', 'problem_b': '', 'prob_b_true': '', 'problem_c_yes': '', 'problem_c': '', 'prob_c_true': ''}]
I expect it to output a truple of the three keys, depending on what the record id of the corresponding dictionary is, but it instead outputs the same thing regardless of the subject ID
AN EXAMPLE OF THE OUTPUT
find('1', all_data)
('1', '2', '1')
find('2', all_data)
('1', '2', '1')
In the future I also want to be able to send those to an Excel spreadsheet.
So in this case, you're doing a ton of unnecessary iteration. The beauty of python dictionaries is that they're hashed and optimized for lookup operations.
Rather than iterating through keys and values, all you need to do is supply the key as the index and return early if the record exists. (Note, I changed a few names around for clarity, and to ensure that things like find() don't shadow built-in methods from other classes)
def find_item(subj, data):
for subdict in data:
if subdict['record_id']== subj:
return subdict['record_id'],subdict['selfharm_18yr'],subdict['talksaboutkillingself_18yr']
return "No Records Found"
find_item('1',data)
('1', '2', '1')
find_item('2',data)
('2', '3', '2')
find_item('zyzzyx',data)
"No records found"
And, regarding your function, here's where I believe the problem lies:
if k == 'record_id' and v == subj:
index = j
j+=1
else:
j+=1
In the case of the provided list of 2 records, this means you're setting index==0 before you update j, so even if the record is found at i[1], you still return the values from i[0]
I'm running a Python program which fetches a UTF-8-encoded web page, and I extract some text from HTML table using pandas(read_html) and write result to csv file
However, when I write this text to a file,all spaces in it gets written in an unexpected encoding (example \xd0\xb9\xd1\x82\xd0\xb8).
to solve the problem I added a line i = i.split(" ")
after, all spaces in csv file substitutes for characters, the example below:
['0', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '1', '', '', '', '', '', '', '', '', '', '', '', '', '', '2', '', '', '3\n0', '', '', '', '', '', '', '', 'number', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'last name', '', 'number', 'plan', 'NaN\n1', '', '', '', '', '', '', '', '', '', 'NaN', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'NaN', '', '', 'not', 'NaN\n2', '', '', '', '', '53494580', '', '', '', '', '', '', '', '', '', '+', '(53)494580', '', '', '', '', '', '', '', '', 'NP_551', 'NaN\n3', '', '', '', '', '53494581', '', '', '', '', '', '', '', '', '', '+', '(53)494581', '', '', '', '', '', '', '', '', 'NP_551', 'NaN\n4', '', '', '', '']
I would like to get rid of character ( '', ) Is there a way to fix this?
Any pointers would be much appreciated.
code python:
import pandas as pd
import html5lib
filename="1.csv"
file=open(filename,"w",encoding='UTF-8', newline='\n');
output=csv.writer(file, dialect='excel',delimiter =' ')
r = requests.get('http://10.45.87.12/og?sh=1&CallerName=&Sys=.79.83.86.51&')
pd.set_option('max_rows',10000)
df = pd.read_html(r.content)
for i in df:
i = str(i)
i = i.strip()
i = i.encode('UTF-8').decode('UTF-8')
i = i.split(" ")
output.writerow(i)
file.close()
You can use the filter method to remove of empty values. you can add the below snippet after 'i = i.split(" ")'
A = ['0', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '1', '', '', '', '', '', '', '', '', '', '', '', '', '', '2', '', '', '3\n0', '', '', '', '', '', '', '', 'number', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'last name', '', 'number', 'plan', 'NaN\n1', '', '', '', '', '', '', '', '', '', 'NaN', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'NaN', '', '', 'not', 'NaN\n2', '', '', '', '', '53494580', '', '', '', '', '', '', '', '', '', '+', '(53)494580', '', '', '', '', '', '', '', '', 'NP_551', 'NaN\n3', '', '', '', '', '53494581', '', '', '', '', '', '', '', '', '', '+', '(53)494581', '', '', '', '', '', '', '', '', 'NP_551', 'NaN\n4', '', '', '', '']
print filter(None, A)
Output:
['0', '1', '2', '3\n0', 'number', 'last name', 'number', 'plan', 'NaN\n1', 'NaN', 'NaN', 'not', 'NaN\n2', '53494580', '+', '(53)494580', 'NP_551', 'NaN\n3', '53494581', '+', '(53)494581', 'NP_551', 'NaN\n4']
I I have the following list of lists:
(['investmentseminar', '300', '', '', 'CNAME', '', 'domain.com.'], 7)
(['#', '300', '', '', '', '', '', '', '', 'CNAME', '', 'domain.com.'], 12)
(['#', '300', '', '', '', '', '', '', '', '', '', '', '', '', '', 'MX', '', '1', '', 'eu-smtp-inbound-1.com.'], 20)
(['#', '3600', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'TXT', '', 'MS=ms87183849'], 19)
(['#', '3600', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'TXT', '', 'MS=ms91398333'], 19)
it is from a parsed file with BIND data, i am trying to extract the record type and TTL, where the position of the items in the list are fixed.
this is the code i have so far:
lines = [['#', '', '', 'MX', '', '10', '', 'relay1.netnames.net.'],['#', '', '', 'MX', '', '20', '', 'relay2.netnames.net.'], ['#', '3600', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'TXT', '', 'MS=ms91398333'], ['#', '300', '', '', '', '', '', '', '', '', '', '', '', '', '', 'MX', '', '1', '', 'eu-smtp-inbound-1.com.'], ['domain.tld.', '3600', '', '', '', '', '', '', '', '', '', '', '', 'TXT', '', 'v=spf1 redirect=spf.domain.tld'],['a.ns.slf', '', '', '', '', '', '', '', '', '', 'A', '', '192.123.54.133'],['adfs', '', '', '', '', '', '', '', '', '', '', '', '', '', 'A', '', '192.123.67.20']]
record_set_list = []
def record_set(record):
resource = {
'Name': record[0],
'TTL': record[1],
'Type': record[4],
'Value': record[-1]
}
record_set_list.append({'RecordSets': resource})
types = ['A', 'AAAA', 'CAA', 'CNAME', 'MX', 'NAPTR', 'PTR', 'SPF', 'SRV', 'TXT', 'ZONE']
for record in csv.reader(lines, delimiter=" "):
any_in = any(i in record for i in types)
if any_in is True:
record_set(record)
how do i match the TTL, Type and in the case of MX record the preference?
any advise is much appreciated
Use the builtin function filter to remove the empty strings, zip the remaining values with the corresponding keys, and make a dict.
def record_set(record):
keys = ['Name', 'TTL', 'Type', 'Value']
values = filter(None, record)
resource = dict(zip(keys, values))
record_set_list.append({'RecordSets': resource})
import csv
import requests
webpage = requests.get('http://www.pjm.com/pub/account/lmpda/20160427-da.csv')
reader=csv.reader(webpage)
for row in reader:
print(row)
Hi, I'm new to Python and I'm trying to open a CSV file from a URL & then display the rows so I can take the data that I need from it. However, the I get an error saying :
Traceback (most recent call last):
File "", line 1, in
for row in reader: Error: iterator should return strings, not bytes (did you open the file in text mode?)
Thank you in advance.
You can try this:
import csv, requests
webpage=requests.get('http://www.pjm.com/pub/account/lmpda/20160427-da.csv')
reader = csv.reader(webpage.content.splitlines())
for row in reader:
print(row)
Hope this will help
Use .text as you are getting bytes returned in python3:
webpage = requests.get('http://www.pjm.com/pub/account/lmpda/20160427-da.csv')
reader = csv.reader([webpage.text])
for row in reader:
print(row)
That gives _csv.Error: new-line character seen in unquoted field so split the lines after decoding, also stream=True will allow you to get the data in chunks not all at once so you can filter by row and write:
import csv
import requests
webpage = requests.get('http://www.pjm.com/pub/account/lmpda/20160427-da.csv', stream=1)
for line in webpage:
print(list(csv.reader((line.decode("utf-8")).splitlines()))[0])
Which gives you:
['Day Ahead Hourly LMP Values for 20160427', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['00', '600', '700', '800', '900', '1000', '1100', '1200', '1300', '1400', '1500', '1600', '1700', '1800', '1900', '2000', '2100', '2200', '2300', '2400', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['1', '25.13', '25.03', '28.66', '25.94', '21.74', '19.47', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['600', '600', '600', '700', '700', '700', '800', '800', '800', '900', '900', '900', '1000', '1000', '1000', '1100', '1100', '1100', '1200', '1200', '1200', '1300', '1300', '1300', '1400', '1400', '1400', '1500', '']
['1500', '1500', '1600', '1600', '1600', '1700', '1700', '1700', '1800', '1800', '1800', '1900', '1900', '1900']
['', '2000', '2000', '2000', '2100', '2100', '2100', '2200', '2200', '2200', '2300', '2300', '2300', '2400', '2400', '2400', '']
['lLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'Tot']
['alLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'To']
['talLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'TotalLMP', 'CongestionPrice', 'MarginalLossPrice', 'T']
.......................................
A variation on the answer by Padriac Cunningham uses iter_lines() from Requests and decodes each line using a list comprehension
import csv
import requests
webpage = requests.get('http://www.pjm.com/pub/account/lmpda/20160427-da.csv', stream = True)
webpage_decoded = [line.decode('utf-8') for line in webpage.iter_lines()]
reader = csv.reader(webpage_decoded)
or even simpler, you can have iter_lines() do the decoding
webpage_decoded = webpage.iter_lines(decode_unicode=True)