unable to write csv file, python - python
here is my code, i added exception to indexerror, but its not writing to csv file.
import urllib2
import csv
import time
import requests
import os
#a = open(r"C:\Drive F data\Client\Blake\III\2.txt")
a = ['http://www.houzz.com/trk/aHR0cDovL3d3dy5SSUtCLmNvbQ/b020157b98711b4a190eee3331eb0066/ue/MjA5ODg2MQ/1b9b00b9fdc9f270f14688046ef161e2',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5nc2NhYmluZXRyeS5jb20/0323b7db059b9e0357d045685be21a6d/ue/NDY2MjE0/d8815293352eb2a6e40c95060c019697',
'http://www.houzz.com/trk/aHR0cDovL3NpY29yYS5jb20/dc807b3705b95b5da772a7aefe23a803/ue/Njc0NDA4/a73f8bdb38e10abd5899fb5c55ff3548',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5DYXNlRGVzaWduLmNvbQ/d79c6af934e3c815d602c4d79b0d6617/ue/OTY3MDg/ce9a87e31e84871a96bca7538aae9856',
'http://www.houzz.com/trk/aHR0cDovL2phcnJldHRkZXNpZ25sbGMuY29t/9d0009d3544d9c22f6058b20097321b3/ue/MzExNTk1NA/310d49732d317725364368ea3fbfd7c1',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5yb2JlcnRsZWdlcmVkZXNpZ24uY29t/8ac7311be2f794654cefba71474563f7/ue/MTExNTQ4/af201ffdc62de6aba9e2de90f69a770d']
with open("C:\Drive F data\Blake\III/2.csv", "ab")as export:
names = ['source','link']
writer = csv.DictWriter(export, fieldnames=names)
writer.writeheader()
for each in a:
try:
link = urllib2.urlopen(each).geturl()
except IndexError:
pass
print each, link
writer.writerow({'source':each,'link':link})
After removing try & exception , it works fine
I think you miss escape character in your file path. "C:\\Drive F data\\Blake\\III\\2.csv"
Related
Python Append file should refreshed with new data
I am trying to write my output into a file what my code is doing is that its looking for matched file names and storing it into a file similary for unmatched files but the problem is when i use write it overwrites the file and when i use append on every run it keeps on appending the file matched file names. What i need is that it refresh te file whenever the script is run and loads it with current data only. import re import sys import os import glob import pandas as pd import logging try: for file in glob.glob('*.csv'): r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file) if r: #matched=file print(f'File matched:{file}') fp=open('bad_lines.txt', 'r+') sys.stdout = fp else: path=f'File not matched:{file}' f=open('filenotmatched.txt','a') f.seek(0) f.truncate() f.write(path+'\n') f.close() except Exception as e: pass
Suggested changes to your code. import re import sys import os import glob import pandas as pd import logging # We create new 'bad_lines.txt' and # 'filenotmatched.txt' for each run with open('bad_lines.txt', 'w') as f_badlines, open('filenotmatched.txt','w') as f_notmatched: try: for file in glob.glob('*.csv'): r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file) if r: #matched=file #print(f'File matched:{file}') #fp=open('bad_lines.txt', 'r+') # ** Not clear why you redirected # ** standard out to a file # ** rather than writing to file directly #sys.stdout = fp f_badlines.write(f'File matched:{file}\n') else: path=f'File not matched:{file}' #f=open('filenotmatched.txt','a') #f.seek(0) #f.truncate() #f.write(path+'\n') #f.close() f_notmatched.write(path + '\n') except Exception as e: pass
python write and open temporary csv
Using Python 3 on a windows machine: I have a function to take a list of lists and open it as a csv file using my default application (excel). Despite closing the file after writing, I get a 'locked for editing' message when excel starts. def opencsv(data): """saves a list of lists as a csv and opens""" import tempfile import os import csv handle, fn = tempfile.mkstemp(suffix='.csv') with open(fn,"w", encoding='utf8',errors='surrogateescape',\ newline='') as f: writer=csv.writer(f) for row in data: try: writer.writerow(row) except Exception as e: print ('Error in writing row:',e) f.close() url = 'file://' + fn.replace(os.path.sep, '/') os.startfile(fn) opencsv([['d1','d2'],['d3','d4','d5']]) How can I fix this?
Answer from swstephe's input: The issue is that mkstemp opens the file and associates it with an os handle. In my original code I was not closing this file properly. See below for updated code. def opencsv(data): """saves a list of lists as a csv and opens""" import tempfile import os import csv handle, fn = tempfile.mkstemp(suffix='.csv') with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',\ newline='') as f: writer=csv.writer(f) for row in data: try: writer.writerow(row) except Exception as e: print ('Error in writing row:',e) print (fn) os.startfile(fn)
Creating a python program that scrapes file from a website
This is what I have so far import urllib Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"] currentCount=0 while currentCount < len(Champions): urllib.urlretrieve("http://www.lolflavor.com/champions/"+Champions[currentCount]+ "/Recommended/"+Champions[currentCount]+"_lane_scrape.json","C:\Users\Jay\Desktop\LolFlavor\ " +Champions[currentCount]+ "\ "+Champions[currentCount]+ "_lane_scrape.json") currentCount+=1 What the program is meant to do is to use the list and the currentCount to get the champion, then it goes to the website e.g for "Aatrox" http://www.lolflavor.com/champions/Aatrox/Recommended/Aatrox_lane_scrape.json, then it downloads and stores the file in the folder LolFlavor/Aatrox/Aatrox_lane_scrape.json in this case. The bit which is Aatrox changes depending on the champion. Can anyone help me to get it to work? EDIT: CURRENT CODE WITH VALUE ERROR: import json import os import requests Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"] for champ in Champions: os.makedirs("C:\\Users\\Jay\\Desktop\\LolFlavor\\{}\\Recommended".format(champ), exist_ok=True) with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_lane_scrape.json".format(champ,champ),"w") as f: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ,champ)) json.dump(r.json(),f) with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_jungle_scrape.json".format(champ,champ),"w") as f: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_jungle_scrape.json".format(champ,champ)) json.dump(r.json(),f) with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_support_scrape.json".format(champ,champ),"w") as f: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_support_scrape.json".format(champ,champ)) json.dump(r.json(),f) with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_aram_scrape.json".format(champ,champ),"w") as f: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_aram_scrape.json".format(champ,champ)) json.dump(r.json(),f)
import requests Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"] for champ in Champions: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ,champ)) print(r.json()) If you want to save each to a file. dump the json. import json import simplejson for champ in Champions: with open(r"C:\Users\Jay\Desktop\LolFlavor\{}_lane_scrape.json".format(champ),"w") as f: try: r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ, champ)) json.dump(r.json(),f) except simplejson.scanner.JSONDecodeError as e: print(e.r.url) The error is from 404 - File or directory not found as one of you calls fails so there is no valid json to decode. The offending url is: u'http://www.lolflavor.com/champions/Wukong/Recommended/Wukong_lane_scrape.json' which if you try in your browser will also give you a 404 error. That is caused by the fact there is no user Wukong which can be confirmed by opening http://www.lolflavor.com/champions/Wukong/ in your browser There is no need to index the list using a while loop. simply iterate over the list items directly and use str.format to pass the variables into the url. Also make sure you use raw string r for the file path when using \'s as they have a special meaning in python they are using to escape characters so \n or \r etc.. in your paths would cause problems. You can also use / or escape using \\.
Python not saving files to a different folder than where the python file is on Ubuntu 11.10
So I am pulling jpg's from a url. I am able to save the image files as long as they are being saved to the same folder the python file is in. As soon as I attempt to change the folder(seen here as the outpath) the image files do not get created. I imagine it has something to do with my outpath, but it seems to be fine when I am printing and watching it in the console. Ubuntu 11.10 OS by the way. I'm a newbie with both linux and python, so it could easily be either. :) If I were to print the sequence taken from the CSV file it would look like: [['Champ1', 'Subname1', 'imgurl1'],['Champ2', 'subname2', 'imgurl2'],['Champ3','subname3','imgurl3']...] (It was scraped from a website) import csv from urlparse import urlsplit from urllib2 import urlopen, build_opener from urllib import urlretrieve import webbrowser import os import sys reader = csv.reader(open('champdata.csv', "rb"), delimiter = ",", skipinitialspace=True) champInfo = [] for champs in reader: champInfo.append(champs) size = len(champInfo) def GetImages(x, out_folder="/home/sean/Home/workspace/CP/images"): b=1 size = len(champInfo) print size while b < size: temp_imgurls = x.pop(b) filename = os.path.basename(temp_imgurls[2]) print filename outpath = os.path.join(out_folder, filename) print outpath u = urlopen(temp_imgurls[2]) localFile = open(outpath, 'wb') localFile.write(u.read()) localFile.close() b+=1 GetImages(champInfo) I understand it's quite crude, but it does work, only if I'm not attempting to change the save path.
Try providing the complete image path everywhere E:/../home/sean/Home/workspace/CD/images
def GetImages(x): b=1 size = len(champInfo) print size while b < size: temp_imgurls = x.pop(b) filename = temp_imgurls[2] u = urlopen(temp_imgurls[2]) localFile = open(filename, 'wb') localFile.write(u.read()) localFile.close() And this code will be save files in the same directory where script is.
Updated Answer: I think the answer to your problem is just to add a check for the output directory existence, and create it if needed. ie, add: if not os.path.exists(out_folder): os.makedirs(out_folder) to your existing code. More generally , you could try something more like this: import csv from urllib2 import urlopen import os import sys default_outfolder = "/home/sean/Home/workspace/CD/images" # simple arg passing wihtout error checking out_folder = sys.argv[1] if len(sys.argv) == 2 else default_outfolder if not os.path.exists(out_folder): os.makedirs(out_folder) # creates out_folder, including any required parent ones else: if not os.path.isdir(out_folder): raise RuntimeError('output path must be a directory') reader = csv.reader(open('champdata.csv', "rb"), delimiter = ",", skipinitialspace=True) for champs in reader: img_url = champs[2] filename = os.path.basename(img_url) outpath = os.path.join(out_folder, filename) print 'Downloading %s to %s' % (img_url, outpath) with open(outpath, 'wb') as f: u = urlopen(img_url) f.write(u.read()) The above code works for champdata.csv of the form stuff,more_stuff,http://www.somesite.com.au/path/to/image.png but will need to be adapted if I have not understood the actual format of your incoming data.
With regards to urllib AttributeError: 'module' object has no attribute 'urlopen'
import re import string import shutil import os import os.path import time import datetime import math import urllib from array import array import random filehandle = urllib.urlopen('http://www.google.com/') #open webpage s = filehandle.read() #read print s #display #what i plan to do with it once i get the first part working #results = re.findall('[<td style="font-weight:bold;" nowrap>$][0-9][0-9][0-9][.][0-9][0-9][</td></tr></tfoot></table>]',s) #earnings = '$ ' #for money in results: #earnings = earnings + money[1]+money[2]+money[3]+'.'+money[5]+money[6] #print earnings #raw_input() this is the code that i have so far. now i have looked at all the other forums that give solutions such as the name of the script, which is parse_Money.py, and i have tried doing it with urllib.request.urlopen AND i have tried running it on python 2.5, 2.6, and 2.7. If anybody has any suggestions it would be really welcome, thanks everyone!! --Matt ---EDIT--- I also tried this code and it worked, so im thinking its some kind of syntax error, so if anybody with a sharp eye can point it out, i would be very appreciative. import shutil import os import os.path import time import datetime import math import urllib from array import array import random b = 3 #find URL URL = raw_input('Type the URL you would like to read from[Example: http://www.google.com/] :') while b == 3: #get file name file1 = raw_input('Enter a file name for the downloaded code:') filepath = file1 + '.txt' if os.path.isfile(filepath): print 'File already exists' b = 3 else: print 'Filename accepted' b = 4 file_path = filepath #open file FileWrite = open(file_path, 'a') #acces URL filehandle = urllib.urlopen(URL) #display souce code for lines in filehandle.readlines(): FileWrite.write(lines) print lines print 'The above has been saved in both a text and html file' #close files filehandle.close() FileWrite.close()
it appears that the urlopen method is available in the urllib.request module and not in the urllib module as you're expecting. rule of thumb - if you're getting an AttributeError, that field/operation is not present in the particular module. EDIT - Thanks to AndiDog for pointing out - this is a solution valid for Py 3.x, and not applicable to Py2.x!
The urlopen function is actually in the urllib2 module. Try import urllib2 and use urllib2.urlopen
I see that you are using Python2 or at least intend to use Python2. urlopen helper function is available in both urllib and urllib2 in Python2. What you need to do this, execute this script against the correct version of your python C:\Python26\python.exe yourscript.py