here is the error
[Running] python -u "/Users/thomasmciver/Desktop/chat/ai-chat.py"
File "/Users/thomasmciver/Desktop/chat/ai-chat.py", line 38
with open ('Users/thomasmciver/Desktop/chat/RC_2015-01.txt' buffering=1000) as f:
^
SyntaxError: invalid syntax
[Done] exited with code=1 in 0.051 seconds
here is my code
# This Python file uses the following encoding: utf-8
import sqlite3
import json
from datetime import datetime
timeframe = '2015-05'
sql_transaction = []
connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS parent_reply(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subreddit TEXT, unix INT, score INT)")
def format_data(data):
data = data.replace('\n',' newlinechar ').replace('\r',' newlinechar ').replace('"',"'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
c.execute(sql)
result = c.fetchone()
if result != None:
return result[0]
else: return False
except Exception as e:
#print(str(e))
return False
if __name__ == '__main__':
create_table()
row_counter = 0
paired_rows = 0
with open ('Users/thomasmciver/Desktop/chat/RC_2015-01.txt' buffering=1000) as f:
for row in f:
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
comment_id = row['name']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)
if __name__ == '__main__':
create_table()
row_counter = 0
paired_rows = 0
# maybe check for a child, if child, is our new score superior? If so, replace. If not...
if score >= 2:
existing_comment_score = find_existing_score(parent_id)
You mean
with open('Cloud Drive/Desktop/chat/RC_2015-01.txt', buffering=1000) as f:
instead of
with open ('Cloud Drive/Desktop/chat/RC_2015-01.txt' (buffering=1000)) as f:
What you had is not valid syntax.
Related
I have a very simple page spider that crawls for words on a given page and stores the count of the words in a SQLite data base. Although, the code exits with the exit code 0, the database won't update with any entries.
I don't know if I'm just snow blind or there's something inherently wrong with my code.
Here's the structure of the project and the code:
spider.py
input.txt
words.db
utilities (folder):
url_utilities.py
database_utilities.py
spider.py
import argparse
from utilities import url_utilities, database_utilities
def main(database: str, url_list_file: str):
big_word_list = []
urls = url_utilities.load_urls_from_file(url_list_file)
for url in urls:
print(f"Reading {url}")
page_content = url_utilities.load_page(url=url)
words = url_utilities.scrape_page(page_contents=page_content)
big_word_list.extend(words)
# database code
path = "C:\\Users\\baduker\\PycharmProjects\\page_spider\\words.db"
database_utilities.create_database(database_path=path)
database_utilities.save_words_to_database(database_path=path, words_list=big_word_list)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-db", "--database", help="SQLite File Name")
parser.add_argument("-i", "--input", help="File with urls")
args = parser.parse_args()
database_file = args.database
input_file = args.input
main(database=database_file, url_list_file=input_file)
url_utilities.py
import re
import string
from urllib.request import urlopen
from bs4 import BeautifulSoup
def load_urls_from_file(file_path: str):
try:
with open("input.txt") as f:
content = f.readlines()
return content
except FileNotFoundError:
print(f"The file {file_path} could not be found.")
exit(2)
def load_page(url: str):
response = urlopen(url)
html = response.read().decode("utf-8")
return html
def scrape_page(page_contents: str):
chicken_noodle = BeautifulSoup(page_contents, "html.parser")
for script in chicken_noodle(["script", "style"]):
script.extract()
text = chicken_noodle.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
plain_text = ''.join(filter(lambda x: x in string.printable, text))
clean_words = []
words = plain_text.split(" ")
for word in words:
clean = True
for punctuation_marks in string.punctuation:
if punctuation_marks in word:
clean = False
if any(char.isdigit() for char in word):
clean = False
# at least two characters but no more than 10
if len(word) < 2 or len(word) > 10:
clean = False
if not re.match(r'^\w+$', word):
clean = False
if clean:
try:
clean_words.append(word.lower())
except UnicodeEncodeError:
print(".")
return clean_words
database_utilities.py
import sqlite3 as lite
def create_database(database_path: str):
conn = lite.connect(database_path)
with conn:
cur = conn.cursor()
cur.execute("drop table if exists words")
ddl = "create table words (word text not null primary key, usage_count int default 1 not null);"
cur.execute(ddl)
ddl = "create unique index words_word_uindex on words (word);"
cur.execute(ddl)
conn.close()
def save_words_to_database(database_path: str, words_list: list):
conn = lite.connect(database_path)
with conn:
cur = conn.cursor()
for word in words_list:
sql = "select count(word) from words where word='" + word + "';"
cur.execute(sql)
count = cur.fetchone()[0]
if count > 0:
sql = "update words set usage_count = usage_count + 1 where word='" + word + "';"
else:
sql = "insert into words(word) values ('" + word + "');"
cur.execute(sql)
conn.commit()
conn.close()
print(f"Database save complete!")
input.txt
https://en.wikipedia.org/wiki/Python_(programming_language)
https://en.wikipedia.org/wiki/Guido_van_Rossum
https://en.wikipedia.org/wiki/Benevolent_dictator_for_life
Your code seems to be working.
I suspect you have permissions issue with the database file.
Make sure this line points to a folder where you have permissions to write:
path = "C:\\Users\\baduker\\PycharmProjects\\page_spider\\words.db"
or just remove the path and see if it works.
path = "words.db"
Ur context manager i.e. with
with con:
Hope u should commit before closing it. I mean u should commit in that with block itself.
U should do that in your database utility file.
I am following an online tutorial for making a chatbot...here is the code for the beginning portion
import sqlite3
import json
from datetime import datetime
timeframe = '2015-01'
sql_transaction = []
connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS parent_reply(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subreddit TEXT, unix INT, score INT)")
def format_data(data):
data = data.replace('\n', ' newlinechar ').replace('\r', ' newlinechar ').replace('"', "'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
c.execute(sql)
result = c.fetchone()
if result != None:
return result[0]
else: return False
except Exception as e:
#print(str(e))
return False
if __name__ == '__main__':
create_table()
row_counter = 0
paired_rows = 0
with open('C:/Users/oriba/Desktop/Month of Reddit/RC_2015-01'.format(timeframe.split('-')[0], timeframe), encoding='ISO-8859-1', buffering=1000) as f:
for row in f:
print(row)
row_counter += 1
row = json.load(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
comment_id = row['name']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)
When that runs, I get this error:
Traceback (most recent call last):
File "C:/Users/oriba/Desktop/Month of Reddit/chatbot.py", line 37, in <module>
for row in f:
File "C:\Users\oriba\AppData\Local\Programs\Python\Python36\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 97: character maps to <undefined>
After searching online, I found that adding "encoding='ISO-8859-1'" to 'with open()' should fix it...then I get this error:
Traceback (most recent call last):
File "C:/Users/oriba/Desktop/Month of Reddit/chatbot.py", line 40, in <module>
row = json.load(row)
File "C:\Users\oriba\AppData\Local\Programs\Python\Python36\lib\json\__init__.py", line 296, in load
BZh91AY&SYÔAÙÕÿî»ÿÿÿúÿÿÿÿÿÿÿÿc*è` 1Ï. ñÕ ¢U±Ã$'¤;\=# ÝX9kl´ÜιKW; É# Ò PQáGF PÝ Û P :è
return loads(fp.read(),
AttributeError: 'str' object has no attribute 'read'
And now I'm at a loss. I know this is a lot, this is complex for me. I appreciate any help :)
json.loads() does the job.
loads() read from a str object while load() read from a File object
You code is
for row in f:
...
row here is a str
My file path is
C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/
There are many folders in that directory. I need to look through those directories and open files that starts with 'RC_'
Here's my code:
import sqlite3
import json
import os
from datetime import datetime
timeframe = '2015-05'
sql_transaction = []
connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS parent_reply(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subreddit TEXT, unix INT, score INT)")
def format_data(data):
data = data.replace('\n',' newlinechar ').replace('\r',' newlinechar ').replace('"',"'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
c.execute(sql)
result = c.fetchone()
if result != None:
return result[0]
else: return False
except Exception as e:
#print(str(e))
return False
if __name__ == '__main__':
create_table()
row_counter = 0
paired_rows = 0
with open('C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/{}/RC_{}'.format(timeframe.split('-')[0],timeframe), buffering=1000) as f:
for row in f:
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
comment_id = row['name']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)
# maybe check for a child, if child, is our new score superior? If so, replace. If not...
if score >= 2:
existing_comment_score = find_existing_score(parent_id)
But it seems there is some mistake in the path. I get an error
Traceback (most recent call last): File
"C:/Users/Ratul/AppData/Local/Programs/Python/Python37/test02.py",
line 36, in
with open('C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/{}/RC_{}'.format(timeframe.split('-')[0],timeframe), buffering=1000) as f: FileNotFoundError: [Errno 2] No such file or
directory:
'C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/2015/RC_2015-05'
I'm not sure what wrong I did there. Please help.
Use How to debug small programs (#1) and
print('C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/{}/RC_{}'.format(
timeframe.split('-')[0],timeframe))
instead of open. Check if all exists - because for some of your values it does not exist. Hence the error.
If most of your files exist, it is far easier to handle the error itself:
myname = 'C:/Users/Ratul/Downloads/Machine_Learning_Data/reddit_data/reddit_data/{}/RC_{}'.format(timeframe.split('-')[0],timeframe)
try:
with open(myname, buffering=1000) as f:
for row in f:
row_counter += 1
row = json.loads(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
comment_id = row['name']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)
# maybe check for a child, if child, is our new score superior? If so, replace. If not...
if score >= 2:
existing_comment_score = find_existing_score(parent_id)
except FileNotFoundError as fnfError:
print(myname)
print(fnfError)
The open() command does not care about you using \ or / - if using \ you should escape it or use raw strings (aka: r'C:\some\dir\file.txt') - your syntax is ok as is - open() will use the appropriate directory delimiters under windows even if you give it 'c:/somedir/file.txt'
Readup: About error handling
I must extract data in an IFC file but when i read the file seems I make some mistake I don't undestand:
First: I've a key;
Second: I read a file;
Third: I create a string and I put it in a csv like file.
Fourth: the visual components are in Pyside2.
the code:
orderNr = self.getIFC_ProjectDetail(readFile, self.orderNrLineEdit.text())
custNr = self.getIFC_ProjectDetail(readFile, self.custNoLineEdit.text())
if len(custNr) == 0:
custNr = "9999"
projManager = self.getIFC_ProjectDetail(readFile, self.projManagerLineEdit.text())
drawer = self.getIFC_ProjectDetail(readFile, self.drawerLineEdit.text())
ifcFile = open(readFile, 'r')
csvFile = open(csvFileName, 'w')
lineTokens = []
csvFile.write("GUID;Type;UserText1;UserText2;UserText3;UserText4;UserText5;UserText6;UserText7;\n")
for mainLine in ifcFile:
if ("IFCSLAB" in line or "IFCWALLSTANDARDCASE" in line):
if len(uID) > 0:
if uID == oldID:
uID = "ciao"
csvFile.write("{0};{1};{2};{3};{4};{5};{6};{7};{8};\n".format(uID, matType, orderNr, custNr, assPos, partPos, fab, projManager, drawer))
oldID = uID
uID = ""
matType = ""
assPos = ""
partPos = ""
fab = ""
lineTokens = line.split(",")
headerLine = line[0:line.find("'")]
line = line[line.find("(") +1:len(line)]
lineTokens = line.split(",")
uID = lineTokens[0]
uID = uID[1:len(uID)-1]
matType = lineTokens[2]
matType = matType[1:len(matType)-1]
floorName = lineTokens[4]
floorName = floorName[1:len(matType)-1]
if self.assPosLineEdit.text() in line:
assPos = self.getIFC_EntityProperty(line, self.assPosLineEdit.text())
if self.partPosLineEdit.text() in line:
partPos = self.getIFC_EntityProperty(line, self.partPosLineEdit.text())
if self.fabricatorLineEdit.text() in line:
fab = self.getIFC_EntityProperty(line, self.fabricatorLineEdit.text())
if i == progDlg.maximum():
csvFile.write("{0};{1};{2};{3};{4};{5};{6};{7};{8};\n".format(uID, matType, orderNr, custNr, assPos, partPos, fab, projManager, drawer))
ifcFile.close()
csvFile.close()
def getIFC_EntityProperty(self, row, ifcKey):
s = ""
lineTokens = []
if ifcKey in row:
lineTokens = row.split(",")
ifcTag = lineTokens[2]
ifcTag = ifcTag[0:ifcTag.find("(")]
#print(ifcTag)
if len(ifcTag) > 1:
s = row[row.find(ifcTag)+len(ifcTag)+2:row.rfind(',')-2]
return s
def getIFC_ProjectDetail(self, fileName, ifcKey):
s = ""
content = open(fileName, 'r')
lineTokens = []
for line in content:
if ifcKey in line:
lineTokens = line.split(",")
ifcTag = lineTokens[2]
ifcTag = ifcTag[0:ifcTag.find("(")]
if len(ifcTag) > 1:
s = line[line.find(ifcTag)+len(ifcTag)+2:line.rfind(',')-2]
break
content.close()
return s
The problem is it jumps a value, it shifts a row and post the data in the line below in the csv like file, creating however the line with the right uID but leaveng the fields of the line blanks.
can Anyone help me?
If list stored in csv file below the example, does each row stored in the array?
import csv
import os
DIR = "C:/Users/Administrator/Desktop/key_list.csv"
def Customer_List(csv):
customer = open(DIR)
for line in customer:
row = []
(row['MEM_ID'],
row['MEM_SQ'],
row['X_AUTH_USER'],
row['X_AUTH_KEY'],
row['X_STORAGE_URL'])=line.split(",")
if csv == row['MEM_ID']:
customer.close()
return(row)
else:
print ("Not search for ID")
return([])
query = input("Input the your email id: ")
result = Customer_List(query)
This example alert errors code.. Why ..?
Additionally update the this code & error
Input the your email id: sdfsdf#naver.com
Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\PyDev\Pydev\Day4\uCloudStorage.py", line 32, in <module>
result = Customer_List(query)
File "C:\Users\Administrator\Desktop\PyDev\Pydev\Day4\uCloudStorage.py", line 20, in Customer_List
row['X_STORAGE_URL'])=line.split(",")
ValueError: too many values to unpack (expected 5)
To show what's in the CSV, here's some simple code and the result:
DIR = "C:/Users/Administrator/Desktop/key_list.csv"
def Customer_List():
customer = open(DIR)
for line in customer:
print (line)
result:
MEM_ID, MEM_SQ, X_AUTH_USER, X_AUTH_KEY, X_STORAGE_URL
kimdm98#gmail.com, M100009, M100009:M100009, wreQew3u, AUTH_xxxxxx-xxxxx
minsejisuk#paran.com, M100022, M100022:M100022, PEm6tREx, AUTH_xxxxx-xxxxx
sdfsdf#naver.com, M100034, M100034:M100034, 3tAzEf3u, AUTH_xxxx-xxxxx
=============================================================================
I edited this script..... Is it best practice ?
DIR = "C:/Users/Administrator/Desktop/key_list.csv"
DATA = csv.reader(open(DIR,"r"))
ID = input("Input the Customer EMAIL ID: ")
def cList(value):
for row in DATA:
MEM_ID = row[0]
MEM_SQ = row[1]
X_AUTH_USER = row[2]
X_AUTH_KEY = row[3]
X_STORAGE_URL = row[4]
ACCESSKEY = row[5]
ACCESSKEYID1 = row[6]
SECRETKEY1 = row[7]
ACCESSKEYID2 = row[8]
SECRETKEY2 = row[9]
if MEM_ID == value:
print(".EMAIL ID :" + MEM_ID)
print(".MID :" + MEM_SQ)
print(".PASSWORD :" + X_AUTH_KEY)
print(".AUTH_ACCOUNT :" + X_STORAGE_URL)
print(".API KEY :" + ACCESSKEY)
cList(ID)
print ("============================")
print ("1. Upload / Download Error")
print ("2. Permission Error")
print ("3. 4xx Error")
print ("4. etc... Error")
print ("============================")
Result
Input the Customer EMAIL ID: kiyoung.jung#kt.com
.EMAIL ID :kiyoung.jung#kt.com
.MID :xxxxxx
.PASSWORD :xxxxxx
.AUTH_ACCOUNT :xxxxxx-d50a-xxxx-xxxbc05-6267d5ff6712
.API KEY :xxxxxxxx
============================
1. Upload / Download Error
2. Permission Error
3. 4xx Error
4. etc... Error
============================
If your input data is formatted like what you added at the very end of your question, your could get your approach to work like this:
import csv
DIR = "C:/Users/Administrator/Desktop/key_list.csv"
def Customer_List(email_id):
with open(DIR, newline='') as f: # open assuming Python 3.x
csvreader = csv.reader(f, skipinitialspace=True)
for fields in csvreader:
row = {} # initialize to an empty dictionary
(row['MEM_ID'],
row['MEM_SQ'],
row['X_AUTH_USER'],
row['X_AUTH_KEY'],
row['X_STORAGE_URL']) = fields
if row['MEM_ID'] == email_id:
return [row['MEM_ID'],
row['MEM_SQ'],
row['X_AUTH_USER'],
row['X_AUTH_KEY'],
row['X_STORAGE_URL']]
else:
print("ID not found")
return []
match = Customer_List('minsejisuk#paran.com')
if match:
print('found! {}'.format(match))
However you could simplify things slightly by using a csv.DictReader to read the file which will automatically read the header line to obtain the fieldnames and then return a dictionary using them as keys for each row read:
def Customer_List(email_id):
with open(DIR, newline='') as f: # open assuming Python 3.x
csvreader = csv.DictReader(f, skipinitialspace=True)
for row in csvreader:
if row['MEM_ID'] == email_id:
return [row['MEM_ID'],
row['MEM_SQ'],
row['X_AUTH_USER'],
row['X_AUTH_KEY'],
row['X_STORAGE_URL']]
else:
print("ID not found")
return []