blob.noun_phrases fails with ConllExtractor()

blob.noun_phrases fails with ConllExtractor() - python

I'm importing the yaml file and while calling blob.noun_phrases is giving me the error. I tried for a simple example by specifying unicode and it works but in this case it doesn't. Can somebody help me? to figure this out. Thanks in advance.
class TatvamNPExtractService:
text_column_no = -1
def Read_config(self, config_file, cust_id):
path = {}
config_file_obj = open(self.config_file, "r")
consts = YAML.load(config_file_obj)
if consts:
log_out_path = consts['log_file_dir']
path['log_out_path'] = log_out_path
self.text_column_no = consts['comment_column_no'] # column contains reviews
return path
def NounPhraseExtrctor(self, file):
word_chars_pattern = re.compile('\W')
try:
in_file_path = self.paths['in_dir_path'] + file
with open(in_file_path, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter="|")
extractor = ConllExtractor()
for row in reader:
if len(row) > 2:
mod_text = row[self.text_column_no-1]
print " the text is %s" % ", ".join(mod_text)
tBlob = TextBlob(mod_text, np_extractor=extractor)
tlist = tBlob.noun_phrases
return True
The above code is giving me below error:
AttributeError: 'str' object has no attribute 'raw'
EDIT
Main Program:
import TatvamNPExtractService
def main():
cmd_args =[]
config_file = ""
cmd_args.extend(sys.argv)
if len(cmd_args) < 3:
print "Expecting Config File and Customer ID!!! Terminating..."
sys.exit()
config_file = sys.argv[1]
cust_id = sys.argv[2]
if not (os.path.exists(config_file)):
print "config File not exists, Terminating"
sys.exit()
ps = TatvamNPExtractService.TatvamNPExtractService(cust_id, config_file)
ps.doProcess()

Related

Error while using click module in python - Error: No such option: --addtask

Error - Error: No such option: --addtask
I ran code using - python3 main.py --addtask task
On running this addtask function should execute but it isnt
Saw several youtube videos still couldnt fix it
Pls help!
code below
Code -
import click
import json
from rich.console import Console
console = Console()
filename = './db.json'
#click.group()
def cli():
pass
def veiw_data():
with open(filename,'r') as f:
temp = json.load(f)
for entry in temp:
print(entry)
def get_data(name:str):
with open(filename, 'r') as f:
temp = json.load(f)
for entry in temp:
if entry['name'] == name:
return entry
def insert_data(data:dict):
with open (filename, 'r') as f:
temp = json.loads(f)
print(temp)
temp.append(data)
with open(filename, 'w') as f:
json.dump(temp,f,indent = 4)
def delete_data(name:str):
with open(filename, 'r') as f:
temp = json.loads(f)
i = 0
for entry in temp:
if entry[name] == name:
temp.pop(i)
i+=1
#click.command()
#click.option('--addtask',help='Add a task')
def addtask(task):
data = {
"name":task
}
insert_data(data)
console.print(f'[bold cyan]Task Added -[/bold cyan] [red]{task}[/red]')
cli.add_command(addtask)
if __name__ == '__main__':
cli()
```

try this rename click with cli an remove the line : cli.add_command(addtask)
#cli.command()
#cli.option('--addtask',help='Add a task')
def addtask(task):
data = {
"name":task
}
insert_data(data)
console.print(f'[bold cyan]Task Added -[/bold cyan] [red]{task}[/red]')

How to import every Nth row of CSV file using python into script

I am using a script in fusion360 called importsplinecsv
I was wondering if it was possible to modify the script so that it would import one row every 10th row?
as the amount of rows that are being imported are very large and bloating.
if I could get some help that would be awesome.
here is the text
Author-Autodesk Inc.
Description-Import spline from csv file
import adsk.core, adsk.fusion, traceback
import io
def run(context):
ui = None
try:
app = adsk.core.Application.get()
ui = app.userInterface
# Get all components in the active design.
product = app.activeProduct
design = adsk.fusion.Design.cast(product)
title = 'Import Spline csv'
if not design:
ui.messageBox('No active Fusion design', title)
return
dlg = ui.createFileDialog()
dlg.title = 'Open CSV File'
dlg.filter = 'Comma Separated Values (*.csv);;All Files (*.*)'
if dlg.showOpen() != adsk.core.DialogResults.DialogOK :
return
filename = dlg.filename
with io.open(filename, 'r', encoding='utf-8-sig') as f:
points = adsk.core.ObjectCollection.create()
line = f.readline()
data = []
while line:
pntStrArr = line.split(',')
for pntStr in pntStrArr:
try:
data.append(float(pntStr))
except:
break
if len(data) >= 3 :
point = adsk.core.Point3D.create(data[0], data[1], data[2])
points.add(point)
line = f.readline()
data.clear()
if points.count:
root = design.rootComponent
sketch = root.sketches.add(root.xYConstructionPlane)
sketch.sketchCurves.sketchFittedSplines.add(points)
else:
ui.messageBox('No valid points', title)
except:
if ui:
ui.messageBox('Failed:\n{}'.format(traceback.format_exc()))

I have not used this library before but try:
for i, line in enumerate(f):
if i%10==0:
then your import command here
f is your filepointer
i will be the linenumber and line will be your line
dlg = ui.createFileDialog()
dlg.title = 'Open CSV File'
dlg.filter = 'Comma Separated Values (*.csv);;All Files (*.*)'
if dlg.showOpen() != adsk.core.DialogResults.DialogOK :
return
filename = dlg.filename
with io.open(filename, 'r', encoding='utf-8-sig') as f:
points = adsk.core.ObjectCollection.create()
for i, line in enumerate(f):
if i%10==0:
while line:
pntStrArr = line.split(',')
for pntStr in pntStrArr:
try:
data.append(float(pntStr))
except:
break
if len(data) >= 3 :
point = adsk.core.Point3D.create(data[0], data[1], data[2])
points.add(point)
line = f.readline()
data.clear()
if points.count:
root = design.rootComponent
sketch = root.sketches.add(root.xYConstructionPlane)
sketch.sketchCurves.sketchFittedSplines.add(points)
else:
ui.messageBox('No valid points', title)
except:
if ui:
ui.messageBox('Failed:\n{}'.format(traceback.format_exc()))

Python - how to optimize iterator in file parsing

I get files that have NTFS audit permissions and I'm using Python to parse them. The raw CSV files list the path and then which groups have which access, such as this type of pattern:
E:\DIR A, CREATOR OWNER FullControl
E:\DIR A, Sales FullControl
E:\DIR A, HR Full Control
E:\DIR A\SUBDIR, Sales FullControl
E:\DIR A\SUBDIR, HR FullControl
My code parses the file to output this:
File Access for: E:\DIR A
CREATOR OWNER,FullControl
Sales,FullControl
HR,FullControl
File Access For: E:\DIR A\SUBDIR
Sales,FullControl
HR,FullControl
I'm new to generators but I'd like to use them to optimize my code. Nothing I've tried seems to work, so here is the original code (I know it's ugly). It works but it's very slow. The only way I can do this is by parsing out the paths first, put them in a list, make a set so that they're unique, then iterate over that list and match them with the path in the second list, and list all of the items it finds. Like I said, it's ugly but works.
import os, codecs, sys
reload(sys)
sys.setdefaultencoding('utf8') // to prevent cp-932 errors on screen
file = "aud.csv"
outfile = "access-2.csv"
filelist = []
accesslist = []
with codecs.open(file,"r",'utf-8-sig') as infile:
for line in infile:
newline = line.split(',')
folder = newline[0].replace("\"","")
user = newline[1].replace("\"","")
filelist.append(folder)
accesslist.append(folder+","+user)
newfl = sorted(set(filelist))
def makeFile():
print "Starting, please wait"
for i in range(1,len(newfl)):
searchItem = str(newfl[i])
with codecs.open(outfile,"a",'utf-8-sig') as output:
outtext = ("\r\nFile access for: "+ searchItem + "\r\n")
output.write(outtext)
for item in accesslist:
searchBreak = item.split(",")
searchTarg = searchBreak[0]
if searchItem == searchTarg:
searchBreaknew = searchBreak[1].replace("FSA-INC01S\\","")
searchBreaknew = str(searchBreaknew)
# print(searchBreaknew)
searchBreaknew = searchBreaknew.replace(" ",",")
searchBreaknew = searchBreaknew.replace("CREATOR,OWNER","CREATOR OWNER")
output.write(searchBreaknew)
How should I optimize this?
EDIT:
Here is an edited version. It works MUCH faster, though I'm sure it can still be fixed:
import os, codecs, sys, csv
reload(sys)
sys.setdefaultencoding('utf8')
file = "aud.csv"
outfile = "access-3.csv"
filelist = []
accesslist = []
with codecs.open(file,"r",'utf-8-sig') as csvinfile:
auditfile = csv.reader(csvinfile, delimiter=",")
for line in auditfile:
folder = line[0]
user = line[1].replace("FSA-INC01S\\","")
filelist.append(folder)
accesslist.append(folder+","+user)
newfl = sorted(set(filelist))
def makeFile():
print "Starting, please wait"
for i in xrange(1,len(newfl)):
searchItem = str(newfl[i])
outtext = ("\r\nFile access for: "+ searchItem + "\r\n")
accessUserlist = ""
for item in accesslist:
searchBreak = item.split(",")
if searchItem == searchBreak[0]:
searchBreaknew = str(searchBreak[1]).replace(" ",",")
searchBreaknew = searchBreaknew.replace("R,O","R O")
accessUserlist += searchBreaknew+"\r\n"
with codecs.open(outfile,"a",'utf-8-sig') as output:
output.write(outtext)
output.write(accessUserlist)

I'm misguided from your used .csv file extension.
Your given expected output isn't compatible with csv, as inside a record no \n possible.
Proposal using a generator returning record by record:
class Audit(object):
def __init__(self, fieldnames):
self.fieldnames = fieldnames
self.__access = {}
def append(self, row):
folder = row[self.fieldnames[0]]
access = row[self.fieldnames[1]].strip(' ')
access = access.replace("FSA-INC01S\\", "")
access = access.split(' ')
if len(access) == 3:
if access[0] == 'CREATOR':
access[0] += ' ' + access[1]
del access[1];
elif access[1] == 'Full':
access[1] += ' ' + access[2]
del access[2];
if folder not in self.__access:
self.__access[folder] = []
self.__access[folder].append(access)
# Generator for class Audit
def __iter__(self):
record = ''
for folder in sorted(self.__access):
record = folder + '\n'
for access in self.__access[folder]:
record += '%s\n' % (','.join(access) )
yield record + '\n'
How to use it:
def main():
import io, csv
audit = Audit(['Folder', 'Accesslist'])
with io.open(file, "r", encoding='utf-8') as csc_in:
for row in csv.DictReader(csc_in, delimiter=","):
audit.append(row)
with io.open(outfile, 'w', newline='', encoding='utf-8') as txt_out:
for record in audit:
txt_out.write(record)
Tested with Python:3.4.2 - csv:1.0

Extract Data From an IFC File in Python

I must extract data in an IFC file but when i read the file seems I make some mistake I don't undestand:
First: I've a key;
Second: I read a file;
Third: I create a string and I put it in a csv like file.
Fourth: the visual components are in Pyside2.
the code:
orderNr = self.getIFC_ProjectDetail(readFile, self.orderNrLineEdit.text())
custNr = self.getIFC_ProjectDetail(readFile, self.custNoLineEdit.text())
if len(custNr) == 0:
custNr = "9999"
projManager = self.getIFC_ProjectDetail(readFile, self.projManagerLineEdit.text())
drawer = self.getIFC_ProjectDetail(readFile, self.drawerLineEdit.text())
ifcFile = open(readFile, 'r')
csvFile = open(csvFileName, 'w')
lineTokens = []
csvFile.write("GUID;Type;UserText1;UserText2;UserText3;UserText4;UserText5;UserText6;UserText7;\n")
for mainLine in ifcFile:
if ("IFCSLAB" in line or "IFCWALLSTANDARDCASE" in line):
if len(uID) > 0:
if uID == oldID:
uID = "ciao"
csvFile.write("{0};{1};{2};{3};{4};{5};{6};{7};{8};\n".format(uID, matType, orderNr, custNr, assPos, partPos, fab, projManager, drawer))
oldID = uID
uID = ""
matType = ""
assPos = ""
partPos = ""
fab = ""
lineTokens = line.split(",")
headerLine = line[0:line.find("'")]
line = line[line.find("(") +1:len(line)]
lineTokens = line.split(",")
uID = lineTokens[0]
uID = uID[1:len(uID)-1]
matType = lineTokens[2]
matType = matType[1:len(matType)-1]
floorName = lineTokens[4]
floorName = floorName[1:len(matType)-1]
if self.assPosLineEdit.text() in line:
assPos = self.getIFC_EntityProperty(line, self.assPosLineEdit.text())
if self.partPosLineEdit.text() in line:
partPos = self.getIFC_EntityProperty(line, self.partPosLineEdit.text())
if self.fabricatorLineEdit.text() in line:
fab = self.getIFC_EntityProperty(line, self.fabricatorLineEdit.text())
if i == progDlg.maximum():
csvFile.write("{0};{1};{2};{3};{4};{5};{6};{7};{8};\n".format(uID, matType, orderNr, custNr, assPos, partPos, fab, projManager, drawer))
ifcFile.close()
csvFile.close()
def getIFC_EntityProperty(self, row, ifcKey):
s = ""
lineTokens = []
if ifcKey in row:
lineTokens = row.split(",")
ifcTag = lineTokens[2]
ifcTag = ifcTag[0:ifcTag.find("(")]
#print(ifcTag)
if len(ifcTag) > 1:
s = row[row.find(ifcTag)+len(ifcTag)+2:row.rfind(',')-2]
return s
def getIFC_ProjectDetail(self, fileName, ifcKey):
s = ""
content = open(fileName, 'r')
lineTokens = []
for line in content:
if ifcKey in line:
lineTokens = line.split(",")
ifcTag = lineTokens[2]
ifcTag = ifcTag[0:ifcTag.find("(")]
if len(ifcTag) > 1:
s = line[line.find(ifcTag)+len(ifcTag)+2:line.rfind(',')-2]
break
content.close()
return s
The problem is it jumps a value, it shifts a row and post the data in the line below in the csv like file, creating however the line with the right uID but leaveng the fields of the line blanks.
can Anyone help me?

a bytes-like object is required, not 'str' JSON File opened as STR

I've only learnt the basics of Python please forgive me but I was not able to determine the fix from the other posts. I open my JSON files with 'r' and I think I'm writing to them in r but it doesn't like that. Changing it to 'r' doesn't help :(
For the following section:
if isinstance(to_write, list):
self.log_file.write(''.join(to_write) + "<r/>")
else:
self.log_file.write(str(to_write) + "<r/>")
self.log_file.flush()
The error I get is: a bytes-like object is required, not 'str'
import math
import time
from random import randint
import json
from instagram.client import InstagramAPI
class Bot:
def __init__(self, config_file, tags_file):
# Loading the configuration file, it has the access_token, user_id and others configs
self.config = json.load(config_file)
# Loading the tags file, it will be keep up to date while the script is running
self.tags = json.load(tags_file)
# Log file to output to html the debugging info about the script
self.filename = self.config["path"] + self.config["prefix_name"] + time.strftime("%d%m%Y") + ".html"
self.log_file = open(self.filename, "wb")
# Initializing the Instagram API with our access token
self.api = InstagramAPI(access_token=self.config["access_token"], client_secret=self.config['client_secret'])
# Likes per tag rate
self.likes_per_tag = math.trunc(min(self.config["follows_per_hour"],
self.config["likes_per_hour"]) / len(self.tags["tags"]))
def save_tags(self):
j = json.dumps(self.tags, indent=4)
f = open('tags.json', 'w')
print >> f, j
f.close()
def insta_write(self, to_write):
if self.filename != self.config["path"] + self.config["prefix_name"] + time.strftime("%d%m%Y") + ".html":
self.log_file.close()
self.filename = self.config["path"] + self.config["prefix_name"] + time.strftime("%d%m%Y") + ".html"
self.log_file = open(self.filename, "wb")
if isinstance(to_write, list):
self.log_file.write(''.join(to_write) + "<r/>")
else:
self.log_file.write(str(to_write) + "<r/>")
self.log_file.flush()
def going_sleep(self, timer):
sleep = randint(timer, 2 * timer)
self.insta_write("SLEEP " + str(sleep))
time.sleep(sleep)
def like_and_follow(self, media, likes_for_this_tag):
try:
var = self.api.user_relationship(user_id=media.user.id)
if self.config["my_user_id"] != media.user.id:
self.insta_write("--------------")
self.insta_write(var)
if var.outgoing_status == 'none':
self.insta_write("LIKE RESULT:")
self.insta_write(self.api.like_media(media_id=media.id))
self.insta_write("FOLLOW RESULT:")
self.insta_write(self.api.follow_user(user_id=media.user.id))
likes_for_this_tag -= 1
self.going_sleep(self.config["sleep_timer"])
else:
self.going_sleep(self.config["sleep_timer"] / 2)
except Exception as e:
self.insta_write(str(e))
self.insta_write("GOING SLEEP 30 min")
time.sleep(1800)
self.like_and_follow(media, likes_for_this_tag)
return likes_for_this_tag
def run(self):
while True:
for tag in self.tags["tags"].keys():
tag = str(tag)
self.insta_write("--------------------")
self.insta_write("TAG: " + tag)
self.insta_write("--------------------")
self.insta_write("--------------------")
self.insta_write("DICTIONARY STATUS:")
for keys, values in self.tags["tags"].items():
self.insta_write(keys)
if values is not None:
self.insta_write(values)
likes_for_this_tag = self.likes_per_tag
while likes_for_this_tag > 0 and self.tags["tags"][tag] != 0:
if self.tags["tags"][tag] is None:
media_tag, self.tags["tags"][tag] = self.api.tag_recent_media(tag_name=tag,
count=likes_for_this_tag)
else:
media_tag, self.tags["tags"][tag] = self.api.tag_recent_media(tag_name=tag,
count=likes_for_this_tag,
max_tag_id=self.tags["tags"][tag])
self.insta_write("API CALL DONE")
if len(media_tag) == 0 or self.tags["tags"][tag] is None:
self.tags["tags"][tag] = 0
likes_for_this_tag = 0
else:
self.insta_write(self.tags["tags"][tag])
self.tags["tags"][tag] = self.tags["tags"][tag].split("&")[-1:][0].split("=")[1]
self.save_tags()
for m in media_tag:
likes_for_this_tag = self.like_and_follow(m, likes_for_this_tag)
if reduce(lambda r, h: r and h[1] == 0, self.tags["tags"].items(), True):
self.insta_write("END")
exit(1)
if __name__ == '__main__':
bot = Bot(open("config_bot.json", "r"), open("tags.json", "r"))
bot.run()

You opened the file as binary:
self.log_file = open(self.filename, "wb")
but are writing str Unicode strings to it. Either open the file in text mode (with an encoding set) or encode each string, separately.
Opening the file in text mode is easiest:
self.log_file = open(self.filename, "w", encoding="utf8")

In my case, the reason for the error was the conflict between json.load function and another function from another module w/ the same name load. Specifying explicitly which load function to use i.e. json.load, solved the problem.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

blob.noun_phrases fails with ConllExtractor() - python

Related

Error while using click module in python - Error: No such option: --addtask

How to import every Nth row of CSV file using python into script

Python - how to optimize iterator in file parsing

Extract Data From an IFC File in Python

a bytes-like object is required, not 'str' JSON File opened as STR

Categories

Resources