I'm trying with the google docs api (python) to create collections and subcollections and to upload files in a created subcollection.
First question:
Everything is ok with the below code, the hierarchy is ok (subfolder1 under the folder1, uploaded file under the subfolder1), but the only issue is that the subfolder & the file are also seen in the Home for the end user.
I would like to see only the higher level collection in the Home.
Is there a way to prevent the resources (sub-collections & files) to be displayed in the Home ?
Note: I have tried the following alternatives but still get the same result:
1) parameter 'collection=' of create_resource has same result
2) clientlogin or twoleggedoauth have same result
Second question:
Is it possible to set the description field ?
import gdata.data
import gdata.docs.client
import gdata.acl.data
import gdata.docs.data
GAPPS_OAUTH_CONSUMER_KEY = "xxxx"
GAPPS_OAUTH_CONSUMER_SECRET = "xxxxx"
GAPPS_ADMIN_ACCOUNT = "x"
GAPPS_CLIENT_LOGIN_LOGIN='xxxxx'
GAPPS_CLIENT_LOGIN_PWD='xxxxx'
GAPPS_CLIENT_LOGIN_APP='xxxxxx'
filepath = 'C:\\Users\\xxxxx\\Pictures\\'
filename = 'xxxxxx.png'
path = filepath + filename
client = gdata.docs.client.DocsClient()
client.ssl = True
#client.ClientLogin(GAPPS_CLIENT_LOGIN_LOGIN, GAPPS_CLIENT_LOGIN_PWD, GAPPS_CLIENT_LOGIN_APP)
client.auth_token = gdata.gauth.TwoLeggedOAuthHmacToken(GAPPS_OAUTH_CONSUMER_KEY, GAPPS_OAUTH_CONSUMER_SECRET, GAPPS_ADMIN_ACCOUNT)
# create a folder
collection1 = gdata.docs.data.Resource('folder', title = 'Script Folder')
collection1 = client.create_resource(collection1)
# create a sub-folder in collection1
subcollection1 = gdata.docs.data.Resource('folder', title = 'Script Sub Folder')
subcollection1 = client.create_resource(subcollection1)
res = client.move_resource(subcollection1, collection = collection1, keep_in_collections = False)
# Upload the resource in subcollection1
doc = gdata.docs.data.Resource(type = 'file', title = filename)
media = gdata.data.MediaSource()
media.SetFileHandle(path, 'application/octet-stream')
create_uri = gdata.docs.client.RESOURCE_UPLOAD_URI + '?convert=false'
doc = client.CreateResource(doc, create_uri = create_uri, media = media)
print 'Created, and uploaded:', doc.title.text, doc.resource_id.text
client.move_resource(doc, collection = subcollection1, keep_in_collections = False)
Here is the solution:
subcollection1 = gdata.docs.data.Resource('folder', title = 'Script Sub Folder')
subcollection1.AddCategory(gdata.docs.data.LABELS_NS, gdata.docs.data.LABELS_NS + "#" +gdata.docs.data.HIDDEN_LABEL, gdata.docs.data.HIDDEN_LABEL)
subcollection1 = client.create_resource(subcollection1)
There is another much simpler approach.
subcollection1 = client.create_resource(subcollection1,collection=collection1)
The script sub folder never appears on your root folder by this approach.
Related
I am trying the example from the Google repo:
https://github.com/googleapis/python-documentai/blob/HEAD/samples/snippets/quickstart_sample.py
I have an error:
metadata=[('x-goog-request-params', 'name=projects/my_proj_id/locations/us/processors/my_processor_id'), ('x-goog-api-client', 'gl-python/3.8.10 grpc/1.38.1 gax/1.30.0 gapic/1.0.0')]), last exception: 503 DNS resolution failed for service: https://us-documentai.googleapis.com/v1/
My full code:
from google.cloud import documentai_v1 as documentai
import os
# TODO(developer): Uncomment these variables before running the sample.
project_id= '123456789'
location = 'us' # Format is 'us' or 'eu'
processor_id = '1a23345gh823892' # Create processor in Cloud Console
file_path = 'document.jpg'
os.environ['GRPC_DNS_RESOLVER'] = 'native'
def quickstart(project_id: str, location: str, processor_id: str, file_path: str):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = {}
if location == "eu":
opts = {"api_endpoint": "eu-documentai.googleapis.com"}
client = documentai.DocumentProcessorServiceClient(client_options=opts)
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}:process"
# Read the file into memory
with open(file_path, "rb") as image:
image_content = image.read()
document = {"content": image_content, "mime_type": "image/jpeg"}
# Configure the process request
request = {"name": name, "raw_document": document}
result = client.process_document(request=request)
document = result.document
document_pages = document.pages
# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
print(paragraph)
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text: {paragraph_text}")
def get_text(doc_element: dict, document: dict):
"""
Document AI identifies form fields by their offsets
in document text. This function converts offsets
to text snippets.
"""
response = ""
# If a text segment spans several lines, it will
# be stored in different text segments.
for segment in doc_element.text_anchor.text_segments:
start_index = (
int(segment.start_index)
if segment in doc_element.text_anchor.text_segments
else 0
)
end_index = int(segment.end_index)
response += document.text[start_index:end_index]
return response
def main ():
quickstart (project_id = project_id, location = location, processor_id = processor_id, file_path = file_path)
if __name__ == '__main__':
main ()
FYI, on the Google Cloud website it stated that the endpoint is:
https://us-documentai.googleapis.com/v1/projects/123456789/locations/us/processors/1a23345gh823892:process
I can use the web interface to run DocumentAI so it is working. I just have the problem with Python code.
Any suggestion is appreciated.
I would suspect the GRPC_DNS_RESOLVER environment variable to be the root cause. Did you try with the corresponding line commented out? Why was it added in your code?
Hello I'm new to django and I'm trying to make a web app. I have a running back end, but the problem is it's only running on cli and I have to turn it into a web app.
def testing(request):
ksize = 6
somsize= 10
csvname="input.csv"
testcap = "testing.pcap"
pl.csv5("chap/a",testcap)
tmparr=[]
for filename in os.listdir("chap"):
if filename.endswith(".csv"):
tmparr.append(filename)
continue
else:
continue
tmparr.sort()
visual_list = natsort.natsorted(tmparr)
csv = sl.opencsv(csvname)
norm = sl.normalize(csv)
weights = sl.som(norm,somsize)
label = sl.kmeans(ksize,weights)
#for x in range (2,21):
# label = sl.kmeans(x,weights)
# print("K is", x, "Score is ", label[1])
lblarr = np.reshape(label,(somsize,somsize))
#sl.dispcolor(lblarr)
classess = sl.cluster_coloring(weights,norm,csv)
classpercluster = sl.determine_cluster(classess,lblarr,ksize)
classpercent = sl.toperc(classpercluster)
print (classpercent)
#print(classpercluster)
for x in visual_list:
temp = ("chap/"+x)
tests = sl.opencsv(temp)
print(tests)
hits = sl.som_hits(weights, tests)
name = ("img/" + x + ".png")
sl.disp(lblarr,name,hits)
return render(request,'visualization/detail.html')
The system cannot find the path specified: 'chap', I'm not sure if I should put the chap folder inside the templates folder or in the app folder. Thank you in advance!
You're doing relative paths here it looks like. Change it to an absolute path.
dirpath = os.path.dirname(os.path.abspath(__file__))
chap_dirpath = os.path.join(dirpath, chap_dirpath)
I use the blogger2wordpress python script that Google released back in 2010 (https://code.google.com/archive/p/google-blog-converters-appengine/downloads), to convert a 95mb blogger export file to wordpress wxr format.
However, the script has this code:
#!/usr/bin/env python
# Copyright 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os.path
import logging
import re
import sys
import time
from xml.sax.saxutils import unescape
import BeautifulSoup
import gdata
from gdata import atom
import iso8601
import wordpress
__author__ = 'JJ Lueck (EMAIL#gmail.com)'
###########################
# Constants
###########################
BLOGGER_URL = 'http://www.blogger.com/'
BLOGGER_NS = 'http://www.blogger.com/atom/ns#'
KIND_SCHEME = 'http://schemas.google.com/g/2005#kind'
YOUTUBE_RE = re.compile('http://www.youtube.com/v/([^&]+)&?.*')
YOUTUBE_FMT = r'[youtube=http://www.youtube.com/watch?v=\1]'
GOOGLEVIDEO_RE = re.compile('(http://video.google.com/googleplayer.swf.*)')
GOOGLEVIDEO_FMT = r'[googlevideo=\1]'
DAILYMOTION_RE = re.compile('http://www.dailymotion.com/swf/(.*)')
DAILYMOTION_FMT = r'[dailymotion id=\1]'
###########################
# Translation class
###########################
class Blogger2Wordpress(object):
"""Performs the translation of a Blogger export document to WordPress WXR."""
def __init__(self, doc):
"""Constructs a translator for a Blogger export file.
Args:
doc: The WXR file as a string
"""
# Ensure UTF8 chars get through correctly by ensuring we have a
# compliant UTF8 input doc.
self.doc = doc.decode('utf-8', 'replace').encode('utf-8')
# Read the incoming document as a GData Atom feed.
self.feed = atom.FeedFromString(self.doc)
self.next_id = 1
def Translate(self):
"""Performs the actual translation to WordPress WXR export format.
Returns:
A WordPress WXR export document as a string, or None on error.
"""
# Create the top-level document and the channel associated with it.
channel = wordpress.Channel(
title = self.feed.title.text,
link = self.feed.GetAlternateLink().href,
base_blog_url = self.feed.GetAlternateLink().href,
pubDate = self._ConvertPubDate(self.feed.updated.text))
posts_map = {}
for entry in self.feed.entry:
# Grab the information about the entry kind
entry_kind = ""
for category in entry.category:
if category.scheme == KIND_SCHEME:
entry_kind = category.term
if entry_kind.endswith("#comment"):
# This entry will be a comment, grab the post that it goes to
in_reply_to = entry.FindExtensions('in-reply-to')
post_item = None
# Check to see that the comment has a corresponding post entry
if in_reply_to:
post_id = self._ParsePostId(in_reply_to[0].attributes['ref'])
post_item = posts_map.get(post_id, None)
# Found the post for the comment, add the commment to it
if post_item:
# The author email may not be included in the file
author_email = ''
if entry.author[0].email:
author_email = entry.author[0].email.text
# Same for the the author's url
author_url = ''
if entry.author[0].uri:
author_url = entry.author[0].uri.text
post_item.comments.append(wordpress.Comment(
comment_id = self._GetNextId(),
author = entry.author[0].name.text,
author_email = author_email,
author_url = author_url,
date = self._ConvertDate(entry.published.text),
content = self._ConvertContent(entry.content.text)))
elif entry_kind.endswith('#post'):
# This entry will be a post
post_item = self._ConvertEntry(entry, False)
posts_map[self._ParsePostId(entry.id.text)] = post_item
channel.items.append(post_item)
elif entry_kind.endswith('#page'):
# This entry will be a static page
page_item = self._ConvertEntry(entry, True)
posts_map[self._ParsePageId(entry.id.text)] = page_item
channel.items.append(page_item)
wxr = wordpress.WordPressWxr(channel=channel)
return wxr.WriteXml()
def _ConvertEntry(self, entry, is_page):
"""Converts the contents of an Atom entry into a WXR post Item element."""
# A post may have an empty title, in which case the text element is None.
title = ''
if entry.title.text:
title = entry.title.text
# Check here to see if the entry points to a draft or regular post
status = 'publish'
if entry.control and entry.control.draft:
status = 'draft'
# If no link is present in the Blogger entry, just link
if entry.GetAlternateLink():
link = entry.GetAlternateLink().href
else:
link = BLOGGER_URL
# Declare whether this is a post of a page
post_type = 'post'
if is_page:
post_type = 'page'
blogger_blog = ''
blogger_permalink = ''
if entry.GetAlternateLink():
blogger_path_full = entry.GetAlternateLink().href.replace('http://', '')
blogger_blog = blogger_path_full.split('/')[0]
blogger_permalink = blogger_path_full[len(blogger_blog):]
# Create the actual item element
post_item = wordpress.Item(
title = title,
link = link,
pubDate = self._ConvertPubDate(entry.published.text),
creator = entry.author[0].name.text,
content = self._ConvertContent(entry.content.text),
post_id = self._GetNextId(),
post_date = self._ConvertDate(entry.published.text),
status = status,
post_type = post_type,
blogger_blog = blogger_blog,
blogger_permalink = blogger_permalink,
blogger_author = entry.author[0].name.text)
# Convert the categories which specify labels into wordpress labels
for category in entry.category:
if category.scheme == BLOGGER_NS:
post_item.labels.append(category.term)
return post_item
def _ConvertContent(self, text):
"""Unescapes the post/comment text body and replaces video content.
All <object> and <embed> tags in the post that relate to video must be
changed into the WordPress tags for embedding video,
e.g. [youtube=http://www.youtube.com/...]
If no text is provided, the empty string is returned.
"""
if not text:
return ''
# First unescape all XML tags as they'll be escaped by the XML emitter
content = unescape(text)
# Use an HTML parser on the body to look for video content
content_tree = BeautifulSoup.BeautifulSoup(content)
# Find the object tag
objs = content_tree.findAll('object')
for obj_tag in objs:
# Find the param tag within which contains the URL to the movie
param_tag = obj_tag.find('param', { 'name': 'movie' })
if not param_tag:
continue
# Get the video URL
video = param_tag.attrMap.get('value', None)
if not video:
continue
# Convert the video URL if necessary
video = YOUTUBE_RE.subn(YOUTUBE_FMT, video)[0]
video = GOOGLEVIDEO_RE.subn(GOOGLEVIDEO_FMT, video)[0]
video = DAILYMOTION_RE.subn(DAILYMOTION_FMT, video)[0]
# Replace the portion of the contents with the video
obj_tag.replaceWith(video)
return str(content_tree)
def _ConvertPubDate(self, date):
"""Translates to a pubDate element's time/date format."""
date_tuple = iso8601.parse_date(date)
return date_tuple.strftime('%a, %d %b %Y %H:%M:%S %z')
def _ConvertDate(self, date):
"""Translates to a wordpress date element's time/date format."""
date_tuple = iso8601.parse_date(date)
return date_tuple.strftime('%Y-%m-%d %H:%M:%S')
def _GetNextId(self):
"""Returns the next identifier to use in the export document as a string."""
next_id = self.next_id;
self.next_id += 1
return str(next_id)
def _ParsePostId(self, text):
"""Extracts the post identifier from a Blogger entry ID."""
matcher = re.compile('post-(\d+)')
matches = matcher.search(text)
return matches.group(1)
def _ParsePageId(self, text):
"""Extracts the page identifier from a Blogger entry ID."""
matcher = re.compile('page-(\d+)')
matches = matcher.search(text)
return matches.group(1)
if __name__ == '__main__':
if len(sys.argv) <= 1:
print 'Usage: %s <blogger_export_file>' % os.path.basename(sys.argv[0])
print
print ' Outputs the converted WordPress export file to standard out.'
sys.exit(-1)
wp_xml_file = open(sys.argv[1])
wp_xml_doc = wp_xml_file.read()
translator = Blogger2Wordpress(wp_xml_doc)
print translator.Translate()
wp_xml_file.close()
This scripts outputs the wxr file in the terminal window which is useless for me when the import file has tons of entries.
As I am not familiar with python, how can I modify the script to output the data into a .xml file?
Edit:
I did changed the end of the script to:
wp_xml_file = open(sys.argv[1])
wp_xml_doc = wp_xml_file.read()
translator = Blogger2Wordpress(wp_xml_doc)
print translator.Translate()
fh = open("testoutput.xml", "w")
fh.write(wp_xml_doc);
fh.close();
wp_xml_file.close()
But the produced file is an "invalid wxr file" :/
Can anybody help? Thanks!
Quick and dirty answer:
Output to the stdout is normal behaviour.
You might want to redirect it to a file for instance:
python2 blogger2wordpress your_blogger_export_file > backup
The output will be saved in the file named backup.
Or you can replace print translator.Translate() by
with open('output_file', 'w') as fd:
fd.write(translator.Translate())
This should do the trick (haven't tried).
As far i have following code to force an ods to recalc all values in spred sheet and save it that into several formats.
But i can not find the filter for XLS.
Has anyone an idea how to find it out?
import uno
from com.sun.star.beans import PropertyValue
def _toProperties(**args):
props = []
for key in args:
prop = PropertyValue()
prop.Name = key
prop.Value = args[key]
props.append(prop)
return tuple(props)
# start first
# libreoffice --headless --accept="socket,host=0,port=8001,tcpNoDelay=1;urp"
inputFile = 'file:///home/user/Downloads/in.ods'
outputFile = 'file:///home/user/Downloads/out.xls'
# import the OpenOffice component context
local = uno.getComponentContext()
# access the UnoUrlResolver service - this will allow to connect to OpenOffice.org program
resolver = local.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", local)
# load the context and you are now connected - you can access OpenOffice via its API mechanism
context = resolver.resolve("uno:socket,host=localhost,port=8001;urp;StarOffice.ServiceManager")
remoteContext = context.getPropertyValue("DefaultContext")
# service responsible for the current document called desktop
desktop = context.createInstanceWithContext("com.sun.star.frame.Desktop", remoteContext)
document = desktop.getCurrentComponent()
# load, calculateAll(), save
document = desktop.loadComponentFromURL(inputFile, "_blank", 0, ())
document.calculateAll()
# ods
# document.storeAsURL(outputFile, ())
# pds
#document.storeToURL(outputFile, _toProperties(FilterName="calc_pdf_Export"))
# csv
#document.storeToURL(outputFile, _toProperties(FilterName="Text - txt - csv (StarCalc)"))
# xls
document.storeToURL(outputFile, _toProperties(FilterName="calc_MS_Excel_40"))
# xlsx
#document.storeToURL(outputFile, _toProperties(FilterName="Calc Office Open XML"))
document.dispose()
Use the following basic macro to get a list of all available filter names
' DannyB Tue Oct 28, 2003 9:49 am
' http://www.oooforum.org/forum/viewtopic.phtml?t=3549
Sub writer_dumpFilterNames
oFF = createUnoService( "com.sun.star.document.FilterFactory" )
oFilterNames = oFF.getElementNames()
' Now print the filter names.
' For i = LBound( oFilterNames ) To UBound( oFilterNames )
' Print oFilterNames(i)
' Next
' Create a Writer doc and save the filter names to it.
oDoc = StarDesktop.loadComponentFromURL( "private:factory/swriter", "_blank", 0, Array() )
oText = oDoc.getText()
oCursor = oText.createTextCursor()
oCursor.gotoEnd( False )
' Print the filter names into a Writer document.
For i = LBound( oFilterNames ) To UBound( oFilterNames )
oText.insertString( oCursor, oFilterNames(i), False )
oText.insertControlCharacter( oCursor, com.sun.star.text.ControlCharacter.PARAGRAPH_BREAK, False )
Next
End Sub
The code will create a new Writer document and put the names there (originally only printed the names, line 8-10)
Does this help you?
I'm using the csv module in python to create a download from one of the datastore tables in Google App Engine. The download works alright but you have to manually add an extension so that you can open it in Excel. I can't figure out how to modify the response so that the file download has a .csv extension. I could leave it like this however this web app is meant for a broad audience so I wanted to make it as easy as possible for them to use.
class fuCheckUp(webapp2.RequestHandler):
def get(self):
schedule_query = emailSchedule.all()
follow_up_num = schedule_query[0].follow_up_num
email_job_query = emailJobs.all()
email_job_query.order('consent_date')
header_tuple = ('last_modified', 'trigger_id', 'recipient_id', 'test_data', 'unsubscribe', 'start_date_local', 'consent_date', 'fu_period', 'last_fu_sent')
data_tuples = ()
variable_list = []
for i in range(1, follow_up_num + 1):
i = str(i)
fu_due = 'fu' + i
fu_sent = 'fu' + i + '_email_sent'
variable_list.append(fu_due)
variable_list.append(fu_sent)
data_tuples = data_tuples + (fu_due, fu_sent)
final_data_tuple = header_tuple + data_tuples
data = [final_data_tuple]
for part in email_job_query:
last_modified = str(part.last_modified)
trigger_id = str(part.trigger_id)
recipient_id = str(part.recipient_id)
test_data = str(part.test_data)
unsubscribed = str(part.unsubscribed)
start_date_local = str(part.start_date_local)
consent_date = str(part.consent_date)
fu_period = str(part.fu_period)
last_fu_sent = str(part.last_fu_sent)
var_list = []
for var in variable_list:
fu_var = getattr(part, var)
var_list.append(str(fu_var))
var_tuple = tuple(var_list)
fixed_tuple = (last_modified, trigger_id, recipient_id, test_data, unsubscribed, start_date_local, consent_date, fu_period, last_fu_sent)
csv_tuple = fixed_tuple + var_tuple
data.append((csv_tuple))
self.response.headers['Content-Type'] = 'application/csv'
writer = csv.writer(self.response.out)
for item in data:
writer.writerow(item)
Add another response header like this:
Content-Disposition: attachment;filename=example.csv