Tried accessing the OpenAPI example - Explain code
But it shows error as -
InvalidRequestError: Engine not found
enter code response = openai.Completion.create(
engine="code-davinci-002",
prompt="class Log:\n def __init__(self, path):\n dirname = os.path.dirname(path)\n os.makedirs(dirname, exist_ok=True)\n f = open(path, \"a+\")\n\n # Check that the file is newline-terminated\n size = os.path.getsize(path)\n if size > 0:\n f.seek(size - 1)\n end = f.read(1)\n if end != \"\\n\":\n f.write(\"\\n\")\n self.f = f\n self.path = path\n\n def log(self, event):\n event[\"_event_id\"] = str(uuid.uuid4())\n json.dump(event, self.f)\n self.f.write(\"\\n\")\n\n def state(self):\n state = {\"complete\": set(), \"last\": None}\n for line in open(self.path):\n event = json.loads(line)\n if event[\"type\"] == \"submit\" and event[\"success\"]:\n state[\"complete\"].add(event[\"id\"])\n state[\"last\"] = event\n return state\n\n\"\"\"\nHere's what the above class is doing:\n1.",
temperature=0,
max_tokens=64,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0,
stop=["\"\"\""]
)
I've been trying to access the engine named code-davinci-002 which is a private beta version engine. So without access it's not possible to access the engine. It seems only the GPT-3 models are of public usage. We need to need to join the OpenAI Codex Private Beta Waitlist in order to access Codex models through API.
Please note that your code is not very readable.
However, from the given error, I think it has to do with the missing colon : in the engine name.
Change this line from:
engine="code-davinci-002",
to
engine="code-davinci:002",
If you are using a finetuned model instead of an engine, you'd want to use model= instead of engine=.
response = openai.Completion.create(
model="<finetuned model>",
prompt=
Related
I am trying the example from the Google repo:
https://github.com/googleapis/python-documentai/blob/HEAD/samples/snippets/quickstart_sample.py
I have an error:
metadata=[('x-goog-request-params', 'name=projects/my_proj_id/locations/us/processors/my_processor_id'), ('x-goog-api-client', 'gl-python/3.8.10 grpc/1.38.1 gax/1.30.0 gapic/1.0.0')]), last exception: 503 DNS resolution failed for service: https://us-documentai.googleapis.com/v1/
My full code:
from google.cloud import documentai_v1 as documentai
import os
# TODO(developer): Uncomment these variables before running the sample.
project_id= '123456789'
location = 'us' # Format is 'us' or 'eu'
processor_id = '1a23345gh823892' # Create processor in Cloud Console
file_path = 'document.jpg'
os.environ['GRPC_DNS_RESOLVER'] = 'native'
def quickstart(project_id: str, location: str, processor_id: str, file_path: str):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = {}
if location == "eu":
opts = {"api_endpoint": "eu-documentai.googleapis.com"}
client = documentai.DocumentProcessorServiceClient(client_options=opts)
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}:process"
# Read the file into memory
with open(file_path, "rb") as image:
image_content = image.read()
document = {"content": image_content, "mime_type": "image/jpeg"}
# Configure the process request
request = {"name": name, "raw_document": document}
result = client.process_document(request=request)
document = result.document
document_pages = document.pages
# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
print(paragraph)
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text: {paragraph_text}")
def get_text(doc_element: dict, document: dict):
"""
Document AI identifies form fields by their offsets
in document text. This function converts offsets
to text snippets.
"""
response = ""
# If a text segment spans several lines, it will
# be stored in different text segments.
for segment in doc_element.text_anchor.text_segments:
start_index = (
int(segment.start_index)
if segment in doc_element.text_anchor.text_segments
else 0
)
end_index = int(segment.end_index)
response += document.text[start_index:end_index]
return response
def main ():
quickstart (project_id = project_id, location = location, processor_id = processor_id, file_path = file_path)
if __name__ == '__main__':
main ()
FYI, on the Google Cloud website it stated that the endpoint is:
https://us-documentai.googleapis.com/v1/projects/123456789/locations/us/processors/1a23345gh823892:process
I can use the web interface to run DocumentAI so it is working. I just have the problem with Python code.
Any suggestion is appreciated.
I would suspect the GRPC_DNS_RESOLVER environment variable to be the root cause. Did you try with the corresponding line commented out? Why was it added in your code?
Prehistory:
Look at the code below:
class Adt:
# I avoided constructor with dependencies
def generate_document(self, date_from, date_to):
try:
adt_data = self.repository.read_adt_data(date_from, date_to) # **<- adt_data may be null**
document_body = self.__prepare_document_body(adt_data )
doc_id = self.__generate_document(document_body)
return doc_id
except Exception:
self.logger.exception("generate_document")
raise
And below you can see client code:
doc_id = adt.generate_document(date_from,date_to)
email_sender_client.send_document_as_email(doc_id)
Explanation and problem:
There is normal business state when we do not have adt_data, so this variable sometimes can be None.
Straightforward solution is just to put if..."
adt_data = self.repository.read_adt_data(date_from, date_to)
if not adt_data:
return None
And corrected client code:
doc_id = adt.generate_document(date_from,date_to)
if doc_id:
email_sender_client.send_document_as_email(doc_id)
Question:
Is there any typical mechanism to avoid such if's? I've read about Null object pattern. Probably repository could return not None, but an object with empty fields? I want to ask experts about possible solutions.
I have a big query table that I need to bring down and populate a MSSQL table with. Since I can't find a BigQuerytoMSSQL operator, I'm doing this by hand.
I've been able to export the table to a series of <>_001.txt, <>_002.txt, etc, and store them into GCS, but now I need to get them down into the Airflow server.
I'm attempting to use the GoogleDownloadOperator, but it seams to have an issue I cannot repair.
Export_to_Local = GoogleCloudStorageDownloadOperator(
task_id='Export_GCS_to_Airflow_Staging',
bucket='offrs',
object='TAX_ASSESSOR_LIVE_*.txt',
filename=Variable.get("temp_directory") + "TAL/*",
google_cloud_storage_conn_id='GCP_Mother_Staging',
dag=dag
)
The above code results in this errror:
google.resumable_media.common.InvalidResponse: ('Request failed with status code', 404, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.PARTIAL_CONTENT: 206>)
am I missing something? I don't know what the problem is.
THanks
GoogleCloudStorageDownloadOperator does not support wildcards, unfortunately.
The quickest option would be to use gsutil command in BashOperator if your VM is already authorized to that bucket.
The other option is to use the following Custom Operator:
from airflow.contrib.hooks.gcs_hook import GoogleCloudStorageHook
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.exceptions import AirflowException
WILDCARD = '*'
class CustomGcsDownloadOperator(BaseOperator):
template_fields = ('source_bucket', 'source_object', 'destination_folder',
'destination_object',)
ui_color = '#f0eee4'
#apply_defaults
def __init__(self,
source_bucket,
source_object,
destination_folder,
destination_object=None,
google_cloud_storage_conn_id='google_cloud_default',
delegate_to=None,
last_modified_time=None,
*args,
**kwargs):
super(CustomGcsDownloadOperator,
self).__init__(*args, **kwargs)
self.source_bucket = source_bucket
self.source_object = source_object
self.destination_folder = destination_folder
self.destination_object = destination_object
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
self.delegate_to = delegate_to
self.last_modified_time = last_modified_time
def execute(self, context):
hook = GoogleCloudStorageHook(
google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
delegate_to=self.delegate_to
)
if WILDCARD in self.source_object:
total_wildcards = self.source_object.count(WILDCARD)
if total_wildcards > 1:
error_msg = "Only one wildcard '*' is allowed in source_object parameter. " \
"Found {} in {}.".format(total_wildcards, self.source_object)
raise AirflowException(error_msg)
prefix, delimiter = self.source_object.split(WILDCARD, 1)
objects = hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter)
for source_object in objects:
if self.destination_object is None:
destination_object = source_object
else:
destination_object = source_object.replace(prefix,
self.destination_object, 1)
self._download_single_object(hook=hook, source_object=source_object,
destination_object=destination_object)
else:
self._download_single_object(hook=hook, source_object=self.source_object,
destination_object=self.destination_object)
def _download_single_object(self, hook, source_object, destination_object):
if self.last_modified_time is not None:
# Check to see if object was modified after last_modified_time
if hook.is_updated_after(self.source_bucket,
source_object,
self.last_modified_time):
self.log.debug("Object has been modified after %s ", self.last_modified_time)
pass
else:
return
self.log.info('Executing copy of gs://%s/%s to file://%s/%s',
self.source_bucket, source_object,
self.destination_folder, destination_object)
hook.download(self.source_bucket, source_object, destination_object)
I am teaching myself how to use python and django to access the google places api to make nearby searches for different types of gyms.
I was only taught how to use python and django with databases you build locally.
I wrote out a full Get request for they four different searches I am doing. I looked up examples but none seem to work for me.
allgyms = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
all_text = allgyms.text
alljson = json.loads(all_text)
healthclubs = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=healthclub&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
health_text = healthclubs.text
healthjson = json.loads(health_text)
crossfit = requests.get('https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=38.9208,-77.036&radius=2500&type=gym&keyword=crossfit&key=AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg')
cross_text = crossfit.text
crossjson = json.loads(cross_text)
I really would like to be pointed in the right direction on how to have the api key referenced only one time while changing the keywords.
Try this for better readability and better reusability
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
LOCATION = '38.9208,-77.036'
RADIUS = '2500'
TYPE = 'gym'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
KEYWORDS = ''
allgyms = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&key='+API_KEY) all_text = allgyms.text
alljson = json.loads(all_text)
KEYWORDS = 'healthclub'
healthclubs = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
health_text = healthclubs.text
healthjson = json.loads(health_text)
KEYWORDS = 'crossfit'
crossfit = requests.get(BASE_URL+'location='+LOCATION+'&radius='+RADIUS+'&type='+TYPE+'&keyword='+KEYWORDS+'&key='+API_KEY)
cross_text = crossfit.text
crossjson = json.loads(cross_text)
as V-R suggested in a comment you can go further and define function which makes things more reusable allowing you to use the that function in other places of your application
Function implementation
def makeRequest(location, radius, type, keywords):
BASE_URL = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
API_KEY = 'AIzaSyDOwVK7bGap6b5Mpct1cjKMp7swFGi3uGg'
result = requests.get(BASE_URL+'location='+location+'&radius='+radius+'&type='+type+'&keyword='+keywords+'&key='+API_KEY)
jsonResult = json.loads(result)
return jsonResult
Function invocation
json = makeRequest('38.9208,-77.036', '2500', 'gym', '')
Let me know if there is an issue
I want to use lucene using python.
I used StandardAnalyser for indexing and searching both. It works fine but now, my requirement changes, I need to use KeywordAnalyser.
Code for StandardAnalyser:
#Importing packages
import lucene
lucene.initVM()
from org.apache.lucene import analysis, document, index, queryparser, search, store, util
#Initialize Parameters
analyzer = analysis.standard.StandardAnalyzer(util.Version.LUCENE_CURRENT)
config = index.IndexWriterConfig(util.Version.LUCENE_CURRENT, self.analyzer)
directory = store.FSDirectory.open(File(<path_where_to_index>))
iwriter = None
iwriter = index.IndexWriter(directory, config)
#Indexing Part
doc = document.Document()
doc.add(document.Field("fieldname", entity, document.Field.Store.YES, document.Field.Index.ANALYZED))
doc.add(document.Field("category", category, document.Field.Store.YES, document.Field.Index.ANALYZED))
iwriter.addDocument(doc)
iwriter.commit()
#Searching Part
ireader = index.IndexReader.open(directory)
isearcher = search.IndexSearcher(ireader)
parser = queryparser.classic.QueryParser(util.Version.LUCENE_CURRENT, "fieldname", self.analyzer)
query = parser.parse(entity)
hits = isearcher.search(query, None, 100).scoreDocs
print hits
for hit in hits:
hitDoc = isearcher.doc(hit.doc)
print hitDoc
Above code is using StandardAnalyser. I want to use KeywordAnalyser instead of StandardAnalyser.
I have changed analyser in below code. Below Code is using KeywordAnalyser but searching is not performing.
Code for KeywordAnalyser:
#Importing packages
import lucene
lucene.initVM()
from org.apache.lucene import analysis, document, index, queryparser, search, store, util
#Initialize Parameters
analyzer = analysis.core.KeywordAnalyser(util.Version.LUCENE_CURRENT)
config = index.IndexWriterConfig(util.Version.LUCENE_CURRENT, self.analyzer)
directory = store.FSDirectory.open(File(<path_where_to_index>))
iwriter = None
iwriter = index.IndexWriter(directory, config)
#Indexing Part
doc = document.Document()
doc.add(document.Field("fieldname", entity, document.Field.Store.YES, document.Field.Index.ANALYZED))
doc.add(document.Field("category", category, document.Field.Store.YES, document.Field.Index.ANALYZED))
iwriter.addDocument(doc)
iwriter.commit()
#Searching Part
ireader = index.IndexReader.open(directory)
isearcher = search.IndexSearcher(ireader)
parser = queryparser.classic.QueryParser(util.Version.LUCENE_CURRENT, "fieldname", self.analyzer)
query = parser.parse(entity)
hits = isearcher.search(query, None, 100).scoreDocs
print hits
for hit in hits:
hitDoc = isearcher.doc(hit.doc)
print hitDoc
Any help?
I found the solution of my question.
To use KeywordAnalyser, I need analysis.core. I can't use queryparser for searching because it mostly works for StandardAnalyser. To do search on KeywordAnalyser, I need to use index.Term and search.TermQuery
Searching Code:
term_parser = index.Term("fieldname", entity)
query = search.TermQuery(term_parser)
hits = isearcher.search(query, None, 10).scoreDocs