The following is a Python script to export Salesforce KB articles, and automatically publish them to a Fluid Topics portal.
This script relies on the following Salesforce web services to export Salesforce KB articles:
These web services require the use of an access token.
import os
import sys
import json
import shutil
import requests
from pathlib import Path
SF_URL = "https://salesforce_url.com"
CLIENT_ID = ""
CLIENT_SECRET = ""
USER_NAME = ""
PASSWORD = "" # concatenate the password and security token
VERSION = "" # example v55.0
FT_URL = "" # https://docs.mydomain.com
FT_API = ""
SOURCE_ID = "salesforce" # example salesforce
GET_SINCE = "1901-01-01"
def get_access_token_and_instance_url():
# security token must be provided if IP address is outside your company's trusted IP range
r = requests.post(f"https://{SF_URL}/services/oauth2/token",
data={'grant_type': 'password',
'client_id': CLIENT_ID,
'client_secret': CLIENT_SECRET,
'username': USER_NAME,
'password': PASSWORD})
if r.ok:
return r.json().get('access_token'), r.json().get('instance_url')
return None, None
def get_kb_articles_list(access_token, instance_url, page=1):
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/resources_knowledge_support_artlist.htm
params = {'pageNumber': page}
# Params can include filters for the request : q, channel, categories, queryMethod, sort, order
# params = {'pageNumber': page, q: '', channel: '', categories: '', queryMethod:'', sort: '', order: ''}
# NB : /support/knowledgeArticles does not get archived documents, but /query does
r = requests.get(f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles',
params=params,
headers={
'Authorization': f'Bearer {access_token}',
'Accept': 'application/json',
'Accept-language': 'en-US'
})
if not r.ok:
print(f'Error while retrieving articles list: {r.text}')
return []
return r.json()
def get_kb_article_detail(access_token, instance_url, article_id):
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/resources_knowledge_support_artdetails.htm
r = requests.get(
f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles/{article_id}',
headers={
'Authorization': f'Bearer {access_token}',
'Accept': 'application/json',
'Accept-language': 'en-US'
})
if not r.ok:
print(f'Error while retrieving article with id {article_id}: {r.text}')
return None
return r.json()
def upload_to_ft(zip):
with open(zip, "rb") as file:
files = {"file": file}
url = f"https://{FT_URL}/api/admin/khub/sources/{SOURCE_ID}/upload"
try:
r = requests.post(
url, files=files, auth=BearerAuth(FT_API)
)
if not r.ok:
print(f'Failed to upload unstructured documents\nPOST {url}\n{r.content}')
return False
except ConnectionError:
print(f'Connection reset while sending archive to {FT_URL}, please check that file uploaded correctly')
return True
def serialize(doc, id):
Path("./json_to_upload").mkdir(parents=True, exist_ok=True)
with open('./json_to_upload/' + id + '.json', 'w') as f:
f.write(json.dumps(doc, indent=1))
def get_documents(access_token, instance_url):
page = 1
while True:
articles_list = get_kb_articles_list(access_token, instance_url, page)
for article in articles_list.get('articles'):
if article.get('lastPublishedDate') > GET_SINCE:
doc = get_kb_article_detail(access_token, instance_url, article.get('id'))
if doc:
serialize(doc, article.get('id'))
if articles_list.get('nextPageUrl'):
page += 1
else:
# no more items
break
class BearerAuth(requests.auth.AuthBase):
def __init__(self, token):
self.token = token
def __call__(self, r):
r.headers["authorization"] = "Bearer " + self.token
return r
if __name__ == "__main__":
if len(sys.argv) == 2:
GET_SINCE = sys.argv[1]
print(f'Will crawl from {GET_SINCE}')
else:
print(f'No date argument given, will crawl from {GET_SINCE}')
access_token, instance_url = get_access_token_and_instance_url()
get_documents(access_token, instance_url)
shutil.make_archive('to_upload_in_FT', 'zip', './json_to_upload/')
upload_to_ft('to_upload_in_FT.zip')
os.remove('./to_upload_in_FT.zip') # Remove this line to keep archives
shutil.rmtree('./json_to_upload/')
The following is an alternative script which exports Salesforce KB articles, without automatically publishing them to a Fluid Topics portal. It creates a ZIP file called to_upload_in_FT.zip. To publish this archive, see Publish Salesforce content.
import os
import sys
import json
import shutil
import requests
from pathlib import Path
SF_URL = "https://salesforce_url.com"
CLIENT_ID = ""
CLIENT_SECRET = ""
USER_NAME = ""
PASSWORD = "" # concatenate the password and security token
VERSION = "" # example v55.0
GET_SINCE = "1901-01-01"
def get_access_token_and_instance_url():
# security token must be provided if IP address is outside your company's trusted IP range
r = requests.post(f"https://{SF_URL}/services/oauth2/token",
data={'grant_type': 'password',
'client_id': CLIENT_ID,
'client_secret': CLIENT_SECRET,
'username': USER_NAME,
'password': PASSWORD})
if r.ok:
return r.json().get('access_token'), r.json().get('instance_url')
return None, None
def get_kb_articles_list(access_token, instance_url, page=1):
params = {'pageNumber': page}
r = requests.get(f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles',
params=params,
headers={
'Authorization': f'Bearer {access_token}',
'Accept': 'application/json',
'Accept-language': 'en-US'
})
if not r.ok:
print(f'Error while retrieving articles list: {r.text}')
return []
return r.json()
def get_kb_article_detail(access_token, instance_url, article_id):
r = requests.get(
f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles/{article_id}',
headers={
'Authorization': f'Bearer {access_token}',
'Accept': 'application/json',
'Accept-language': 'en-US'
})
if not r.ok:
print(f'Error while retrieving article with id {article_id}: {r.text}')
return None
return r.json()
def serialize(doc, id):
Path("./json_to_upload").mkdir(parents=True, exist_ok=True)
with open('./json_to_upload/' + id + '.json', 'w') as f:
f.write(json.dumps(doc, indent=1))
def get_documents(access_token, instance_url):
page = 1
while True:
articles_list = get_kb_articles_list(access_token, instance_url, page)
for article in articles_list.get('articles'):
if article.get('lastPublishedDate') > GET_SINCE:
doc = get_kb_article_detail(access_token, instance_url, article.get('id'))
if doc:
serialize(doc, article.get('id'))
if articles_list.get('nextPageUrl'):
page += 1
else:
# no more items
break
if __name__ == "__main__":
if len(sys.argv) == 2:
GET_SINCE = sys.argv[1]
print(f'Will crawl from {GET_SINCE}')
else:
print(f'No date argument given, will crawl from {GET_SINCE}')
access_token, instance_url = get_access_token_and_instance_url()
if access_token and instance_url:
get_documents(access_token, instance_url)
shutil.make_archive('to_upload_in_FT', 'zip', './json_to_upload/')
# If you don't want to delete the archive, comment out or remove this line
# os.remove('./to_upload_in_FT.zip')
shutil.rmtree('./json_to_upload/')