Content export script - Fluid Topics - Latest

Salesforce Connector Reference Guide

Category
Reference Guides
Audience
public
Version
Latest

The following is a Python script to export Salesforce KB articles, and automatically publish them to a Fluid Topics portal.

This script relies on the following Salesforce web services to export Salesforce KB articles:

These web services require the use of an access token.

import os
import sys
import json
import shutil
import requests
from pathlib import Path

SF_URL = "https://salesforce_url.com"
CLIENT_ID = ""
CLIENT_SECRET = ""
USER_NAME = ""
PASSWORD = ""       # concatenate the password and security token
VERSION = ""        # example v55.0
FT_URL = ""         # https://docs.mydomain.com
FT_API = ""
SOURCE_ID = "salesforce"  # example salesforce
GET_SINCE = "1901-01-01"

def get_access_token_and_instance_url():
    # security token must be provided if IP address is outside your company's trusted IP range
    r = requests.post(f"https://{SF_URL}/services/oauth2/token",
                      data={'grant_type': 'password',
                            'client_id': CLIENT_ID,
                            'client_secret': CLIENT_SECRET,
                            'username': USER_NAME,
                            'password': PASSWORD})

    if r.ok:
        return r.json().get('access_token'), r.json().get('instance_url')
    return None, None

def get_kb_articles_list(access_token, instance_url, page=1):
    # https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/resources_knowledge_support_artlist.htm

    params = {'pageNumber': page}
    # Params can include filters for the request : q, channel, categories, queryMethod, sort, order
    # params = {'pageNumber': page, q: '', channel: '', categories: '', queryMethod:'', sort: '', order: ''}

    # NB : /support/knowledgeArticles does not get archived documents, but /query does
    r = requests.get(f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles',
                     params=params,
                     headers={
                         'Authorization': f'Bearer {access_token}',
                         'Accept': 'application/json',
                         'Accept-language': 'en-US'
                     })
    if not r.ok:
        print(f'Error while retrieving articles list: {r.text}')
        return []
    return r.json()

def get_kb_article_detail(access_token, instance_url, article_id):
    # https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/resources_knowledge_support_artdetails.htm

    r = requests.get(
        f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles/{article_id}',
        headers={
            'Authorization': f'Bearer {access_token}',
            'Accept': 'application/json',
            'Accept-language': 'en-US'
        })

    if not r.ok:
        print(f'Error while retrieving article with id {article_id}: {r.text}')
        return None
    return r.json()

def upload_to_ft(zip):
    with open(zip, "rb") as file:
        files = {"file": file}
        url = f"https://{FT_URL}/api/admin/khub/sources/{SOURCE_ID}/upload"
        try:
            r = requests.post(
                url, files=files, auth=BearerAuth(FT_API)
            )
            if not r.ok:
                print(f'Failed to upload unstructured documents\nPOST {url}\n{r.content}')
                return False
        except ConnectionError:
            print(f'Connection reset while sending archive to {FT_URL}, please check that file uploaded correctly')
    return True

def serialize(doc, id):
    Path("./json_to_upload").mkdir(parents=True, exist_ok=True)
    with open('./json_to_upload/' + id + '.json', 'w') as f:
        f.write(json.dumps(doc, indent=1))

def get_documents(access_token, instance_url):
    page = 1
    while True:

        articles_list = get_kb_articles_list(access_token, instance_url, page)

        for article in articles_list.get('articles'):

            if article.get('lastPublishedDate') > GET_SINCE:
                doc = get_kb_article_detail(access_token, instance_url, article.get('id'))
                if doc:
                    serialize(doc, article.get('id'))

        if articles_list.get('nextPageUrl'):
            page += 1
        else:
            # no more items
            break

class BearerAuth(requests.auth.AuthBase):
    def __init__(self, token):
        self.token = token

    def __call__(self, r):
        r.headers["authorization"] = "Bearer " + self.token
        return r


if __name__ == "__main__":
    if len(sys.argv) == 2:
        GET_SINCE = sys.argv[1]
        print(f'Will crawl from {GET_SINCE}')
    else:
        print(f'No date argument given, will crawl from {GET_SINCE}')
    access_token, instance_url = get_access_token_and_instance_url()
    get_documents(access_token, instance_url)
    shutil.make_archive('to_upload_in_FT', 'zip', './json_to_upload/')
    upload_to_ft('to_upload_in_FT.zip')

    os.remove('./to_upload_in_FT.zip') # Remove this line to keep archives

    shutil.rmtree('./json_to_upload/')

The following is an alternative script which exports Salesforce KB articles, without automatically publishing them to a Fluid Topics portal. It creates a ZIP file called to_upload_in_FT.zip. To publish this archive, see Publish Salesforce content.

import os
import sys
import json
import shutil
import requests
from pathlib import Path

SF_URL = "https://salesforce_url.com"
CLIENT_ID = ""
CLIENT_SECRET = ""
USER_NAME = ""
PASSWORD = ""       # concatenate the password and security token
VERSION = ""        # example v55.0
GET_SINCE = "1901-01-01"

def get_access_token_and_instance_url():
    # security token must be provided if IP address is outside your company's trusted IP range
    r = requests.post(f"https://{SF_URL}/services/oauth2/token",
                      data={'grant_type': 'password',
                            'client_id': CLIENT_ID,
                            'client_secret': CLIENT_SECRET,
                            'username': USER_NAME,
                            'password': PASSWORD})

    if r.ok:
        return r.json().get('access_token'), r.json().get('instance_url')
    return None, None

def get_kb_articles_list(access_token, instance_url, page=1):
    params = {'pageNumber': page}
    r = requests.get(f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles',
                     params=params,
                     headers={
                         'Authorization': f'Bearer {access_token}',
                         'Accept': 'application/json',
                         'Accept-language': 'en-US'
                     })
    if not r.ok:
        print(f'Error while retrieving articles list: {r.text}')
        return []
    return r.json()

def get_kb_article_detail(access_token, instance_url, article_id):
    r = requests.get(
        f'{instance_url}/services/data/{VERSION}/support/knowledgeArticles/{article_id}',
        headers={
            'Authorization': f'Bearer {access_token}',
            'Accept': 'application/json',
            'Accept-language': 'en-US'
        })

    if not r.ok:
        print(f'Error while retrieving article with id {article_id}: {r.text}')
        return None
    return r.json()

def serialize(doc, id):
    Path("./json_to_upload").mkdir(parents=True, exist_ok=True)
    with open('./json_to_upload/' + id + '.json', 'w') as f:
        f.write(json.dumps(doc, indent=1))

def get_documents(access_token, instance_url):
    page = 1
    while True:
        articles_list = get_kb_articles_list(access_token, instance_url, page)

        for article in articles_list.get('articles'):
            if article.get('lastPublishedDate') > GET_SINCE:
                doc = get_kb_article_detail(access_token, instance_url, article.get('id'))
                if doc:
                    serialize(doc, article.get('id'))

        if articles_list.get('nextPageUrl'):
            page += 1
        else:
            # no more items
            break

if __name__ == "__main__":
    if len(sys.argv) == 2:
        GET_SINCE = sys.argv[1]
        print(f'Will crawl from {GET_SINCE}')
    else:
        print(f'No date argument given, will crawl from {GET_SINCE}')
    access_token, instance_url = get_access_token_and_instance_url()
    if access_token and instance_url:
        get_documents(access_token, instance_url)
        shutil.make_archive('to_upload_in_FT', 'zip', './json_to_upload/')
        # If you don't want to delete the archive, comment out or remove this line
        # os.remove('./to_upload_in_FT.zip')

        shutil.rmtree('./json_to_upload/')