From a0599267ead9f1cb0adf7a00538011eea49800d3 Mon Sep 17 00:00:00 2001
From: Jakub Debski <jakub.debski@intel.com>
Date: Tue, 18 Jan 2022 13:03:09 +0100
Subject: [PATCH] Nbdoc change source (#9739)

* doc fixes

* doc fix

* doc fix

* Add suggestion how to change source of download

* Fix typo

Co-authored-by: Nikolay Tyukaev <nikolay.tyukaev@intel.com>
---
 docs/nbdoc/README.md        |  10 +--
 docs/nbdoc/consts.py        |   7 +-
 docs/nbdoc/nbdoc.py         | 160 +++++++++++++-----------------------
 docs/nbdoc/requirements.txt |   3 +-
 docs/nbdoc/utils.py         |  54 ------------
 5 files changed, 70 insertions(+), 164 deletions(-)

diff --git a/docs/nbdoc/README.md b/docs/nbdoc/README.md
index ea655fa32b5..90f0efb5636 100644
--- a/docs/nbdoc/README.md
+++ b/docs/nbdoc/README.md
@@ -29,19 +29,15 @@ venv/Scripts/activate
 python -m pip install -r requirements.txt
 ```
 
-## Step 2. Download personal token from github account
-
-[Tutorial how to do it.](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token). By default configuration you should place it on path ../.secret.
-
-## Step 3. Configure consts to meet project directions
+## Step 2. Configure consts to meet project directions
 
 [Consts file](consts.py) contains multiple variables that might differ for different environments.
 
-## Step 4. Add classes with methods to makefile or other executed file
+## Step 3. Add classes with methods to makefile or other executed file
 
 [Main file](main.py) contains example usecases of auto generator for notebooks. Informations placed in [main](main.py) should be further used to add it to makefile and possibly fully automate notebook documentation process.
 
-## Step 5. Run python file (optional)
+## Step 4. Run python file (optional)
 
 If step 4 was skipped use command
 
diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py
index 0fd5053cb6b..0311d32c50a 100644
--- a/docs/nbdoc/consts.py
+++ b/docs/nbdoc/consts.py
@@ -8,7 +8,12 @@ repo_owner = "openvinotoolkit"
 
 repo_name = "openvino_notebooks"
 
-section_names = ["Getting Started", "Convert & Optimize", "Model Demos", "Model Training", "Live Demos"]
+artifacts_link = "https://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/latest/dist/rst_files/"
+
+blacklisted_extensions = ['.xml', '.bin']
+
+section_names = ["Getting Started", "Convert & Optimize",
+                 "Model Demos", "Model Training", "Live Demos"]
 
 # Templates
 
diff --git a/docs/nbdoc/nbdoc.py b/docs/nbdoc/nbdoc.py
index e936093e229..828e3a02c13 100644
--- a/docs/nbdoc/nbdoc.py
+++ b/docs/nbdoc/nbdoc.py
@@ -3,123 +3,86 @@ from pathlib import Path
 from utils import (
     create_content,
     add_content_below,
-    load_secret,
     process_notebook_name,
-    find_latest_artifact,
     verify_notebook_name,
-    generate_artifact_link,
-    remove_existing,
     split_notebooks_into_sections,
 )
 from consts import (
+    artifacts_link,
     binder_template,
-    no_binder_template,
-    rst_template,
-    notebooks_path,
-    repo_owner,
-    repo_name,
-    repo_directory,
+    blacklisted_extensions,
     notebooks_docs,
-    section_names
+    notebooks_path,
+    no_binder_template,
+    repo_directory,
+    repo_name,
+    repo_owner,
+    rst_template,
+    section_names,
 )
 from notebook import Notebook
 from section import Section
-from io import BytesIO
 from glob import glob
+from lxml import html
 from jinja2 import Template
+from urllib.request import urlretrieve
 from requests import get
-from zipfile import ZipFile
 import os
 
 
-class NbDownloader:
-    """Class responsible for downloading and extracting notebooks"""
-
-    def __init__(self, secret_path: str) -> None:
-        self.secret = load_secret(secret_path)
-        self.headers = {
-            "Accept": "application/vnd.github.v3+json",
-            "Authorization": f"token {self.secret}",
-        }
-        self.artifact_link = generate_artifact_link(repo_owner, repo_name)
-
-    def default_pipeline(self, path: str = notebooks_path) -> bool:
-        """Default pipeline for fetching, downloading and extracting rst files
-
-        :param path: Path to folder that will contain notebooks. Defaults to notebooks_path.
-        :type path: str
-        :returns: Returns if status is sucessful
-        :rtype: bool
+class NbTravisDownloader:
+    @staticmethod
+    def download_from_jenkins(path: str = notebooks_path, artifact_link: str = artifacts_link):
+        """Function for downloading files from jenkins artifacts
 
+        :param path: path where notebooks files will be placed, defaults to notebooks_path
+        :type path: str, optional
+        :param artifact_link: link of notebooks artifacts rst files, defaults to artifacts_link
+        :type artifact_link: str, optional
         """
-        artifacts = self.fetch_artifacts()
-        latest_artifact = find_latest_artifact(artifacts)
-        download_link = self.generate_artifact_download_link(latest_artifact)
-        zipfile = self.download_rst_files(download_link)
-        if zipfile.testzip() is None:
-            remove_existing(path)
-        return self.extract_artifacts(zipfile, path=path)
+        def is_directory(path: str) -> bool:
+            """Helper fuction for checking whether path leads to subdirectory
 
-    def fetch_artifacts(self) -> dict:
-        """Fetching artifcats from github actions
+            :param path: Path to traversed file or directory
+            :type path: str
+            :return: Returns True if path leads to directory, otherwise False
+            :rtype: bool
+            """
+            return path[-1] == '/' and path != '../'
 
-        :returns: Artifacts in repo
-        :rtype: dict
+        def traverse(path: Path, link: str, blacklisted_extensions: list = blacklisted_extensions):
+            """Traverse recursively to download all directories with their subfolders, within given link.
 
-        """
-        return get(self.artifact_link, headers=self.headers).json()
+            :param path: Path to directory that file will be saved to.
+            :type path: Path
+            :param link: Link to hosted resources
+            :type link: str
+            """
+            path.mkdir(exist_ok=True)
+            page = get(link, verify=False).content
+            tree = html.fromstring(page)
+            # retrieve all links on page returning their content
+            tree = tree.xpath('//a[@*]/@href')
+            files = map(str, tree)
+            for file in files:
+                if is_directory(file):
+                    traverse(path.joinpath(file), link + file)
+                elif len(Path(file).suffix) > 0 and Path(file).suffix not in blacklisted_extensions:
+                    urlretrieve(link + file, path.joinpath(file))
 
-    def generate_artifact_download_link(self, artifact_id: int) -> str:
-        """Generate link based on link and latest artifact id containing rst files
-
-        :param artifact_id: Latest artifact id containing rst files
-        :type artifact_id: int
-        :returns: Link to download rst files
-        :rtype: str
-
-        """
-        return f"{self.artifact_link}/{artifact_id}/zip"
-
-    def download_rst_files(self, artifact_download_link: str) -> ZipFile:
-        """Downloading rst files
-
-        :param artifact_download_link: Generated link for downloading rst
-        :type artifact_download_link: str
-        :returns: Zipped archive of rst files
-        :rtype: ZipFile
-
-        """
-        artifact = get(artifact_download_link, headers=self.headers)
-        return ZipFile(BytesIO(artifact.content))
-
-    def extract_artifacts(self, zipfile: ZipFile, path: str) -> bool:
-        """Extracting all artifacts from zipped archive
-
-        :param zipfile: zipped rst files
-        :type zipfile: ZipFile
-        :param path: path to extract files to
-        :type path: str
-        :returns: Returns if status is sucessful
-        :rtype: bool
-
-        """
-        try:
-            zipfile.extractall(path=path)
-            return True
-        except ValueError:
-            return False
+        traverse(Path(path), artifact_link)
 
 
 class NbProcessor:
     def __init__(self, nb_path: str = notebooks_path):
         self.nb_path = nb_path
         notebooks = [
-                Notebook(
-                    name=process_notebook_name(notebook),
-                    path=notebook,
-                )
-                for notebook in os.listdir(self.nb_path)
-                if verify_notebook_name(notebook)
+            Notebook(
+                name=process_notebook_name(notebook),
+                path=notebook,
+            )
+            for notebook in os.listdir(self.nb_path)
+            if verify_notebook_name(notebook)
         ]
         notebooks = split_notebooks_into_sections(notebooks)
         self.rst_data = {
@@ -127,7 +90,7 @@ class NbProcessor:
                 Section(name=section_name, notebooks=section_notebooks)
                 for section_name, section_notebooks in zip(section_names, notebooks)
             ]
-            
+
         }
         self.binder_data = {
             "owner": repo_owner,
@@ -167,11 +130,13 @@ class NbProcessor:
             nb for nb in os.listdir(self.nb_path) if verify_notebook_name(nb)
         ]:
             if '-'.join(notebook.split('-')[:-2]) in buttons_list:
-                button_text = create_content(template_with_binder, self.binder_data, notebook)
+                button_text = create_content(
+                    template_with_binder, self.binder_data, notebook)
                 if not add_content_below(button_text, f"{self.nb_path}/{notebook}"):
                     raise FileNotFoundError("Unable to modify file")
             else:
-                button_text = create_content(template_without_binder, self.binder_data, notebook)
+                button_text = create_content(
+                    template_without_binder, self.binder_data, notebook)
                 if not add_content_below(button_text, f"{self.nb_path}/{notebook}"):
                     raise FileNotFoundError("Unable to modify file")
 
@@ -190,19 +155,12 @@ class NbProcessor:
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('secret', type=Path)
     parser.add_argument('outdir', type=Path)
     args = parser.parse_args()
-    secret = args.secret
     outdir = args.outdir
     outdir.mkdir(parents=True, exist_ok=True)
-    # Step 1. Create secret file
-    # link: https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token
-    # For this notebooks purpose only repo -> public_repo box is required
-    nbd = NbDownloader(secret)
     # Step 2. Run default pipeline for downloading
-    if not nbd.default_pipeline(outdir):
-        raise FileExistsError("Files not downloaded")
+    NbTravisDownloader.download_from_jenkins(outdir)
     # Step 3. Run processing on downloaded file
     nbp = NbProcessor(outdir)
     buttons_list = nbp.fetch_binder_list('txt')
@@ -211,4 +169,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file
diff --git a/docs/nbdoc/requirements.txt b/docs/nbdoc/requirements.txt
index 20f9b475d25..9aab7c04704 100644
--- a/docs/nbdoc/requirements.txt
+++ b/docs/nbdoc/requirements.txt
@@ -1,2 +1,3 @@
 jinja2
-requests
\ No newline at end of file
+requests
+lxml
\ No newline at end of file
diff --git a/docs/nbdoc/utils.py b/docs/nbdoc/utils.py
index c00524b63f4..8ddd34c4ec4 100644
--- a/docs/nbdoc/utils.py
+++ b/docs/nbdoc/utils.py
@@ -45,19 +45,6 @@ def add_content_below(text: str, path: str, line=3) -> bool:
         return False
 
 
-def load_secret(path: str = "../.secret") -> str:
-    """Loading secret file
-
-    :param path: Path to secret file. Defaults to "../.secret".
-    :type path: str
-    :returns: Secret key
-    :rtype: str
-
-    """
-    with open(path, "r+") as file:
-        return file.readline().strip()
-
-
 def process_notebook_name(notebook_name: str) -> str:
     """Processes notebook name
 
@@ -77,20 +64,6 @@ def process_notebook_name(notebook_name: str) -> str:
     )
 
 
-def find_latest_artifact(artifacts_dict: dict, name: str = "rst_files") -> int:
-    """Finds id of latest artifact that can be downloaded
-
-    :param artifacts_dict: Fetched github actions
-    :type artifacts_dict: dict
-    :param name: Name of searched artifact. Defaults to "rst_files".
-    :type name: str
-    :returns: Id of latest artifact containing rst files
-    :rtype: int
-
-    """
-    return max([r["id"] for r in artifacts_dict["artifacts"] if r["name"] == name])
-
-
 def verify_notebook_name(notebook_name: str) -> bool:
     """Verification based on notebook name
 
@@ -105,33 +78,6 @@ def verify_notebook_name(notebook_name: str) -> bool:
     return notebook_name[:3].isdigit() and notebook_name[-4:] == ".rst"
 
 
-def generate_artifact_link(owner: str, name: str) -> str:
-    """Generate link for downloading artifacts
-
-    :param owner: Github repo owner name
-    :type owner: str
-    :param name: Github repo name
-    :type name: str
-    :returns: Link to api to download artifacts
-    :rtype: str
-
-    """
-    return f"https://api.github.com/repos/{owner}/{name}/actions/artifacts"
-
-
-def remove_existing(notebooks_path: str) -> None:
-    """Removes file if already existed
-
-    :param notebooks_path: path to file to be removed
-    :type notebooks_path: str
-
-    """
-    if path.exists(notebooks_path):
-        if path.isdir(notebooks_path):
-            rmtree(notebooks_path)
-        else:
-            remove(notebooks_path)
-
 def split_notebooks_into_sections(notebooks: list) -> list:
     series = [list() for _ in range(5)]
     for notebook in notebooks: