From a0599267ead9f1cb0adf7a00538011eea49800d3 Mon Sep 17 00:00:00 2001 From: Jakub Debski Date: Tue, 18 Jan 2022 13:03:09 +0100 Subject: [PATCH] Nbdoc change source (#9739) * doc fixes * doc fix * doc fix * Add suggestion how to change source of download * Fix typo Co-authored-by: Nikolay Tyukaev --- docs/nbdoc/README.md | 10 +-- docs/nbdoc/consts.py | 7 +- docs/nbdoc/nbdoc.py | 160 +++++++++++++----------------------- docs/nbdoc/requirements.txt | 3 +- docs/nbdoc/utils.py | 54 ------------ 5 files changed, 70 insertions(+), 164 deletions(-) diff --git a/docs/nbdoc/README.md b/docs/nbdoc/README.md index ea655fa32b5..90f0efb5636 100644 --- a/docs/nbdoc/README.md +++ b/docs/nbdoc/README.md @@ -29,19 +29,15 @@ venv/Scripts/activate python -m pip install -r requirements.txt ``` -## Step 2. Download personal token from github account - -[Tutorial how to do it.](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token). By default configuration you should place it on path ../.secret. - -## Step 3. Configure consts to meet project directions +## Step 2. Configure consts to meet project directions [Consts file](consts.py) contains multiple variables that might differ for different environments. -## Step 4. Add classes with methods to makefile or other executed file +## Step 3. Add classes with methods to makefile or other executed file [Main file](main.py) contains example usecases of auto generator for notebooks. Informations placed in [main](main.py) should be further used to add it to makefile and possibly fully automate notebook documentation process. -## Step 5. Run python file (optional) +## Step 4. Run python file (optional) If step 4 was skipped use command diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 0fd5053cb6b..0311d32c50a 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -8,7 +8,12 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" -section_names = ["Getting Started", "Convert & Optimize", "Model Demos", "Model Training", "Live Demos"] +artifacts_link = "https://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/latest/dist/rst_files/" + +blacklisted_extensions = ['.xml', '.bin'] + +section_names = ["Getting Started", "Convert & Optimize", + "Model Demos", "Model Training", "Live Demos"] # Templates diff --git a/docs/nbdoc/nbdoc.py b/docs/nbdoc/nbdoc.py index e936093e229..828e3a02c13 100644 --- a/docs/nbdoc/nbdoc.py +++ b/docs/nbdoc/nbdoc.py @@ -3,123 +3,86 @@ from pathlib import Path from utils import ( create_content, add_content_below, - load_secret, process_notebook_name, - find_latest_artifact, verify_notebook_name, - generate_artifact_link, - remove_existing, split_notebooks_into_sections, ) from consts import ( + artifacts_link, binder_template, - no_binder_template, - rst_template, - notebooks_path, - repo_owner, - repo_name, - repo_directory, + blacklisted_extensions, notebooks_docs, - section_names + notebooks_path, + no_binder_template, + repo_directory, + repo_name, + repo_owner, + rst_template, + section_names, ) from notebook import Notebook from section import Section -from io import BytesIO from glob import glob +from lxml import html from jinja2 import Template +from urllib.request import urlretrieve from requests import get -from zipfile import ZipFile import os -class NbDownloader: - """Class responsible for downloading and extracting notebooks""" - - def __init__(self, secret_path: str) -> None: - self.secret = load_secret(secret_path) - self.headers = { - "Accept": "application/vnd.github.v3+json", - "Authorization": f"token {self.secret}", - } - self.artifact_link = generate_artifact_link(repo_owner, repo_name) - - def default_pipeline(self, path: str = notebooks_path) -> bool: - """Default pipeline for fetching, downloading and extracting rst files - - :param path: Path to folder that will contain notebooks. Defaults to notebooks_path. - :type path: str - :returns: Returns if status is sucessful - :rtype: bool +class NbTravisDownloader: + @staticmethod + def download_from_jenkins(path: str = notebooks_path, artifact_link: str = artifacts_link): + """Function for downloading files from jenkins artifacts + :param path: path where notebooks files will be placed, defaults to notebooks_path + :type path: str, optional + :param artifact_link: link of notebooks artifacts rst files, defaults to artifacts_link + :type artifact_link: str, optional """ - artifacts = self.fetch_artifacts() - latest_artifact = find_latest_artifact(artifacts) - download_link = self.generate_artifact_download_link(latest_artifact) - zipfile = self.download_rst_files(download_link) - if zipfile.testzip() is None: - remove_existing(path) - return self.extract_artifacts(zipfile, path=path) + def is_directory(path: str) -> bool: + """Helper fuction for checking whether path leads to subdirectory - def fetch_artifacts(self) -> dict: - """Fetching artifcats from github actions + :param path: Path to traversed file or directory + :type path: str + :return: Returns True if path leads to directory, otherwise False + :rtype: bool + """ + return path[-1] == '/' and path != '../' - :returns: Artifacts in repo - :rtype: dict + def traverse(path: Path, link: str, blacklisted_extensions: list = blacklisted_extensions): + """Traverse recursively to download all directories with their subfolders, within given link. - """ - return get(self.artifact_link, headers=self.headers).json() + :param path: Path to directory that file will be saved to. + :type path: Path + :param link: Link to hosted resources + :type link: str + """ + path.mkdir(exist_ok=True) + page = get(link, verify=False).content + tree = html.fromstring(page) + # retrieve all links on page returning their content + tree = tree.xpath('//a[@*]/@href') + files = map(str, tree) + for file in files: + if is_directory(file): + traverse(path.joinpath(file), link + file) + elif len(Path(file).suffix) > 0 and Path(file).suffix not in blacklisted_extensions: + urlretrieve(link + file, path.joinpath(file)) - def generate_artifact_download_link(self, artifact_id: int) -> str: - """Generate link based on link and latest artifact id containing rst files - - :param artifact_id: Latest artifact id containing rst files - :type artifact_id: int - :returns: Link to download rst files - :rtype: str - - """ - return f"{self.artifact_link}/{artifact_id}/zip" - - def download_rst_files(self, artifact_download_link: str) -> ZipFile: - """Downloading rst files - - :param artifact_download_link: Generated link for downloading rst - :type artifact_download_link: str - :returns: Zipped archive of rst files - :rtype: ZipFile - - """ - artifact = get(artifact_download_link, headers=self.headers) - return ZipFile(BytesIO(artifact.content)) - - def extract_artifacts(self, zipfile: ZipFile, path: str) -> bool: - """Extracting all artifacts from zipped archive - - :param zipfile: zipped rst files - :type zipfile: ZipFile - :param path: path to extract files to - :type path: str - :returns: Returns if status is sucessful - :rtype: bool - - """ - try: - zipfile.extractall(path=path) - return True - except ValueError: - return False + traverse(Path(path), artifact_link) class NbProcessor: def __init__(self, nb_path: str = notebooks_path): self.nb_path = nb_path notebooks = [ - Notebook( - name=process_notebook_name(notebook), - path=notebook, - ) - for notebook in os.listdir(self.nb_path) - if verify_notebook_name(notebook) + Notebook( + name=process_notebook_name(notebook), + path=notebook, + ) + for notebook in os.listdir(self.nb_path) + if verify_notebook_name(notebook) ] notebooks = split_notebooks_into_sections(notebooks) self.rst_data = { @@ -127,7 +90,7 @@ class NbProcessor: Section(name=section_name, notebooks=section_notebooks) for section_name, section_notebooks in zip(section_names, notebooks) ] - + } self.binder_data = { "owner": repo_owner, @@ -167,11 +130,13 @@ class NbProcessor: nb for nb in os.listdir(self.nb_path) if verify_notebook_name(nb) ]: if '-'.join(notebook.split('-')[:-2]) in buttons_list: - button_text = create_content(template_with_binder, self.binder_data, notebook) + button_text = create_content( + template_with_binder, self.binder_data, notebook) if not add_content_below(button_text, f"{self.nb_path}/{notebook}"): raise FileNotFoundError("Unable to modify file") else: - button_text = create_content(template_without_binder, self.binder_data, notebook) + button_text = create_content( + template_without_binder, self.binder_data, notebook) if not add_content_below(button_text, f"{self.nb_path}/{notebook}"): raise FileNotFoundError("Unable to modify file") @@ -190,19 +155,12 @@ class NbProcessor: def main(): parser = argparse.ArgumentParser() - parser.add_argument('secret', type=Path) parser.add_argument('outdir', type=Path) args = parser.parse_args() - secret = args.secret outdir = args.outdir outdir.mkdir(parents=True, exist_ok=True) - # Step 1. Create secret file - # link: https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token - # For this notebooks purpose only repo -> public_repo box is required - nbd = NbDownloader(secret) # Step 2. Run default pipeline for downloading - if not nbd.default_pipeline(outdir): - raise FileExistsError("Files not downloaded") + NbTravisDownloader.download_from_jenkins(outdir) # Step 3. Run processing on downloaded file nbp = NbProcessor(outdir) buttons_list = nbp.fetch_binder_list('txt') @@ -211,4 +169,4 @@ def main(): if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/docs/nbdoc/requirements.txt b/docs/nbdoc/requirements.txt index 20f9b475d25..9aab7c04704 100644 --- a/docs/nbdoc/requirements.txt +++ b/docs/nbdoc/requirements.txt @@ -1,2 +1,3 @@ jinja2 -requests \ No newline at end of file +requests +lxml \ No newline at end of file diff --git a/docs/nbdoc/utils.py b/docs/nbdoc/utils.py index c00524b63f4..8ddd34c4ec4 100644 --- a/docs/nbdoc/utils.py +++ b/docs/nbdoc/utils.py @@ -45,19 +45,6 @@ def add_content_below(text: str, path: str, line=3) -> bool: return False -def load_secret(path: str = "../.secret") -> str: - """Loading secret file - - :param path: Path to secret file. Defaults to "../.secret". - :type path: str - :returns: Secret key - :rtype: str - - """ - with open(path, "r+") as file: - return file.readline().strip() - - def process_notebook_name(notebook_name: str) -> str: """Processes notebook name @@ -77,20 +64,6 @@ def process_notebook_name(notebook_name: str) -> str: ) -def find_latest_artifact(artifacts_dict: dict, name: str = "rst_files") -> int: - """Finds id of latest artifact that can be downloaded - - :param artifacts_dict: Fetched github actions - :type artifacts_dict: dict - :param name: Name of searched artifact. Defaults to "rst_files". - :type name: str - :returns: Id of latest artifact containing rst files - :rtype: int - - """ - return max([r["id"] for r in artifacts_dict["artifacts"] if r["name"] == name]) - - def verify_notebook_name(notebook_name: str) -> bool: """Verification based on notebook name @@ -105,33 +78,6 @@ def verify_notebook_name(notebook_name: str) -> bool: return notebook_name[:3].isdigit() and notebook_name[-4:] == ".rst" -def generate_artifact_link(owner: str, name: str) -> str: - """Generate link for downloading artifacts - - :param owner: Github repo owner name - :type owner: str - :param name: Github repo name - :type name: str - :returns: Link to api to download artifacts - :rtype: str - - """ - return f"https://api.github.com/repos/{owner}/{name}/actions/artifacts" - - -def remove_existing(notebooks_path: str) -> None: - """Removes file if already existed - - :param notebooks_path: path to file to be removed - :type notebooks_path: str - - """ - if path.exists(notebooks_path): - if path.isdir(notebooks_path): - rmtree(notebooks_path) - else: - remove(notebooks_path) - def split_notebooks_into_sections(notebooks: list) -> list: series = [list() for _ in range(5)] for notebook in notebooks: