From fd1bde76beeaf07ea30709f4a5ecd836996f9756 Mon Sep 17 00:00:00 2001 From: hay-kot Date: Fri, 30 Apr 2021 11:54:54 -0800 Subject: [PATCH] remove old script --- dev/scripts/scrape_recipe.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100755 dev/scripts/scrape_recipe.py diff --git a/dev/scripts/scrape_recipe.py b/dev/scripts/scrape_recipe.py deleted file mode 100755 index ce5f119ef..000000000 --- a/dev/scripts/scrape_recipe.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Helper script to download raw recipe data from a URL and dump it to disk. -The resulting files can be used as test input data. -""" - -import sys, json, pprint -import requests -import extruct -from scrape_schema_recipe import scrape_url -from w3lib.html import get_base_url - -for url in sys.argv[1:]: - try: - data = scrape_url(url)[0] - slug = list(filter(None, url.split("/")))[-1] - filename = f"{slug}.json" - with open(filename, "w") as f: - json.dump(data, f, indent=4, default=str) - print(f"Saved {filename}") - except Exception as e: - print(f"Error for {url}: {e}") - print("Trying extruct instead") - pp = pprint.PrettyPrinter(indent=2) - r = requests.get(url) - base_url = get_base_url(r.text, r.url) - data = extruct.extract(r.text, base_url=base_url) - pp.pprint(data)