caoscrawler.structure_elements.structure_elements module

class caoscrawler.structure_elements.structure_elements.StructureElement(name: str)

Bases: object

Base class for elements in the hierarchical data structure.

Parameters:

name (str) – The name of the StructureElement. May be used for pattern matching by CFood rules.

get_name()
class caoscrawler.structure_elements.structure_elements.FileSystemStructureElement(name: str, path: str)

Bases: StructureElement

StructureElement representing an element of a file system, like a directory or a simple file.

Parameters:
  • name (str) – The name of the StructureElement. May be used for pattern matching by CFood rules.

  • path (str) – The path to the file or directory.

class caoscrawler.structure_elements.structure_elements.NoneElement(name: str)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.Directory(name: str, path: str)

Bases: FileSystemStructureElement

class caoscrawler.structure_elements.structure_elements.File(name: str, path: str)

Bases: FileSystemStructureElement

StrutureElement representing a file.

class caoscrawler.structure_elements.structure_elements.JSONFile(name: str, path: str)

Bases: File

class caoscrawler.structure_elements.structure_elements.DictElement(name: str, value: dict)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.TextElement(name: str, value: str)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.DictTextElement(*args, **kwargs)

Bases: TextElement

class caoscrawler.structure_elements.structure_elements.IntegerElement(name: str, value: int)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.DictIntegerElement(*args, **kwargs)

Bases: IntegerElement

class caoscrawler.structure_elements.structure_elements.BooleanElement(name: str, value: bool)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.DictBooleanElement(*args, **kwargs)

Bases: BooleanElement

class caoscrawler.structure_elements.structure_elements.ListElement(name: str, value: list)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.DictListElement(*args, **kwargs)

Bases: ListElement

class caoscrawler.structure_elements.structure_elements.FloatElement(name: str, value: float)

Bases: StructureElement

class caoscrawler.structure_elements.structure_elements.DictFloatElement(*args, **kwargs)

Bases: FloatElement

class caoscrawler.structure_elements.structure_elements.Dict(*args, **kwargs)

Bases: DictElement

class caoscrawler.structure_elements.structure_elements.DictDictElement(*args, **kwargs)

Bases: DictElement

class caoscrawler.structure_elements.structure_elements.XMLTagElement(element: Element)

Bases: StructureElement

Stores elements of an XML tree.

class caoscrawler.structure_elements.structure_elements.XMLTextNode(element: Element)

Bases: StructureElement

Stores text nodes of XML trees.

class caoscrawler.structure_elements.structure_elements.XMLAttributeNode(element: Element, key: str)

Bases: StructureElement

Stores text nodes of XML trees.