id
int32
repo
string
path
string
func_name
string
original_string
string
language
string
code
string
code_tokens
sequence
docstring
string
docstring_tokens
sequence
sha
string
url
string
docstring_summary
string
parameters
string
return_statement
string
argument_list
string
identifier
string
nwo
string
score
float32
0
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.settext"
"def settext(self, text, cls='current'): """Set the text for this element. Arguments: text (str): The text cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element. """ self.replace(TextContent, value=text, cls=cls)"
"python"
"def settext(self, text, cls='current'): """Set the text for this element. Arguments: text (str): The text cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element. """ self.replace(TextContent, value=text, cls=cls)"
[ "def", "settext", "(", "self", ",", "text", ",", "cls", "=", "'current'", ")", ":", "self", ".", "replace", "(", "TextContent", ",", "value", "=", "text", ",", "cls", "=", "cls", ")" ]
"Set the text for this element. Arguments: text (str): The text cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element."
[ "Set", "the", "text", "for", "this", "element", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1357-L1364"
""
""
""
""
""
""
-1
1
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.setdocument"
"def setdocument(self, doc): """Associate a document with this element. Arguments: doc (:class:`Document`): A document Each element must be associated with a FoLiA document. """ assert isinstance(doc, Document) if not self.doc: self.doc = doc if self.id: if self.id in doc: raise DuplicateIDError(self.id) else: self.doc.index[id] = self for e in self: #recursive for all children if isinstance(e,AbstractElement): e.setdocument(doc)"
"python"
"def setdocument(self, doc): """Associate a document with this element. Arguments: doc (:class:`Document`): A document Each element must be associated with a FoLiA document. """ assert isinstance(doc, Document) if not self.doc: self.doc = doc if self.id: if self.id in doc: raise DuplicateIDError(self.id) else: self.doc.index[id] = self for e in self: #recursive for all children if isinstance(e,AbstractElement): e.setdocument(doc)"
[ "def", "setdocument", "(", "self", ",", "doc", ")", ":", "assert", "isinstance", "(", "doc", ",", "Document", ")", "if", "not", "self", ".", "doc", ":", "self", ".", "doc", "=", "doc", "if", "self", ".", "id", ":", "if", "self", ".", "id", "in", "doc", ":", "raise", "DuplicateIDError", "(", "self", ".", "id", ")", "else", ":", "self", ".", "doc", ".", "index", "[", "id", "]", "=", "self", "for", "e", "in", "self", ":", "#recursive for all children", "if", "isinstance", "(", "e", ",", "AbstractElement", ")", ":", "e", ".", "setdocument", "(", "doc", ")" ]
"Associate a document with this element. Arguments: doc (:class:`Document`): A document Each element must be associated with a FoLiA document."
[ "Associate", "a", "document", "with", "this", "element", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1366-L1385"
""
""
""
""
""
""
-1
2
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.addable"
"def addable(Class, parent, set=None, raiseexceptions=True): """Tests whether a new element of this class can be added to the parent. This method is mostly for internal use. This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour. Parameters: parent (:class:`AbstractElement`): The element that is being added to set (str or None): The set raiseexceptions (bool): Raise an exception if the element can't be added? Returns: bool Raises: ValueError """ if not parent.__class__.accepts(Class, raiseexceptions, parent): return False if Class.OCCURRENCES > 0: #check if the parent doesn't have too many already count = parent.count(Class,None,True,[True, AbstractStructureElement]) #never descend into embedded structure annotatioton if count >= Class.OCCURRENCES: if raiseexceptions: if parent.id: extra = ' (id=' + parent.id + ')' else: extra = '' raise DuplicateAnnotationError("Unable to add another object of type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum.") else: return False if Class.OCCURRENCES_PER_SET > 0 and set and Class.REQUIRED_ATTRIBS and Attrib.CLASS in Class.REQUIRED_ATTRIBS: count = parent.count(Class,set,True, [True, AbstractStructureElement]) if count >= Class.OCCURRENCES_PER_SET: if raiseexceptions: if parent.id: extra = ' (id=' + parent.id + ')' else: extra = '' raise DuplicateAnnotationError("Unable to add another object of set " + set + " and type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum for the set.") else: return False return True"
"python"
"def addable(Class, parent, set=None, raiseexceptions=True): """Tests whether a new element of this class can be added to the parent. This method is mostly for internal use. This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour. Parameters: parent (:class:`AbstractElement`): The element that is being added to set (str or None): The set raiseexceptions (bool): Raise an exception if the element can't be added? Returns: bool Raises: ValueError """ if not parent.__class__.accepts(Class, raiseexceptions, parent): return False if Class.OCCURRENCES > 0: #check if the parent doesn't have too many already count = parent.count(Class,None,True,[True, AbstractStructureElement]) #never descend into embedded structure annotatioton if count >= Class.OCCURRENCES: if raiseexceptions: if parent.id: extra = ' (id=' + parent.id + ')' else: extra = '' raise DuplicateAnnotationError("Unable to add another object of type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum.") else: return False if Class.OCCURRENCES_PER_SET > 0 and set and Class.REQUIRED_ATTRIBS and Attrib.CLASS in Class.REQUIRED_ATTRIBS: count = parent.count(Class,set,True, [True, AbstractStructureElement]) if count >= Class.OCCURRENCES_PER_SET: if raiseexceptions: if parent.id: extra = ' (id=' + parent.id + ')' else: extra = '' raise DuplicateAnnotationError("Unable to add another object of set " + set + " and type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum for the set.") else: return False return True"
[ "def", "addable", "(", "Class", ",", "parent", ",", "set", "=", "None", ",", "raiseexceptions", "=", "True", ")", ":", "if", "not", "parent", ".", "__class__", ".", "accepts", "(", "Class", ",", "raiseexceptions", ",", "parent", ")", ":", "return", "False", "if", "Class", ".", "OCCURRENCES", ">", "0", ":", "#check if the parent doesn't have too many already", "count", "=", "parent", ".", "count", "(", "Class", ",", "None", ",", "True", ",", "[", "True", ",", "AbstractStructureElement", "]", ")", "#never descend into embedded structure annotatioton", "if", "count", ">=", "Class", ".", "OCCURRENCES", ":", "if", "raiseexceptions", ":", "if", "parent", ".", "id", ":", "extra", "=", "' (id='", "+", "parent", ".", "id", "+", "')'", "else", ":", "extra", "=", "''", "raise", "DuplicateAnnotationError", "(", "\"Unable to add another object of type \"", "+", "Class", ".", "__name__", "+", "\" to \"", "+", "parent", ".", "__class__", ".", "__name__", "+", "\" \"", "+", "extra", "+", "\". There are already \"", "+", "str", "(", "count", ")", "+", "\" instances of this class, which is the maximum.\"", ")", "else", ":", "return", "False", "if", "Class", ".", "OCCURRENCES_PER_SET", ">", "0", "and", "set", "and", "Class", ".", "REQUIRED_ATTRIBS", "and", "Attrib", ".", "CLASS", "in", "Class", ".", "REQUIRED_ATTRIBS", ":", "count", "=", "parent", ".", "count", "(", "Class", ",", "set", ",", "True", ",", "[", "True", ",", "AbstractStructureElement", "]", ")", "if", "count", ">=", "Class", ".", "OCCURRENCES_PER_SET", ":", "if", "raiseexceptions", ":", "if", "parent", ".", "id", ":", "extra", "=", "' (id='", "+", "parent", ".", "id", "+", "')'", "else", ":", "extra", "=", "''", "raise", "DuplicateAnnotationError", "(", "\"Unable to add another object of set \"", "+", "set", "+", "\" and type \"", "+", "Class", ".", "__name__", "+", "\" to \"", "+", "parent", ".", "__class__", ".", "__name__", "+", "\" \"", "+", "extra", "+", "\". There are already \"", "+", "str", "(", "count", ")", "+", "\" instances of this class, which is the maximum for the set.\"", ")", "else", ":", "return", "False", "return", "True" ]
"Tests whether a new element of this class can be added to the parent. This method is mostly for internal use. This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour. Parameters: parent (:class:`AbstractElement`): The element that is being added to set (str or None): The set raiseexceptions (bool): Raise an exception if the element can't be added? Returns: bool Raises: ValueError"
[ "Tests", "whether", "a", "new", "element", "of", "this", "class", "can", "be", "added", "to", "the", "parent", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1406-L1455"
""
""
""
""
""
""
-1
3
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.postappend"
"def postappend(self): """This method will be called after an element is added to another and does some checks. It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated. This method is mostly for internal use. """ #If the element was not associated with a document yet, do so now (and for all unassociated children: if not self.doc and self.parent.doc: self.setdocument(self.parent.doc) if self.doc and self.doc.deepvalidation: self.deepvalidation()"
"python"
"def postappend(self): """This method will be called after an element is added to another and does some checks. It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated. This method is mostly for internal use. """ #If the element was not associated with a document yet, do so now (and for all unassociated children: if not self.doc and self.parent.doc: self.setdocument(self.parent.doc) if self.doc and self.doc.deepvalidation: self.deepvalidation()"
[ "def", "postappend", "(", "self", ")", ":", "#If the element was not associated with a document yet, do so now (and for all unassociated children:", "if", "not", "self", ".", "doc", "and", "self", ".", "parent", ".", "doc", ":", "self", ".", "setdocument", "(", "self", ".", "parent", ".", "doc", ")", "if", "self", ".", "doc", "and", "self", ".", "doc", ".", "deepvalidation", ":", "self", ".", "deepvalidation", "(", ")" ]
"This method will be called after an element is added to another and does some checks. It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated. This method is mostly for internal use."
[ "This", "method", "will", "be", "called", "after", "an", "element", "is", "added", "to", "another", "and", "does", "some", "checks", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1458-L1471"
""
""
""
""
""
""
-1
4
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.updatetext"
"def updatetext(self): """Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``""" if self.TEXTCONTAINER: s = "" for child in self: if isinstance(child, AbstractElement): child.updatetext() s += child.text() elif isstring(child): s += child self.data = [s]"
"python"
"def updatetext(self): """Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``""" if self.TEXTCONTAINER: s = "" for child in self: if isinstance(child, AbstractElement): child.updatetext() s += child.text() elif isstring(child): s += child self.data = [s]"
[ "def", "updatetext", "(", "self", ")", ":", "if", "self", ".", "TEXTCONTAINER", ":", "s", "=", "\"\"", "for", "child", "in", "self", ":", "if", "isinstance", "(", "child", ",", "AbstractElement", ")", ":", "child", ".", "updatetext", "(", ")", "s", "+=", "child", ".", "text", "(", ")", "elif", "isstring", "(", "child", ")", ":", "s", "+=", "child", "self", ".", "data", "=", "[", "s", "]" ]
"Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``"
[ "Recompute", "textual", "value", "based", "on", "the", "text", "content", "of", "the", "children", ".", "Only", "supported", "on", "elements", "that", "are", "a", "TEXTCONTAINER" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1772-L1782"
""
""
""
""
""
""
-1
5
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.ancestors"
"def ancestors(self, Class=None): """Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified. Arguments: *Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances! Yields: elements (instances derived from :class:`AbstractElement`) """ e = self while e: if e.parent: e = e.parent if not Class or isinstance(e,Class): yield e elif isinstance(Class, tuple): for C in Class: if isinstance(e,C): yield e else: break"
"python"
"def ancestors(self, Class=None): """Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified. Arguments: *Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances! Yields: elements (instances derived from :class:`AbstractElement`) """ e = self while e: if e.parent: e = e.parent if not Class or isinstance(e,Class): yield e elif isinstance(Class, tuple): for C in Class: if isinstance(e,C): yield e else: break"
[ "def", "ancestors", "(", "self", ",", "Class", "=", "None", ")", ":", "e", "=", "self", "while", "e", ":", "if", "e", ".", "parent", ":", "e", "=", "e", ".", "parent", "if", "not", "Class", "or", "isinstance", "(", "e", ",", "Class", ")", ":", "yield", "e", "elif", "isinstance", "(", "Class", ",", "tuple", ")", ":", "for", "C", "in", "Class", ":", "if", "isinstance", "(", "e", ",", "C", ")", ":", "yield", "e", "else", ":", "break" ]
"Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified. Arguments: *Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances! Yields: elements (instances derived from :class:`AbstractElement`)"
[ "Generator", "yielding", "all", "ancestors", "of", "this", "element", "effectively", "back", "-", "tracing", "its", "path", "to", "the", "root", "element", ".", "A", "tuple", "of", "multiple", "classes", "may", "be", "specified", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1840-L1860"
""
""
""
""
""
""
-1
6
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.ancestor"
"def ancestor(self, *Classes): """Find the most immediate ancestor of the specified type, multiple classes may be specified. Arguments: *Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances! Example:: paragraph = word.ancestor(folia.Paragraph) """ for e in self.ancestors(tuple(Classes)): return e raise NoSuchAnnotation"
"python"
"def ancestor(self, *Classes): """Find the most immediate ancestor of the specified type, multiple classes may be specified. Arguments: *Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances! Example:: paragraph = word.ancestor(folia.Paragraph) """ for e in self.ancestors(tuple(Classes)): return e raise NoSuchAnnotation"
[ "def", "ancestor", "(", "self", ",", "*", "Classes", ")", ":", "for", "e", "in", "self", ".", "ancestors", "(", "tuple", "(", "Classes", ")", ")", ":", "return", "e", "raise", "NoSuchAnnotation" ]
"Find the most immediate ancestor of the specified type, multiple classes may be specified. Arguments: *Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances! Example:: paragraph = word.ancestor(folia.Paragraph)"
[ "Find", "the", "most", "immediate", "ancestor", "of", "the", "specified", "type", "multiple", "classes", "may", "be", "specified", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1862-L1874"
""
""
""
""
""
""
-1
7
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.json"
"def json(self, attribs=None, recurse=True, ignorelist=False): """Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON. Example:: import json json.dumps(word.json()) Returns: dict """ jsonnode = {} jsonnode['type'] = self.XMLTAG if self.id: jsonnode['id'] = self.id if self.set: jsonnode['set'] = self.set if self.cls: jsonnode['class'] = self.cls if self.annotator: jsonnode['annotator'] = self.annotator if self.annotatortype: if self.annotatortype == AnnotatorType.AUTO: jsonnode['annotatortype'] = "auto" elif self.annotatortype == AnnotatorType.MANUAL: jsonnode['annotatortype'] = "manual" if self.confidence is not None: jsonnode['confidence'] = self.confidence if self.n: jsonnode['n'] = self.n if self.auth: jsonnode['auth'] = self.auth if self.datetime: jsonnode['datetime'] = self.datetime.strftime("%Y-%m-%dT%H:%M:%S") if recurse: #pylint: disable=too-many-nested-blocks jsonnode['children'] = [] if self.TEXTCONTAINER: jsonnode['text'] = self.text() if self.PHONCONTAINER: jsonnode['phon'] = self.phon() for child in self: if self.TEXTCONTAINER and isstring(child): jsonnode['children'].append(child) elif not self.PHONCONTAINER: #check ignore list ignore = False if ignorelist: for e in ignorelist: if isinstance(child,e): ignore = True break if not ignore: jsonnode['children'].append(child.json(attribs,recurse,ignorelist)) if attribs: for attrib in attribs: jsonnode[attrib] = attribs return jsonnode"
"python"
"def json(self, attribs=None, recurse=True, ignorelist=False): """Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON. Example:: import json json.dumps(word.json()) Returns: dict """ jsonnode = {} jsonnode['type'] = self.XMLTAG if self.id: jsonnode['id'] = self.id if self.set: jsonnode['set'] = self.set if self.cls: jsonnode['class'] = self.cls if self.annotator: jsonnode['annotator'] = self.annotator if self.annotatortype: if self.annotatortype == AnnotatorType.AUTO: jsonnode['annotatortype'] = "auto" elif self.annotatortype == AnnotatorType.MANUAL: jsonnode['annotatortype'] = "manual" if self.confidence is not None: jsonnode['confidence'] = self.confidence if self.n: jsonnode['n'] = self.n if self.auth: jsonnode['auth'] = self.auth if self.datetime: jsonnode['datetime'] = self.datetime.strftime("%Y-%m-%dT%H:%M:%S") if recurse: #pylint: disable=too-many-nested-blocks jsonnode['children'] = [] if self.TEXTCONTAINER: jsonnode['text'] = self.text() if self.PHONCONTAINER: jsonnode['phon'] = self.phon() for child in self: if self.TEXTCONTAINER and isstring(child): jsonnode['children'].append(child) elif not self.PHONCONTAINER: #check ignore list ignore = False if ignorelist: for e in ignorelist: if isinstance(child,e): ignore = True break if not ignore: jsonnode['children'].append(child.json(attribs,recurse,ignorelist)) if attribs: for attrib in attribs: jsonnode[attrib] = attribs return jsonnode"
[ "def", "json", "(", "self", ",", "attribs", "=", "None", ",", "recurse", "=", "True", ",", "ignorelist", "=", "False", ")", ":", "jsonnode", "=", "{", "}", "jsonnode", "[", "'type'", "]", "=", "self", ".", "XMLTAG", "if", "self", ".", "id", ":", "jsonnode", "[", "'id'", "]", "=", "self", ".", "id", "if", "self", ".", "set", ":", "jsonnode", "[", "'set'", "]", "=", "self", ".", "set", "if", "self", ".", "cls", ":", "jsonnode", "[", "'class'", "]", "=", "self", ".", "cls", "if", "self", ".", "annotator", ":", "jsonnode", "[", "'annotator'", "]", "=", "self", ".", "annotator", "if", "self", ".", "annotatortype", ":", "if", "self", ".", "annotatortype", "==", "AnnotatorType", ".", "AUTO", ":", "jsonnode", "[", "'annotatortype'", "]", "=", "\"auto\"", "elif", "self", ".", "annotatortype", "==", "AnnotatorType", ".", "MANUAL", ":", "jsonnode", "[", "'annotatortype'", "]", "=", "\"manual\"", "if", "self", ".", "confidence", "is", "not", "None", ":", "jsonnode", "[", "'confidence'", "]", "=", "self", ".", "confidence", "if", "self", ".", "n", ":", "jsonnode", "[", "'n'", "]", "=", "self", ".", "n", "if", "self", ".", "auth", ":", "jsonnode", "[", "'auth'", "]", "=", "self", ".", "auth", "if", "self", ".", "datetime", ":", "jsonnode", "[", "'datetime'", "]", "=", "self", ".", "datetime", ".", "strftime", "(", "\"%Y-%m-%dT%H:%M:%S\"", ")", "if", "recurse", ":", "#pylint: disable=too-many-nested-blocks", "jsonnode", "[", "'children'", "]", "=", "[", "]", "if", "self", ".", "TEXTCONTAINER", ":", "jsonnode", "[", "'text'", "]", "=", "self", ".", "text", "(", ")", "if", "self", ".", "PHONCONTAINER", ":", "jsonnode", "[", "'phon'", "]", "=", "self", ".", "phon", "(", ")", "for", "child", "in", "self", ":", "if", "self", ".", "TEXTCONTAINER", "and", "isstring", "(", "child", ")", ":", "jsonnode", "[", "'children'", "]", ".", "append", "(", "child", ")", "elif", "not", "self", ".", "PHONCONTAINER", ":", "#check ignore list", "ignore", "=", "False", "if", "ignorelist", ":", "for", "e", "in", "ignorelist", ":", "if", "isinstance", "(", "child", ",", "e", ")", ":", "ignore", "=", "True", "break", "if", "not", "ignore", ":", "jsonnode", "[", "'children'", "]", ".", "append", "(", "child", ".", "json", "(", "attribs", ",", "recurse", ",", "ignorelist", ")", ")", "if", "attribs", ":", "for", "attrib", "in", "attribs", ":", "jsonnode", "[", "attrib", "]", "=", "attribs", "return", "jsonnode" ]
"Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON. Example:: import json json.dumps(word.json()) Returns: dict"
[ "Serialises", "the", "FoLiA", "element", "and", "all", "its", "contents", "to", "a", "Python", "dictionary", "suitable", "for", "serialisation", "to", "JSON", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2050-L2110"
""
""
""
""
""
""
-1
8
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.xmlstring"
"def xmlstring(self, pretty_print=False): """Serialises this FoLiA element and all its contents to XML. Returns: str: a string with XML representation for this element and all its children""" s = ElementTree.tostring(self.xml(), xml_declaration=False, pretty_print=pretty_print, encoding='utf-8') if sys.version < '3': if isinstance(s, str): s = unicode(s,'utf-8') #pylint: disable=undefined-variable else: if isinstance(s,bytes): s = str(s,'utf-8') s = s.replace('ns0:','') #ugly patch to get rid of namespace prefix s = s.replace(':ns0','') return s"
"python"
"def xmlstring(self, pretty_print=False): """Serialises this FoLiA element and all its contents to XML. Returns: str: a string with XML representation for this element and all its children""" s = ElementTree.tostring(self.xml(), xml_declaration=False, pretty_print=pretty_print, encoding='utf-8') if sys.version < '3': if isinstance(s, str): s = unicode(s,'utf-8') #pylint: disable=undefined-variable else: if isinstance(s,bytes): s = str(s,'utf-8') s = s.replace('ns0:','') #ugly patch to get rid of namespace prefix s = s.replace(':ns0','') return s"
[ "def", "xmlstring", "(", "self", ",", "pretty_print", "=", "False", ")", ":", "s", "=", "ElementTree", ".", "tostring", "(", "self", ".", "xml", "(", ")", ",", "xml_declaration", "=", "False", ",", "pretty_print", "=", "pretty_print", ",", "encoding", "=", "'utf-8'", ")", "if", "sys", ".", "version", "<", "'3'", ":", "if", "isinstance", "(", "s", ",", "str", ")", ":", "s", "=", "unicode", "(", "s", ",", "'utf-8'", ")", "#pylint: disable=undefined-variable", "else", ":", "if", "isinstance", "(", "s", ",", "bytes", ")", ":", "s", "=", "str", "(", "s", ",", "'utf-8'", ")", "s", "=", "s", ".", "replace", "(", "'ns0:'", ",", "''", ")", "#ugly patch to get rid of namespace prefix", "s", "=", "s", ".", "replace", "(", "':ns0'", ",", "''", ")", "return", "s" ]
"Serialises this FoLiA element and all its contents to XML. Returns: str: a string with XML representation for this element and all its children"
[ "Serialises", "this", "FoLiA", "element", "and", "all", "its", "contents", "to", "XML", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2114-L2129"
""
""
""
""
""
""
-1
9
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.select"
"def select(self, Class, set=None, recursive=True, ignore=True, node=None): #pylint: disable=bad-classmethod-argument,redefined-builtin """Select child elements of the specified class. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``. ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones. * ``node``: Reserved for internal usage, used in recursion. Yields: Elements (instances derived from :class:`AbstractElement`) Example:: for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ): .. """ #if ignorelist is True: # ignorelist = default_ignore if not node: node = self for e in self.data: #pylint: disable=too-many-nested-blocks if (not self.TEXTCONTAINER and not self.PHONCONTAINER) or isinstance(e, AbstractElement): if ignore is True: try: if not e.auth: continue except AttributeError: #not all elements have auth attribute.. pass elif ignore: #list doignore = False for c in ignore: if c is True: try: if not e.auth: doignore =True break except AttributeError: #not all elements have auth attribute.. pass elif c == e.__class__ or issubclass(e.__class__,c): doignore = True break if doignore: continue if isinstance(e, Class): if not set is None: try: if e.set != set: continue except AttributeError: continue yield e if recursive: for e2 in e.select(Class, set, recursive, ignore, e): if not set is None: try: if e2.set != set: continue except AttributeError: continue yield e2"
"python"
"def select(self, Class, set=None, recursive=True, ignore=True, node=None): #pylint: disable=bad-classmethod-argument,redefined-builtin """Select child elements of the specified class. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``. ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones. * ``node``: Reserved for internal usage, used in recursion. Yields: Elements (instances derived from :class:`AbstractElement`) Example:: for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ): .. """ #if ignorelist is True: # ignorelist = default_ignore if not node: node = self for e in self.data: #pylint: disable=too-many-nested-blocks if (not self.TEXTCONTAINER and not self.PHONCONTAINER) or isinstance(e, AbstractElement): if ignore is True: try: if not e.auth: continue except AttributeError: #not all elements have auth attribute.. pass elif ignore: #list doignore = False for c in ignore: if c is True: try: if not e.auth: doignore =True break except AttributeError: #not all elements have auth attribute.. pass elif c == e.__class__ or issubclass(e.__class__,c): doignore = True break if doignore: continue if isinstance(e, Class): if not set is None: try: if e.set != set: continue except AttributeError: continue yield e if recursive: for e2 in e.select(Class, set, recursive, ignore, e): if not set is None: try: if e2.set != set: continue except AttributeError: continue yield e2"
[ "def", "select", "(", "self", ",", "Class", ",", "set", "=", "None", ",", "recursive", "=", "True", ",", "ignore", "=", "True", ",", "node", "=", "None", ")", ":", "#pylint: disable=bad-classmethod-argument,redefined-builtin", "#if ignorelist is True:", "# ignorelist = default_ignore", "if", "not", "node", ":", "node", "=", "self", "for", "e", "in", "self", ".", "data", ":", "#pylint: disable=too-many-nested-blocks", "if", "(", "not", "self", ".", "TEXTCONTAINER", "and", "not", "self", ".", "PHONCONTAINER", ")", "or", "isinstance", "(", "e", ",", "AbstractElement", ")", ":", "if", "ignore", "is", "True", ":", "try", ":", "if", "not", "e", ".", "auth", ":", "continue", "except", "AttributeError", ":", "#not all elements have auth attribute..", "pass", "elif", "ignore", ":", "#list", "doignore", "=", "False", "for", "c", "in", "ignore", ":", "if", "c", "is", "True", ":", "try", ":", "if", "not", "e", ".", "auth", ":", "doignore", "=", "True", "break", "except", "AttributeError", ":", "#not all elements have auth attribute..", "pass", "elif", "c", "==", "e", ".", "__class__", "or", "issubclass", "(", "e", ".", "__class__", ",", "c", ")", ":", "doignore", "=", "True", "break", "if", "doignore", ":", "continue", "if", "isinstance", "(", "e", ",", "Class", ")", ":", "if", "not", "set", "is", "None", ":", "try", ":", "if", "e", ".", "set", "!=", "set", ":", "continue", "except", "AttributeError", ":", "continue", "yield", "e", "if", "recursive", ":", "for", "e2", "in", "e", ".", "select", "(", "Class", ",", "set", ",", "recursive", ",", "ignore", ",", "e", ")", ":", "if", "not", "set", "is", "None", ":", "try", ":", "if", "e2", ".", "set", "!=", "set", ":", "continue", "except", "AttributeError", ":", "continue", "yield", "e2" ]
"Select child elements of the specified class. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``. ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones. * ``node``: Reserved for internal usage, used in recursion. Yields: Elements (instances derived from :class:`AbstractElement`) Example:: for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ): .."
[ "Select", "child", "elements", "of", "the", "specified", "class", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2132-L2201"
""
""
""
""
""
""
-1
10
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.getmetadata"
"def getmetadata(self, key=None): """Get the metadata that applies to this element, automatically inherited from parent elements""" if self.metadata: d = self.doc.submetadata[self.metadata] elif self.parent: d = self.parent.getmetadata() elif self.doc: d = self.doc.metadata else: return None if key: return d[key] else: return d"
"python"
"def getmetadata(self, key=None): """Get the metadata that applies to this element, automatically inherited from parent elements""" if self.metadata: d = self.doc.submetadata[self.metadata] elif self.parent: d = self.parent.getmetadata() elif self.doc: d = self.doc.metadata else: return None if key: return d[key] else: return d"
[ "def", "getmetadata", "(", "self", ",", "key", "=", "None", ")", ":", "if", "self", ".", "metadata", ":", "d", "=", "self", ".", "doc", ".", "submetadata", "[", "self", ".", "metadata", "]", "elif", "self", ".", "parent", ":", "d", "=", "self", ".", "parent", ".", "getmetadata", "(", ")", "elif", "self", ".", "doc", ":", "d", "=", "self", ".", "doc", ".", "metadata", "else", ":", "return", "None", "if", "key", ":", "return", "d", "[", "key", "]", "else", ":", "return", "d" ]
"Get the metadata that applies to this element, automatically inherited from parent elements"
[ "Get", "the", "metadata", "that", "applies", "to", "this", "element", "automatically", "inherited", "from", "parent", "elements" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2221-L2234"
""
""
""
""
""
""
-1
11
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.getindex"
"def getindex(self, child, recursive=True, ignore=True): """Get the index at which an element occurs, recursive by default! Returns: int """ #breadth first search for i, c in enumerate(self.data): if c is child: return i if recursive: #pylint: disable=too-many-nested-blocks for i, c in enumerate(self.data): if ignore is True: try: if not c.auth: continue except AttributeError: #not all elements have auth attribute.. pass elif ignore: #list doignore = False for e in ignore: if e is True: try: if not c.auth: doignore =True break except AttributeError: #not all elements have auth attribute.. pass elif e == c.__class__ or issubclass(c.__class__,e): doignore = True break if doignore: continue if isinstance(c, AbstractElement): j = c.getindex(child, recursive) if j != -1: return i #yes, i ... not j! return -1"
"python"
"def getindex(self, child, recursive=True, ignore=True): """Get the index at which an element occurs, recursive by default! Returns: int """ #breadth first search for i, c in enumerate(self.data): if c is child: return i if recursive: #pylint: disable=too-many-nested-blocks for i, c in enumerate(self.data): if ignore is True: try: if not c.auth: continue except AttributeError: #not all elements have auth attribute.. pass elif ignore: #list doignore = False for e in ignore: if e is True: try: if not c.auth: doignore =True break except AttributeError: #not all elements have auth attribute.. pass elif e == c.__class__ or issubclass(c.__class__,e): doignore = True break if doignore: continue if isinstance(c, AbstractElement): j = c.getindex(child, recursive) if j != -1: return i #yes, i ... not j! return -1"
[ "def", "getindex", "(", "self", ",", "child", ",", "recursive", "=", "True", ",", "ignore", "=", "True", ")", ":", "#breadth first search", "for", "i", ",", "c", "in", "enumerate", "(", "self", ".", "data", ")", ":", "if", "c", "is", "child", ":", "return", "i", "if", "recursive", ":", "#pylint: disable=too-many-nested-blocks", "for", "i", ",", "c", "in", "enumerate", "(", "self", ".", "data", ")", ":", "if", "ignore", "is", "True", ":", "try", ":", "if", "not", "c", ".", "auth", ":", "continue", "except", "AttributeError", ":", "#not all elements have auth attribute..", "pass", "elif", "ignore", ":", "#list", "doignore", "=", "False", "for", "e", "in", "ignore", ":", "if", "e", "is", "True", ":", "try", ":", "if", "not", "c", ".", "auth", ":", "doignore", "=", "True", "break", "except", "AttributeError", ":", "#not all elements have auth attribute..", "pass", "elif", "e", "==", "c", ".", "__class__", "or", "issubclass", "(", "c", ".", "__class__", ",", "e", ")", ":", "doignore", "=", "True", "break", "if", "doignore", ":", "continue", "if", "isinstance", "(", "c", ",", "AbstractElement", ")", ":", "j", "=", "c", ".", "getindex", "(", "child", ",", "recursive", ")", "if", "j", "!=", "-", "1", ":", "return", "i", "#yes, i ... not j!", "return", "-", "1" ]
"Get the index at which an element occurs, recursive by default! Returns: int"
[ "Get", "the", "index", "at", "which", "an", "element", "occurs", "recursive", "by", "default!" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2238-L2278"
""
""
""
""
""
""
-1
12
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.precedes"
"def precedes(self, other): """Returns a boolean indicating whether this element precedes the other element""" try: ancestor = next(commonancestors(AbstractElement, self, other)) except StopIteration: raise Exception("Elements share no common ancestor") #now we just do a depth first search and see who comes first def callback(e): if e is self: return True elif e is other: return False return None result = ancestor.depthfirstsearch(callback) if result is None: raise Exception("Unable to find relation between elements! (shouldn't happen)") return result"
"python"
"def precedes(self, other): """Returns a boolean indicating whether this element precedes the other element""" try: ancestor = next(commonancestors(AbstractElement, self, other)) except StopIteration: raise Exception("Elements share no common ancestor") #now we just do a depth first search and see who comes first def callback(e): if e is self: return True elif e is other: return False return None result = ancestor.depthfirstsearch(callback) if result is None: raise Exception("Unable to find relation between elements! (shouldn't happen)") return result"
[ "def", "precedes", "(", "self", ",", "other", ")", ":", "try", ":", "ancestor", "=", "next", "(", "commonancestors", "(", "AbstractElement", ",", "self", ",", "other", ")", ")", "except", "StopIteration", ":", "raise", "Exception", "(", "\"Elements share no common ancestor\"", ")", "#now we just do a depth first search and see who comes first", "def", "callback", "(", "e", ")", ":", "if", "e", "is", "self", ":", "return", "True", "elif", "e", "is", "other", ":", "return", "False", "return", "None", "result", "=", "ancestor", ".", "depthfirstsearch", "(", "callback", ")", "if", "result", "is", "None", ":", "raise", "Exception", "(", "\"Unable to find relation between elements! (shouldn't happen)\"", ")", "return", "result" ]
"Returns a boolean indicating whether this element precedes the other element"
[ "Returns", "a", "boolean", "indicating", "whether", "this", "element", "precedes", "the", "other", "element" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2280-L2296"
""
""
""
""
""
""
-1
13
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.depthfirstsearch"
"def depthfirstsearch(self, function): """Generic depth first search algorithm using a callback function, continues as long as the callback function returns None""" result = function(self) if result is not None: return result for e in self: result = e.depthfirstsearch(function) if result is not None: return result return None"
"python"
"def depthfirstsearch(self, function): """Generic depth first search algorithm using a callback function, continues as long as the callback function returns None""" result = function(self) if result is not None: return result for e in self: result = e.depthfirstsearch(function) if result is not None: return result return None"
[ "def", "depthfirstsearch", "(", "self", ",", "function", ")", ":", "result", "=", "function", "(", "self", ")", "if", "result", "is", "not", "None", ":", "return", "result", "for", "e", "in", "self", ":", "result", "=", "e", ".", "depthfirstsearch", "(", "function", ")", "if", "result", "is", "not", "None", ":", "return", "result", "return", "None" ]
"Generic depth first search algorithm using a callback function, continues as long as the callback function returns None"
[ "Generic", "depth", "first", "search", "algorithm", "using", "a", "callback", "function", "continues", "as", "long", "as", "the", "callback", "function", "returns", "None" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2299-L2308"
""
""
""
""
""
""
-1
14
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.next"
"def next(self, Class=True, scope=True, reverse=False): """Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all. """ if Class is True: Class = self.__class__ if scope is True: scope = STRUCTURESCOPE structural = Class is not None and issubclass(Class,AbstractStructureElement) if reverse: order = reversed descendindex = -1 else: order = lambda x: x #pylint: disable=redefined-variable-type descendindex = 0 child = self parent = self.parent while parent: #pylint: disable=too-many-nested-blocks if len(parent) > 1: returnnext = False for e in order(parent): if e is child: #we found the current item, next item will be the one to return returnnext = True elif returnnext and e.auth and not isinstance(e,AbstractAnnotationLayer) and (not structural or (structural and (not isinstance(e,(AbstractTokenAnnotation,TextContent)) ) )): if structural and isinstance(e,Correction): if not list(e.select(AbstractStructureElement)): #skip-over non-structural correction continue if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class): return e else: #this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only while e.data: e = e.data[descendindex] if not isinstance(e, AbstractElement): return None #we've gone too far if e.auth and not isinstance(e,AbstractAnnotationLayer): if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class): return e else: #descend deeper continue return None #generational iteration child = parent if scope is not None and child.__class__ in scope: #you shall not pass! break parent = parent.parent return None"
"python"
"def next(self, Class=True, scope=True, reverse=False): """Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all. """ if Class is True: Class = self.__class__ if scope is True: scope = STRUCTURESCOPE structural = Class is not None and issubclass(Class,AbstractStructureElement) if reverse: order = reversed descendindex = -1 else: order = lambda x: x #pylint: disable=redefined-variable-type descendindex = 0 child = self parent = self.parent while parent: #pylint: disable=too-many-nested-blocks if len(parent) > 1: returnnext = False for e in order(parent): if e is child: #we found the current item, next item will be the one to return returnnext = True elif returnnext and e.auth and not isinstance(e,AbstractAnnotationLayer) and (not structural or (structural and (not isinstance(e,(AbstractTokenAnnotation,TextContent)) ) )): if structural and isinstance(e,Correction): if not list(e.select(AbstractStructureElement)): #skip-over non-structural correction continue if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class): return e else: #this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only while e.data: e = e.data[descendindex] if not isinstance(e, AbstractElement): return None #we've gone too far if e.auth and not isinstance(e,AbstractAnnotationLayer): if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class): return e else: #descend deeper continue return None #generational iteration child = parent if scope is not None and child.__class__ in scope: #you shall not pass! break parent = parent.parent return None"
[ "def", "next", "(", "self", ",", "Class", "=", "True", ",", "scope", "=", "True", ",", "reverse", "=", "False", ")", ":", "if", "Class", "is", "True", ":", "Class", "=", "self", ".", "__class__", "if", "scope", "is", "True", ":", "scope", "=", "STRUCTURESCOPE", "structural", "=", "Class", "is", "not", "None", "and", "issubclass", "(", "Class", ",", "AbstractStructureElement", ")", "if", "reverse", ":", "order", "=", "reversed", "descendindex", "=", "-", "1", "else", ":", "order", "=", "lambda", "x", ":", "x", "#pylint: disable=redefined-variable-type", "descendindex", "=", "0", "child", "=", "self", "parent", "=", "self", ".", "parent", "while", "parent", ":", "#pylint: disable=too-many-nested-blocks", "if", "len", "(", "parent", ")", ">", "1", ":", "returnnext", "=", "False", "for", "e", "in", "order", "(", "parent", ")", ":", "if", "e", "is", "child", ":", "#we found the current item, next item will be the one to return", "returnnext", "=", "True", "elif", "returnnext", "and", "e", ".", "auth", "and", "not", "isinstance", "(", "e", ",", "AbstractAnnotationLayer", ")", "and", "(", "not", "structural", "or", "(", "structural", "and", "(", "not", "isinstance", "(", "e", ",", "(", "AbstractTokenAnnotation", ",", "TextContent", ")", ")", ")", ")", ")", ":", "if", "structural", "and", "isinstance", "(", "e", ",", "Correction", ")", ":", "if", "not", "list", "(", "e", ".", "select", "(", "AbstractStructureElement", ")", ")", ":", "#skip-over non-structural correction", "continue", "if", "Class", "is", "None", "or", "(", "isinstance", "(", "Class", ",", "tuple", ")", "and", "(", "any", "(", "isinstance", "(", "e", ",", "C", ")", "for", "C", "in", "Class", ")", ")", ")", "or", "isinstance", "(", "e", ",", "Class", ")", ":", "return", "e", "else", ":", "#this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only", "while", "e", ".", "data", ":", "e", "=", "e", ".", "data", "[", "descendindex", "]", "if", "not", "isinstance", "(", "e", ",", "AbstractElement", ")", ":", "return", "None", "#we've gone too far", "if", "e", ".", "auth", "and", "not", "isinstance", "(", "e", ",", "AbstractAnnotationLayer", ")", ":", "if", "Class", "is", "None", "or", "(", "isinstance", "(", "Class", ",", "tuple", ")", "and", "(", "any", "(", "isinstance", "(", "e", ",", "C", ")", "for", "C", "in", "Class", ")", ")", ")", "or", "isinstance", "(", "e", ",", "Class", ")", ":", "return", "e", "else", ":", "#descend deeper", "continue", "return", "None", "#generational iteration", "child", "=", "parent", "if", "scope", "is", "not", "None", "and", "child", ".", "__class__", "in", "scope", ":", "#you shall not pass!", "break", "parent", "=", "parent", ".", "parent", "return", "None" ]
"Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all."
[ "Returns", "the", "next", "element", "if", "it", "is", "of", "the", "specified", "type", "and", "if", "it", "does", "not", "cross", "the", "boundary", "of", "the", "defined", "scope", ".", "Returns", "None", "if", "no", "next", "element", "is", "found", ".", "Non", "-", "authoritative", "elements", "are", "never", "returned", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2310-L2367"
""
""
""
""
""
""
-1
15
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.previous"
"def previous(self, Class=True, scope=True): """Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all. """ return self.next(Class,scope, True)"
"python"
"def previous(self, Class=True, scope=True): """Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all. """ return self.next(Class,scope, True)"
[ "def", "previous", "(", "self", ",", "Class", "=", "True", ",", "scope", "=", "True", ")", ":", "return", "self", ".", "next", "(", "Class", ",", "scope", ",", "True", ")" ]
"Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned. Arguments: * ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all * ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all."
[ "Returns", "the", "previous", "element", "if", "it", "is", "of", "the", "specified", "type", "and", "if", "it", "does", "not", "cross", "the", "boundary", "of", "the", "defined", "scope", ".", "Returns", "None", "if", "no", "next", "element", "is", "found", ".", "Non", "-", "authoritative", "elements", "are", "never", "returned", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2371-L2379"
""
""
""
""
""
""
-1
16
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractElement.remove"
"def remove(self, child): """Removes the child element""" if not isinstance(child, AbstractElement): raise ValueError("Expected AbstractElement, got " + str(type(child))) if child.parent == self: child.parent = None self.data.remove(child) #delete from index if child.id and self.doc and child.id in self.doc.index: del self.doc.index[child.id]"
"python"
"def remove(self, child): """Removes the child element""" if not isinstance(child, AbstractElement): raise ValueError("Expected AbstractElement, got " + str(type(child))) if child.parent == self: child.parent = None self.data.remove(child) #delete from index if child.id and self.doc and child.id in self.doc.index: del self.doc.index[child.id]"
[ "def", "remove", "(", "self", ",", "child", ")", ":", "if", "not", "isinstance", "(", "child", ",", "AbstractElement", ")", ":", "raise", "ValueError", "(", "\"Expected AbstractElement, got \"", "+", "str", "(", "type", "(", "child", ")", ")", ")", "if", "child", ".", "parent", "==", "self", ":", "child", ".", "parent", "=", "None", "self", ".", "data", ".", "remove", "(", "child", ")", "#delete from index", "if", "child", ".", "id", "and", "self", ".", "doc", "and", "child", ".", "id", "in", "self", ".", "doc", ".", "index", ":", "del", "self", ".", "doc", ".", "index", "[", "child", ".", "id", "]" ]
"Removes the child element"
[ "Removes", "the", "child", "element" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2729-L2738"
""
""
""
""
""
""
-1
17
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AllowTokenAnnotation.hasannotation"
"def hasannotation(self,Class,set=None): """Returns an integer indicating whether such as annotation exists, and if so, how many. See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters.""" return sum( 1 for _ in self.select(Class,set,True,default_ignore_annotations))"
"python"
"def hasannotation(self,Class,set=None): """Returns an integer indicating whether such as annotation exists, and if so, how many. See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters.""" return sum( 1 for _ in self.select(Class,set,True,default_ignore_annotations))"
[ "def", "hasannotation", "(", "self", ",", "Class", ",", "set", "=", "None", ")", ":", "return", "sum", "(", "1", "for", "_", "in", "self", ".", "select", "(", "Class", ",", "set", ",", "True", ",", "default_ignore_annotations", ")", ")" ]
"Returns an integer indicating whether such as annotation exists, and if so, how many. See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters."
[ "Returns", "an", "integer", "indicating", "whether", "such", "as", "annotation", "exists", "and", "if", "so", "how", "many", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3046-L3050"
""
""
""
""
""
""
-1
18
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AllowTokenAnnotation.annotation"
"def annotation(self, type, set=None): """Obtain a single annotation element. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. Returns: An element (instance derived from :class:`AbstractElement`) Example:: sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls See also: :meth:`AllowTokenAnnotation.annotations` :meth:`AbstractElement.select` Raises: :class:`NoSuchAnnotation` if no such annotation exists """ """Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found""" for e in self.select(type,set,True,default_ignore_annotations): return e raise NoSuchAnnotation()"
"python"
"def annotation(self, type, set=None): """Obtain a single annotation element. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. Returns: An element (instance derived from :class:`AbstractElement`) Example:: sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls See also: :meth:`AllowTokenAnnotation.annotations` :meth:`AbstractElement.select` Raises: :class:`NoSuchAnnotation` if no such annotation exists """ """Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found""" for e in self.select(type,set,True,default_ignore_annotations): return e raise NoSuchAnnotation()"
[ "def", "annotation", "(", "self", ",", "type", ",", "set", "=", "None", ")", ":", "\"\"\"Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found\"\"\"", "for", "e", "in", "self", ".", "select", "(", "type", ",", "set", ",", "True", ",", "default_ignore_annotations", ")", ":", "return", "e", "raise", "NoSuchAnnotation", "(", ")" ]
"Obtain a single annotation element. A further restriction can be made based on set. Arguments: Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement` Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned. Returns: An element (instance derived from :class:`AbstractElement`) Example:: sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls See also: :meth:`AllowTokenAnnotation.annotations` :meth:`AbstractElement.select` Raises: :class:`NoSuchAnnotation` if no such annotation exists"
[ "Obtain", "a", "single", "annotation", "element", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3052-L3078"
""
""
""
""
""
""
-1
19
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractStructureElement.hasannotationlayer"
"def hasannotationlayer(self, annotationtype=None,set=None): """Does the specified annotation layer exist?""" l = self.layers(annotationtype, set) return (len(l) > 0)"
"python"
"def hasannotationlayer(self, annotationtype=None,set=None): """Does the specified annotation layer exist?""" l = self.layers(annotationtype, set) return (len(l) > 0)"
[ "def", "hasannotationlayer", "(", "self", ",", "annotationtype", "=", "None", ",", "set", "=", "None", ")", ":", "l", "=", "self", ".", "layers", "(", "annotationtype", ",", "set", ")", "return", "(", "len", "(", "l", ")", ">", "0", ")" ]
"Does the specified annotation layer exist?"
[ "Does", "the", "specified", "annotation", "layer", "exist?" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3268-L3271"
""
""
""
""
""
""
-1
20
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"TextContent.getreference"
"def getreference(self, validate=True): """Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid""" if self.offset is None: return None #nothing to test if self.ref: ref = self.doc[self.ref] else: ref = self.finddefaultreference() if not ref: raise UnresolvableTextContent("Default reference for textcontent not found!") elif not ref.hastext(self.cls): raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ") has no such text (class=" + self.cls+")") elif validate and self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]: raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ", class=" + self.cls+") found but no text match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'") else: #finally, we made it! return ref"
"python"
"def getreference(self, validate=True): """Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid""" if self.offset is None: return None #nothing to test if self.ref: ref = self.doc[self.ref] else: ref = self.finddefaultreference() if not ref: raise UnresolvableTextContent("Default reference for textcontent not found!") elif not ref.hastext(self.cls): raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ") has no such text (class=" + self.cls+")") elif validate and self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]: raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ", class=" + self.cls+") found but no text match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'") else: #finally, we made it! return ref"
[ "def", "getreference", "(", "self", ",", "validate", "=", "True", ")", ":", "if", "self", ".", "offset", "is", "None", ":", "return", "None", "#nothing to test", "if", "self", ".", "ref", ":", "ref", "=", "self", ".", "doc", "[", "self", ".", "ref", "]", "else", ":", "ref", "=", "self", ".", "finddefaultreference", "(", ")", "if", "not", "ref", ":", "raise", "UnresolvableTextContent", "(", "\"Default reference for textcontent not found!\"", ")", "elif", "not", "ref", ".", "hastext", "(", "self", ".", "cls", ")", ":", "raise", "UnresolvableTextContent", "(", "\"Reference (ID \"", "+", "str", "(", "ref", ".", "id", ")", "+", "\") has no such text (class=\"", "+", "self", ".", "cls", "+", "\")\"", ")", "elif", "validate", "and", "self", ".", "text", "(", ")", "!=", "ref", ".", "textcontent", "(", "self", ".", "cls", ")", ".", "text", "(", ")", "[", "self", ".", "offset", ":", "self", ".", "offset", "+", "len", "(", "self", ".", "data", "[", "0", "]", ")", "]", ":", "raise", "UnresolvableTextContent", "(", "\"Reference (ID \"", "+", "str", "(", "ref", ".", "id", ")", "+", "\", class=\"", "+", "self", ".", "cls", "+", "\") found but no text match at specified offset (\"", "+", "str", "(", "self", ".", "offset", ")", "+", "\")! Expected '\"", "+", "self", ".", "text", "(", ")", "+", "\"', got '\"", "+", "ref", ".", "textcontent", "(", "self", ".", "cls", ")", ".", "text", "(", ")", "[", "self", ".", "offset", ":", "self", ".", "offset", "+", "len", "(", "self", ".", "data", "[", "0", "]", ")", "]", "+", "\"'\"", ")", "else", ":", "#finally, we made it!", "return", "ref" ]
"Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid"
[ "Returns", "and", "validates", "the", "Text", "Content", "s", "reference", ".", "Raises", "UnresolvableTextContent", "when", "invalid" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3502-L3519"
""
""
""
""
""
""
-1
21
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"PhonContent.getreference"
"def getreference(self, validate=True): """Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid""" if self.offset is None: return None #nothing to test if self.ref: ref = self.doc[self.ref] else: ref = self.finddefaultreference() if not ref: raise UnresolvableTextContent("Default reference for phonetic content not found!") elif not ref.hasphon(self.cls): raise UnresolvableTextContent("Reference has no such phonetic content (class=" + self.cls+")") elif validate and self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]: raise UnresolvableTextContent("Reference (class=" + self.cls+") found but no phonetic match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'") else: #finally, we made it! return ref"
"python"
"def getreference(self, validate=True): """Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid""" if self.offset is None: return None #nothing to test if self.ref: ref = self.doc[self.ref] else: ref = self.finddefaultreference() if not ref: raise UnresolvableTextContent("Default reference for phonetic content not found!") elif not ref.hasphon(self.cls): raise UnresolvableTextContent("Reference has no such phonetic content (class=" + self.cls+")") elif validate and self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]: raise UnresolvableTextContent("Reference (class=" + self.cls+") found but no phonetic match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'") else: #finally, we made it! return ref"
[ "def", "getreference", "(", "self", ",", "validate", "=", "True", ")", ":", "if", "self", ".", "offset", "is", "None", ":", "return", "None", "#nothing to test", "if", "self", ".", "ref", ":", "ref", "=", "self", ".", "doc", "[", "self", ".", "ref", "]", "else", ":", "ref", "=", "self", ".", "finddefaultreference", "(", ")", "if", "not", "ref", ":", "raise", "UnresolvableTextContent", "(", "\"Default reference for phonetic content not found!\"", ")", "elif", "not", "ref", ".", "hasphon", "(", "self", ".", "cls", ")", ":", "raise", "UnresolvableTextContent", "(", "\"Reference has no such phonetic content (class=\"", "+", "self", ".", "cls", "+", "\")\"", ")", "elif", "validate", "and", "self", ".", "phon", "(", ")", "!=", "ref", ".", "textcontent", "(", "self", ".", "cls", ")", ".", "phon", "(", ")", "[", "self", ".", "offset", ":", "self", ".", "offset", "+", "len", "(", "self", ".", "data", "[", "0", "]", ")", "]", ":", "raise", "UnresolvableTextContent", "(", "\"Reference (class=\"", "+", "self", ".", "cls", "+", "\") found but no phonetic match at specified offset (\"", "+", "str", "(", "self", ".", "offset", ")", "+", "\")! Expected '\"", "+", "self", ".", "text", "(", ")", "+", "\"', got '\"", "+", "ref", ".", "textcontent", "(", "self", ".", "cls", ")", ".", "text", "(", ")", "[", "self", ".", "offset", ":", "self", ".", "offset", "+", "len", "(", "self", ".", "data", "[", "0", "]", ")", "]", "+", "\"'\"", ")", "else", ":", "#finally, we made it!", "return", "ref" ]
"Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid"
[ "Return", "and", "validate", "the", "Phonetic", "Content", "s", "reference", ".", "Raises", "UnresolvableTextContent", "when", "invalid" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3715-L3732"
""
""
""
""
""
""
-1
22
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Word.findspans"
"def findspans(self, type,set=None): """Yields span annotation elements of the specified type that include this word. Arguments: type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class. set (str or None): Constrain by set Example:: for chunk in word.findspans(folia.Chunk): print(" Chunk class=", chunk.cls, " words=") for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part) print(word2, end="") print() Yields: Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`) """ if issubclass(type, AbstractAnnotationLayer): layerclass = type else: layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE] e = self while True: if not e.parent: break e = e.parent for layer in e.select(layerclass,set,False): if type is layerclass: for e2 in layer.select(AbstractSpanAnnotation,set,True, (True, Word, Morpheme)): if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs(): yield e2 else: for e2 in layer.select(type,set,True, (True, Word, Morpheme)): if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs(): yield e2"
"python"
"def findspans(self, type,set=None): """Yields span annotation elements of the specified type that include this word. Arguments: type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class. set (str or None): Constrain by set Example:: for chunk in word.findspans(folia.Chunk): print(" Chunk class=", chunk.cls, " words=") for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part) print(word2, end="") print() Yields: Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`) """ if issubclass(type, AbstractAnnotationLayer): layerclass = type else: layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE] e = self while True: if not e.parent: break e = e.parent for layer in e.select(layerclass,set,False): if type is layerclass: for e2 in layer.select(AbstractSpanAnnotation,set,True, (True, Word, Morpheme)): if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs(): yield e2 else: for e2 in layer.select(type,set,True, (True, Word, Morpheme)): if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs(): yield e2"
[ "def", "findspans", "(", "self", ",", "type", ",", "set", "=", "None", ")", ":", "if", "issubclass", "(", "type", ",", "AbstractAnnotationLayer", ")", ":", "layerclass", "=", "type", "else", ":", "layerclass", "=", "ANNOTATIONTYPE2LAYERCLASS", "[", "type", ".", "ANNOTATIONTYPE", "]", "e", "=", "self", "while", "True", ":", "if", "not", "e", ".", "parent", ":", "break", "e", "=", "e", ".", "parent", "for", "layer", "in", "e", ".", "select", "(", "layerclass", ",", "set", ",", "False", ")", ":", "if", "type", "is", "layerclass", ":", "for", "e2", "in", "layer", ".", "select", "(", "AbstractSpanAnnotation", ",", "set", ",", "True", ",", "(", "True", ",", "Word", ",", "Morpheme", ")", ")", ":", "if", "not", "isinstance", "(", "e2", ",", "AbstractSpanRole", ")", "and", "self", "in", "e2", ".", "wrefs", "(", ")", ":", "yield", "e2", "else", ":", "for", "e2", "in", "layer", ".", "select", "(", "type", ",", "set", ",", "True", ",", "(", "True", ",", "Word", ",", "Morpheme", ")", ")", ":", "if", "not", "isinstance", "(", "e2", ",", "AbstractSpanRole", ")", "and", "self", "in", "e2", ".", "wrefs", "(", ")", ":", "yield", "e2" ]
"Yields span annotation elements of the specified type that include this word. Arguments: type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class. set (str or None): Constrain by set Example:: for chunk in word.findspans(folia.Chunk): print(" Chunk class=", chunk.cls, " words=") for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part) print(word2, end="") print() Yields: Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`)"
[ "Yields", "span", "annotation", "elements", "of", "the", "specified", "type", "that", "include", "this", "word", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4178-L4213"
""
""
""
""
""
""
-1
23
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractSpanAnnotation.setspan"
"def setspan(self, *args): """Sets the span of the span element anew, erases all data inside. Arguments: *args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme` """ self.data = [] for child in args: self.append(child)"
"python"
"def setspan(self, *args): """Sets the span of the span element anew, erases all data inside. Arguments: *args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme` """ self.data = [] for child in args: self.append(child)"
[ "def", "setspan", "(", "self", ",", "*", "args", ")", ":", "self", ".", "data", "=", "[", "]", "for", "child", "in", "args", ":", "self", ".", "append", "(", "child", ")" ]
"Sets the span of the span element anew, erases all data inside. Arguments: *args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme`"
[ "Sets", "the", "span", "of", "the", "span", "element", "anew", "erases", "all", "data", "inside", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4373-L4381"
""
""
""
""
""
""
-1
24
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractSpanAnnotation._helper_wrefs"
"def _helper_wrefs(self, targets, recurse=True): """Internal helper function""" for c in self: if isinstance(c,Word) or isinstance(c,Morpheme) or isinstance(c, Phoneme): targets.append(c) elif isinstance(c,WordReference): try: targets.append(self.doc[c.id]) #try to resolve except KeyError: targets.append(c) #add unresolved elif isinstance(c, AbstractSpanAnnotation) and recurse: #recursion c._helper_wrefs(targets) #pylint: disable=protected-access elif isinstance(c, Correction) and c.auth: #recurse into corrections for e in c: if isinstance(e, AbstractCorrectionChild) and e.auth: for e2 in e: if isinstance(e2, AbstractSpanAnnotation): #recursion e2._helper_wrefs(targets)"
"python"
"def _helper_wrefs(self, targets, recurse=True): """Internal helper function""" for c in self: if isinstance(c,Word) or isinstance(c,Morpheme) or isinstance(c, Phoneme): targets.append(c) elif isinstance(c,WordReference): try: targets.append(self.doc[c.id]) #try to resolve except KeyError: targets.append(c) #add unresolved elif isinstance(c, AbstractSpanAnnotation) and recurse: #recursion c._helper_wrefs(targets) #pylint: disable=protected-access elif isinstance(c, Correction) and c.auth: #recurse into corrections for e in c: if isinstance(e, AbstractCorrectionChild) and e.auth: for e2 in e: if isinstance(e2, AbstractSpanAnnotation): #recursion e2._helper_wrefs(targets)"
[ "def", "_helper_wrefs", "(", "self", ",", "targets", ",", "recurse", "=", "True", ")", ":", "for", "c", "in", "self", ":", "if", "isinstance", "(", "c", ",", "Word", ")", "or", "isinstance", "(", "c", ",", "Morpheme", ")", "or", "isinstance", "(", "c", ",", "Phoneme", ")", ":", "targets", ".", "append", "(", "c", ")", "elif", "isinstance", "(", "c", ",", "WordReference", ")", ":", "try", ":", "targets", ".", "append", "(", "self", ".", "doc", "[", "c", ".", "id", "]", ")", "#try to resolve", "except", "KeyError", ":", "targets", ".", "append", "(", "c", ")", "#add unresolved", "elif", "isinstance", "(", "c", ",", "AbstractSpanAnnotation", ")", "and", "recurse", ":", "#recursion", "c", ".", "_helper_wrefs", "(", "targets", ")", "#pylint: disable=protected-access", "elif", "isinstance", "(", "c", ",", "Correction", ")", "and", "c", ".", "auth", ":", "#recurse into corrections", "for", "e", "in", "c", ":", "if", "isinstance", "(", "e", ",", "AbstractCorrectionChild", ")", "and", "e", ".", "auth", ":", "for", "e2", "in", "e", ":", "if", "isinstance", "(", "e2", ",", "AbstractSpanAnnotation", ")", ":", "#recursion", "e2", ".", "_helper_wrefs", "(", "targets", ")" ]
"Internal helper function"
[ "Internal", "helper", "function" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4418-L4437"
""
""
""
""
""
""
-1
25
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractSpanAnnotation.wrefs"
"def wrefs(self, index = None, recurse=True): """Returns a list of word references, these can be Words but also Morphemes or Phonemes. Arguments: index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all """ targets =[] self._helper_wrefs(targets, recurse) if index is None: return targets else: return targets[index]"
"python"
"def wrefs(self, index = None, recurse=True): """Returns a list of word references, these can be Words but also Morphemes or Phonemes. Arguments: index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all """ targets =[] self._helper_wrefs(targets, recurse) if index is None: return targets else: return targets[index]"
[ "def", "wrefs", "(", "self", ",", "index", "=", "None", ",", "recurse", "=", "True", ")", ":", "targets", "=", "[", "]", "self", ".", "_helper_wrefs", "(", "targets", ",", "recurse", ")", "if", "index", "is", "None", ":", "return", "targets", "else", ":", "return", "targets", "[", "index", "]" ]
"Returns a list of word references, these can be Words but also Morphemes or Phonemes. Arguments: index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all"
[ "Returns", "a", "list", "of", "word", "references", "these", "can", "be", "Words", "but", "also", "Morphemes", "or", "Phonemes", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4439-L4450"
""
""
""
""
""
""
-1
26
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractSpanAnnotation.copychildren"
"def copychildren(self, newdoc=None, idsuffix=""): """Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash""" if idsuffix is True: idsuffix = ".copy." + "%08x" % random.getrandbits(32) #random 32-bit hash for each copy, same one will be reused for all children for c in self: if isinstance(c, Word): yield WordReference(newdoc, id=c.id) else: yield c.copy(newdoc,idsuffix)"
"python"
"def copychildren(self, newdoc=None, idsuffix=""): """Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash""" if idsuffix is True: idsuffix = ".copy." + "%08x" % random.getrandbits(32) #random 32-bit hash for each copy, same one will be reused for all children for c in self: if isinstance(c, Word): yield WordReference(newdoc, id=c.id) else: yield c.copy(newdoc,idsuffix)"
[ "def", "copychildren", "(", "self", ",", "newdoc", "=", "None", ",", "idsuffix", "=", "\"\"", ")", ":", "if", "idsuffix", "is", "True", ":", "idsuffix", "=", "\".copy.\"", "+", "\"%08x\"", "%", "random", ".", "getrandbits", "(", "32", ")", "#random 32-bit hash for each copy, same one will be reused for all children", "for", "c", "in", "self", ":", "if", "isinstance", "(", "c", ",", "Word", ")", ":", "yield", "WordReference", "(", "newdoc", ",", "id", "=", "c", ".", "id", ")", "else", ":", "yield", "c", ".", "copy", "(", "newdoc", ",", "idsuffix", ")" ]
"Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash"
[ "Generator", "creating", "a", "deep", "copy", "of", "the", "children", "of", "this", "element", ".", "If", "idsuffix", "is", "a", "string", "if", "set", "to", "True", "a", "random", "idsuffix", "will", "be", "generated", "including", "a", "random", "32", "-", "bit", "hash" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4465-L4472"
""
""
""
""
""
""
-1
27
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractAnnotationLayer.alternatives"
"def alternatives(self, Class=None, set=None): """Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set. Arguments: * ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are. * ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set) Returns: Generator over Alternative elements """ for e in self.select(AlternativeLayers,None, True, ['Original','Suggestion']): #pylint: disable=too-many-nested-blocks if Class is None: yield e elif len(e) >= 1: #child elements? for e2 in e: try: if isinstance(e2, Class): try: if set is None or e2.set == set: yield e #not e2 break #yield an alternative only once (in case there are multiple matches) except AttributeError: continue except AttributeError: continue"
"python"
"def alternatives(self, Class=None, set=None): """Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set. Arguments: * ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are. * ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set) Returns: Generator over Alternative elements """ for e in self.select(AlternativeLayers,None, True, ['Original','Suggestion']): #pylint: disable=too-many-nested-blocks if Class is None: yield e elif len(e) >= 1: #child elements? for e2 in e: try: if isinstance(e2, Class): try: if set is None or e2.set == set: yield e #not e2 break #yield an alternative only once (in case there are multiple matches) except AttributeError: continue except AttributeError: continue"
[ "def", "alternatives", "(", "self", ",", "Class", "=", "None", ",", "set", "=", "None", ")", ":", "for", "e", "in", "self", ".", "select", "(", "AlternativeLayers", ",", "None", ",", "True", ",", "[", "'Original'", ",", "'Suggestion'", "]", ")", ":", "#pylint: disable=too-many-nested-blocks", "if", "Class", "is", "None", ":", "yield", "e", "elif", "len", "(", "e", ")", ">=", "1", ":", "#child elements?", "for", "e2", "in", "e", ":", "try", ":", "if", "isinstance", "(", "e2", ",", "Class", ")", ":", "try", ":", "if", "set", "is", "None", "or", "e2", ".", "set", "==", "set", ":", "yield", "e", "#not e2", "break", "#yield an alternative only once (in case there are multiple matches)", "except", "AttributeError", ":", "continue", "except", "AttributeError", ":", "continue" ]
"Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set. Arguments: * ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are. * ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set) Returns: Generator over Alternative elements"
[ "Generator", "over", "alternatives", "either", "all", "or", "only", "of", "a", "specific", "annotation", "type", "and", "possibly", "restrained", "also", "by", "set", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4574-L4599"
""
""
""
""
""
""
-1
28
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"AbstractAnnotationLayer.findspan"
"def findspan(self, *words): """Returns the span element which spans over the specified words or morphemes. See also: :meth:`Word.findspans` """ for span in self.select(AbstractSpanAnnotation,None,True): if tuple(span.wrefs()) == words: return span raise NoSuchAnnotation"
"python"
"def findspan(self, *words): """Returns the span element which spans over the specified words or morphemes. See also: :meth:`Word.findspans` """ for span in self.select(AbstractSpanAnnotation,None,True): if tuple(span.wrefs()) == words: return span raise NoSuchAnnotation"
[ "def", "findspan", "(", "self", ",", "*", "words", ")", ":", "for", "span", "in", "self", ".", "select", "(", "AbstractSpanAnnotation", ",", "None", ",", "True", ")", ":", "if", "tuple", "(", "span", ".", "wrefs", "(", ")", ")", "==", "words", ":", "return", "span", "raise", "NoSuchAnnotation" ]
"Returns the span element which spans over the specified words or morphemes. See also: :meth:`Word.findspans`"
[ "Returns", "the", "span", "element", "which", "spans", "over", "the", "specified", "words", "or", "morphemes", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4601-L4611"
""
""
""
""
""
""
-1
29
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.hasnew"
"def hasnew(self,allowempty=False): """Does the correction define new corrected annotations?""" for e in self.select(New,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
"python"
"def hasnew(self,allowempty=False): """Does the correction define new corrected annotations?""" for e in self.select(New,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
[ "def", "hasnew", "(", "self", ",", "allowempty", "=", "False", ")", ":", "for", "e", "in", "self", ".", "select", "(", "New", ",", "None", ",", "False", ",", "False", ")", ":", "if", "not", "allowempty", "and", "len", "(", "e", ")", "==", "0", ":", "continue", "return", "True", "return", "False" ]
"Does the correction define new corrected annotations?"
[ "Does", "the", "correction", "define", "new", "corrected", "annotations?" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4982-L4987"
""
""
""
""
""
""
-1
30
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.hasoriginal"
"def hasoriginal(self,allowempty=False): """Does the correction record the old annotations prior to correction?""" for e in self.select(Original,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
"python"
"def hasoriginal(self,allowempty=False): """Does the correction record the old annotations prior to correction?""" for e in self.select(Original,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
[ "def", "hasoriginal", "(", "self", ",", "allowempty", "=", "False", ")", ":", "for", "e", "in", "self", ".", "select", "(", "Original", ",", "None", ",", "False", ",", "False", ")", ":", "if", "not", "allowempty", "and", "len", "(", "e", ")", "==", "0", ":", "continue", "return", "True", "return", "False" ]
"Does the correction record the old annotations prior to correction?"
[ "Does", "the", "correction", "record", "the", "old", "annotations", "prior", "to", "correction?" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4989-L4994"
""
""
""
""
""
""
-1
31
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.hassuggestions"
"def hassuggestions(self,allowempty=False): """Does the correction propose suggestions for correction?""" for e in self.select(Suggestion,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
"python"
"def hassuggestions(self,allowempty=False): """Does the correction propose suggestions for correction?""" for e in self.select(Suggestion,None,False, False): if not allowempty and len(e) == 0: continue return True return False"
[ "def", "hassuggestions", "(", "self", ",", "allowempty", "=", "False", ")", ":", "for", "e", "in", "self", ".", "select", "(", "Suggestion", ",", "None", ",", "False", ",", "False", ")", ":", "if", "not", "allowempty", "and", "len", "(", "e", ")", "==", "0", ":", "continue", "return", "True", "return", "False" ]
"Does the correction propose suggestions for correction?"
[ "Does", "the", "correction", "propose", "suggestions", "for", "correction?" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5003-L5008"
""
""
""
""
""
""
-1
32
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.new"
"def new(self,index = None): """Get the new corrected annotation. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation` """ if index is None: try: return next(self.select(New,None,False)) except StopIteration: raise NoSuchAnnotation else: for e in self.select(New,None,False): return e[index] raise NoSuchAnnotation"
"python"
"def new(self,index = None): """Get the new corrected annotation. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation` """ if index is None: try: return next(self.select(New,None,False)) except StopIteration: raise NoSuchAnnotation else: for e in self.select(New,None,False): return e[index] raise NoSuchAnnotation"
[ "def", "new", "(", "self", ",", "index", "=", "None", ")", ":", "if", "index", "is", "None", ":", "try", ":", "return", "next", "(", "self", ".", "select", "(", "New", ",", "None", ",", "False", ")", ")", "except", "StopIteration", ":", "raise", "NoSuchAnnotation", "else", ":", "for", "e", "in", "self", ".", "select", "(", "New", ",", "None", ",", "False", ")", ":", "return", "e", "[", "index", "]", "raise", "NoSuchAnnotation" ]
"Get the new corrected annotation. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation`"
[ "Get", "the", "new", "corrected", "annotation", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5106-L5126"
""
""
""
""
""
""
-1
33
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.original"
"def original(self,index=None): """Get the old annotation prior to correction. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation` """ if index is None: try: return next(self.select(Original,None,False, False)) except StopIteration: raise NoSuchAnnotation else: for e in self.select(Original,None,False, False): return e[index] raise NoSuchAnnotation"
"python"
"def original(self,index=None): """Get the old annotation prior to correction. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation` """ if index is None: try: return next(self.select(Original,None,False, False)) except StopIteration: raise NoSuchAnnotation else: for e in self.select(Original,None,False, False): return e[index] raise NoSuchAnnotation"
[ "def", "original", "(", "self", ",", "index", "=", "None", ")", ":", "if", "index", "is", "None", ":", "try", ":", "return", "next", "(", "self", ".", "select", "(", "Original", ",", "None", ",", "False", ",", "False", ")", ")", "except", "StopIteration", ":", "raise", "NoSuchAnnotation", "else", ":", "for", "e", "in", "self", ".", "select", "(", "Original", ",", "None", ",", "False", ",", "False", ")", ":", "return", "e", "[", "index", "]", "raise", "NoSuchAnnotation" ]
"Get the old annotation prior to correction. This returns only one annotation if multiple exist, use `index` to select another in the sequence. Returns: an annotation element (:class:`AbstractElement`) Raises: :class:`NoSuchAnnotation`"
[ "Get", "the", "old", "annotation", "prior", "to", "correction", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5128-L5147"
""
""
""
""
""
""
-1
34
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Correction.suggestions"
"def suggestions(self,index=None): """Get suggestions for correction. Yields: :class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default) Returns: a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set) Raises: :class:`IndexError` """ if index is None: return self.select(Suggestion,None,False, False) else: for i, e in enumerate(self.select(Suggestion,None,False, False)): if index == i: return e raise IndexError"
"python"
"def suggestions(self,index=None): """Get suggestions for correction. Yields: :class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default) Returns: a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set) Raises: :class:`IndexError` """ if index is None: return self.select(Suggestion,None,False, False) else: for i, e in enumerate(self.select(Suggestion,None,False, False)): if index == i: return e raise IndexError"
[ "def", "suggestions", "(", "self", ",", "index", "=", "None", ")", ":", "if", "index", "is", "None", ":", "return", "self", ".", "select", "(", "Suggestion", ",", "None", ",", "False", ",", "False", ")", "else", ":", "for", "i", ",", "e", "in", "enumerate", "(", "self", ".", "select", "(", "Suggestion", ",", "None", ",", "False", ",", "False", ")", ")", ":", "if", "index", "==", "i", ":", "return", "e", "raise", "IndexError" ]
"Get suggestions for correction. Yields: :class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default) Returns: a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set) Raises: :class:`IndexError`"
[ "Get", "suggestions", "for", "correction", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5170-L5188"
""
""
""
""
""
""
-1
35
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Morpheme.findspans"
"def findspans(self, type,set=None): """Find span annotation of the specified type that include this word""" if issubclass(type, AbstractAnnotationLayer): layerclass = type else: layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE] e = self while True: if not e.parent: break e = e.parent for layer in e.select(layerclass,set,False): for e2 in layer: if isinstance(e2, AbstractSpanAnnotation): if self in e2.wrefs(): yield e2"
"python"
"def findspans(self, type,set=None): """Find span annotation of the specified type that include this word""" if issubclass(type, AbstractAnnotationLayer): layerclass = type else: layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE] e = self while True: if not e.parent: break e = e.parent for layer in e.select(layerclass,set,False): for e2 in layer: if isinstance(e2, AbstractSpanAnnotation): if self in e2.wrefs(): yield e2"
[ "def", "findspans", "(", "self", ",", "type", ",", "set", "=", "None", ")", ":", "if", "issubclass", "(", "type", ",", "AbstractAnnotationLayer", ")", ":", "layerclass", "=", "type", "else", ":", "layerclass", "=", "ANNOTATIONTYPE2LAYERCLASS", "[", "type", ".", "ANNOTATIONTYPE", "]", "e", "=", "self", "while", "True", ":", "if", "not", "e", ".", "parent", ":", "break", "e", "=", "e", ".", "parent", "for", "layer", "in", "e", ".", "select", "(", "layerclass", ",", "set", ",", "False", ")", ":", "for", "e2", "in", "layer", ":", "if", "isinstance", "(", "e2", ",", "AbstractSpanAnnotation", ")", ":", "if", "self", "in", "e2", ".", "wrefs", "(", ")", ":", "yield", "e2" ]
"Find span annotation of the specified type that include this word"
[ "Find", "span", "annotation", "of", "the", "specified", "type", "that", "include", "this", "word" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5528-L5542"
""
""
""
""
""
""
-1
36
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Pattern.resolve"
"def resolve(self,size, distribution): """Resolve a variable sized pattern to all patterns of a certain fixed size""" if not self.variablesize(): raise Exception("Can only resize patterns with * wildcards") nrofwildcards = 0 for x in self.sequence: if x == '*': nrofwildcards += 1 assert (len(distribution) == nrofwildcards) wildcardnr = 0 newsequence = [] for x in self.sequence: if x == '*': newsequence += [True] * distribution[wildcardnr] wildcardnr += 1 else: newsequence.append(x) d = { 'matchannotation':self.matchannotation, 'matchannotationset':self.matchannotationset, 'casesensitive':self.casesensitive } yield Pattern(*newsequence, **d )"
"python"
"def resolve(self,size, distribution): """Resolve a variable sized pattern to all patterns of a certain fixed size""" if not self.variablesize(): raise Exception("Can only resize patterns with * wildcards") nrofwildcards = 0 for x in self.sequence: if x == '*': nrofwildcards += 1 assert (len(distribution) == nrofwildcards) wildcardnr = 0 newsequence = [] for x in self.sequence: if x == '*': newsequence += [True] * distribution[wildcardnr] wildcardnr += 1 else: newsequence.append(x) d = { 'matchannotation':self.matchannotation, 'matchannotationset':self.matchannotationset, 'casesensitive':self.casesensitive } yield Pattern(*newsequence, **d )"
[ "def", "resolve", "(", "self", ",", "size", ",", "distribution", ")", ":", "if", "not", "self", ".", "variablesize", "(", ")", ":", "raise", "Exception", "(", "\"Can only resize patterns with * wildcards\"", ")", "nrofwildcards", "=", "0", "for", "x", "in", "self", ".", "sequence", ":", "if", "x", "==", "'*'", ":", "nrofwildcards", "+=", "1", "assert", "(", "len", "(", "distribution", ")", "==", "nrofwildcards", ")", "wildcardnr", "=", "0", "newsequence", "=", "[", "]", "for", "x", "in", "self", ".", "sequence", ":", "if", "x", "==", "'*'", ":", "newsequence", "+=", "[", "True", "]", "*", "distribution", "[", "wildcardnr", "]", "wildcardnr", "+=", "1", "else", ":", "newsequence", ".", "append", "(", "x", ")", "d", "=", "{", "'matchannotation'", ":", "self", ".", "matchannotation", ",", "'matchannotationset'", ":", "self", ".", "matchannotationset", ",", "'casesensitive'", ":", "self", ".", "casesensitive", "}", "yield", "Pattern", "(", "*", "newsequence", ",", "*", "*", "d", ")" ]
"Resolve a variable sized pattern to all patterns of a certain fixed size"
[ "Resolve", "a", "variable", "sized", "pattern", "to", "all", "patterns", "of", "a", "certain", "fixed", "size" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6221-L6242"
""
""
""
""
""
""
-1
37
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.load"
"def load(self, filename): """Load a FoLiA XML file. Argument: filename (str): The file to load """ #if LXE and self.mode != Mode.XPATH: # #workaround for xml:id problem (disabled) # #f = open(filename) # #s = f.read().replace(' xml:id=', ' id=') # #f.close() # self.tree = ElementTree.parse(filename) #else: self.tree = xmltreefromfile(filename) self.parsexml(self.tree.getroot()) if self.mode != Mode.XPATH: #XML Tree is now obsolete (only needed when partially loaded for xpath queries) self.tree = None"
"python"
"def load(self, filename): """Load a FoLiA XML file. Argument: filename (str): The file to load """ #if LXE and self.mode != Mode.XPATH: # #workaround for xml:id problem (disabled) # #f = open(filename) # #s = f.read().replace(' xml:id=', ' id=') # #f.close() # self.tree = ElementTree.parse(filename) #else: self.tree = xmltreefromfile(filename) self.parsexml(self.tree.getroot()) if self.mode != Mode.XPATH: #XML Tree is now obsolete (only needed when partially loaded for xpath queries) self.tree = None"
[ "def", "load", "(", "self", ",", "filename", ")", ":", "#if LXE and self.mode != Mode.XPATH:", "# #workaround for xml:id problem (disabled)", "# #f = open(filename)", "# #s = f.read().replace(' xml:id=', ' id=')", "# #f.close()", "# self.tree = ElementTree.parse(filename)", "#else:", "self", ".", "tree", "=", "xmltreefromfile", "(", "filename", ")", "self", ".", "parsexml", "(", "self", ".", "tree", ".", "getroot", "(", ")", ")", "if", "self", ".", "mode", "!=", "Mode", ".", "XPATH", ":", "#XML Tree is now obsolete (only needed when partially loaded for xpath queries)", "self", ".", "tree", "=", "None" ]
"Load a FoLiA XML file. Argument: filename (str): The file to load"
[ "Load", "a", "FoLiA", "XML", "file", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6495-L6512"
""
""
""
""
""
""
-1
38
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.items"
"def items(self): """Returns a depth-first flat list of all items in the document""" l = [] for e in self.data: l += e.items() return l"
"python"
"def items(self): """Returns a depth-first flat list of all items in the document""" l = [] for e in self.data: l += e.items() return l"
[ "def", "items", "(", "self", ")", ":", "l", "=", "[", "]", "for", "e", "in", "self", ".", "data", ":", "l", "+=", "e", ".", "items", "(", ")", "return", "l" ]
"Returns a depth-first flat list of all items in the document"
[ "Returns", "a", "depth", "-", "first", "flat", "list", "of", "all", "items", "in", "the", "document" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6514-L6519"
""
""
""
""
""
""
-1
39
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.save"
"def save(self, filename=None): """Save the document to file. Arguments: * filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from. """ if not filename: filename = self.filename if not filename: raise Exception("No filename specified") if filename[-4:].lower() == '.bz2': f = bz2.BZ2File(filename,'wb') f.write(self.xmlstring().encode('utf-8')) f.close() elif filename[-3:].lower() == '.gz': f = gzip.GzipFile(filename,'wb') #pylint: disable=redefined-variable-type f.write(self.xmlstring().encode('utf-8')) f.close() else: f = io.open(filename,'w',encoding='utf-8') f.write(self.xmlstring()) f.close()"
"python"
"def save(self, filename=None): """Save the document to file. Arguments: * filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from. """ if not filename: filename = self.filename if not filename: raise Exception("No filename specified") if filename[-4:].lower() == '.bz2': f = bz2.BZ2File(filename,'wb') f.write(self.xmlstring().encode('utf-8')) f.close() elif filename[-3:].lower() == '.gz': f = gzip.GzipFile(filename,'wb') #pylint: disable=redefined-variable-type f.write(self.xmlstring().encode('utf-8')) f.close() else: f = io.open(filename,'w',encoding='utf-8') f.write(self.xmlstring()) f.close()"
[ "def", "save", "(", "self", ",", "filename", "=", "None", ")", ":", "if", "not", "filename", ":", "filename", "=", "self", ".", "filename", "if", "not", "filename", ":", "raise", "Exception", "(", "\"No filename specified\"", ")", "if", "filename", "[", "-", "4", ":", "]", ".", "lower", "(", ")", "==", "'.bz2'", ":", "f", "=", "bz2", ".", "BZ2File", "(", "filename", ",", "'wb'", ")", "f", ".", "write", "(", "self", ".", "xmlstring", "(", ")", ".", "encode", "(", "'utf-8'", ")", ")", "f", ".", "close", "(", ")", "elif", "filename", "[", "-", "3", ":", "]", ".", "lower", "(", ")", "==", "'.gz'", ":", "f", "=", "gzip", ".", "GzipFile", "(", "filename", ",", "'wb'", ")", "#pylint: disable=redefined-variable-type", "f", ".", "write", "(", "self", ".", "xmlstring", "(", ")", ".", "encode", "(", "'utf-8'", ")", ")", "f", ".", "close", "(", ")", "else", ":", "f", "=", "io", ".", "open", "(", "filename", ",", "'w'", ",", "encoding", "=", "'utf-8'", ")", "f", ".", "write", "(", "self", ".", "xmlstring", "(", ")", ")", "f", ".", "close", "(", ")" ]
"Save the document to file. Arguments: * filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from."
[ "Save", "the", "document", "to", "file", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6547-L6568"
""
""
""
""
""
""
-1
40
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.xmldeclarations"
"def xmldeclarations(self): """Internal method to generate XML nodes for all declarations""" l = [] E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"}) for annotationtype, set in self.annotations: label = None #Find the 'label' for the declarations dynamically (aka: AnnotationType --> String) for key, value in vars(AnnotationType).items(): if value == annotationtype: label = key break #gather attribs if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0: #this is the implicit TextContent declaration, no need to output it explicitly continue attribs = {} if set and set != 'undefined': attribs['{' + NSFOLIA + '}set'] = set for key, value in self.annotationdefaults[annotationtype][set].items(): if key == 'annotatortype': if value == AnnotatorType.MANUAL: attribs['{' + NSFOLIA + '}' + key] = 'manual' elif value == AnnotatorType.AUTO: attribs['{' + NSFOLIA + '}' + key] = 'auto' elif key == 'datetime': attribs['{' + NSFOLIA + '}' + key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting elif value: attribs['{' + NSFOLIA + '}' + key] = value if label: l.append( makeelement(E,'{' + NSFOLIA + '}' + label.lower() + '-annotation', **attribs) ) else: raise Exception("Invalid annotation type") return l"
"python"
"def xmldeclarations(self): """Internal method to generate XML nodes for all declarations""" l = [] E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"}) for annotationtype, set in self.annotations: label = None #Find the 'label' for the declarations dynamically (aka: AnnotationType --> String) for key, value in vars(AnnotationType).items(): if value == annotationtype: label = key break #gather attribs if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0: #this is the implicit TextContent declaration, no need to output it explicitly continue attribs = {} if set and set != 'undefined': attribs['{' + NSFOLIA + '}set'] = set for key, value in self.annotationdefaults[annotationtype][set].items(): if key == 'annotatortype': if value == AnnotatorType.MANUAL: attribs['{' + NSFOLIA + '}' + key] = 'manual' elif value == AnnotatorType.AUTO: attribs['{' + NSFOLIA + '}' + key] = 'auto' elif key == 'datetime': attribs['{' + NSFOLIA + '}' + key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting elif value: attribs['{' + NSFOLIA + '}' + key] = value if label: l.append( makeelement(E,'{' + NSFOLIA + '}' + label.lower() + '-annotation', **attribs) ) else: raise Exception("Invalid annotation type") return l"
[ "def", "xmldeclarations", "(", "self", ")", ":", "l", "=", "[", "]", "E", "=", "ElementMaker", "(", "namespace", "=", "\"http://ilk.uvt.nl/folia\"", ",", "nsmap", "=", "{", "None", ":", "\"http://ilk.uvt.nl/folia\"", ",", "'xml'", ":", "\"http://www.w3.org/XML/1998/namespace\"", "}", ")", "for", "annotationtype", ",", "set", "in", "self", ".", "annotations", ":", "label", "=", "None", "#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)", "for", "key", ",", "value", "in", "vars", "(", "AnnotationType", ")", ".", "items", "(", ")", ":", "if", "value", "==", "annotationtype", ":", "label", "=", "key", "break", "#gather attribs", "if", "(", "annotationtype", "==", "AnnotationType", ".", "TEXT", "or", "annotationtype", "==", "AnnotationType", ".", "PHON", ")", "and", "set", "==", "'undefined'", "and", "len", "(", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", ")", "==", "0", ":", "#this is the implicit TextContent declaration, no need to output it explicitly", "continue", "attribs", "=", "{", "}", "if", "set", "and", "set", "!=", "'undefined'", ":", "attribs", "[", "'{'", "+", "NSFOLIA", "+", "'}set'", "]", "=", "set", "for", "key", ",", "value", "in", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", ".", "items", "(", ")", ":", "if", "key", "==", "'annotatortype'", ":", "if", "value", "==", "AnnotatorType", ".", "MANUAL", ":", "attribs", "[", "'{'", "+", "NSFOLIA", "+", "'}'", "+", "key", "]", "=", "'manual'", "elif", "value", "==", "AnnotatorType", ".", "AUTO", ":", "attribs", "[", "'{'", "+", "NSFOLIA", "+", "'}'", "+", "key", "]", "=", "'auto'", "elif", "key", "==", "'datetime'", ":", "attribs", "[", "'{'", "+", "NSFOLIA", "+", "'}'", "+", "key", "]", "=", "value", ".", "strftime", "(", "\"%Y-%m-%dT%H:%M:%S\"", ")", "#proper iso-formatting", "elif", "value", ":", "attribs", "[", "'{'", "+", "NSFOLIA", "+", "'}'", "+", "key", "]", "=", "value", "if", "label", ":", "l", ".", "append", "(", "makeelement", "(", "E", ",", "'{'", "+", "NSFOLIA", "+", "'}'", "+", "label", ".", "lower", "(", ")", "+", "'-annotation'", ",", "*", "*", "attribs", ")", ")", "else", ":", "raise", "Exception", "(", "\"Invalid annotation type\"", ")", "return", "l" ]
"Internal method to generate XML nodes for all declarations"
[ "Internal", "method", "to", "generate", "XML", "nodes", "for", "all", "declarations" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6653-L6690"
""
""
""
""
""
""
-1
41
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.jsondeclarations"
"def jsondeclarations(self): """Return all declarations in a form ready to be serialised to JSON. Returns: list of dict """ l = [] for annotationtype, set in self.annotations: label = None #Find the 'label' for the declarations dynamically (aka: AnnotationType --> String) for key, value in vars(AnnotationType).items(): if value == annotationtype: label = key break #gather attribs if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0: #this is the implicit TextContent declaration, no need to output it explicitly continue jsonnode = {'annotationtype': label.lower()} if set and set != 'undefined': jsonnode['set'] = set for key, value in self.annotationdefaults[annotationtype][set].items(): if key == 'annotatortype': if value == AnnotatorType.MANUAL: jsonnode[key] = 'manual' elif value == AnnotatorType.AUTO: jsonnode[key] = 'auto' elif key == 'datetime': jsonnode[key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting elif value: jsonnode[key] = value if label: l.append( jsonnode ) else: raise Exception("Invalid annotation type") return l"
"python"
"def jsondeclarations(self): """Return all declarations in a form ready to be serialised to JSON. Returns: list of dict """ l = [] for annotationtype, set in self.annotations: label = None #Find the 'label' for the declarations dynamically (aka: AnnotationType --> String) for key, value in vars(AnnotationType).items(): if value == annotationtype: label = key break #gather attribs if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0: #this is the implicit TextContent declaration, no need to output it explicitly continue jsonnode = {'annotationtype': label.lower()} if set and set != 'undefined': jsonnode['set'] = set for key, value in self.annotationdefaults[annotationtype][set].items(): if key == 'annotatortype': if value == AnnotatorType.MANUAL: jsonnode[key] = 'manual' elif value == AnnotatorType.AUTO: jsonnode[key] = 'auto' elif key == 'datetime': jsonnode[key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting elif value: jsonnode[key] = value if label: l.append( jsonnode ) else: raise Exception("Invalid annotation type") return l"
[ "def", "jsondeclarations", "(", "self", ")", ":", "l", "=", "[", "]", "for", "annotationtype", ",", "set", "in", "self", ".", "annotations", ":", "label", "=", "None", "#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)", "for", "key", ",", "value", "in", "vars", "(", "AnnotationType", ")", ".", "items", "(", ")", ":", "if", "value", "==", "annotationtype", ":", "label", "=", "key", "break", "#gather attribs", "if", "(", "annotationtype", "==", "AnnotationType", ".", "TEXT", "or", "annotationtype", "==", "AnnotationType", ".", "PHON", ")", "and", "set", "==", "'undefined'", "and", "len", "(", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", ")", "==", "0", ":", "#this is the implicit TextContent declaration, no need to output it explicitly", "continue", "jsonnode", "=", "{", "'annotationtype'", ":", "label", ".", "lower", "(", ")", "}", "if", "set", "and", "set", "!=", "'undefined'", ":", "jsonnode", "[", "'set'", "]", "=", "set", "for", "key", ",", "value", "in", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", ".", "items", "(", ")", ":", "if", "key", "==", "'annotatortype'", ":", "if", "value", "==", "AnnotatorType", ".", "MANUAL", ":", "jsonnode", "[", "key", "]", "=", "'manual'", "elif", "value", "==", "AnnotatorType", ".", "AUTO", ":", "jsonnode", "[", "key", "]", "=", "'auto'", "elif", "key", "==", "'datetime'", ":", "jsonnode", "[", "key", "]", "=", "value", ".", "strftime", "(", "\"%Y-%m-%dT%H:%M:%S\"", ")", "#proper iso-formatting", "elif", "value", ":", "jsonnode", "[", "key", "]", "=", "value", "if", "label", ":", "l", ".", "append", "(", "jsonnode", ")", "else", ":", "raise", "Exception", "(", "\"Invalid annotation type\"", ")", "return", "l" ]
"Return all declarations in a form ready to be serialised to JSON. Returns: list of dict"
[ "Return", "all", "declarations", "in", "a", "form", "ready", "to", "be", "serialised", "to", "JSON", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6692-L6731"
""
""
""
""
""
""
-1
42
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.xml"
"def xml(self): """Serialise the document to XML. Returns: lxml.etree.Element See also: :meth:`Document.xmlstring` """ self.pendingvalidation() E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={'xml' : "http://www.w3.org/XML/1998/namespace", 'xlink':"http://www.w3.org/1999/xlink"}) attribs = {} attribs['{http://www.w3.org/XML/1998/namespace}id'] = self.id #if self.version: # attribs['version'] = self.version #else: attribs['version'] = FOLIAVERSION attribs['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION metadataattribs = {} metadataattribs['{' + NSFOLIA + '}type'] = self.metadatatype if isinstance(self.metadata, ExternalMetaData): metadataattribs['{' + NSFOLIA + '}src'] = self.metadata.url e = E.FoLiA( E.metadata( E.annotations( *self.xmldeclarations() ), *self.xmlmetadata(), **metadataattribs ) , **attribs) for text in self.data: e.append(text.xml()) return e"
"python"
"def xml(self): """Serialise the document to XML. Returns: lxml.etree.Element See also: :meth:`Document.xmlstring` """ self.pendingvalidation() E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={'xml' : "http://www.w3.org/XML/1998/namespace", 'xlink':"http://www.w3.org/1999/xlink"}) attribs = {} attribs['{http://www.w3.org/XML/1998/namespace}id'] = self.id #if self.version: # attribs['version'] = self.version #else: attribs['version'] = FOLIAVERSION attribs['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION metadataattribs = {} metadataattribs['{' + NSFOLIA + '}type'] = self.metadatatype if isinstance(self.metadata, ExternalMetaData): metadataattribs['{' + NSFOLIA + '}src'] = self.metadata.url e = E.FoLiA( E.metadata( E.annotations( *self.xmldeclarations() ), *self.xmlmetadata(), **metadataattribs ) , **attribs) for text in self.data: e.append(text.xml()) return e"
[ "def", "xml", "(", "self", ")", ":", "self", ".", "pendingvalidation", "(", ")", "E", "=", "ElementMaker", "(", "namespace", "=", "\"http://ilk.uvt.nl/folia\"", ",", "nsmap", "=", "{", "'xml'", ":", "\"http://www.w3.org/XML/1998/namespace\"", ",", "'xlink'", ":", "\"http://www.w3.org/1999/xlink\"", "}", ")", "attribs", "=", "{", "}", "attribs", "[", "'{http://www.w3.org/XML/1998/namespace}id'", "]", "=", "self", ".", "id", "#if self.version:", "# attribs['version'] = self.version", "#else:", "attribs", "[", "'version'", "]", "=", "FOLIAVERSION", "attribs", "[", "'generator'", "]", "=", "'pynlpl.formats.folia-v'", "+", "LIBVERSION", "metadataattribs", "=", "{", "}", "metadataattribs", "[", "'{'", "+", "NSFOLIA", "+", "'}type'", "]", "=", "self", ".", "metadatatype", "if", "isinstance", "(", "self", ".", "metadata", ",", "ExternalMetaData", ")", ":", "metadataattribs", "[", "'{'", "+", "NSFOLIA", "+", "'}src'", "]", "=", "self", ".", "metadata", ".", "url", "e", "=", "E", ".", "FoLiA", "(", "E", ".", "metadata", "(", "E", ".", "annotations", "(", "*", "self", ".", "xmldeclarations", "(", ")", ")", ",", "*", "self", ".", "xmlmetadata", "(", ")", ",", "*", "*", "metadataattribs", ")", ",", "*", "*", "attribs", ")", "for", "text", "in", "self", ".", "data", ":", "e", ".", "append", "(", "text", ".", "xml", "(", ")", ")", "return", "e" ]
"Serialise the document to XML. Returns: lxml.etree.Element See also: :meth:`Document.xmlstring`"
[ "Serialise", "the", "document", "to", "XML", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6733-L6773"
""
""
""
""
""
""
-1
43
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.json"
"def json(self): """Serialise the document to a ``dict`` ready for serialisation to JSON. Example:: import json jsondoc = json.dumps(doc.json()) """ self.pendingvalidation() jsondoc = {'id': self.id, 'children': [], 'declarations': self.jsondeclarations() } if self.version: jsondoc['version'] = self.version else: jsondoc['version'] = FOLIAVERSION jsondoc['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION for text in self.data: jsondoc['children'].append(text.json()) return jsondoc"
"python"
"def json(self): """Serialise the document to a ``dict`` ready for serialisation to JSON. Example:: import json jsondoc = json.dumps(doc.json()) """ self.pendingvalidation() jsondoc = {'id': self.id, 'children': [], 'declarations': self.jsondeclarations() } if self.version: jsondoc['version'] = self.version else: jsondoc['version'] = FOLIAVERSION jsondoc['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION for text in self.data: jsondoc['children'].append(text.json()) return jsondoc"
[ "def", "json", "(", "self", ")", ":", "self", ".", "pendingvalidation", "(", ")", "jsondoc", "=", "{", "'id'", ":", "self", ".", "id", ",", "'children'", ":", "[", "]", ",", "'declarations'", ":", "self", ".", "jsondeclarations", "(", ")", "}", "if", "self", ".", "version", ":", "jsondoc", "[", "'version'", "]", "=", "self", ".", "version", "else", ":", "jsondoc", "[", "'version'", "]", "=", "FOLIAVERSION", "jsondoc", "[", "'generator'", "]", "=", "'pynlpl.formats.folia-v'", "+", "LIBVERSION", "for", "text", "in", "self", ".", "data", ":", "jsondoc", "[", "'children'", "]", ".", "append", "(", "text", ".", "json", "(", ")", ")", "return", "jsondoc" ]
"Serialise the document to a ``dict`` ready for serialisation to JSON. Example:: import json jsondoc = json.dumps(doc.json())"
[ "Serialise", "the", "document", "to", "a", "dict", "ready", "for", "serialisation", "to", "JSON", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6775-L6794"
""
""
""
""
""
""
-1
44
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.xmlmetadata"
"def xmlmetadata(self): """Internal method to serialize metadata to XML""" E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"}) elements = [] if self.metadatatype == "native": if isinstance(self.metadata, NativeMetaData): for key, value in self.metadata.items(): elements.append(E.meta(value,id=key) ) else: if isinstance(self.metadata, ForeignData): #in-document m = self.metadata while m is not None: elements.append(m.xml()) m = m.next for metadata_id, submetadata in self.submetadata.items(): subelements = [] attribs = { "{http://www.w3.org/XML/1998/namespace}id": metadata_id, "type": self.submetadatatype[metadata_id] } if isinstance(submetadata, NativeMetaData): for key, value in submetadata.items(): subelements.append(E.meta(value,id=key) ) elif isinstance(submetadata, ExternalMetaData): attribs['src'] = submetadata.url elif isinstance(submetadata, ForeignData): #in-document m = submetadata while m is not None: subelements.append(m.xml()) m = m.next elements.append( E.submetadata(*subelements, **attribs)) return elements"
"python"
"def xmlmetadata(self): """Internal method to serialize metadata to XML""" E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"}) elements = [] if self.metadatatype == "native": if isinstance(self.metadata, NativeMetaData): for key, value in self.metadata.items(): elements.append(E.meta(value,id=key) ) else: if isinstance(self.metadata, ForeignData): #in-document m = self.metadata while m is not None: elements.append(m.xml()) m = m.next for metadata_id, submetadata in self.submetadata.items(): subelements = [] attribs = { "{http://www.w3.org/XML/1998/namespace}id": metadata_id, "type": self.submetadatatype[metadata_id] } if isinstance(submetadata, NativeMetaData): for key, value in submetadata.items(): subelements.append(E.meta(value,id=key) ) elif isinstance(submetadata, ExternalMetaData): attribs['src'] = submetadata.url elif isinstance(submetadata, ForeignData): #in-document m = submetadata while m is not None: subelements.append(m.xml()) m = m.next elements.append( E.submetadata(*subelements, **attribs)) return elements"
[ "def", "xmlmetadata", "(", "self", ")", ":", "E", "=", "ElementMaker", "(", "namespace", "=", "\"http://ilk.uvt.nl/folia\"", ",", "nsmap", "=", "{", "None", ":", "\"http://ilk.uvt.nl/folia\"", ",", "'xml'", ":", "\"http://www.w3.org/XML/1998/namespace\"", "}", ")", "elements", "=", "[", "]", "if", "self", ".", "metadatatype", "==", "\"native\"", ":", "if", "isinstance", "(", "self", ".", "metadata", ",", "NativeMetaData", ")", ":", "for", "key", ",", "value", "in", "self", ".", "metadata", ".", "items", "(", ")", ":", "elements", ".", "append", "(", "E", ".", "meta", "(", "value", ",", "id", "=", "key", ")", ")", "else", ":", "if", "isinstance", "(", "self", ".", "metadata", ",", "ForeignData", ")", ":", "#in-document", "m", "=", "self", ".", "metadata", "while", "m", "is", "not", "None", ":", "elements", ".", "append", "(", "m", ".", "xml", "(", ")", ")", "m", "=", "m", ".", "next", "for", "metadata_id", ",", "submetadata", "in", "self", ".", "submetadata", ".", "items", "(", ")", ":", "subelements", "=", "[", "]", "attribs", "=", "{", "\"{http://www.w3.org/XML/1998/namespace}id\"", ":", "metadata_id", ",", "\"type\"", ":", "self", ".", "submetadatatype", "[", "metadata_id", "]", "}", "if", "isinstance", "(", "submetadata", ",", "NativeMetaData", ")", ":", "for", "key", ",", "value", "in", "submetadata", ".", "items", "(", ")", ":", "subelements", ".", "append", "(", "E", ".", "meta", "(", "value", ",", "id", "=", "key", ")", ")", "elif", "isinstance", "(", "submetadata", ",", "ExternalMetaData", ")", ":", "attribs", "[", "'src'", "]", "=", "submetadata", ".", "url", "elif", "isinstance", "(", "submetadata", ",", "ForeignData", ")", ":", "#in-document", "m", "=", "submetadata", "while", "m", "is", "not", "None", ":", "subelements", ".", "append", "(", "m", ".", "xml", "(", ")", ")", "m", "=", "m", ".", "next", "elements", ".", "append", "(", "E", ".", "submetadata", "(", "*", "subelements", ",", "*", "*", "attribs", ")", ")", "return", "elements" ]
"Internal method to serialize metadata to XML"
[ "Internal", "method", "to", "serialize", "metadata", "to", "XML" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6796-L6828"
""
""
""
""
""
""
-1
45
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.declare"
"def declare(self, annotationtype, set, **kwargs): """Declare a new annotation type to be used in the document. Keyword arguments can be used to set defaults for any annotation of this type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Keyword Arguments: annotator (str): Sets a default annotator annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process. datetime (datetime.datetime): Sets the default datetime alias (str): Defines alias that may be used in set attribute of elements instead of the full set name Example:: doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO) """ if (sys.version > '3' and not isinstance(set,str)) or (sys.version < '3' and not isinstance(set,(str,unicode))): raise ValueError("Set parameter for declare() must be a string") if inspect.isclass(annotationtype): annotationtype = annotationtype.ANNOTATIONTYPE if annotationtype in self.alias_set and set in self.alias_set[annotationtype]: raise ValueError("Set " + set + " conflicts with alias, may not be equal!") if not (annotationtype, set) in self.annotations: self.annotations.append( (annotationtype,set) ) if set and self.loadsetdefinitions and not set in self.setdefinitions: if set[:7] == "http://" or set[:8] == "https://" or set[:6] == "ftp://": self.setdefinitions[set] = SetDefinition(set,verbose=self.verbose) #will raise exception on error if not annotationtype in self.annotationdefaults: self.annotationdefaults[annotationtype] = {} self.annotationdefaults[annotationtype][set] = kwargs if 'alias' in kwargs: if annotationtype in self.set_alias and set in self.set_alias[annotationtype] and self.set_alias[annotationtype][set] != kwargs['alias']: raise ValueError("Redeclaring set " + set + " with another alias ('"+kwargs['alias']+"') is not allowed!") if annotationtype in self.alias_set and kwargs['alias'] in self.alias_set[annotationtype] and self.alias_set[annotationtype][kwargs['alias']] != set: raise ValueError("Redeclaring alias " + kwargs['alias'] + " with another set ('"+set+"') is not allowed!") if annotationtype in self.set_alias and kwargs['alias'] in self.set_alias[annotationtype]: raise ValueError("Alias " + kwargs['alias'] + " conflicts with set name, may not be equal!") if annotationtype not in self.alias_set: self.alias_set[annotationtype] = {} if annotationtype not in self.set_alias: self.set_alias[annotationtype] = {} self.alias_set[annotationtype][kwargs['alias']] = set self.set_alias[annotationtype][set] = kwargs['alias']"
"python"
"def declare(self, annotationtype, set, **kwargs): """Declare a new annotation type to be used in the document. Keyword arguments can be used to set defaults for any annotation of this type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Keyword Arguments: annotator (str): Sets a default annotator annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process. datetime (datetime.datetime): Sets the default datetime alias (str): Defines alias that may be used in set attribute of elements instead of the full set name Example:: doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO) """ if (sys.version > '3' and not isinstance(set,str)) or (sys.version < '3' and not isinstance(set,(str,unicode))): raise ValueError("Set parameter for declare() must be a string") if inspect.isclass(annotationtype): annotationtype = annotationtype.ANNOTATIONTYPE if annotationtype in self.alias_set and set in self.alias_set[annotationtype]: raise ValueError("Set " + set + " conflicts with alias, may not be equal!") if not (annotationtype, set) in self.annotations: self.annotations.append( (annotationtype,set) ) if set and self.loadsetdefinitions and not set in self.setdefinitions: if set[:7] == "http://" or set[:8] == "https://" or set[:6] == "ftp://": self.setdefinitions[set] = SetDefinition(set,verbose=self.verbose) #will raise exception on error if not annotationtype in self.annotationdefaults: self.annotationdefaults[annotationtype] = {} self.annotationdefaults[annotationtype][set] = kwargs if 'alias' in kwargs: if annotationtype in self.set_alias and set in self.set_alias[annotationtype] and self.set_alias[annotationtype][set] != kwargs['alias']: raise ValueError("Redeclaring set " + set + " with another alias ('"+kwargs['alias']+"') is not allowed!") if annotationtype in self.alias_set and kwargs['alias'] in self.alias_set[annotationtype] and self.alias_set[annotationtype][kwargs['alias']] != set: raise ValueError("Redeclaring alias " + kwargs['alias'] + " with another set ('"+set+"') is not allowed!") if annotationtype in self.set_alias and kwargs['alias'] in self.set_alias[annotationtype]: raise ValueError("Alias " + kwargs['alias'] + " conflicts with set name, may not be equal!") if annotationtype not in self.alias_set: self.alias_set[annotationtype] = {} if annotationtype not in self.set_alias: self.set_alias[annotationtype] = {} self.alias_set[annotationtype][kwargs['alias']] = set self.set_alias[annotationtype][set] = kwargs['alias']"
[ "def", "declare", "(", "self", ",", "annotationtype", ",", "set", ",", "*", "*", "kwargs", ")", ":", "if", "(", "sys", ".", "version", ">", "'3'", "and", "not", "isinstance", "(", "set", ",", "str", ")", ")", "or", "(", "sys", ".", "version", "<", "'3'", "and", "not", "isinstance", "(", "set", ",", "(", "str", ",", "unicode", ")", ")", ")", ":", "raise", "ValueError", "(", "\"Set parameter for declare() must be a string\"", ")", "if", "inspect", ".", "isclass", "(", "annotationtype", ")", ":", "annotationtype", "=", "annotationtype", ".", "ANNOTATIONTYPE", "if", "annotationtype", "in", "self", ".", "alias_set", "and", "set", "in", "self", ".", "alias_set", "[", "annotationtype", "]", ":", "raise", "ValueError", "(", "\"Set \"", "+", "set", "+", "\" conflicts with alias, may not be equal!\"", ")", "if", "not", "(", "annotationtype", ",", "set", ")", "in", "self", ".", "annotations", ":", "self", ".", "annotations", ".", "append", "(", "(", "annotationtype", ",", "set", ")", ")", "if", "set", "and", "self", ".", "loadsetdefinitions", "and", "not", "set", "in", "self", ".", "setdefinitions", ":", "if", "set", "[", ":", "7", "]", "==", "\"http://\"", "or", "set", "[", ":", "8", "]", "==", "\"https://\"", "or", "set", "[", ":", "6", "]", "==", "\"ftp://\"", ":", "self", ".", "setdefinitions", "[", "set", "]", "=", "SetDefinition", "(", "set", ",", "verbose", "=", "self", ".", "verbose", ")", "#will raise exception on error", "if", "not", "annotationtype", "in", "self", ".", "annotationdefaults", ":", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "=", "{", "}", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", "=", "kwargs", "if", "'alias'", "in", "kwargs", ":", "if", "annotationtype", "in", "self", ".", "set_alias", "and", "set", "in", "self", ".", "set_alias", "[", "annotationtype", "]", "and", "self", ".", "set_alias", "[", "annotationtype", "]", "[", "set", "]", "!=", "kwargs", "[", "'alias'", "]", ":", "raise", "ValueError", "(", "\"Redeclaring set \"", "+", "set", "+", "\" with another alias ('\"", "+", "kwargs", "[", "'alias'", "]", "+", "\"') is not allowed!\"", ")", "if", "annotationtype", "in", "self", ".", "alias_set", "and", "kwargs", "[", "'alias'", "]", "in", "self", ".", "alias_set", "[", "annotationtype", "]", "and", "self", ".", "alias_set", "[", "annotationtype", "]", "[", "kwargs", "[", "'alias'", "]", "]", "!=", "set", ":", "raise", "ValueError", "(", "\"Redeclaring alias \"", "+", "kwargs", "[", "'alias'", "]", "+", "\" with another set ('\"", "+", "set", "+", "\"') is not allowed!\"", ")", "if", "annotationtype", "in", "self", ".", "set_alias", "and", "kwargs", "[", "'alias'", "]", "in", "self", ".", "set_alias", "[", "annotationtype", "]", ":", "raise", "ValueError", "(", "\"Alias \"", "+", "kwargs", "[", "'alias'", "]", "+", "\" conflicts with set name, may not be equal!\"", ")", "if", "annotationtype", "not", "in", "self", ".", "alias_set", ":", "self", ".", "alias_set", "[", "annotationtype", "]", "=", "{", "}", "if", "annotationtype", "not", "in", "self", ".", "set_alias", ":", "self", ".", "set_alias", "[", "annotationtype", "]", "=", "{", "}", "self", ".", "alias_set", "[", "annotationtype", "]", "[", "kwargs", "[", "'alias'", "]", "]", "=", "set", "self", ".", "set_alias", "[", "annotationtype", "]", "[", "set", "]", "=", "kwargs", "[", "'alias'", "]" ]
"Declare a new annotation type to be used in the document. Keyword arguments can be used to set defaults for any annotation of this type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Keyword Arguments: annotator (str): Sets a default annotator annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process. datetime (datetime.datetime): Sets the default datetime alias (str): Defines alias that may be used in set attribute of elements instead of the full set name Example:: doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO)"
[ "Declare", "a", "new", "annotation", "type", "to", "be", "used", "in", "the", "document", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6972-L7018"
""
""
""
""
""
""
-1
46
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.defaultset"
"def defaultset(self, annotationtype): """Obtain the default set for the specified annotation type. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type) """ if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE try: return list(self.annotationdefaults[annotationtype].keys())[0] except KeyError: raise NoDefaultError except IndexError: raise NoDefaultError"
"python"
"def defaultset(self, annotationtype): """Obtain the default set for the specified annotation type. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type) """ if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE try: return list(self.annotationdefaults[annotationtype].keys())[0] except KeyError: raise NoDefaultError except IndexError: raise NoDefaultError"
[ "def", "defaultset", "(", "self", ",", "annotationtype", ")", ":", "if", "inspect", ".", "isclass", "(", "annotationtype", ")", "or", "isinstance", "(", "annotationtype", ",", "AbstractElement", ")", ":", "annotationtype", "=", "annotationtype", ".", "ANNOTATIONTYPE", "try", ":", "return", "list", "(", "self", ".", "annotationdefaults", "[", "annotationtype", "]", ".", "keys", "(", ")", ")", "[", "0", "]", "except", "KeyError", ":", "raise", "NoDefaultError", "except", "IndexError", ":", "raise", "NoDefaultError" ]
"Obtain the default set for the specified annotation type. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)"
[ "Obtain", "the", "default", "set", "for", "the", "specified", "annotation", "type", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7039-L7058"
""
""
""
""
""
""
-1
47
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.defaultannotator"
"def defaultannotator(self, annotationtype, set=None): """Obtain the default annotator for the specified annotation type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type) """ if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE if not set: set = self.defaultset(annotationtype) try: return self.annotationdefaults[annotationtype][set]['annotator'] except KeyError: raise NoDefaultError"
"python"
"def defaultannotator(self, annotationtype, set=None): """Obtain the default annotator for the specified annotation type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type) """ if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE if not set: set = self.defaultset(annotationtype) try: return self.annotationdefaults[annotationtype][set]['annotator'] except KeyError: raise NoDefaultError"
[ "def", "defaultannotator", "(", "self", ",", "annotationtype", ",", "set", "=", "None", ")", ":", "if", "inspect", ".", "isclass", "(", "annotationtype", ")", "or", "isinstance", "(", "annotationtype", ",", "AbstractElement", ")", ":", "annotationtype", "=", "annotationtype", ".", "ANNOTATIONTYPE", "if", "not", "set", ":", "set", "=", "self", ".", "defaultset", "(", "annotationtype", ")", "try", ":", "return", "self", ".", "annotationdefaults", "[", "annotationtype", "]", "[", "set", "]", "[", "'annotator'", "]", "except", "KeyError", ":", "raise", "NoDefaultError" ]
"Obtain the default annotator for the specified annotation type and set. Arguments: annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``. set (str): the set, should formally be a URL pointing to the set definition Returns: the set (str) Raises: :class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)"
[ "Obtain", "the", "default", "annotator", "for", "the", "specified", "annotation", "type", "and", "set", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7061-L7080"
""
""
""
""
""
""
-1
48
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.parsemetadata"
"def parsemetadata(self, node): """Internal method to parse metadata""" if 'type' in node.attrib: self.metadatatype = node.attrib['type'] else: #no type specified, default to native self.metadatatype = "native" if 'src' in node.attrib: self.metadata = ExternalMetaData(node.attrib['src']) elif self.metadatatype == "native": self.metadata = NativeMetaData() else: self.metadata = None #may be set below to ForeignData for subnode in node: if subnode.tag == '{' + NSFOLIA + '}annotations': self.parsexmldeclarations(subnode) elif subnode.tag == '{' + NSFOLIA + '}meta': if self.metadatatype == "native": if subnode.text: self.metadata[subnode.attrib['id']] = subnode.text else: raise MetaDataError("Encountered a meta element but metadata type is not native!") elif subnode.tag == '{' + NSFOLIA + '}provenance': #forward compatibility with FoLiA 2.0; ignore provenance print("WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!",file=sys.stderr) pass elif subnode.tag == '{' + NSFOLIA + '}foreign-data': if self.metadatatype == "native": raise MetaDataError("Encountered a foreign-data element but metadata type is native!") elif self.metadata is not None: #multiple foreign-data elements, chain: e = self.metadata while e.next is not None: e = e.next e.next = ForeignData(self, node=subnode) else: self.metadata = ForeignData(self, node=subnode) elif subnode.tag == '{' + NSFOLIA + '}submetadata': self.parsesubmetadata(subnode) elif subnode.tag == '{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT': #backward-compatibility for old IMDI without foreign-key E = ElementMaker(namespace=NSFOLIA,nsmap={None: NSFOLIA, 'xml' : "http://www.w3.org/XML/1998/namespace"}) self.metadatatype = "imdi" self.metadata = ForeignData(self, node=subnode)"
"python"
"def parsemetadata(self, node): """Internal method to parse metadata""" if 'type' in node.attrib: self.metadatatype = node.attrib['type'] else: #no type specified, default to native self.metadatatype = "native" if 'src' in node.attrib: self.metadata = ExternalMetaData(node.attrib['src']) elif self.metadatatype == "native": self.metadata = NativeMetaData() else: self.metadata = None #may be set below to ForeignData for subnode in node: if subnode.tag == '{' + NSFOLIA + '}annotations': self.parsexmldeclarations(subnode) elif subnode.tag == '{' + NSFOLIA + '}meta': if self.metadatatype == "native": if subnode.text: self.metadata[subnode.attrib['id']] = subnode.text else: raise MetaDataError("Encountered a meta element but metadata type is not native!") elif subnode.tag == '{' + NSFOLIA + '}provenance': #forward compatibility with FoLiA 2.0; ignore provenance print("WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!",file=sys.stderr) pass elif subnode.tag == '{' + NSFOLIA + '}foreign-data': if self.metadatatype == "native": raise MetaDataError("Encountered a foreign-data element but metadata type is native!") elif self.metadata is not None: #multiple foreign-data elements, chain: e = self.metadata while e.next is not None: e = e.next e.next = ForeignData(self, node=subnode) else: self.metadata = ForeignData(self, node=subnode) elif subnode.tag == '{' + NSFOLIA + '}submetadata': self.parsesubmetadata(subnode) elif subnode.tag == '{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT': #backward-compatibility for old IMDI without foreign-key E = ElementMaker(namespace=NSFOLIA,nsmap={None: NSFOLIA, 'xml' : "http://www.w3.org/XML/1998/namespace"}) self.metadatatype = "imdi" self.metadata = ForeignData(self, node=subnode)"
[ "def", "parsemetadata", "(", "self", ",", "node", ")", ":", "if", "'type'", "in", "node", ".", "attrib", ":", "self", ".", "metadatatype", "=", "node", ".", "attrib", "[", "'type'", "]", "else", ":", "#no type specified, default to native", "self", ".", "metadatatype", "=", "\"native\"", "if", "'src'", "in", "node", ".", "attrib", ":", "self", ".", "metadata", "=", "ExternalMetaData", "(", "node", ".", "attrib", "[", "'src'", "]", ")", "elif", "self", ".", "metadatatype", "==", "\"native\"", ":", "self", ".", "metadata", "=", "NativeMetaData", "(", ")", "else", ":", "self", ".", "metadata", "=", "None", "#may be set below to ForeignData", "for", "subnode", "in", "node", ":", "if", "subnode", ".", "tag", "==", "'{'", "+", "NSFOLIA", "+", "'}annotations'", ":", "self", ".", "parsexmldeclarations", "(", "subnode", ")", "elif", "subnode", ".", "tag", "==", "'{'", "+", "NSFOLIA", "+", "'}meta'", ":", "if", "self", ".", "metadatatype", "==", "\"native\"", ":", "if", "subnode", ".", "text", ":", "self", ".", "metadata", "[", "subnode", ".", "attrib", "[", "'id'", "]", "]", "=", "subnode", ".", "text", "else", ":", "raise", "MetaDataError", "(", "\"Encountered a meta element but metadata type is not native!\"", ")", "elif", "subnode", ".", "tag", "==", "'{'", "+", "NSFOLIA", "+", "'}provenance'", ":", "#forward compatibility with FoLiA 2.0; ignore provenance", "print", "(", "\"WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!\"", ",", "file", "=", "sys", ".", "stderr", ")", "pass", "elif", "subnode", ".", "tag", "==", "'{'", "+", "NSFOLIA", "+", "'}foreign-data'", ":", "if", "self", ".", "metadatatype", "==", "\"native\"", ":", "raise", "MetaDataError", "(", "\"Encountered a foreign-data element but metadata type is native!\"", ")", "elif", "self", ".", "metadata", "is", "not", "None", ":", "#multiple foreign-data elements, chain:", "e", "=", "self", ".", "metadata", "while", "e", ".", "next", "is", "not", "None", ":", "e", "=", "e", ".", "next", "e", ".", "next", "=", "ForeignData", "(", "self", ",", "node", "=", "subnode", ")", "else", ":", "self", ".", "metadata", "=", "ForeignData", "(", "self", ",", "node", "=", "subnode", ")", "elif", "subnode", ".", "tag", "==", "'{'", "+", "NSFOLIA", "+", "'}submetadata'", ":", "self", ".", "parsesubmetadata", "(", "subnode", ")", "elif", "subnode", ".", "tag", "==", "'{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT'", ":", "#backward-compatibility for old IMDI without foreign-key", "E", "=", "ElementMaker", "(", "namespace", "=", "NSFOLIA", ",", "nsmap", "=", "{", "None", ":", "NSFOLIA", ",", "'xml'", ":", "\"http://www.w3.org/XML/1998/namespace\"", "}", ")", "self", ".", "metadatatype", "=", "\"imdi\"", "self", ".", "metadata", "=", "ForeignData", "(", "self", ",", "node", "=", "subnode", ")" ]
"Internal method to parse metadata"
[ "Internal", "method", "to", "parse", "metadata" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7216-L7261"
""
""
""
""
""
""
-1
49
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.pendingvalidation"
"def pendingvalidation(self, warnonly=None): """Perform any pending validations Parameters: warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5) Returns: bool """ if self.debug: print("[PyNLPl FoLiA DEBUG] Processing pending validations (if any)",file=stderr) if warnonly is None and self and self.version: warnonly = (checkversion(self.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5 if self.textvalidation: while self.offsetvalidationbuffer: structureelement, textclass = self.offsetvalidationbuffer.pop() if self.debug: print("[PyNLPl FoLiA DEBUG] Performing offset validation on " + repr(structureelement) + " textclass " + textclass,file=stderr) #validate offsets tc = structureelement.textcontent(textclass) if tc.offset is not None: try: tc.getreference(validate=True) except UnresolvableTextContent: msg = "Text for " + structureelement.__class__.__name__ + ", ID " + str(structureelement.id) + ", textclass " + textclass + ", has incorrect offset " + str(tc.offset) + " or invalid reference" print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr) if not warnonly: raise"
"python"
"def pendingvalidation(self, warnonly=None): """Perform any pending validations Parameters: warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5) Returns: bool """ if self.debug: print("[PyNLPl FoLiA DEBUG] Processing pending validations (if any)",file=stderr) if warnonly is None and self and self.version: warnonly = (checkversion(self.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5 if self.textvalidation: while self.offsetvalidationbuffer: structureelement, textclass = self.offsetvalidationbuffer.pop() if self.debug: print("[PyNLPl FoLiA DEBUG] Performing offset validation on " + repr(structureelement) + " textclass " + textclass,file=stderr) #validate offsets tc = structureelement.textcontent(textclass) if tc.offset is not None: try: tc.getreference(validate=True) except UnresolvableTextContent: msg = "Text for " + structureelement.__class__.__name__ + ", ID " + str(structureelement.id) + ", textclass " + textclass + ", has incorrect offset " + str(tc.offset) + " or invalid reference" print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr) if not warnonly: raise"
[ "def", "pendingvalidation", "(", "self", ",", "warnonly", "=", "None", ")", ":", "if", "self", ".", "debug", ":", "print", "(", "\"[PyNLPl FoLiA DEBUG] Processing pending validations (if any)\"", ",", "file", "=", "stderr", ")", "if", "warnonly", "is", "None", "and", "self", "and", "self", ".", "version", ":", "warnonly", "=", "(", "checkversion", "(", "self", ".", "version", ",", "'1.5.0'", ")", "<", "0", ")", "#warn only for documents older than FoLiA v1.5", "if", "self", ".", "textvalidation", ":", "while", "self", ".", "offsetvalidationbuffer", ":", "structureelement", ",", "textclass", "=", "self", ".", "offsetvalidationbuffer", ".", "pop", "(", ")", "if", "self", ".", "debug", ":", "print", "(", "\"[PyNLPl FoLiA DEBUG] Performing offset validation on \"", "+", "repr", "(", "structureelement", ")", "+", "\" textclass \"", "+", "textclass", ",", "file", "=", "stderr", ")", "#validate offsets", "tc", "=", "structureelement", ".", "textcontent", "(", "textclass", ")", "if", "tc", ".", "offset", "is", "not", "None", ":", "try", ":", "tc", ".", "getreference", "(", "validate", "=", "True", ")", "except", "UnresolvableTextContent", ":", "msg", "=", "\"Text for \"", "+", "structureelement", ".", "__class__", ".", "__name__", "+", "\", ID \"", "+", "str", "(", "structureelement", ".", "id", ")", "+", "\", textclass \"", "+", "textclass", "+", "\", has incorrect offset \"", "+", "str", "(", "tc", ".", "offset", ")", "+", "\" or invalid reference\"", "print", "(", "\"TEXT VALIDATION ERROR: \"", "+", "msg", ",", "file", "=", "sys", ".", "stderr", ")", "if", "not", "warnonly", ":", "raise" ]
"Perform any pending validations Parameters: warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5) Returns: bool"
[ "Perform", "any", "pending", "validations" ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7396-L7424"
""
""
""
""
""
""
-1
50
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.paragraphs"
"def paragraphs(self, index = None): """Return a generator of all paragraphs found in the document. If an index is specified, return the n'th paragraph only (starting at 0)""" if index is None: return self.select(Paragraph) else: if index < 0: index = sum(t.count(Paragraph) for t in self.data) + index for t in self.data: for i,e in enumerate(t.select(Paragraph)) : if i == index: return e raise IndexError"
"python"
"def paragraphs(self, index = None): """Return a generator of all paragraphs found in the document. If an index is specified, return the n'th paragraph only (starting at 0)""" if index is None: return self.select(Paragraph) else: if index < 0: index = sum(t.count(Paragraph) for t in self.data) + index for t in self.data: for i,e in enumerate(t.select(Paragraph)) : if i == index: return e raise IndexError"
[ "def", "paragraphs", "(", "self", ",", "index", "=", "None", ")", ":", "if", "index", "is", "None", ":", "return", "self", ".", "select", "(", "Paragraph", ")", "else", ":", "if", "index", "<", "0", ":", "index", "=", "sum", "(", "t", ".", "count", "(", "Paragraph", ")", "for", "t", "in", "self", ".", "data", ")", "+", "index", "for", "t", "in", "self", ".", "data", ":", "for", "i", ",", "e", "in", "enumerate", "(", "t", ".", "select", "(", "Paragraph", ")", ")", ":", "if", "i", "==", "index", ":", "return", "e", "raise", "IndexError" ]
"Return a generator of all paragraphs found in the document. If an index is specified, return the n'th paragraph only (starting at 0)"
[ "Return", "a", "generator", "of", "all", "paragraphs", "found", "in", "the", "document", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7445-L7458"
""
""
""
""
""
""
-1
51
"proycon/pynlpl"
"pynlpl/formats/folia.py"
"Document.sentences"
"def sentences(self, index = None): """Return a generator of all sentence found in the document. Except for sentences in quotes. If an index is specified, return the n'th sentence only (starting at 0)""" if index is None: return self.select(Sentence,None,True,[Quote]) else: if index < 0: index = sum(t.count(Sentence,None,True,[Quote]) for t in self.data) + index for t in self.data: for i,e in enumerate(t.select(Sentence,None,True,[Quote])) : if i == index: return e raise IndexError"
"python"
"def sentences(self, index = None): """Return a generator of all sentence found in the document. Except for sentences in quotes. If an index is specified, return the n'th sentence only (starting at 0)""" if index is None: return self.select(Sentence,None,True,[Quote]) else: if index < 0: index = sum(t.count(Sentence,None,True,[Quote]) for t in self.data) + index for t in self.data: for i,e in enumerate(t.select(Sentence,None,True,[Quote])) : if i == index: return e raise IndexError"
[ "def", "sentences", "(", "self", ",", "index", "=", "None", ")", ":", "if", "index", "is", "None", ":", "return", "self", ".", "select", "(", "Sentence", ",", "None", ",", "True", ",", "[", "Quote", "]", ")", "else", ":", "if", "index", "<", "0", ":", "index", "=", "sum", "(", "t", ".", "count", "(", "Sentence", ",", "None", ",", "True", ",", "[", "Quote", "]", ")", "for", "t", "in", "self", ".", "data", ")", "+", "index", "for", "t", "in", "self", ".", "data", ":", "for", "i", ",", "e", "in", "enumerate", "(", "t", ".", "select", "(", "Sentence", ",", "None", ",", "True", ",", "[", "Quote", "]", ")", ")", ":", "if", "i", "==", "index", ":", "return", "e", "raise", "IndexError" ]
"Return a generator of all sentence found in the document. Except for sentences in quotes. If an index is specified, return the n'th sentence only (starting at 0)"
[ "Return", "a", "generator", "of", "all", "sentence", "found", "in", "the", "document", ".", "Except", "for", "sentences", "in", "quotes", "." ]
"7707f69a91caaa6cde037f0d0379f1d42500a68b"
"https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7460-L7473"
""
""
""
""
""
""
-1

Dataset Card for "code_x_glue_tc_nl_code_search_adv"

Dataset Summary

CodeXGLUE NL-code-search-Adv dataset, available at https://github.com/microsoft/CodeXGLUE/tree/main/Text-Code/NL-code-search-Adv

The dataset we use comes from CodeSearchNet and we filter the dataset as the following:

  • Remove examples that codes cannot be parsed into an abstract syntax tree.
  • Remove examples that #tokens of documents is < 3 or >256
  • Remove examples that documents contain special tokens (e.g. <img ...> or https:...)
  • Remove examples that documents are not English.

Supported Tasks and Leaderboards

  • document-retrieval: The dataset can be used to train a model for retrieving top-k codes from a given English natural language query.

Languages

  • Python programming language
  • English natural language

Dataset Structure

Data Instances

An example of 'validation' looks as follows.

{
    "argument_list": "", 
    "code": "def Func(arg_0, arg_1='.', arg_2=True, arg_3=False, **arg_4):\n    \"\"\"Downloads Dailymotion videos by URL.\n    \"\"\"\n\n    arg_5 = get_content(rebuilt_url(arg_0))\n    arg_6 = json.loads(match1(arg_5, r'qualities\":({.+?}),\"'))\n    arg_7 = match1(arg_5, r'\"video_title\"\\s*:\\s*\"([^\"]+)\"') or \\\n            match1(arg_5, r'\"title\"\\s*:\\s*\"([^\"]+)\"')\n    arg_7 = unicodize(arg_7)\n\n    for arg_8 in ['1080','720','480','380','240','144','auto']:\n        try:\n            arg_9 = arg_6[arg_8][1][\"url\"]\n            if arg_9:\n                break\n        except KeyError:\n            pass\n\n    arg_10, arg_11, arg_12 = url_info(arg_9)\n\n    print_info(site_info, arg_7, arg_10, arg_12)\n    if not arg_3:\n        download_urls([arg_9], arg_7, arg_11, arg_12, arg_1=arg_1, arg_2=arg_2)", 
    "code_tokens": ["def", "Func", "(", "arg_0", ",", "arg_1", "=", "'.'", ",", "arg_2", "=", "True", ",", "arg_3", "=", "False", ",", "**", "arg_4", ")", ":", "arg_5", "=", "get_content", "(", "rebuilt_url", "(", "arg_0", ")", ")", "arg_6", "=", "json", ".", "loads", "(", "match1", "(", "arg_5", ",", "r'qualities\":({.+?}),\"'", ")", ")", "arg_7", "=", "match1", "(", "arg_5", ",", "r'\"video_title\"\\s*:\\s*\"([^\"]+)\"'", ")", "or", "match1", "(", "arg_5", ",", "r'\"title\"\\s*:\\s*\"([^\"]+)\"'", ")", "arg_7", "=", "unicodize", "(", "arg_7", ")", "for", "arg_8", "in", "[", "'1080'", ",", "'720'", ",", "'480'", ",", "'380'", ",", "'240'", ",", "'144'", ",", "'auto'", "]", ":", "try", ":", "arg_9", "=", "arg_6", "[", "arg_8", "]", "[", "1", "]", "[", "\"url\"", "]", "if", "arg_9", ":", "break", "except", "KeyError", ":", "pass", "arg_10", ",", "arg_11", ",", "arg_12", "=", "url_info", "(", "arg_9", ")", "print_info", "(", "site_info", ",", "arg_7", ",", "arg_10", ",", "arg_12", ")", "if", "not", "arg_3", ":", "download_urls", "(", "[", "arg_9", "]", ",", "arg_7", ",", "arg_11", ",", "arg_12", ",", "arg_1", "=", "arg_1", ",", "arg_2", "=", "arg_2", ")"], 
    "docstring": "Downloads Dailymotion videos by URL.", 
    "docstring_summary": "Downloads Dailymotion videos by URL.", 
    "docstring_tokens": ["Downloads", "Dailymotion", "videos", "by", "URL", "."], 
    "func_name": "", 
    "id": 0, 
    "identifier": "dailymotion_download", 
    "language": "python", 
    "nwo": "soimort/you-get", 
    "original_string": "", 
    "parameters": "(url, output_dir='.', merge=True, info_only=False, **kwargs)", 
    "path": "src/you_get/extractors/dailymotion.py", 
    "repo": "", 
    "return_statement": "", 
    "score": 0.9997601509094238, 
    "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", 
    "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/dailymotion.py#L13-L35"
}

Data Fields

In the following each data field in go is explained for each config. The data fields are the same among all splits.

default

field name type description
id int32 Index of the sample
repo string repo: the owner/repo
path string path: the full path to the original file
func_name string func_name: the function or method name
original_string string original_string: the raw string before tokenization or parsing
language string language: the programming language
code string code/function: the part of the original_string that is code
code_tokens Sequence[string] code_tokens/function_tokens: tokenized version of code
docstring string docstring: the top-level comment or docstring, if it exists in the original string
docstring_tokens Sequence[string] docstring_tokens: tokenized version of docstring
sha string sha of the file
url string url of the file
docstring_summary string Summary of the docstring
parameters string parameters of the function
return_statement string return statement
argument_list string list of arguments of the function
identifier string identifier
nwo string nwo
score datasets.Value("float"] score for this search

Data Splits

name train validation test
default 251820 9604 19210

Dataset Creation

Curation Rationale

[More Information Needed]

Source Data

Initial Data Collection and Normalization

Data from CodeSearchNet Challenge dataset. [More Information Needed]

Who are the source language producers?

Software Engineering developers.

Annotations

Annotation process

[More Information Needed]

Who are the annotators?

[More Information Needed]

Personal and Sensitive Information

[More Information Needed]

Considerations for Using the Data

Social Impact of Dataset

[More Information Needed]

Discussion of Biases

[More Information Needed]

Other Known Limitations

[More Information Needed]

Additional Information

Dataset Curators

https://github.com/microsoft, https://github.com/madlag

Licensing Information

Computational Use of Data Agreement (C-UDA) License.

Citation Information

@article{DBLP:journals/corr/abs-2102-04664,
  author    = {Shuai Lu and
               Daya Guo and
               Shuo Ren and
               Junjie Huang and
               Alexey Svyatkovskiy and
               Ambrosio Blanco and
               Colin B. Clement and
               Dawn Drain and
               Daxin Jiang and
               Duyu Tang and
               Ge Li and
               Lidong Zhou and
               Linjun Shou and
               Long Zhou and
               Michele Tufano and
               Ming Gong and
               Ming Zhou and
               Nan Duan and
               Neel Sundaresan and
               Shao Kun Deng and
               Shengyu Fu and
               Shujie Liu},
  title     = {CodeXGLUE: {A} Machine Learning Benchmark Dataset for Code Understanding
               and Generation},
  journal   = {CoRR},
  volume    = {abs/2102.04664},
  year      = {2021}
}
@article{husain2019codesearchnet,
  title={Codesearchnet challenge: Evaluating the state of semantic code search},
  author={Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc},
  journal={arXiv preprint arXiv:1909.09436},
  year={2019}
}

Contributions

Thanks to @madlag (and partly also @ncoop57) for adding this dataset.

Downloads last month
499
Edit dataset card
Evaluate models HF Leaderboard