Datasets:
id
int32
| repo
string
| path
string
| func_name
string
| original_string
string
| language
string
| code
string
| code_tokens
sequence
| docstring
string
| docstring_tokens
sequence
| sha
string
| url
string
| docstring_summary
string
| parameters
string
| return_statement
string
| argument_list
string
| identifier
string
| nwo
string
| score
float32
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.settext" | "def settext(self, text, cls='current'):
"""Set the text for this element.
Arguments:
text (str): The text
cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element.
"""
self.replace(TextContent, value=text, cls=cls)" | "python" | "def settext(self, text, cls='current'):
"""Set the text for this element.
Arguments:
text (str): The text
cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element.
"""
self.replace(TextContent, value=text, cls=cls)" | [
"def",
"settext",
"(",
"self",
",",
"text",
",",
"cls",
"=",
"'current'",
")",
":",
"self",
".",
"replace",
"(",
"TextContent",
",",
"value",
"=",
"text",
",",
"cls",
"=",
"cls",
")"
] | "Set the text for this element.
Arguments:
text (str): The text
cls (str): The class of the text, defaults to ``current`` (leave this unless you know what you are doing). There may be only one text content element of each class associated with the element." | [
"Set",
"the",
"text",
"for",
"this",
"element",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1357-L1364" | "" | "" | "" | "" | "" | "" | -1 |
1 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.setdocument" | "def setdocument(self, doc):
"""Associate a document with this element.
Arguments:
doc (:class:`Document`): A document
Each element must be associated with a FoLiA document.
"""
assert isinstance(doc, Document)
if not self.doc:
self.doc = doc
if self.id:
if self.id in doc:
raise DuplicateIDError(self.id)
else:
self.doc.index[id] = self
for e in self: #recursive for all children
if isinstance(e,AbstractElement): e.setdocument(doc)" | "python" | "def setdocument(self, doc):
"""Associate a document with this element.
Arguments:
doc (:class:`Document`): A document
Each element must be associated with a FoLiA document.
"""
assert isinstance(doc, Document)
if not self.doc:
self.doc = doc
if self.id:
if self.id in doc:
raise DuplicateIDError(self.id)
else:
self.doc.index[id] = self
for e in self: #recursive for all children
if isinstance(e,AbstractElement): e.setdocument(doc)" | [
"def",
"setdocument",
"(",
"self",
",",
"doc",
")",
":",
"assert",
"isinstance",
"(",
"doc",
",",
"Document",
")",
"if",
"not",
"self",
".",
"doc",
":",
"self",
".",
"doc",
"=",
"doc",
"if",
"self",
".",
"id",
":",
"if",
"self",
".",
"id",
"in",
"doc",
":",
"raise",
"DuplicateIDError",
"(",
"self",
".",
"id",
")",
"else",
":",
"self",
".",
"doc",
".",
"index",
"[",
"id",
"]",
"=",
"self",
"for",
"e",
"in",
"self",
":",
"#recursive for all children",
"if",
"isinstance",
"(",
"e",
",",
"AbstractElement",
")",
":",
"e",
".",
"setdocument",
"(",
"doc",
")"
] | "Associate a document with this element.
Arguments:
doc (:class:`Document`): A document
Each element must be associated with a FoLiA document." | [
"Associate",
"a",
"document",
"with",
"this",
"element",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1366-L1385" | "" | "" | "" | "" | "" | "" | -1 |
2 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.addable" | "def addable(Class, parent, set=None, raiseexceptions=True):
"""Tests whether a new element of this class can be added to the parent.
This method is mostly for internal use.
This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour.
Parameters:
parent (:class:`AbstractElement`): The element that is being added to
set (str or None): The set
raiseexceptions (bool): Raise an exception if the element can't be added?
Returns:
bool
Raises:
ValueError
"""
if not parent.__class__.accepts(Class, raiseexceptions, parent):
return False
if Class.OCCURRENCES > 0:
#check if the parent doesn't have too many already
count = parent.count(Class,None,True,[True, AbstractStructureElement]) #never descend into embedded structure annotatioton
if count >= Class.OCCURRENCES:
if raiseexceptions:
if parent.id:
extra = ' (id=' + parent.id + ')'
else:
extra = ''
raise DuplicateAnnotationError("Unable to add another object of type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum.")
else:
return False
if Class.OCCURRENCES_PER_SET > 0 and set and Class.REQUIRED_ATTRIBS and Attrib.CLASS in Class.REQUIRED_ATTRIBS:
count = parent.count(Class,set,True, [True, AbstractStructureElement])
if count >= Class.OCCURRENCES_PER_SET:
if raiseexceptions:
if parent.id:
extra = ' (id=' + parent.id + ')'
else:
extra = ''
raise DuplicateAnnotationError("Unable to add another object of set " + set + " and type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum for the set.")
else:
return False
return True" | "python" | "def addable(Class, parent, set=None, raiseexceptions=True):
"""Tests whether a new element of this class can be added to the parent.
This method is mostly for internal use.
This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour.
Parameters:
parent (:class:`AbstractElement`): The element that is being added to
set (str or None): The set
raiseexceptions (bool): Raise an exception if the element can't be added?
Returns:
bool
Raises:
ValueError
"""
if not parent.__class__.accepts(Class, raiseexceptions, parent):
return False
if Class.OCCURRENCES > 0:
#check if the parent doesn't have too many already
count = parent.count(Class,None,True,[True, AbstractStructureElement]) #never descend into embedded structure annotatioton
if count >= Class.OCCURRENCES:
if raiseexceptions:
if parent.id:
extra = ' (id=' + parent.id + ')'
else:
extra = ''
raise DuplicateAnnotationError("Unable to add another object of type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum.")
else:
return False
if Class.OCCURRENCES_PER_SET > 0 and set and Class.REQUIRED_ATTRIBS and Attrib.CLASS in Class.REQUIRED_ATTRIBS:
count = parent.count(Class,set,True, [True, AbstractStructureElement])
if count >= Class.OCCURRENCES_PER_SET:
if raiseexceptions:
if parent.id:
extra = ' (id=' + parent.id + ')'
else:
extra = ''
raise DuplicateAnnotationError("Unable to add another object of set " + set + " and type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". There are already " + str(count) + " instances of this class, which is the maximum for the set.")
else:
return False
return True" | [
"def",
"addable",
"(",
"Class",
",",
"parent",
",",
"set",
"=",
"None",
",",
"raiseexceptions",
"=",
"True",
")",
":",
"if",
"not",
"parent",
".",
"__class__",
".",
"accepts",
"(",
"Class",
",",
"raiseexceptions",
",",
"parent",
")",
":",
"return",
"False",
"if",
"Class",
".",
"OCCURRENCES",
">",
"0",
":",
"#check if the parent doesn't have too many already",
"count",
"=",
"parent",
".",
"count",
"(",
"Class",
",",
"None",
",",
"True",
",",
"[",
"True",
",",
"AbstractStructureElement",
"]",
")",
"#never descend into embedded structure annotatioton",
"if",
"count",
">=",
"Class",
".",
"OCCURRENCES",
":",
"if",
"raiseexceptions",
":",
"if",
"parent",
".",
"id",
":",
"extra",
"=",
"' (id='",
"+",
"parent",
".",
"id",
"+",
"')'",
"else",
":",
"extra",
"=",
"''",
"raise",
"DuplicateAnnotationError",
"(",
"\"Unable to add another object of type \"",
"+",
"Class",
".",
"__name__",
"+",
"\" to \"",
"+",
"parent",
".",
"__class__",
".",
"__name__",
"+",
"\" \"",
"+",
"extra",
"+",
"\". There are already \"",
"+",
"str",
"(",
"count",
")",
"+",
"\" instances of this class, which is the maximum.\"",
")",
"else",
":",
"return",
"False",
"if",
"Class",
".",
"OCCURRENCES_PER_SET",
">",
"0",
"and",
"set",
"and",
"Class",
".",
"REQUIRED_ATTRIBS",
"and",
"Attrib",
".",
"CLASS",
"in",
"Class",
".",
"REQUIRED_ATTRIBS",
":",
"count",
"=",
"parent",
".",
"count",
"(",
"Class",
",",
"set",
",",
"True",
",",
"[",
"True",
",",
"AbstractStructureElement",
"]",
")",
"if",
"count",
">=",
"Class",
".",
"OCCURRENCES_PER_SET",
":",
"if",
"raiseexceptions",
":",
"if",
"parent",
".",
"id",
":",
"extra",
"=",
"' (id='",
"+",
"parent",
".",
"id",
"+",
"')'",
"else",
":",
"extra",
"=",
"''",
"raise",
"DuplicateAnnotationError",
"(",
"\"Unable to add another object of set \"",
"+",
"set",
"+",
"\" and type \"",
"+",
"Class",
".",
"__name__",
"+",
"\" to \"",
"+",
"parent",
".",
"__class__",
".",
"__name__",
"+",
"\" \"",
"+",
"extra",
"+",
"\". There are already \"",
"+",
"str",
"(",
"count",
")",
"+",
"\" instances of this class, which is the maximum for the set.\"",
")",
"else",
":",
"return",
"False",
"return",
"True"
] | "Tests whether a new element of this class can be added to the parent.
This method is mostly for internal use.
This will use the ``OCCURRENCES`` property, but may be overidden by subclasses for more customised behaviour.
Parameters:
parent (:class:`AbstractElement`): The element that is being added to
set (str or None): The set
raiseexceptions (bool): Raise an exception if the element can't be added?
Returns:
bool
Raises:
ValueError" | [
"Tests",
"whether",
"a",
"new",
"element",
"of",
"this",
"class",
"can",
"be",
"added",
"to",
"the",
"parent",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1406-L1455" | "" | "" | "" | "" | "" | "" | -1 |
3 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.postappend" | "def postappend(self):
"""This method will be called after an element is added to another and does some checks.
It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated.
This method is mostly for internal use.
"""
#If the element was not associated with a document yet, do so now (and for all unassociated children:
if not self.doc and self.parent.doc:
self.setdocument(self.parent.doc)
if self.doc and self.doc.deepvalidation:
self.deepvalidation()" | "python" | "def postappend(self):
"""This method will be called after an element is added to another and does some checks.
It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated.
This method is mostly for internal use.
"""
#If the element was not associated with a document yet, do so now (and for all unassociated children:
if not self.doc and self.parent.doc:
self.setdocument(self.parent.doc)
if self.doc and self.doc.deepvalidation:
self.deepvalidation()" | [
"def",
"postappend",
"(",
"self",
")",
":",
"#If the element was not associated with a document yet, do so now (and for all unassociated children:",
"if",
"not",
"self",
".",
"doc",
"and",
"self",
".",
"parent",
".",
"doc",
":",
"self",
".",
"setdocument",
"(",
"self",
".",
"parent",
".",
"doc",
")",
"if",
"self",
".",
"doc",
"and",
"self",
".",
"doc",
".",
"deepvalidation",
":",
"self",
".",
"deepvalidation",
"(",
")"
] | "This method will be called after an element is added to another and does some checks.
It can do extra checks and if necessary raise exceptions to prevent addition. By default makes sure the right document is associated.
This method is mostly for internal use." | [
"This",
"method",
"will",
"be",
"called",
"after",
"an",
"element",
"is",
"added",
"to",
"another",
"and",
"does",
"some",
"checks",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1458-L1471" | "" | "" | "" | "" | "" | "" | -1 |
4 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.updatetext" | "def updatetext(self):
"""Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``"""
if self.TEXTCONTAINER:
s = ""
for child in self:
if isinstance(child, AbstractElement):
child.updatetext()
s += child.text()
elif isstring(child):
s += child
self.data = [s]" | "python" | "def updatetext(self):
"""Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``"""
if self.TEXTCONTAINER:
s = ""
for child in self:
if isinstance(child, AbstractElement):
child.updatetext()
s += child.text()
elif isstring(child):
s += child
self.data = [s]" | [
"def",
"updatetext",
"(",
"self",
")",
":",
"if",
"self",
".",
"TEXTCONTAINER",
":",
"s",
"=",
"\"\"",
"for",
"child",
"in",
"self",
":",
"if",
"isinstance",
"(",
"child",
",",
"AbstractElement",
")",
":",
"child",
".",
"updatetext",
"(",
")",
"s",
"+=",
"child",
".",
"text",
"(",
")",
"elif",
"isstring",
"(",
"child",
")",
":",
"s",
"+=",
"child",
"self",
".",
"data",
"=",
"[",
"s",
"]"
] | "Recompute textual value based on the text content of the children. Only supported on elements that are a ``TEXTCONTAINER``" | [
"Recompute",
"textual",
"value",
"based",
"on",
"the",
"text",
"content",
"of",
"the",
"children",
".",
"Only",
"supported",
"on",
"elements",
"that",
"are",
"a",
"TEXTCONTAINER"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1772-L1782" | "" | "" | "" | "" | "" | "" | -1 |
5 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.ancestors" | "def ancestors(self, Class=None):
"""Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified.
Arguments:
*Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances!
Yields:
elements (instances derived from :class:`AbstractElement`)
"""
e = self
while e:
if e.parent:
e = e.parent
if not Class or isinstance(e,Class):
yield e
elif isinstance(Class, tuple):
for C in Class:
if isinstance(e,C):
yield e
else:
break" | "python" | "def ancestors(self, Class=None):
"""Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified.
Arguments:
*Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances!
Yields:
elements (instances derived from :class:`AbstractElement`)
"""
e = self
while e:
if e.parent:
e = e.parent
if not Class or isinstance(e,Class):
yield e
elif isinstance(Class, tuple):
for C in Class:
if isinstance(e,C):
yield e
else:
break" | [
"def",
"ancestors",
"(",
"self",
",",
"Class",
"=",
"None",
")",
":",
"e",
"=",
"self",
"while",
"e",
":",
"if",
"e",
".",
"parent",
":",
"e",
"=",
"e",
".",
"parent",
"if",
"not",
"Class",
"or",
"isinstance",
"(",
"e",
",",
"Class",
")",
":",
"yield",
"e",
"elif",
"isinstance",
"(",
"Class",
",",
"tuple",
")",
":",
"for",
"C",
"in",
"Class",
":",
"if",
"isinstance",
"(",
"e",
",",
"C",
")",
":",
"yield",
"e",
"else",
":",
"break"
] | "Generator yielding all ancestors of this element, effectively back-tracing its path to the root element. A tuple of multiple classes may be specified.
Arguments:
*Class: The class or classes (:class:`AbstractElement` or subclasses). Not instances!
Yields:
elements (instances derived from :class:`AbstractElement`)" | [
"Generator",
"yielding",
"all",
"ancestors",
"of",
"this",
"element",
"effectively",
"back",
"-",
"tracing",
"its",
"path",
"to",
"the",
"root",
"element",
".",
"A",
"tuple",
"of",
"multiple",
"classes",
"may",
"be",
"specified",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1840-L1860" | "" | "" | "" | "" | "" | "" | -1 |
6 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.ancestor" | "def ancestor(self, *Classes):
"""Find the most immediate ancestor of the specified type, multiple classes may be specified.
Arguments:
*Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances!
Example::
paragraph = word.ancestor(folia.Paragraph)
"""
for e in self.ancestors(tuple(Classes)):
return e
raise NoSuchAnnotation" | "python" | "def ancestor(self, *Classes):
"""Find the most immediate ancestor of the specified type, multiple classes may be specified.
Arguments:
*Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances!
Example::
paragraph = word.ancestor(folia.Paragraph)
"""
for e in self.ancestors(tuple(Classes)):
return e
raise NoSuchAnnotation" | [
"def",
"ancestor",
"(",
"self",
",",
"*",
"Classes",
")",
":",
"for",
"e",
"in",
"self",
".",
"ancestors",
"(",
"tuple",
"(",
"Classes",
")",
")",
":",
"return",
"e",
"raise",
"NoSuchAnnotation"
] | "Find the most immediate ancestor of the specified type, multiple classes may be specified.
Arguments:
*Classes: The possible classes (:class:`AbstractElement` or subclasses) to select from. Not instances!
Example::
paragraph = word.ancestor(folia.Paragraph)" | [
"Find",
"the",
"most",
"immediate",
"ancestor",
"of",
"the",
"specified",
"type",
"multiple",
"classes",
"may",
"be",
"specified",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L1862-L1874" | "" | "" | "" | "" | "" | "" | -1 |
7 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.json" | "def json(self, attribs=None, recurse=True, ignorelist=False):
"""Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON.
Example::
import json
json.dumps(word.json())
Returns:
dict
"""
jsonnode = {}
jsonnode['type'] = self.XMLTAG
if self.id:
jsonnode['id'] = self.id
if self.set:
jsonnode['set'] = self.set
if self.cls:
jsonnode['class'] = self.cls
if self.annotator:
jsonnode['annotator'] = self.annotator
if self.annotatortype:
if self.annotatortype == AnnotatorType.AUTO:
jsonnode['annotatortype'] = "auto"
elif self.annotatortype == AnnotatorType.MANUAL:
jsonnode['annotatortype'] = "manual"
if self.confidence is not None:
jsonnode['confidence'] = self.confidence
if self.n:
jsonnode['n'] = self.n
if self.auth:
jsonnode['auth'] = self.auth
if self.datetime:
jsonnode['datetime'] = self.datetime.strftime("%Y-%m-%dT%H:%M:%S")
if recurse: #pylint: disable=too-many-nested-blocks
jsonnode['children'] = []
if self.TEXTCONTAINER:
jsonnode['text'] = self.text()
if self.PHONCONTAINER:
jsonnode['phon'] = self.phon()
for child in self:
if self.TEXTCONTAINER and isstring(child):
jsonnode['children'].append(child)
elif not self.PHONCONTAINER:
#check ignore list
ignore = False
if ignorelist:
for e in ignorelist:
if isinstance(child,e):
ignore = True
break
if not ignore:
jsonnode['children'].append(child.json(attribs,recurse,ignorelist))
if attribs:
for attrib in attribs:
jsonnode[attrib] = attribs
return jsonnode" | "python" | "def json(self, attribs=None, recurse=True, ignorelist=False):
"""Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON.
Example::
import json
json.dumps(word.json())
Returns:
dict
"""
jsonnode = {}
jsonnode['type'] = self.XMLTAG
if self.id:
jsonnode['id'] = self.id
if self.set:
jsonnode['set'] = self.set
if self.cls:
jsonnode['class'] = self.cls
if self.annotator:
jsonnode['annotator'] = self.annotator
if self.annotatortype:
if self.annotatortype == AnnotatorType.AUTO:
jsonnode['annotatortype'] = "auto"
elif self.annotatortype == AnnotatorType.MANUAL:
jsonnode['annotatortype'] = "manual"
if self.confidence is not None:
jsonnode['confidence'] = self.confidence
if self.n:
jsonnode['n'] = self.n
if self.auth:
jsonnode['auth'] = self.auth
if self.datetime:
jsonnode['datetime'] = self.datetime.strftime("%Y-%m-%dT%H:%M:%S")
if recurse: #pylint: disable=too-many-nested-blocks
jsonnode['children'] = []
if self.TEXTCONTAINER:
jsonnode['text'] = self.text()
if self.PHONCONTAINER:
jsonnode['phon'] = self.phon()
for child in self:
if self.TEXTCONTAINER and isstring(child):
jsonnode['children'].append(child)
elif not self.PHONCONTAINER:
#check ignore list
ignore = False
if ignorelist:
for e in ignorelist:
if isinstance(child,e):
ignore = True
break
if not ignore:
jsonnode['children'].append(child.json(attribs,recurse,ignorelist))
if attribs:
for attrib in attribs:
jsonnode[attrib] = attribs
return jsonnode" | [
"def",
"json",
"(",
"self",
",",
"attribs",
"=",
"None",
",",
"recurse",
"=",
"True",
",",
"ignorelist",
"=",
"False",
")",
":",
"jsonnode",
"=",
"{",
"}",
"jsonnode",
"[",
"'type'",
"]",
"=",
"self",
".",
"XMLTAG",
"if",
"self",
".",
"id",
":",
"jsonnode",
"[",
"'id'",
"]",
"=",
"self",
".",
"id",
"if",
"self",
".",
"set",
":",
"jsonnode",
"[",
"'set'",
"]",
"=",
"self",
".",
"set",
"if",
"self",
".",
"cls",
":",
"jsonnode",
"[",
"'class'",
"]",
"=",
"self",
".",
"cls",
"if",
"self",
".",
"annotator",
":",
"jsonnode",
"[",
"'annotator'",
"]",
"=",
"self",
".",
"annotator",
"if",
"self",
".",
"annotatortype",
":",
"if",
"self",
".",
"annotatortype",
"==",
"AnnotatorType",
".",
"AUTO",
":",
"jsonnode",
"[",
"'annotatortype'",
"]",
"=",
"\"auto\"",
"elif",
"self",
".",
"annotatortype",
"==",
"AnnotatorType",
".",
"MANUAL",
":",
"jsonnode",
"[",
"'annotatortype'",
"]",
"=",
"\"manual\"",
"if",
"self",
".",
"confidence",
"is",
"not",
"None",
":",
"jsonnode",
"[",
"'confidence'",
"]",
"=",
"self",
".",
"confidence",
"if",
"self",
".",
"n",
":",
"jsonnode",
"[",
"'n'",
"]",
"=",
"self",
".",
"n",
"if",
"self",
".",
"auth",
":",
"jsonnode",
"[",
"'auth'",
"]",
"=",
"self",
".",
"auth",
"if",
"self",
".",
"datetime",
":",
"jsonnode",
"[",
"'datetime'",
"]",
"=",
"self",
".",
"datetime",
".",
"strftime",
"(",
"\"%Y-%m-%dT%H:%M:%S\"",
")",
"if",
"recurse",
":",
"#pylint: disable=too-many-nested-blocks",
"jsonnode",
"[",
"'children'",
"]",
"=",
"[",
"]",
"if",
"self",
".",
"TEXTCONTAINER",
":",
"jsonnode",
"[",
"'text'",
"]",
"=",
"self",
".",
"text",
"(",
")",
"if",
"self",
".",
"PHONCONTAINER",
":",
"jsonnode",
"[",
"'phon'",
"]",
"=",
"self",
".",
"phon",
"(",
")",
"for",
"child",
"in",
"self",
":",
"if",
"self",
".",
"TEXTCONTAINER",
"and",
"isstring",
"(",
"child",
")",
":",
"jsonnode",
"[",
"'children'",
"]",
".",
"append",
"(",
"child",
")",
"elif",
"not",
"self",
".",
"PHONCONTAINER",
":",
"#check ignore list",
"ignore",
"=",
"False",
"if",
"ignorelist",
":",
"for",
"e",
"in",
"ignorelist",
":",
"if",
"isinstance",
"(",
"child",
",",
"e",
")",
":",
"ignore",
"=",
"True",
"break",
"if",
"not",
"ignore",
":",
"jsonnode",
"[",
"'children'",
"]",
".",
"append",
"(",
"child",
".",
"json",
"(",
"attribs",
",",
"recurse",
",",
"ignorelist",
")",
")",
"if",
"attribs",
":",
"for",
"attrib",
"in",
"attribs",
":",
"jsonnode",
"[",
"attrib",
"]",
"=",
"attribs",
"return",
"jsonnode"
] | "Serialises the FoLiA element and all its contents to a Python dictionary suitable for serialisation to JSON.
Example::
import json
json.dumps(word.json())
Returns:
dict" | [
"Serialises",
"the",
"FoLiA",
"element",
"and",
"all",
"its",
"contents",
"to",
"a",
"Python",
"dictionary",
"suitable",
"for",
"serialisation",
"to",
"JSON",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2050-L2110" | "" | "" | "" | "" | "" | "" | -1 |
8 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.xmlstring" | "def xmlstring(self, pretty_print=False):
"""Serialises this FoLiA element and all its contents to XML.
Returns:
str: a string with XML representation for this element and all its children"""
s = ElementTree.tostring(self.xml(), xml_declaration=False, pretty_print=pretty_print, encoding='utf-8')
if sys.version < '3':
if isinstance(s, str):
s = unicode(s,'utf-8') #pylint: disable=undefined-variable
else:
if isinstance(s,bytes):
s = str(s,'utf-8')
s = s.replace('ns0:','') #ugly patch to get rid of namespace prefix
s = s.replace(':ns0','')
return s" | "python" | "def xmlstring(self, pretty_print=False):
"""Serialises this FoLiA element and all its contents to XML.
Returns:
str: a string with XML representation for this element and all its children"""
s = ElementTree.tostring(self.xml(), xml_declaration=False, pretty_print=pretty_print, encoding='utf-8')
if sys.version < '3':
if isinstance(s, str):
s = unicode(s,'utf-8') #pylint: disable=undefined-variable
else:
if isinstance(s,bytes):
s = str(s,'utf-8')
s = s.replace('ns0:','') #ugly patch to get rid of namespace prefix
s = s.replace(':ns0','')
return s" | [
"def",
"xmlstring",
"(",
"self",
",",
"pretty_print",
"=",
"False",
")",
":",
"s",
"=",
"ElementTree",
".",
"tostring",
"(",
"self",
".",
"xml",
"(",
")",
",",
"xml_declaration",
"=",
"False",
",",
"pretty_print",
"=",
"pretty_print",
",",
"encoding",
"=",
"'utf-8'",
")",
"if",
"sys",
".",
"version",
"<",
"'3'",
":",
"if",
"isinstance",
"(",
"s",
",",
"str",
")",
":",
"s",
"=",
"unicode",
"(",
"s",
",",
"'utf-8'",
")",
"#pylint: disable=undefined-variable",
"else",
":",
"if",
"isinstance",
"(",
"s",
",",
"bytes",
")",
":",
"s",
"=",
"str",
"(",
"s",
",",
"'utf-8'",
")",
"s",
"=",
"s",
".",
"replace",
"(",
"'ns0:'",
",",
"''",
")",
"#ugly patch to get rid of namespace prefix",
"s",
"=",
"s",
".",
"replace",
"(",
"':ns0'",
",",
"''",
")",
"return",
"s"
] | "Serialises this FoLiA element and all its contents to XML.
Returns:
str: a string with XML representation for this element and all its children" | [
"Serialises",
"this",
"FoLiA",
"element",
"and",
"all",
"its",
"contents",
"to",
"XML",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2114-L2129" | "" | "" | "" | "" | "" | "" | -1 |
9 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.select" | "def select(self, Class, set=None, recursive=True, ignore=True, node=None): #pylint: disable=bad-classmethod-argument,redefined-builtin
"""Select child elements of the specified class.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``.
ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones.
* ``node``: Reserved for internal usage, used in recursion.
Yields:
Elements (instances derived from :class:`AbstractElement`)
Example::
for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ):
..
"""
#if ignorelist is True:
# ignorelist = default_ignore
if not node:
node = self
for e in self.data: #pylint: disable=too-many-nested-blocks
if (not self.TEXTCONTAINER and not self.PHONCONTAINER) or isinstance(e, AbstractElement):
if ignore is True:
try:
if not e.auth:
continue
except AttributeError:
#not all elements have auth attribute..
pass
elif ignore: #list
doignore = False
for c in ignore:
if c is True:
try:
if not e.auth:
doignore =True
break
except AttributeError:
#not all elements have auth attribute..
pass
elif c == e.__class__ or issubclass(e.__class__,c):
doignore = True
break
if doignore:
continue
if isinstance(e, Class):
if not set is None:
try:
if e.set != set:
continue
except AttributeError:
continue
yield e
if recursive:
for e2 in e.select(Class, set, recursive, ignore, e):
if not set is None:
try:
if e2.set != set:
continue
except AttributeError:
continue
yield e2" | "python" | "def select(self, Class, set=None, recursive=True, ignore=True, node=None): #pylint: disable=bad-classmethod-argument,redefined-builtin
"""Select child elements of the specified class.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``.
ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones.
* ``node``: Reserved for internal usage, used in recursion.
Yields:
Elements (instances derived from :class:`AbstractElement`)
Example::
for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ):
..
"""
#if ignorelist is True:
# ignorelist = default_ignore
if not node:
node = self
for e in self.data: #pylint: disable=too-many-nested-blocks
if (not self.TEXTCONTAINER and not self.PHONCONTAINER) or isinstance(e, AbstractElement):
if ignore is True:
try:
if not e.auth:
continue
except AttributeError:
#not all elements have auth attribute..
pass
elif ignore: #list
doignore = False
for c in ignore:
if c is True:
try:
if not e.auth:
doignore =True
break
except AttributeError:
#not all elements have auth attribute..
pass
elif c == e.__class__ or issubclass(e.__class__,c):
doignore = True
break
if doignore:
continue
if isinstance(e, Class):
if not set is None:
try:
if e.set != set:
continue
except AttributeError:
continue
yield e
if recursive:
for e2 in e.select(Class, set, recursive, ignore, e):
if not set is None:
try:
if e2.set != set:
continue
except AttributeError:
continue
yield e2" | [
"def",
"select",
"(",
"self",
",",
"Class",
",",
"set",
"=",
"None",
",",
"recursive",
"=",
"True",
",",
"ignore",
"=",
"True",
",",
"node",
"=",
"None",
")",
":",
"#pylint: disable=bad-classmethod-argument,redefined-builtin",
"#if ignorelist is True:",
"# ignorelist = default_ignore",
"if",
"not",
"node",
":",
"node",
"=",
"self",
"for",
"e",
"in",
"self",
".",
"data",
":",
"#pylint: disable=too-many-nested-blocks",
"if",
"(",
"not",
"self",
".",
"TEXTCONTAINER",
"and",
"not",
"self",
".",
"PHONCONTAINER",
")",
"or",
"isinstance",
"(",
"e",
",",
"AbstractElement",
")",
":",
"if",
"ignore",
"is",
"True",
":",
"try",
":",
"if",
"not",
"e",
".",
"auth",
":",
"continue",
"except",
"AttributeError",
":",
"#not all elements have auth attribute..",
"pass",
"elif",
"ignore",
":",
"#list",
"doignore",
"=",
"False",
"for",
"c",
"in",
"ignore",
":",
"if",
"c",
"is",
"True",
":",
"try",
":",
"if",
"not",
"e",
".",
"auth",
":",
"doignore",
"=",
"True",
"break",
"except",
"AttributeError",
":",
"#not all elements have auth attribute..",
"pass",
"elif",
"c",
"==",
"e",
".",
"__class__",
"or",
"issubclass",
"(",
"e",
".",
"__class__",
",",
"c",
")",
":",
"doignore",
"=",
"True",
"break",
"if",
"doignore",
":",
"continue",
"if",
"isinstance",
"(",
"e",
",",
"Class",
")",
":",
"if",
"not",
"set",
"is",
"None",
":",
"try",
":",
"if",
"e",
".",
"set",
"!=",
"set",
":",
"continue",
"except",
"AttributeError",
":",
"continue",
"yield",
"e",
"if",
"recursive",
":",
"for",
"e2",
"in",
"e",
".",
"select",
"(",
"Class",
",",
"set",
",",
"recursive",
",",
"ignore",
",",
"e",
")",
":",
"if",
"not",
"set",
"is",
"None",
":",
"try",
":",
"if",
"e2",
".",
"set",
"!=",
"set",
":",
"continue",
"except",
"AttributeError",
":",
"continue",
"yield",
"e2"
] | "Select child elements of the specified class.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
recursive (bool): Select recursively? Descending into child elements? Defaults to ``True``.
ignore: A list of Classes to ignore, if set to ``True`` instead of a list, all non-authoritative elements will be skipped (this is the default behaviour and corresponds to the following elements: :class:`Alternative`, :class:`AlternativeLayer`, :class:`Suggestion`, and :class:`folia.Original`. These elements and those contained within are never *authorative*. You may also include the boolean True as a member of a list, if you want to skip additional tags along the predefined non-authoritative ones.
* ``node``: Reserved for internal usage, used in recursion.
Yields:
Elements (instances derived from :class:`AbstractElement`)
Example::
for sense in text.select(folia.Sense, 'cornetto', True, [folia.Original, folia.Suggestion, folia.Alternative] ):
.." | [
"Select",
"child",
"elements",
"of",
"the",
"specified",
"class",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2132-L2201" | "" | "" | "" | "" | "" | "" | -1 |
10 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.getmetadata" | "def getmetadata(self, key=None):
"""Get the metadata that applies to this element, automatically inherited from parent elements"""
if self.metadata:
d = self.doc.submetadata[self.metadata]
elif self.parent:
d = self.parent.getmetadata()
elif self.doc:
d = self.doc.metadata
else:
return None
if key:
return d[key]
else:
return d" | "python" | "def getmetadata(self, key=None):
"""Get the metadata that applies to this element, automatically inherited from parent elements"""
if self.metadata:
d = self.doc.submetadata[self.metadata]
elif self.parent:
d = self.parent.getmetadata()
elif self.doc:
d = self.doc.metadata
else:
return None
if key:
return d[key]
else:
return d" | [
"def",
"getmetadata",
"(",
"self",
",",
"key",
"=",
"None",
")",
":",
"if",
"self",
".",
"metadata",
":",
"d",
"=",
"self",
".",
"doc",
".",
"submetadata",
"[",
"self",
".",
"metadata",
"]",
"elif",
"self",
".",
"parent",
":",
"d",
"=",
"self",
".",
"parent",
".",
"getmetadata",
"(",
")",
"elif",
"self",
".",
"doc",
":",
"d",
"=",
"self",
".",
"doc",
".",
"metadata",
"else",
":",
"return",
"None",
"if",
"key",
":",
"return",
"d",
"[",
"key",
"]",
"else",
":",
"return",
"d"
] | "Get the metadata that applies to this element, automatically inherited from parent elements" | [
"Get",
"the",
"metadata",
"that",
"applies",
"to",
"this",
"element",
"automatically",
"inherited",
"from",
"parent",
"elements"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2221-L2234" | "" | "" | "" | "" | "" | "" | -1 |
11 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.getindex" | "def getindex(self, child, recursive=True, ignore=True):
"""Get the index at which an element occurs, recursive by default!
Returns:
int
"""
#breadth first search
for i, c in enumerate(self.data):
if c is child:
return i
if recursive: #pylint: disable=too-many-nested-blocks
for i, c in enumerate(self.data):
if ignore is True:
try:
if not c.auth:
continue
except AttributeError:
#not all elements have auth attribute..
pass
elif ignore: #list
doignore = False
for e in ignore:
if e is True:
try:
if not c.auth:
doignore =True
break
except AttributeError:
#not all elements have auth attribute..
pass
elif e == c.__class__ or issubclass(c.__class__,e):
doignore = True
break
if doignore:
continue
if isinstance(c, AbstractElement):
j = c.getindex(child, recursive)
if j != -1:
return i #yes, i ... not j!
return -1" | "python" | "def getindex(self, child, recursive=True, ignore=True):
"""Get the index at which an element occurs, recursive by default!
Returns:
int
"""
#breadth first search
for i, c in enumerate(self.data):
if c is child:
return i
if recursive: #pylint: disable=too-many-nested-blocks
for i, c in enumerate(self.data):
if ignore is True:
try:
if not c.auth:
continue
except AttributeError:
#not all elements have auth attribute..
pass
elif ignore: #list
doignore = False
for e in ignore:
if e is True:
try:
if not c.auth:
doignore =True
break
except AttributeError:
#not all elements have auth attribute..
pass
elif e == c.__class__ or issubclass(c.__class__,e):
doignore = True
break
if doignore:
continue
if isinstance(c, AbstractElement):
j = c.getindex(child, recursive)
if j != -1:
return i #yes, i ... not j!
return -1" | [
"def",
"getindex",
"(",
"self",
",",
"child",
",",
"recursive",
"=",
"True",
",",
"ignore",
"=",
"True",
")",
":",
"#breadth first search",
"for",
"i",
",",
"c",
"in",
"enumerate",
"(",
"self",
".",
"data",
")",
":",
"if",
"c",
"is",
"child",
":",
"return",
"i",
"if",
"recursive",
":",
"#pylint: disable=too-many-nested-blocks",
"for",
"i",
",",
"c",
"in",
"enumerate",
"(",
"self",
".",
"data",
")",
":",
"if",
"ignore",
"is",
"True",
":",
"try",
":",
"if",
"not",
"c",
".",
"auth",
":",
"continue",
"except",
"AttributeError",
":",
"#not all elements have auth attribute..",
"pass",
"elif",
"ignore",
":",
"#list",
"doignore",
"=",
"False",
"for",
"e",
"in",
"ignore",
":",
"if",
"e",
"is",
"True",
":",
"try",
":",
"if",
"not",
"c",
".",
"auth",
":",
"doignore",
"=",
"True",
"break",
"except",
"AttributeError",
":",
"#not all elements have auth attribute..",
"pass",
"elif",
"e",
"==",
"c",
".",
"__class__",
"or",
"issubclass",
"(",
"c",
".",
"__class__",
",",
"e",
")",
":",
"doignore",
"=",
"True",
"break",
"if",
"doignore",
":",
"continue",
"if",
"isinstance",
"(",
"c",
",",
"AbstractElement",
")",
":",
"j",
"=",
"c",
".",
"getindex",
"(",
"child",
",",
"recursive",
")",
"if",
"j",
"!=",
"-",
"1",
":",
"return",
"i",
"#yes, i ... not j!",
"return",
"-",
"1"
] | "Get the index at which an element occurs, recursive by default!
Returns:
int" | [
"Get",
"the",
"index",
"at",
"which",
"an",
"element",
"occurs",
"recursive",
"by",
"default!"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2238-L2278" | "" | "" | "" | "" | "" | "" | -1 |
12 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.precedes" | "def precedes(self, other):
"""Returns a boolean indicating whether this element precedes the other element"""
try:
ancestor = next(commonancestors(AbstractElement, self, other))
except StopIteration:
raise Exception("Elements share no common ancestor")
#now we just do a depth first search and see who comes first
def callback(e):
if e is self:
return True
elif e is other:
return False
return None
result = ancestor.depthfirstsearch(callback)
if result is None:
raise Exception("Unable to find relation between elements! (shouldn't happen)")
return result" | "python" | "def precedes(self, other):
"""Returns a boolean indicating whether this element precedes the other element"""
try:
ancestor = next(commonancestors(AbstractElement, self, other))
except StopIteration:
raise Exception("Elements share no common ancestor")
#now we just do a depth first search and see who comes first
def callback(e):
if e is self:
return True
elif e is other:
return False
return None
result = ancestor.depthfirstsearch(callback)
if result is None:
raise Exception("Unable to find relation between elements! (shouldn't happen)")
return result" | [
"def",
"precedes",
"(",
"self",
",",
"other",
")",
":",
"try",
":",
"ancestor",
"=",
"next",
"(",
"commonancestors",
"(",
"AbstractElement",
",",
"self",
",",
"other",
")",
")",
"except",
"StopIteration",
":",
"raise",
"Exception",
"(",
"\"Elements share no common ancestor\"",
")",
"#now we just do a depth first search and see who comes first",
"def",
"callback",
"(",
"e",
")",
":",
"if",
"e",
"is",
"self",
":",
"return",
"True",
"elif",
"e",
"is",
"other",
":",
"return",
"False",
"return",
"None",
"result",
"=",
"ancestor",
".",
"depthfirstsearch",
"(",
"callback",
")",
"if",
"result",
"is",
"None",
":",
"raise",
"Exception",
"(",
"\"Unable to find relation between elements! (shouldn't happen)\"",
")",
"return",
"result"
] | "Returns a boolean indicating whether this element precedes the other element" | [
"Returns",
"a",
"boolean",
"indicating",
"whether",
"this",
"element",
"precedes",
"the",
"other",
"element"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2280-L2296" | "" | "" | "" | "" | "" | "" | -1 |
13 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.depthfirstsearch" | "def depthfirstsearch(self, function):
"""Generic depth first search algorithm using a callback function, continues as long as the callback function returns None"""
result = function(self)
if result is not None:
return result
for e in self:
result = e.depthfirstsearch(function)
if result is not None:
return result
return None" | "python" | "def depthfirstsearch(self, function):
"""Generic depth first search algorithm using a callback function, continues as long as the callback function returns None"""
result = function(self)
if result is not None:
return result
for e in self:
result = e.depthfirstsearch(function)
if result is not None:
return result
return None" | [
"def",
"depthfirstsearch",
"(",
"self",
",",
"function",
")",
":",
"result",
"=",
"function",
"(",
"self",
")",
"if",
"result",
"is",
"not",
"None",
":",
"return",
"result",
"for",
"e",
"in",
"self",
":",
"result",
"=",
"e",
".",
"depthfirstsearch",
"(",
"function",
")",
"if",
"result",
"is",
"not",
"None",
":",
"return",
"result",
"return",
"None"
] | "Generic depth first search algorithm using a callback function, continues as long as the callback function returns None" | [
"Generic",
"depth",
"first",
"search",
"algorithm",
"using",
"a",
"callback",
"function",
"continues",
"as",
"long",
"as",
"the",
"callback",
"function",
"returns",
"None"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2299-L2308" | "" | "" | "" | "" | "" | "" | -1 |
14 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.next" | "def next(self, Class=True, scope=True, reverse=False):
"""Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all.
"""
if Class is True: Class = self.__class__
if scope is True: scope = STRUCTURESCOPE
structural = Class is not None and issubclass(Class,AbstractStructureElement)
if reverse:
order = reversed
descendindex = -1
else:
order = lambda x: x #pylint: disable=redefined-variable-type
descendindex = 0
child = self
parent = self.parent
while parent: #pylint: disable=too-many-nested-blocks
if len(parent) > 1:
returnnext = False
for e in order(parent):
if e is child:
#we found the current item, next item will be the one to return
returnnext = True
elif returnnext and e.auth and not isinstance(e,AbstractAnnotationLayer) and (not structural or (structural and (not isinstance(e,(AbstractTokenAnnotation,TextContent)) ) )):
if structural and isinstance(e,Correction):
if not list(e.select(AbstractStructureElement)): #skip-over non-structural correction
continue
if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class):
return e
else:
#this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only
while e.data:
e = e.data[descendindex]
if not isinstance(e, AbstractElement):
return None #we've gone too far
if e.auth and not isinstance(e,AbstractAnnotationLayer):
if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class):
return e
else:
#descend deeper
continue
return None
#generational iteration
child = parent
if scope is not None and child.__class__ in scope:
#you shall not pass!
break
parent = parent.parent
return None" | "python" | "def next(self, Class=True, scope=True, reverse=False):
"""Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all.
"""
if Class is True: Class = self.__class__
if scope is True: scope = STRUCTURESCOPE
structural = Class is not None and issubclass(Class,AbstractStructureElement)
if reverse:
order = reversed
descendindex = -1
else:
order = lambda x: x #pylint: disable=redefined-variable-type
descendindex = 0
child = self
parent = self.parent
while parent: #pylint: disable=too-many-nested-blocks
if len(parent) > 1:
returnnext = False
for e in order(parent):
if e is child:
#we found the current item, next item will be the one to return
returnnext = True
elif returnnext and e.auth and not isinstance(e,AbstractAnnotationLayer) and (not structural or (structural and (not isinstance(e,(AbstractTokenAnnotation,TextContent)) ) )):
if structural and isinstance(e,Correction):
if not list(e.select(AbstractStructureElement)): #skip-over non-structural correction
continue
if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class):
return e
else:
#this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only
while e.data:
e = e.data[descendindex]
if not isinstance(e, AbstractElement):
return None #we've gone too far
if e.auth and not isinstance(e,AbstractAnnotationLayer):
if Class is None or (isinstance(Class,tuple) and (any(isinstance(e,C) for C in Class))) or isinstance(e,Class):
return e
else:
#descend deeper
continue
return None
#generational iteration
child = parent
if scope is not None and child.__class__ in scope:
#you shall not pass!
break
parent = parent.parent
return None" | [
"def",
"next",
"(",
"self",
",",
"Class",
"=",
"True",
",",
"scope",
"=",
"True",
",",
"reverse",
"=",
"False",
")",
":",
"if",
"Class",
"is",
"True",
":",
"Class",
"=",
"self",
".",
"__class__",
"if",
"scope",
"is",
"True",
":",
"scope",
"=",
"STRUCTURESCOPE",
"structural",
"=",
"Class",
"is",
"not",
"None",
"and",
"issubclass",
"(",
"Class",
",",
"AbstractStructureElement",
")",
"if",
"reverse",
":",
"order",
"=",
"reversed",
"descendindex",
"=",
"-",
"1",
"else",
":",
"order",
"=",
"lambda",
"x",
":",
"x",
"#pylint: disable=redefined-variable-type",
"descendindex",
"=",
"0",
"child",
"=",
"self",
"parent",
"=",
"self",
".",
"parent",
"while",
"parent",
":",
"#pylint: disable=too-many-nested-blocks",
"if",
"len",
"(",
"parent",
")",
">",
"1",
":",
"returnnext",
"=",
"False",
"for",
"e",
"in",
"order",
"(",
"parent",
")",
":",
"if",
"e",
"is",
"child",
":",
"#we found the current item, next item will be the one to return",
"returnnext",
"=",
"True",
"elif",
"returnnext",
"and",
"e",
".",
"auth",
"and",
"not",
"isinstance",
"(",
"e",
",",
"AbstractAnnotationLayer",
")",
"and",
"(",
"not",
"structural",
"or",
"(",
"structural",
"and",
"(",
"not",
"isinstance",
"(",
"e",
",",
"(",
"AbstractTokenAnnotation",
",",
"TextContent",
")",
")",
")",
")",
")",
":",
"if",
"structural",
"and",
"isinstance",
"(",
"e",
",",
"Correction",
")",
":",
"if",
"not",
"list",
"(",
"e",
".",
"select",
"(",
"AbstractStructureElement",
")",
")",
":",
"#skip-over non-structural correction",
"continue",
"if",
"Class",
"is",
"None",
"or",
"(",
"isinstance",
"(",
"Class",
",",
"tuple",
")",
"and",
"(",
"any",
"(",
"isinstance",
"(",
"e",
",",
"C",
")",
"for",
"C",
"in",
"Class",
")",
")",
")",
"or",
"isinstance",
"(",
"e",
",",
"Class",
")",
":",
"return",
"e",
"else",
":",
"#this is not yet the element of the type we are looking for, we are going to descend again in the very leftmost (rightmost if reversed) branch only",
"while",
"e",
".",
"data",
":",
"e",
"=",
"e",
".",
"data",
"[",
"descendindex",
"]",
"if",
"not",
"isinstance",
"(",
"e",
",",
"AbstractElement",
")",
":",
"return",
"None",
"#we've gone too far",
"if",
"e",
".",
"auth",
"and",
"not",
"isinstance",
"(",
"e",
",",
"AbstractAnnotationLayer",
")",
":",
"if",
"Class",
"is",
"None",
"or",
"(",
"isinstance",
"(",
"Class",
",",
"tuple",
")",
"and",
"(",
"any",
"(",
"isinstance",
"(",
"e",
",",
"C",
")",
"for",
"C",
"in",
"Class",
")",
")",
")",
"or",
"isinstance",
"(",
"e",
",",
"Class",
")",
":",
"return",
"e",
"else",
":",
"#descend deeper",
"continue",
"return",
"None",
"#generational iteration",
"child",
"=",
"parent",
"if",
"scope",
"is",
"not",
"None",
"and",
"child",
".",
"__class__",
"in",
"scope",
":",
"#you shall not pass!",
"break",
"parent",
"=",
"parent",
".",
"parent",
"return",
"None"
] | "Returns the next element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``, may also be a tuple of multiple classes. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all." | [
"Returns",
"the",
"next",
"element",
"if",
"it",
"is",
"of",
"the",
"specified",
"type",
"and",
"if",
"it",
"does",
"not",
"cross",
"the",
"boundary",
"of",
"the",
"defined",
"scope",
".",
"Returns",
"None",
"if",
"no",
"next",
"element",
"is",
"found",
".",
"Non",
"-",
"authoritative",
"elements",
"are",
"never",
"returned",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2310-L2367" | "" | "" | "" | "" | "" | "" | -1 |
15 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.previous" | "def previous(self, Class=True, scope=True):
"""Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all.
"""
return self.next(Class,scope, True)" | "python" | "def previous(self, Class=True, scope=True):
"""Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all.
"""
return self.next(Class,scope, True)" | [
"def",
"previous",
"(",
"self",
",",
"Class",
"=",
"True",
",",
"scope",
"=",
"True",
")",
":",
"return",
"self",
".",
"next",
"(",
"Class",
",",
"scope",
",",
"True",
")"
] | "Returns the previous element, if it is of the specified type and if it does not cross the boundary of the defined scope. Returns None if no next element is found. Non-authoritative elements are never returned.
Arguments:
* ``Class``: The class to select; any python class subclassed off `'AbstractElement``. Set to ``True`` to constrain to the same class as that of the current instance, set to ``None`` to not constrain at all
* ``scope``: A list of classes which are never crossed looking for a next element. Set to ``True`` to constrain to a default list of structure elements (Sentence,Paragraph,Division,Event, ListItem,Caption), set to ``None`` to not constrain at all." | [
"Returns",
"the",
"previous",
"element",
"if",
"it",
"is",
"of",
"the",
"specified",
"type",
"and",
"if",
"it",
"does",
"not",
"cross",
"the",
"boundary",
"of",
"the",
"defined",
"scope",
".",
"Returns",
"None",
"if",
"no",
"next",
"element",
"is",
"found",
".",
"Non",
"-",
"authoritative",
"elements",
"are",
"never",
"returned",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2371-L2379" | "" | "" | "" | "" | "" | "" | -1 |
16 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractElement.remove" | "def remove(self, child):
"""Removes the child element"""
if not isinstance(child, AbstractElement):
raise ValueError("Expected AbstractElement, got " + str(type(child)))
if child.parent == self:
child.parent = None
self.data.remove(child)
#delete from index
if child.id and self.doc and child.id in self.doc.index:
del self.doc.index[child.id]" | "python" | "def remove(self, child):
"""Removes the child element"""
if not isinstance(child, AbstractElement):
raise ValueError("Expected AbstractElement, got " + str(type(child)))
if child.parent == self:
child.parent = None
self.data.remove(child)
#delete from index
if child.id and self.doc and child.id in self.doc.index:
del self.doc.index[child.id]" | [
"def",
"remove",
"(",
"self",
",",
"child",
")",
":",
"if",
"not",
"isinstance",
"(",
"child",
",",
"AbstractElement",
")",
":",
"raise",
"ValueError",
"(",
"\"Expected AbstractElement, got \"",
"+",
"str",
"(",
"type",
"(",
"child",
")",
")",
")",
"if",
"child",
".",
"parent",
"==",
"self",
":",
"child",
".",
"parent",
"=",
"None",
"self",
".",
"data",
".",
"remove",
"(",
"child",
")",
"#delete from index",
"if",
"child",
".",
"id",
"and",
"self",
".",
"doc",
"and",
"child",
".",
"id",
"in",
"self",
".",
"doc",
".",
"index",
":",
"del",
"self",
".",
"doc",
".",
"index",
"[",
"child",
".",
"id",
"]"
] | "Removes the child element" | [
"Removes",
"the",
"child",
"element"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L2729-L2738" | "" | "" | "" | "" | "" | "" | -1 |
17 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AllowTokenAnnotation.hasannotation" | "def hasannotation(self,Class,set=None):
"""Returns an integer indicating whether such as annotation exists, and if so, how many.
See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters."""
return sum( 1 for _ in self.select(Class,set,True,default_ignore_annotations))" | "python" | "def hasannotation(self,Class,set=None):
"""Returns an integer indicating whether such as annotation exists, and if so, how many.
See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters."""
return sum( 1 for _ in self.select(Class,set,True,default_ignore_annotations))" | [
"def",
"hasannotation",
"(",
"self",
",",
"Class",
",",
"set",
"=",
"None",
")",
":",
"return",
"sum",
"(",
"1",
"for",
"_",
"in",
"self",
".",
"select",
"(",
"Class",
",",
"set",
",",
"True",
",",
"default_ignore_annotations",
")",
")"
] | "Returns an integer indicating whether such as annotation exists, and if so, how many.
See :meth:`AllowTokenAnnotation.annotations`` for a description of the parameters." | [
"Returns",
"an",
"integer",
"indicating",
"whether",
"such",
"as",
"annotation",
"exists",
"and",
"if",
"so",
"how",
"many",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3046-L3050" | "" | "" | "" | "" | "" | "" | -1 |
18 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AllowTokenAnnotation.annotation" | "def annotation(self, type, set=None):
"""Obtain a single annotation element.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
Returns:
An element (instance derived from :class:`AbstractElement`)
Example::
sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls
See also:
:meth:`AllowTokenAnnotation.annotations`
:meth:`AbstractElement.select`
Raises:
:class:`NoSuchAnnotation` if no such annotation exists
"""
"""Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found"""
for e in self.select(type,set,True,default_ignore_annotations):
return e
raise NoSuchAnnotation()" | "python" | "def annotation(self, type, set=None):
"""Obtain a single annotation element.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
Returns:
An element (instance derived from :class:`AbstractElement`)
Example::
sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls
See also:
:meth:`AllowTokenAnnotation.annotations`
:meth:`AbstractElement.select`
Raises:
:class:`NoSuchAnnotation` if no such annotation exists
"""
"""Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found"""
for e in self.select(type,set,True,default_ignore_annotations):
return e
raise NoSuchAnnotation()" | [
"def",
"annotation",
"(",
"self",
",",
"type",
",",
"set",
"=",
"None",
")",
":",
"\"\"\"Will return a **single** annotation (even if there are multiple). Raises a ``NoSuchAnnotation`` exception if none was found\"\"\"",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"type",
",",
"set",
",",
"True",
",",
"default_ignore_annotations",
")",
":",
"return",
"e",
"raise",
"NoSuchAnnotation",
"(",
")"
] | "Obtain a single annotation element.
A further restriction can be made based on set.
Arguments:
Class (class): The class to select; any python class (not instance) subclassed off :class:`AbstractElement`
Set (str): The set to match against, only elements pertaining to this set will be returned. If set to None (default), all elements regardless of set will be returned.
Returns:
An element (instance derived from :class:`AbstractElement`)
Example::
sense = word.annotation(folia.Sense, 'http://some/path/cornetto').cls
See also:
:meth:`AllowTokenAnnotation.annotations`
:meth:`AbstractElement.select`
Raises:
:class:`NoSuchAnnotation` if no such annotation exists" | [
"Obtain",
"a",
"single",
"annotation",
"element",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3052-L3078" | "" | "" | "" | "" | "" | "" | -1 |
19 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractStructureElement.hasannotationlayer" | "def hasannotationlayer(self, annotationtype=None,set=None):
"""Does the specified annotation layer exist?"""
l = self.layers(annotationtype, set)
return (len(l) > 0)" | "python" | "def hasannotationlayer(self, annotationtype=None,set=None):
"""Does the specified annotation layer exist?"""
l = self.layers(annotationtype, set)
return (len(l) > 0)" | [
"def",
"hasannotationlayer",
"(",
"self",
",",
"annotationtype",
"=",
"None",
",",
"set",
"=",
"None",
")",
":",
"l",
"=",
"self",
".",
"layers",
"(",
"annotationtype",
",",
"set",
")",
"return",
"(",
"len",
"(",
"l",
")",
">",
"0",
")"
] | "Does the specified annotation layer exist?" | [
"Does",
"the",
"specified",
"annotation",
"layer",
"exist?"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3268-L3271" | "" | "" | "" | "" | "" | "" | -1 |
20 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "TextContent.getreference" | "def getreference(self, validate=True):
"""Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid"""
if self.offset is None: return None #nothing to test
if self.ref:
ref = self.doc[self.ref]
else:
ref = self.finddefaultreference()
if not ref:
raise UnresolvableTextContent("Default reference for textcontent not found!")
elif not ref.hastext(self.cls):
raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ") has no such text (class=" + self.cls+")")
elif validate and self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]:
raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ", class=" + self.cls+") found but no text match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
else:
#finally, we made it!
return ref" | "python" | "def getreference(self, validate=True):
"""Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid"""
if self.offset is None: return None #nothing to test
if self.ref:
ref = self.doc[self.ref]
else:
ref = self.finddefaultreference()
if not ref:
raise UnresolvableTextContent("Default reference for textcontent not found!")
elif not ref.hastext(self.cls):
raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ") has no such text (class=" + self.cls+")")
elif validate and self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]:
raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ", class=" + self.cls+") found but no text match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
else:
#finally, we made it!
return ref" | [
"def",
"getreference",
"(",
"self",
",",
"validate",
"=",
"True",
")",
":",
"if",
"self",
".",
"offset",
"is",
"None",
":",
"return",
"None",
"#nothing to test",
"if",
"self",
".",
"ref",
":",
"ref",
"=",
"self",
".",
"doc",
"[",
"self",
".",
"ref",
"]",
"else",
":",
"ref",
"=",
"self",
".",
"finddefaultreference",
"(",
")",
"if",
"not",
"ref",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Default reference for textcontent not found!\"",
")",
"elif",
"not",
"ref",
".",
"hastext",
"(",
"self",
".",
"cls",
")",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Reference (ID \"",
"+",
"str",
"(",
"ref",
".",
"id",
")",
"+",
"\") has no such text (class=\"",
"+",
"self",
".",
"cls",
"+",
"\")\"",
")",
"elif",
"validate",
"and",
"self",
".",
"text",
"(",
")",
"!=",
"ref",
".",
"textcontent",
"(",
"self",
".",
"cls",
")",
".",
"text",
"(",
")",
"[",
"self",
".",
"offset",
":",
"self",
".",
"offset",
"+",
"len",
"(",
"self",
".",
"data",
"[",
"0",
"]",
")",
"]",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Reference (ID \"",
"+",
"str",
"(",
"ref",
".",
"id",
")",
"+",
"\", class=\"",
"+",
"self",
".",
"cls",
"+",
"\") found but no text match at specified offset (\"",
"+",
"str",
"(",
"self",
".",
"offset",
")",
"+",
"\")! Expected '\"",
"+",
"self",
".",
"text",
"(",
")",
"+",
"\"', got '\"",
"+",
"ref",
".",
"textcontent",
"(",
"self",
".",
"cls",
")",
".",
"text",
"(",
")",
"[",
"self",
".",
"offset",
":",
"self",
".",
"offset",
"+",
"len",
"(",
"self",
".",
"data",
"[",
"0",
"]",
")",
"]",
"+",
"\"'\"",
")",
"else",
":",
"#finally, we made it!",
"return",
"ref"
] | "Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid" | [
"Returns",
"and",
"validates",
"the",
"Text",
"Content",
"s",
"reference",
".",
"Raises",
"UnresolvableTextContent",
"when",
"invalid"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3502-L3519" | "" | "" | "" | "" | "" | "" | -1 |
21 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "PhonContent.getreference" | "def getreference(self, validate=True):
"""Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid"""
if self.offset is None: return None #nothing to test
if self.ref:
ref = self.doc[self.ref]
else:
ref = self.finddefaultreference()
if not ref:
raise UnresolvableTextContent("Default reference for phonetic content not found!")
elif not ref.hasphon(self.cls):
raise UnresolvableTextContent("Reference has no such phonetic content (class=" + self.cls+")")
elif validate and self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]:
raise UnresolvableTextContent("Reference (class=" + self.cls+") found but no phonetic match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
else:
#finally, we made it!
return ref" | "python" | "def getreference(self, validate=True):
"""Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid"""
if self.offset is None: return None #nothing to test
if self.ref:
ref = self.doc[self.ref]
else:
ref = self.finddefaultreference()
if not ref:
raise UnresolvableTextContent("Default reference for phonetic content not found!")
elif not ref.hasphon(self.cls):
raise UnresolvableTextContent("Reference has no such phonetic content (class=" + self.cls+")")
elif validate and self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]:
raise UnresolvableTextContent("Reference (class=" + self.cls+") found but no phonetic match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
else:
#finally, we made it!
return ref" | [
"def",
"getreference",
"(",
"self",
",",
"validate",
"=",
"True",
")",
":",
"if",
"self",
".",
"offset",
"is",
"None",
":",
"return",
"None",
"#nothing to test",
"if",
"self",
".",
"ref",
":",
"ref",
"=",
"self",
".",
"doc",
"[",
"self",
".",
"ref",
"]",
"else",
":",
"ref",
"=",
"self",
".",
"finddefaultreference",
"(",
")",
"if",
"not",
"ref",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Default reference for phonetic content not found!\"",
")",
"elif",
"not",
"ref",
".",
"hasphon",
"(",
"self",
".",
"cls",
")",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Reference has no such phonetic content (class=\"",
"+",
"self",
".",
"cls",
"+",
"\")\"",
")",
"elif",
"validate",
"and",
"self",
".",
"phon",
"(",
")",
"!=",
"ref",
".",
"textcontent",
"(",
"self",
".",
"cls",
")",
".",
"phon",
"(",
")",
"[",
"self",
".",
"offset",
":",
"self",
".",
"offset",
"+",
"len",
"(",
"self",
".",
"data",
"[",
"0",
"]",
")",
"]",
":",
"raise",
"UnresolvableTextContent",
"(",
"\"Reference (class=\"",
"+",
"self",
".",
"cls",
"+",
"\") found but no phonetic match at specified offset (\"",
"+",
"str",
"(",
"self",
".",
"offset",
")",
"+",
"\")! Expected '\"",
"+",
"self",
".",
"text",
"(",
")",
"+",
"\"', got '\"",
"+",
"ref",
".",
"textcontent",
"(",
"self",
".",
"cls",
")",
".",
"text",
"(",
")",
"[",
"self",
".",
"offset",
":",
"self",
".",
"offset",
"+",
"len",
"(",
"self",
".",
"data",
"[",
"0",
"]",
")",
"]",
"+",
"\"'\"",
")",
"else",
":",
"#finally, we made it!",
"return",
"ref"
] | "Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid" | [
"Return",
"and",
"validate",
"the",
"Phonetic",
"Content",
"s",
"reference",
".",
"Raises",
"UnresolvableTextContent",
"when",
"invalid"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L3715-L3732" | "" | "" | "" | "" | "" | "" | -1 |
22 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Word.findspans" | "def findspans(self, type,set=None):
"""Yields span annotation elements of the specified type that include this word.
Arguments:
type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class.
set (str or None): Constrain by set
Example::
for chunk in word.findspans(folia.Chunk):
print(" Chunk class=", chunk.cls, " words=")
for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part)
print(word2, end="")
print()
Yields:
Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`)
"""
if issubclass(type, AbstractAnnotationLayer):
layerclass = type
else:
layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE]
e = self
while True:
if not e.parent: break
e = e.parent
for layer in e.select(layerclass,set,False):
if type is layerclass:
for e2 in layer.select(AbstractSpanAnnotation,set,True, (True, Word, Morpheme)):
if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs():
yield e2
else:
for e2 in layer.select(type,set,True, (True, Word, Morpheme)):
if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs():
yield e2" | "python" | "def findspans(self, type,set=None):
"""Yields span annotation elements of the specified type that include this word.
Arguments:
type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class.
set (str or None): Constrain by set
Example::
for chunk in word.findspans(folia.Chunk):
print(" Chunk class=", chunk.cls, " words=")
for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part)
print(word2, end="")
print()
Yields:
Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`)
"""
if issubclass(type, AbstractAnnotationLayer):
layerclass = type
else:
layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE]
e = self
while True:
if not e.parent: break
e = e.parent
for layer in e.select(layerclass,set,False):
if type is layerclass:
for e2 in layer.select(AbstractSpanAnnotation,set,True, (True, Word, Morpheme)):
if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs():
yield e2
else:
for e2 in layer.select(type,set,True, (True, Word, Morpheme)):
if not isinstance(e2, AbstractSpanRole) and self in e2.wrefs():
yield e2" | [
"def",
"findspans",
"(",
"self",
",",
"type",
",",
"set",
"=",
"None",
")",
":",
"if",
"issubclass",
"(",
"type",
",",
"AbstractAnnotationLayer",
")",
":",
"layerclass",
"=",
"type",
"else",
":",
"layerclass",
"=",
"ANNOTATIONTYPE2LAYERCLASS",
"[",
"type",
".",
"ANNOTATIONTYPE",
"]",
"e",
"=",
"self",
"while",
"True",
":",
"if",
"not",
"e",
".",
"parent",
":",
"break",
"e",
"=",
"e",
".",
"parent",
"for",
"layer",
"in",
"e",
".",
"select",
"(",
"layerclass",
",",
"set",
",",
"False",
")",
":",
"if",
"type",
"is",
"layerclass",
":",
"for",
"e2",
"in",
"layer",
".",
"select",
"(",
"AbstractSpanAnnotation",
",",
"set",
",",
"True",
",",
"(",
"True",
",",
"Word",
",",
"Morpheme",
")",
")",
":",
"if",
"not",
"isinstance",
"(",
"e2",
",",
"AbstractSpanRole",
")",
"and",
"self",
"in",
"e2",
".",
"wrefs",
"(",
")",
":",
"yield",
"e2",
"else",
":",
"for",
"e2",
"in",
"layer",
".",
"select",
"(",
"type",
",",
"set",
",",
"True",
",",
"(",
"True",
",",
"Word",
",",
"Morpheme",
")",
")",
":",
"if",
"not",
"isinstance",
"(",
"e2",
",",
"AbstractSpanRole",
")",
"and",
"self",
"in",
"e2",
".",
"wrefs",
"(",
")",
":",
"yield",
"e2"
] | "Yields span annotation elements of the specified type that include this word.
Arguments:
type: The annotation type, can be passed as using any of the :class:`AnnotationType` member, or by passing the relevant :class:`AbstractSpanAnnotation` or :class:`AbstractAnnotationLayer` class.
set (str or None): Constrain by set
Example::
for chunk in word.findspans(folia.Chunk):
print(" Chunk class=", chunk.cls, " words=")
for word2 in chunk.wrefs(): #print all words in the chunk (of which the word is a part)
print(word2, end="")
print()
Yields:
Matching span annotation instances (derived from :class:`AbstractSpanAnnotation`)" | [
"Yields",
"span",
"annotation",
"elements",
"of",
"the",
"specified",
"type",
"that",
"include",
"this",
"word",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4178-L4213" | "" | "" | "" | "" | "" | "" | -1 |
23 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractSpanAnnotation.setspan" | "def setspan(self, *args):
"""Sets the span of the span element anew, erases all data inside.
Arguments:
*args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme`
"""
self.data = []
for child in args:
self.append(child)" | "python" | "def setspan(self, *args):
"""Sets the span of the span element anew, erases all data inside.
Arguments:
*args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme`
"""
self.data = []
for child in args:
self.append(child)" | [
"def",
"setspan",
"(",
"self",
",",
"*",
"args",
")",
":",
"self",
".",
"data",
"=",
"[",
"]",
"for",
"child",
"in",
"args",
":",
"self",
".",
"append",
"(",
"child",
")"
] | "Sets the span of the span element anew, erases all data inside.
Arguments:
*args: Instances of :class:`Word`, :class:`Morpheme` or :class:`Phoneme`" | [
"Sets",
"the",
"span",
"of",
"the",
"span",
"element",
"anew",
"erases",
"all",
"data",
"inside",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4373-L4381" | "" | "" | "" | "" | "" | "" | -1 |
24 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractSpanAnnotation._helper_wrefs" | "def _helper_wrefs(self, targets, recurse=True):
"""Internal helper function"""
for c in self:
if isinstance(c,Word) or isinstance(c,Morpheme) or isinstance(c, Phoneme):
targets.append(c)
elif isinstance(c,WordReference):
try:
targets.append(self.doc[c.id]) #try to resolve
except KeyError:
targets.append(c) #add unresolved
elif isinstance(c, AbstractSpanAnnotation) and recurse:
#recursion
c._helper_wrefs(targets) #pylint: disable=protected-access
elif isinstance(c, Correction) and c.auth: #recurse into corrections
for e in c:
if isinstance(e, AbstractCorrectionChild) and e.auth:
for e2 in e:
if isinstance(e2, AbstractSpanAnnotation):
#recursion
e2._helper_wrefs(targets)" | "python" | "def _helper_wrefs(self, targets, recurse=True):
"""Internal helper function"""
for c in self:
if isinstance(c,Word) or isinstance(c,Morpheme) or isinstance(c, Phoneme):
targets.append(c)
elif isinstance(c,WordReference):
try:
targets.append(self.doc[c.id]) #try to resolve
except KeyError:
targets.append(c) #add unresolved
elif isinstance(c, AbstractSpanAnnotation) and recurse:
#recursion
c._helper_wrefs(targets) #pylint: disable=protected-access
elif isinstance(c, Correction) and c.auth: #recurse into corrections
for e in c:
if isinstance(e, AbstractCorrectionChild) and e.auth:
for e2 in e:
if isinstance(e2, AbstractSpanAnnotation):
#recursion
e2._helper_wrefs(targets)" | [
"def",
"_helper_wrefs",
"(",
"self",
",",
"targets",
",",
"recurse",
"=",
"True",
")",
":",
"for",
"c",
"in",
"self",
":",
"if",
"isinstance",
"(",
"c",
",",
"Word",
")",
"or",
"isinstance",
"(",
"c",
",",
"Morpheme",
")",
"or",
"isinstance",
"(",
"c",
",",
"Phoneme",
")",
":",
"targets",
".",
"append",
"(",
"c",
")",
"elif",
"isinstance",
"(",
"c",
",",
"WordReference",
")",
":",
"try",
":",
"targets",
".",
"append",
"(",
"self",
".",
"doc",
"[",
"c",
".",
"id",
"]",
")",
"#try to resolve",
"except",
"KeyError",
":",
"targets",
".",
"append",
"(",
"c",
")",
"#add unresolved",
"elif",
"isinstance",
"(",
"c",
",",
"AbstractSpanAnnotation",
")",
"and",
"recurse",
":",
"#recursion",
"c",
".",
"_helper_wrefs",
"(",
"targets",
")",
"#pylint: disable=protected-access",
"elif",
"isinstance",
"(",
"c",
",",
"Correction",
")",
"and",
"c",
".",
"auth",
":",
"#recurse into corrections",
"for",
"e",
"in",
"c",
":",
"if",
"isinstance",
"(",
"e",
",",
"AbstractCorrectionChild",
")",
"and",
"e",
".",
"auth",
":",
"for",
"e2",
"in",
"e",
":",
"if",
"isinstance",
"(",
"e2",
",",
"AbstractSpanAnnotation",
")",
":",
"#recursion",
"e2",
".",
"_helper_wrefs",
"(",
"targets",
")"
] | "Internal helper function" | [
"Internal",
"helper",
"function"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4418-L4437" | "" | "" | "" | "" | "" | "" | -1 |
25 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractSpanAnnotation.wrefs" | "def wrefs(self, index = None, recurse=True):
"""Returns a list of word references, these can be Words but also Morphemes or Phonemes.
Arguments:
index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all
"""
targets =[]
self._helper_wrefs(targets, recurse)
if index is None:
return targets
else:
return targets[index]" | "python" | "def wrefs(self, index = None, recurse=True):
"""Returns a list of word references, these can be Words but also Morphemes or Phonemes.
Arguments:
index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all
"""
targets =[]
self._helper_wrefs(targets, recurse)
if index is None:
return targets
else:
return targets[index]" | [
"def",
"wrefs",
"(",
"self",
",",
"index",
"=",
"None",
",",
"recurse",
"=",
"True",
")",
":",
"targets",
"=",
"[",
"]",
"self",
".",
"_helper_wrefs",
"(",
"targets",
",",
"recurse",
")",
"if",
"index",
"is",
"None",
":",
"return",
"targets",
"else",
":",
"return",
"targets",
"[",
"index",
"]"
] | "Returns a list of word references, these can be Words but also Morphemes or Phonemes.
Arguments:
index (int or None): If set to an integer, will retrieve and return the n'th element (starting at 0) instead of returning the list of all" | [
"Returns",
"a",
"list",
"of",
"word",
"references",
"these",
"can",
"be",
"Words",
"but",
"also",
"Morphemes",
"or",
"Phonemes",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4439-L4450" | "" | "" | "" | "" | "" | "" | -1 |
26 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractSpanAnnotation.copychildren" | "def copychildren(self, newdoc=None, idsuffix=""):
"""Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash"""
if idsuffix is True: idsuffix = ".copy." + "%08x" % random.getrandbits(32) #random 32-bit hash for each copy, same one will be reused for all children
for c in self:
if isinstance(c, Word):
yield WordReference(newdoc, id=c.id)
else:
yield c.copy(newdoc,idsuffix)" | "python" | "def copychildren(self, newdoc=None, idsuffix=""):
"""Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash"""
if idsuffix is True: idsuffix = ".copy." + "%08x" % random.getrandbits(32) #random 32-bit hash for each copy, same one will be reused for all children
for c in self:
if isinstance(c, Word):
yield WordReference(newdoc, id=c.id)
else:
yield c.copy(newdoc,idsuffix)" | [
"def",
"copychildren",
"(",
"self",
",",
"newdoc",
"=",
"None",
",",
"idsuffix",
"=",
"\"\"",
")",
":",
"if",
"idsuffix",
"is",
"True",
":",
"idsuffix",
"=",
"\".copy.\"",
"+",
"\"%08x\"",
"%",
"random",
".",
"getrandbits",
"(",
"32",
")",
"#random 32-bit hash for each copy, same one will be reused for all children",
"for",
"c",
"in",
"self",
":",
"if",
"isinstance",
"(",
"c",
",",
"Word",
")",
":",
"yield",
"WordReference",
"(",
"newdoc",
",",
"id",
"=",
"c",
".",
"id",
")",
"else",
":",
"yield",
"c",
".",
"copy",
"(",
"newdoc",
",",
"idsuffix",
")"
] | "Generator creating a deep copy of the children of this element. If idsuffix is a string, if set to True, a random idsuffix will be generated including a random 32-bit hash" | [
"Generator",
"creating",
"a",
"deep",
"copy",
"of",
"the",
"children",
"of",
"this",
"element",
".",
"If",
"idsuffix",
"is",
"a",
"string",
"if",
"set",
"to",
"True",
"a",
"random",
"idsuffix",
"will",
"be",
"generated",
"including",
"a",
"random",
"32",
"-",
"bit",
"hash"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4465-L4472" | "" | "" | "" | "" | "" | "" | -1 |
27 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractAnnotationLayer.alternatives" | "def alternatives(self, Class=None, set=None):
"""Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set.
Arguments:
* ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are.
* ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set)
Returns:
Generator over Alternative elements
"""
for e in self.select(AlternativeLayers,None, True, ['Original','Suggestion']): #pylint: disable=too-many-nested-blocks
if Class is None:
yield e
elif len(e) >= 1: #child elements?
for e2 in e:
try:
if isinstance(e2, Class):
try:
if set is None or e2.set == set:
yield e #not e2
break #yield an alternative only once (in case there are multiple matches)
except AttributeError:
continue
except AttributeError:
continue" | "python" | "def alternatives(self, Class=None, set=None):
"""Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set.
Arguments:
* ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are.
* ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set)
Returns:
Generator over Alternative elements
"""
for e in self.select(AlternativeLayers,None, True, ['Original','Suggestion']): #pylint: disable=too-many-nested-blocks
if Class is None:
yield e
elif len(e) >= 1: #child elements?
for e2 in e:
try:
if isinstance(e2, Class):
try:
if set is None or e2.set == set:
yield e #not e2
break #yield an alternative only once (in case there are multiple matches)
except AttributeError:
continue
except AttributeError:
continue" | [
"def",
"alternatives",
"(",
"self",
",",
"Class",
"=",
"None",
",",
"set",
"=",
"None",
")",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"AlternativeLayers",
",",
"None",
",",
"True",
",",
"[",
"'Original'",
",",
"'Suggestion'",
"]",
")",
":",
"#pylint: disable=too-many-nested-blocks",
"if",
"Class",
"is",
"None",
":",
"yield",
"e",
"elif",
"len",
"(",
"e",
")",
">=",
"1",
":",
"#child elements?",
"for",
"e2",
"in",
"e",
":",
"try",
":",
"if",
"isinstance",
"(",
"e2",
",",
"Class",
")",
":",
"try",
":",
"if",
"set",
"is",
"None",
"or",
"e2",
".",
"set",
"==",
"set",
":",
"yield",
"e",
"#not e2",
"break",
"#yield an alternative only once (in case there are multiple matches)",
"except",
"AttributeError",
":",
"continue",
"except",
"AttributeError",
":",
"continue"
] | "Generator over alternatives, either all or only of a specific annotation type, and possibly restrained also by set.
Arguments:
* ``Class`` - The Class you want to retrieve (e.g. PosAnnotation). Or set to None to select all alternatives regardless of what type they are.
* ``set`` - The set you want to retrieve (defaults to None, which selects irregardless of set)
Returns:
Generator over Alternative elements" | [
"Generator",
"over",
"alternatives",
"either",
"all",
"or",
"only",
"of",
"a",
"specific",
"annotation",
"type",
"and",
"possibly",
"restrained",
"also",
"by",
"set",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4574-L4599" | "" | "" | "" | "" | "" | "" | -1 |
28 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "AbstractAnnotationLayer.findspan" | "def findspan(self, *words):
"""Returns the span element which spans over the specified words or morphemes.
See also:
:meth:`Word.findspans`
"""
for span in self.select(AbstractSpanAnnotation,None,True):
if tuple(span.wrefs()) == words:
return span
raise NoSuchAnnotation" | "python" | "def findspan(self, *words):
"""Returns the span element which spans over the specified words or morphemes.
See also:
:meth:`Word.findspans`
"""
for span in self.select(AbstractSpanAnnotation,None,True):
if tuple(span.wrefs()) == words:
return span
raise NoSuchAnnotation" | [
"def",
"findspan",
"(",
"self",
",",
"*",
"words",
")",
":",
"for",
"span",
"in",
"self",
".",
"select",
"(",
"AbstractSpanAnnotation",
",",
"None",
",",
"True",
")",
":",
"if",
"tuple",
"(",
"span",
".",
"wrefs",
"(",
")",
")",
"==",
"words",
":",
"return",
"span",
"raise",
"NoSuchAnnotation"
] | "Returns the span element which spans over the specified words or morphemes.
See also:
:meth:`Word.findspans`" | [
"Returns",
"the",
"span",
"element",
"which",
"spans",
"over",
"the",
"specified",
"words",
"or",
"morphemes",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4601-L4611" | "" | "" | "" | "" | "" | "" | -1 |
29 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.hasnew" | "def hasnew(self,allowempty=False):
"""Does the correction define new corrected annotations?"""
for e in self.select(New,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | "python" | "def hasnew(self,allowempty=False):
"""Does the correction define new corrected annotations?"""
for e in self.select(New,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | [
"def",
"hasnew",
"(",
"self",
",",
"allowempty",
"=",
"False",
")",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"New",
",",
"None",
",",
"False",
",",
"False",
")",
":",
"if",
"not",
"allowempty",
"and",
"len",
"(",
"e",
")",
"==",
"0",
":",
"continue",
"return",
"True",
"return",
"False"
] | "Does the correction define new corrected annotations?" | [
"Does",
"the",
"correction",
"define",
"new",
"corrected",
"annotations?"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4982-L4987" | "" | "" | "" | "" | "" | "" | -1 |
30 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.hasoriginal" | "def hasoriginal(self,allowempty=False):
"""Does the correction record the old annotations prior to correction?"""
for e in self.select(Original,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | "python" | "def hasoriginal(self,allowempty=False):
"""Does the correction record the old annotations prior to correction?"""
for e in self.select(Original,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | [
"def",
"hasoriginal",
"(",
"self",
",",
"allowempty",
"=",
"False",
")",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"Original",
",",
"None",
",",
"False",
",",
"False",
")",
":",
"if",
"not",
"allowempty",
"and",
"len",
"(",
"e",
")",
"==",
"0",
":",
"continue",
"return",
"True",
"return",
"False"
] | "Does the correction record the old annotations prior to correction?" | [
"Does",
"the",
"correction",
"record",
"the",
"old",
"annotations",
"prior",
"to",
"correction?"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L4989-L4994" | "" | "" | "" | "" | "" | "" | -1 |
31 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.hassuggestions" | "def hassuggestions(self,allowempty=False):
"""Does the correction propose suggestions for correction?"""
for e in self.select(Suggestion,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | "python" | "def hassuggestions(self,allowempty=False):
"""Does the correction propose suggestions for correction?"""
for e in self.select(Suggestion,None,False, False):
if not allowempty and len(e) == 0: continue
return True
return False" | [
"def",
"hassuggestions",
"(",
"self",
",",
"allowempty",
"=",
"False",
")",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"Suggestion",
",",
"None",
",",
"False",
",",
"False",
")",
":",
"if",
"not",
"allowempty",
"and",
"len",
"(",
"e",
")",
"==",
"0",
":",
"continue",
"return",
"True",
"return",
"False"
] | "Does the correction propose suggestions for correction?" | [
"Does",
"the",
"correction",
"propose",
"suggestions",
"for",
"correction?"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5003-L5008" | "" | "" | "" | "" | "" | "" | -1 |
32 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.new" | "def new(self,index = None):
"""Get the new corrected annotation.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`
"""
if index is None:
try:
return next(self.select(New,None,False))
except StopIteration:
raise NoSuchAnnotation
else:
for e in self.select(New,None,False):
return e[index]
raise NoSuchAnnotation" | "python" | "def new(self,index = None):
"""Get the new corrected annotation.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`
"""
if index is None:
try:
return next(self.select(New,None,False))
except StopIteration:
raise NoSuchAnnotation
else:
for e in self.select(New,None,False):
return e[index]
raise NoSuchAnnotation" | [
"def",
"new",
"(",
"self",
",",
"index",
"=",
"None",
")",
":",
"if",
"index",
"is",
"None",
":",
"try",
":",
"return",
"next",
"(",
"self",
".",
"select",
"(",
"New",
",",
"None",
",",
"False",
")",
")",
"except",
"StopIteration",
":",
"raise",
"NoSuchAnnotation",
"else",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"New",
",",
"None",
",",
"False",
")",
":",
"return",
"e",
"[",
"index",
"]",
"raise",
"NoSuchAnnotation"
] | "Get the new corrected annotation.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`" | [
"Get",
"the",
"new",
"corrected",
"annotation",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5106-L5126" | "" | "" | "" | "" | "" | "" | -1 |
33 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.original" | "def original(self,index=None):
"""Get the old annotation prior to correction.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`
"""
if index is None:
try:
return next(self.select(Original,None,False, False))
except StopIteration:
raise NoSuchAnnotation
else:
for e in self.select(Original,None,False, False):
return e[index]
raise NoSuchAnnotation" | "python" | "def original(self,index=None):
"""Get the old annotation prior to correction.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`
"""
if index is None:
try:
return next(self.select(Original,None,False, False))
except StopIteration:
raise NoSuchAnnotation
else:
for e in self.select(Original,None,False, False):
return e[index]
raise NoSuchAnnotation" | [
"def",
"original",
"(",
"self",
",",
"index",
"=",
"None",
")",
":",
"if",
"index",
"is",
"None",
":",
"try",
":",
"return",
"next",
"(",
"self",
".",
"select",
"(",
"Original",
",",
"None",
",",
"False",
",",
"False",
")",
")",
"except",
"StopIteration",
":",
"raise",
"NoSuchAnnotation",
"else",
":",
"for",
"e",
"in",
"self",
".",
"select",
"(",
"Original",
",",
"None",
",",
"False",
",",
"False",
")",
":",
"return",
"e",
"[",
"index",
"]",
"raise",
"NoSuchAnnotation"
] | "Get the old annotation prior to correction.
This returns only one annotation if multiple exist, use `index` to select another in the sequence.
Returns:
an annotation element (:class:`AbstractElement`)
Raises:
:class:`NoSuchAnnotation`" | [
"Get",
"the",
"old",
"annotation",
"prior",
"to",
"correction",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5128-L5147" | "" | "" | "" | "" | "" | "" | -1 |
34 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Correction.suggestions" | "def suggestions(self,index=None):
"""Get suggestions for correction.
Yields:
:class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default)
Returns:
a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set)
Raises:
:class:`IndexError`
"""
if index is None:
return self.select(Suggestion,None,False, False)
else:
for i, e in enumerate(self.select(Suggestion,None,False, False)):
if index == i:
return e
raise IndexError" | "python" | "def suggestions(self,index=None):
"""Get suggestions for correction.
Yields:
:class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default)
Returns:
a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set)
Raises:
:class:`IndexError`
"""
if index is None:
return self.select(Suggestion,None,False, False)
else:
for i, e in enumerate(self.select(Suggestion,None,False, False)):
if index == i:
return e
raise IndexError" | [
"def",
"suggestions",
"(",
"self",
",",
"index",
"=",
"None",
")",
":",
"if",
"index",
"is",
"None",
":",
"return",
"self",
".",
"select",
"(",
"Suggestion",
",",
"None",
",",
"False",
",",
"False",
")",
"else",
":",
"for",
"i",
",",
"e",
"in",
"enumerate",
"(",
"self",
".",
"select",
"(",
"Suggestion",
",",
"None",
",",
"False",
",",
"False",
")",
")",
":",
"if",
"index",
"==",
"i",
":",
"return",
"e",
"raise",
"IndexError"
] | "Get suggestions for correction.
Yields:
:class:`Suggestion` element that encapsulate the suggested annotations (if index is ``None``, default)
Returns:
a :class:`Suggestion` element that encapsulate the suggested annotations (if index is set)
Raises:
:class:`IndexError`" | [
"Get",
"suggestions",
"for",
"correction",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5170-L5188" | "" | "" | "" | "" | "" | "" | -1 |
35 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Morpheme.findspans" | "def findspans(self, type,set=None):
"""Find span annotation of the specified type that include this word"""
if issubclass(type, AbstractAnnotationLayer):
layerclass = type
else:
layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE]
e = self
while True:
if not e.parent: break
e = e.parent
for layer in e.select(layerclass,set,False):
for e2 in layer:
if isinstance(e2, AbstractSpanAnnotation):
if self in e2.wrefs():
yield e2" | "python" | "def findspans(self, type,set=None):
"""Find span annotation of the specified type that include this word"""
if issubclass(type, AbstractAnnotationLayer):
layerclass = type
else:
layerclass = ANNOTATIONTYPE2LAYERCLASS[type.ANNOTATIONTYPE]
e = self
while True:
if not e.parent: break
e = e.parent
for layer in e.select(layerclass,set,False):
for e2 in layer:
if isinstance(e2, AbstractSpanAnnotation):
if self in e2.wrefs():
yield e2" | [
"def",
"findspans",
"(",
"self",
",",
"type",
",",
"set",
"=",
"None",
")",
":",
"if",
"issubclass",
"(",
"type",
",",
"AbstractAnnotationLayer",
")",
":",
"layerclass",
"=",
"type",
"else",
":",
"layerclass",
"=",
"ANNOTATIONTYPE2LAYERCLASS",
"[",
"type",
".",
"ANNOTATIONTYPE",
"]",
"e",
"=",
"self",
"while",
"True",
":",
"if",
"not",
"e",
".",
"parent",
":",
"break",
"e",
"=",
"e",
".",
"parent",
"for",
"layer",
"in",
"e",
".",
"select",
"(",
"layerclass",
",",
"set",
",",
"False",
")",
":",
"for",
"e2",
"in",
"layer",
":",
"if",
"isinstance",
"(",
"e2",
",",
"AbstractSpanAnnotation",
")",
":",
"if",
"self",
"in",
"e2",
".",
"wrefs",
"(",
")",
":",
"yield",
"e2"
] | "Find span annotation of the specified type that include this word" | [
"Find",
"span",
"annotation",
"of",
"the",
"specified",
"type",
"that",
"include",
"this",
"word"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L5528-L5542" | "" | "" | "" | "" | "" | "" | -1 |
36 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Pattern.resolve" | "def resolve(self,size, distribution):
"""Resolve a variable sized pattern to all patterns of a certain fixed size"""
if not self.variablesize():
raise Exception("Can only resize patterns with * wildcards")
nrofwildcards = 0
for x in self.sequence:
if x == '*':
nrofwildcards += 1
assert (len(distribution) == nrofwildcards)
wildcardnr = 0
newsequence = []
for x in self.sequence:
if x == '*':
newsequence += [True] * distribution[wildcardnr]
wildcardnr += 1
else:
newsequence.append(x)
d = { 'matchannotation':self.matchannotation, 'matchannotationset':self.matchannotationset, 'casesensitive':self.casesensitive }
yield Pattern(*newsequence, **d )" | "python" | "def resolve(self,size, distribution):
"""Resolve a variable sized pattern to all patterns of a certain fixed size"""
if not self.variablesize():
raise Exception("Can only resize patterns with * wildcards")
nrofwildcards = 0
for x in self.sequence:
if x == '*':
nrofwildcards += 1
assert (len(distribution) == nrofwildcards)
wildcardnr = 0
newsequence = []
for x in self.sequence:
if x == '*':
newsequence += [True] * distribution[wildcardnr]
wildcardnr += 1
else:
newsequence.append(x)
d = { 'matchannotation':self.matchannotation, 'matchannotationset':self.matchannotationset, 'casesensitive':self.casesensitive }
yield Pattern(*newsequence, **d )" | [
"def",
"resolve",
"(",
"self",
",",
"size",
",",
"distribution",
")",
":",
"if",
"not",
"self",
".",
"variablesize",
"(",
")",
":",
"raise",
"Exception",
"(",
"\"Can only resize patterns with * wildcards\"",
")",
"nrofwildcards",
"=",
"0",
"for",
"x",
"in",
"self",
".",
"sequence",
":",
"if",
"x",
"==",
"'*'",
":",
"nrofwildcards",
"+=",
"1",
"assert",
"(",
"len",
"(",
"distribution",
")",
"==",
"nrofwildcards",
")",
"wildcardnr",
"=",
"0",
"newsequence",
"=",
"[",
"]",
"for",
"x",
"in",
"self",
".",
"sequence",
":",
"if",
"x",
"==",
"'*'",
":",
"newsequence",
"+=",
"[",
"True",
"]",
"*",
"distribution",
"[",
"wildcardnr",
"]",
"wildcardnr",
"+=",
"1",
"else",
":",
"newsequence",
".",
"append",
"(",
"x",
")",
"d",
"=",
"{",
"'matchannotation'",
":",
"self",
".",
"matchannotation",
",",
"'matchannotationset'",
":",
"self",
".",
"matchannotationset",
",",
"'casesensitive'",
":",
"self",
".",
"casesensitive",
"}",
"yield",
"Pattern",
"(",
"*",
"newsequence",
",",
"*",
"*",
"d",
")"
] | "Resolve a variable sized pattern to all patterns of a certain fixed size" | [
"Resolve",
"a",
"variable",
"sized",
"pattern",
"to",
"all",
"patterns",
"of",
"a",
"certain",
"fixed",
"size"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6221-L6242" | "" | "" | "" | "" | "" | "" | -1 |
37 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.load" | "def load(self, filename):
"""Load a FoLiA XML file.
Argument:
filename (str): The file to load
"""
#if LXE and self.mode != Mode.XPATH:
# #workaround for xml:id problem (disabled)
# #f = open(filename)
# #s = f.read().replace(' xml:id=', ' id=')
# #f.close()
# self.tree = ElementTree.parse(filename)
#else:
self.tree = xmltreefromfile(filename)
self.parsexml(self.tree.getroot())
if self.mode != Mode.XPATH:
#XML Tree is now obsolete (only needed when partially loaded for xpath queries)
self.tree = None" | "python" | "def load(self, filename):
"""Load a FoLiA XML file.
Argument:
filename (str): The file to load
"""
#if LXE and self.mode != Mode.XPATH:
# #workaround for xml:id problem (disabled)
# #f = open(filename)
# #s = f.read().replace(' xml:id=', ' id=')
# #f.close()
# self.tree = ElementTree.parse(filename)
#else:
self.tree = xmltreefromfile(filename)
self.parsexml(self.tree.getroot())
if self.mode != Mode.XPATH:
#XML Tree is now obsolete (only needed when partially loaded for xpath queries)
self.tree = None" | [
"def",
"load",
"(",
"self",
",",
"filename",
")",
":",
"#if LXE and self.mode != Mode.XPATH:",
"# #workaround for xml:id problem (disabled)",
"# #f = open(filename)",
"# #s = f.read().replace(' xml:id=', ' id=')",
"# #f.close()",
"# self.tree = ElementTree.parse(filename)",
"#else:",
"self",
".",
"tree",
"=",
"xmltreefromfile",
"(",
"filename",
")",
"self",
".",
"parsexml",
"(",
"self",
".",
"tree",
".",
"getroot",
"(",
")",
")",
"if",
"self",
".",
"mode",
"!=",
"Mode",
".",
"XPATH",
":",
"#XML Tree is now obsolete (only needed when partially loaded for xpath queries)",
"self",
".",
"tree",
"=",
"None"
] | "Load a FoLiA XML file.
Argument:
filename (str): The file to load" | [
"Load",
"a",
"FoLiA",
"XML",
"file",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6495-L6512" | "" | "" | "" | "" | "" | "" | -1 |
38 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.items" | "def items(self):
"""Returns a depth-first flat list of all items in the document"""
l = []
for e in self.data:
l += e.items()
return l" | "python" | "def items(self):
"""Returns a depth-first flat list of all items in the document"""
l = []
for e in self.data:
l += e.items()
return l" | [
"def",
"items",
"(",
"self",
")",
":",
"l",
"=",
"[",
"]",
"for",
"e",
"in",
"self",
".",
"data",
":",
"l",
"+=",
"e",
".",
"items",
"(",
")",
"return",
"l"
] | "Returns a depth-first flat list of all items in the document" | [
"Returns",
"a",
"depth",
"-",
"first",
"flat",
"list",
"of",
"all",
"items",
"in",
"the",
"document"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6514-L6519" | "" | "" | "" | "" | "" | "" | -1 |
39 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.save" | "def save(self, filename=None):
"""Save the document to file.
Arguments:
* filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from.
"""
if not filename:
filename = self.filename
if not filename:
raise Exception("No filename specified")
if filename[-4:].lower() == '.bz2':
f = bz2.BZ2File(filename,'wb')
f.write(self.xmlstring().encode('utf-8'))
f.close()
elif filename[-3:].lower() == '.gz':
f = gzip.GzipFile(filename,'wb') #pylint: disable=redefined-variable-type
f.write(self.xmlstring().encode('utf-8'))
f.close()
else:
f = io.open(filename,'w',encoding='utf-8')
f.write(self.xmlstring())
f.close()" | "python" | "def save(self, filename=None):
"""Save the document to file.
Arguments:
* filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from.
"""
if not filename:
filename = self.filename
if not filename:
raise Exception("No filename specified")
if filename[-4:].lower() == '.bz2':
f = bz2.BZ2File(filename,'wb')
f.write(self.xmlstring().encode('utf-8'))
f.close()
elif filename[-3:].lower() == '.gz':
f = gzip.GzipFile(filename,'wb') #pylint: disable=redefined-variable-type
f.write(self.xmlstring().encode('utf-8'))
f.close()
else:
f = io.open(filename,'w',encoding='utf-8')
f.write(self.xmlstring())
f.close()" | [
"def",
"save",
"(",
"self",
",",
"filename",
"=",
"None",
")",
":",
"if",
"not",
"filename",
":",
"filename",
"=",
"self",
".",
"filename",
"if",
"not",
"filename",
":",
"raise",
"Exception",
"(",
"\"No filename specified\"",
")",
"if",
"filename",
"[",
"-",
"4",
":",
"]",
".",
"lower",
"(",
")",
"==",
"'.bz2'",
":",
"f",
"=",
"bz2",
".",
"BZ2File",
"(",
"filename",
",",
"'wb'",
")",
"f",
".",
"write",
"(",
"self",
".",
"xmlstring",
"(",
")",
".",
"encode",
"(",
"'utf-8'",
")",
")",
"f",
".",
"close",
"(",
")",
"elif",
"filename",
"[",
"-",
"3",
":",
"]",
".",
"lower",
"(",
")",
"==",
"'.gz'",
":",
"f",
"=",
"gzip",
".",
"GzipFile",
"(",
"filename",
",",
"'wb'",
")",
"#pylint: disable=redefined-variable-type",
"f",
".",
"write",
"(",
"self",
".",
"xmlstring",
"(",
")",
".",
"encode",
"(",
"'utf-8'",
")",
")",
"f",
".",
"close",
"(",
")",
"else",
":",
"f",
"=",
"io",
".",
"open",
"(",
"filename",
",",
"'w'",
",",
"encoding",
"=",
"'utf-8'",
")",
"f",
".",
"write",
"(",
"self",
".",
"xmlstring",
"(",
")",
")",
"f",
".",
"close",
"(",
")"
] | "Save the document to file.
Arguments:
* filename (str): The filename to save to. If not set (``None``, default), saves to the same file as loaded from." | [
"Save",
"the",
"document",
"to",
"file",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6547-L6568" | "" | "" | "" | "" | "" | "" | -1 |
40 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.xmldeclarations" | "def xmldeclarations(self):
"""Internal method to generate XML nodes for all declarations"""
l = []
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
for annotationtype, set in self.annotations:
label = None
#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)
for key, value in vars(AnnotationType).items():
if value == annotationtype:
label = key
break
#gather attribs
if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0:
#this is the implicit TextContent declaration, no need to output it explicitly
continue
attribs = {}
if set and set != 'undefined':
attribs['{' + NSFOLIA + '}set'] = set
for key, value in self.annotationdefaults[annotationtype][set].items():
if key == 'annotatortype':
if value == AnnotatorType.MANUAL:
attribs['{' + NSFOLIA + '}' + key] = 'manual'
elif value == AnnotatorType.AUTO:
attribs['{' + NSFOLIA + '}' + key] = 'auto'
elif key == 'datetime':
attribs['{' + NSFOLIA + '}' + key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting
elif value:
attribs['{' + NSFOLIA + '}' + key] = value
if label:
l.append( makeelement(E,'{' + NSFOLIA + '}' + label.lower() + '-annotation', **attribs) )
else:
raise Exception("Invalid annotation type")
return l" | "python" | "def xmldeclarations(self):
"""Internal method to generate XML nodes for all declarations"""
l = []
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
for annotationtype, set in self.annotations:
label = None
#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)
for key, value in vars(AnnotationType).items():
if value == annotationtype:
label = key
break
#gather attribs
if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0:
#this is the implicit TextContent declaration, no need to output it explicitly
continue
attribs = {}
if set and set != 'undefined':
attribs['{' + NSFOLIA + '}set'] = set
for key, value in self.annotationdefaults[annotationtype][set].items():
if key == 'annotatortype':
if value == AnnotatorType.MANUAL:
attribs['{' + NSFOLIA + '}' + key] = 'manual'
elif value == AnnotatorType.AUTO:
attribs['{' + NSFOLIA + '}' + key] = 'auto'
elif key == 'datetime':
attribs['{' + NSFOLIA + '}' + key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting
elif value:
attribs['{' + NSFOLIA + '}' + key] = value
if label:
l.append( makeelement(E,'{' + NSFOLIA + '}' + label.lower() + '-annotation', **attribs) )
else:
raise Exception("Invalid annotation type")
return l" | [
"def",
"xmldeclarations",
"(",
"self",
")",
":",
"l",
"=",
"[",
"]",
"E",
"=",
"ElementMaker",
"(",
"namespace",
"=",
"\"http://ilk.uvt.nl/folia\"",
",",
"nsmap",
"=",
"{",
"None",
":",
"\"http://ilk.uvt.nl/folia\"",
",",
"'xml'",
":",
"\"http://www.w3.org/XML/1998/namespace\"",
"}",
")",
"for",
"annotationtype",
",",
"set",
"in",
"self",
".",
"annotations",
":",
"label",
"=",
"None",
"#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)",
"for",
"key",
",",
"value",
"in",
"vars",
"(",
"AnnotationType",
")",
".",
"items",
"(",
")",
":",
"if",
"value",
"==",
"annotationtype",
":",
"label",
"=",
"key",
"break",
"#gather attribs",
"if",
"(",
"annotationtype",
"==",
"AnnotationType",
".",
"TEXT",
"or",
"annotationtype",
"==",
"AnnotationType",
".",
"PHON",
")",
"and",
"set",
"==",
"'undefined'",
"and",
"len",
"(",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
")",
"==",
"0",
":",
"#this is the implicit TextContent declaration, no need to output it explicitly",
"continue",
"attribs",
"=",
"{",
"}",
"if",
"set",
"and",
"set",
"!=",
"'undefined'",
":",
"attribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}set'",
"]",
"=",
"set",
"for",
"key",
",",
"value",
"in",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
".",
"items",
"(",
")",
":",
"if",
"key",
"==",
"'annotatortype'",
":",
"if",
"value",
"==",
"AnnotatorType",
".",
"MANUAL",
":",
"attribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}'",
"+",
"key",
"]",
"=",
"'manual'",
"elif",
"value",
"==",
"AnnotatorType",
".",
"AUTO",
":",
"attribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}'",
"+",
"key",
"]",
"=",
"'auto'",
"elif",
"key",
"==",
"'datetime'",
":",
"attribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}'",
"+",
"key",
"]",
"=",
"value",
".",
"strftime",
"(",
"\"%Y-%m-%dT%H:%M:%S\"",
")",
"#proper iso-formatting",
"elif",
"value",
":",
"attribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}'",
"+",
"key",
"]",
"=",
"value",
"if",
"label",
":",
"l",
".",
"append",
"(",
"makeelement",
"(",
"E",
",",
"'{'",
"+",
"NSFOLIA",
"+",
"'}'",
"+",
"label",
".",
"lower",
"(",
")",
"+",
"'-annotation'",
",",
"*",
"*",
"attribs",
")",
")",
"else",
":",
"raise",
"Exception",
"(",
"\"Invalid annotation type\"",
")",
"return",
"l"
] | "Internal method to generate XML nodes for all declarations" | [
"Internal",
"method",
"to",
"generate",
"XML",
"nodes",
"for",
"all",
"declarations"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6653-L6690" | "" | "" | "" | "" | "" | "" | -1 |
41 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.jsondeclarations" | "def jsondeclarations(self):
"""Return all declarations in a form ready to be serialised to JSON.
Returns:
list of dict
"""
l = []
for annotationtype, set in self.annotations:
label = None
#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)
for key, value in vars(AnnotationType).items():
if value == annotationtype:
label = key
break
#gather attribs
if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0:
#this is the implicit TextContent declaration, no need to output it explicitly
continue
jsonnode = {'annotationtype': label.lower()}
if set and set != 'undefined':
jsonnode['set'] = set
for key, value in self.annotationdefaults[annotationtype][set].items():
if key == 'annotatortype':
if value == AnnotatorType.MANUAL:
jsonnode[key] = 'manual'
elif value == AnnotatorType.AUTO:
jsonnode[key] = 'auto'
elif key == 'datetime':
jsonnode[key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting
elif value:
jsonnode[key] = value
if label:
l.append( jsonnode )
else:
raise Exception("Invalid annotation type")
return l" | "python" | "def jsondeclarations(self):
"""Return all declarations in a form ready to be serialised to JSON.
Returns:
list of dict
"""
l = []
for annotationtype, set in self.annotations:
label = None
#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)
for key, value in vars(AnnotationType).items():
if value == annotationtype:
label = key
break
#gather attribs
if (annotationtype == AnnotationType.TEXT or annotationtype == AnnotationType.PHON) and set == 'undefined' and len(self.annotationdefaults[annotationtype][set]) == 0:
#this is the implicit TextContent declaration, no need to output it explicitly
continue
jsonnode = {'annotationtype': label.lower()}
if set and set != 'undefined':
jsonnode['set'] = set
for key, value in self.annotationdefaults[annotationtype][set].items():
if key == 'annotatortype':
if value == AnnotatorType.MANUAL:
jsonnode[key] = 'manual'
elif value == AnnotatorType.AUTO:
jsonnode[key] = 'auto'
elif key == 'datetime':
jsonnode[key] = value.strftime("%Y-%m-%dT%H:%M:%S") #proper iso-formatting
elif value:
jsonnode[key] = value
if label:
l.append( jsonnode )
else:
raise Exception("Invalid annotation type")
return l" | [
"def",
"jsondeclarations",
"(",
"self",
")",
":",
"l",
"=",
"[",
"]",
"for",
"annotationtype",
",",
"set",
"in",
"self",
".",
"annotations",
":",
"label",
"=",
"None",
"#Find the 'label' for the declarations dynamically (aka: AnnotationType --> String)",
"for",
"key",
",",
"value",
"in",
"vars",
"(",
"AnnotationType",
")",
".",
"items",
"(",
")",
":",
"if",
"value",
"==",
"annotationtype",
":",
"label",
"=",
"key",
"break",
"#gather attribs",
"if",
"(",
"annotationtype",
"==",
"AnnotationType",
".",
"TEXT",
"or",
"annotationtype",
"==",
"AnnotationType",
".",
"PHON",
")",
"and",
"set",
"==",
"'undefined'",
"and",
"len",
"(",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
")",
"==",
"0",
":",
"#this is the implicit TextContent declaration, no need to output it explicitly",
"continue",
"jsonnode",
"=",
"{",
"'annotationtype'",
":",
"label",
".",
"lower",
"(",
")",
"}",
"if",
"set",
"and",
"set",
"!=",
"'undefined'",
":",
"jsonnode",
"[",
"'set'",
"]",
"=",
"set",
"for",
"key",
",",
"value",
"in",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
".",
"items",
"(",
")",
":",
"if",
"key",
"==",
"'annotatortype'",
":",
"if",
"value",
"==",
"AnnotatorType",
".",
"MANUAL",
":",
"jsonnode",
"[",
"key",
"]",
"=",
"'manual'",
"elif",
"value",
"==",
"AnnotatorType",
".",
"AUTO",
":",
"jsonnode",
"[",
"key",
"]",
"=",
"'auto'",
"elif",
"key",
"==",
"'datetime'",
":",
"jsonnode",
"[",
"key",
"]",
"=",
"value",
".",
"strftime",
"(",
"\"%Y-%m-%dT%H:%M:%S\"",
")",
"#proper iso-formatting",
"elif",
"value",
":",
"jsonnode",
"[",
"key",
"]",
"=",
"value",
"if",
"label",
":",
"l",
".",
"append",
"(",
"jsonnode",
")",
"else",
":",
"raise",
"Exception",
"(",
"\"Invalid annotation type\"",
")",
"return",
"l"
] | "Return all declarations in a form ready to be serialised to JSON.
Returns:
list of dict" | [
"Return",
"all",
"declarations",
"in",
"a",
"form",
"ready",
"to",
"be",
"serialised",
"to",
"JSON",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6692-L6731" | "" | "" | "" | "" | "" | "" | -1 |
42 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.xml" | "def xml(self):
"""Serialise the document to XML.
Returns:
lxml.etree.Element
See also:
:meth:`Document.xmlstring`
"""
self.pendingvalidation()
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={'xml' : "http://www.w3.org/XML/1998/namespace", 'xlink':"http://www.w3.org/1999/xlink"})
attribs = {}
attribs['{http://www.w3.org/XML/1998/namespace}id'] = self.id
#if self.version:
# attribs['version'] = self.version
#else:
attribs['version'] = FOLIAVERSION
attribs['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION
metadataattribs = {}
metadataattribs['{' + NSFOLIA + '}type'] = self.metadatatype
if isinstance(self.metadata, ExternalMetaData):
metadataattribs['{' + NSFOLIA + '}src'] = self.metadata.url
e = E.FoLiA(
E.metadata(
E.annotations(
*self.xmldeclarations()
),
*self.xmlmetadata(),
**metadataattribs
)
, **attribs)
for text in self.data:
e.append(text.xml())
return e" | "python" | "def xml(self):
"""Serialise the document to XML.
Returns:
lxml.etree.Element
See also:
:meth:`Document.xmlstring`
"""
self.pendingvalidation()
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={'xml' : "http://www.w3.org/XML/1998/namespace", 'xlink':"http://www.w3.org/1999/xlink"})
attribs = {}
attribs['{http://www.w3.org/XML/1998/namespace}id'] = self.id
#if self.version:
# attribs['version'] = self.version
#else:
attribs['version'] = FOLIAVERSION
attribs['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION
metadataattribs = {}
metadataattribs['{' + NSFOLIA + '}type'] = self.metadatatype
if isinstance(self.metadata, ExternalMetaData):
metadataattribs['{' + NSFOLIA + '}src'] = self.metadata.url
e = E.FoLiA(
E.metadata(
E.annotations(
*self.xmldeclarations()
),
*self.xmlmetadata(),
**metadataattribs
)
, **attribs)
for text in self.data:
e.append(text.xml())
return e" | [
"def",
"xml",
"(",
"self",
")",
":",
"self",
".",
"pendingvalidation",
"(",
")",
"E",
"=",
"ElementMaker",
"(",
"namespace",
"=",
"\"http://ilk.uvt.nl/folia\"",
",",
"nsmap",
"=",
"{",
"'xml'",
":",
"\"http://www.w3.org/XML/1998/namespace\"",
",",
"'xlink'",
":",
"\"http://www.w3.org/1999/xlink\"",
"}",
")",
"attribs",
"=",
"{",
"}",
"attribs",
"[",
"'{http://www.w3.org/XML/1998/namespace}id'",
"]",
"=",
"self",
".",
"id",
"#if self.version:",
"# attribs['version'] = self.version",
"#else:",
"attribs",
"[",
"'version'",
"]",
"=",
"FOLIAVERSION",
"attribs",
"[",
"'generator'",
"]",
"=",
"'pynlpl.formats.folia-v'",
"+",
"LIBVERSION",
"metadataattribs",
"=",
"{",
"}",
"metadataattribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}type'",
"]",
"=",
"self",
".",
"metadatatype",
"if",
"isinstance",
"(",
"self",
".",
"metadata",
",",
"ExternalMetaData",
")",
":",
"metadataattribs",
"[",
"'{'",
"+",
"NSFOLIA",
"+",
"'}src'",
"]",
"=",
"self",
".",
"metadata",
".",
"url",
"e",
"=",
"E",
".",
"FoLiA",
"(",
"E",
".",
"metadata",
"(",
"E",
".",
"annotations",
"(",
"*",
"self",
".",
"xmldeclarations",
"(",
")",
")",
",",
"*",
"self",
".",
"xmlmetadata",
"(",
")",
",",
"*",
"*",
"metadataattribs",
")",
",",
"*",
"*",
"attribs",
")",
"for",
"text",
"in",
"self",
".",
"data",
":",
"e",
".",
"append",
"(",
"text",
".",
"xml",
"(",
")",
")",
"return",
"e"
] | "Serialise the document to XML.
Returns:
lxml.etree.Element
See also:
:meth:`Document.xmlstring`" | [
"Serialise",
"the",
"document",
"to",
"XML",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6733-L6773" | "" | "" | "" | "" | "" | "" | -1 |
43 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.json" | "def json(self):
"""Serialise the document to a ``dict`` ready for serialisation to JSON.
Example::
import json
jsondoc = json.dumps(doc.json())
"""
self.pendingvalidation()
jsondoc = {'id': self.id, 'children': [], 'declarations': self.jsondeclarations() }
if self.version:
jsondoc['version'] = self.version
else:
jsondoc['version'] = FOLIAVERSION
jsondoc['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION
for text in self.data:
jsondoc['children'].append(text.json())
return jsondoc" | "python" | "def json(self):
"""Serialise the document to a ``dict`` ready for serialisation to JSON.
Example::
import json
jsondoc = json.dumps(doc.json())
"""
self.pendingvalidation()
jsondoc = {'id': self.id, 'children': [], 'declarations': self.jsondeclarations() }
if self.version:
jsondoc['version'] = self.version
else:
jsondoc['version'] = FOLIAVERSION
jsondoc['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION
for text in self.data:
jsondoc['children'].append(text.json())
return jsondoc" | [
"def",
"json",
"(",
"self",
")",
":",
"self",
".",
"pendingvalidation",
"(",
")",
"jsondoc",
"=",
"{",
"'id'",
":",
"self",
".",
"id",
",",
"'children'",
":",
"[",
"]",
",",
"'declarations'",
":",
"self",
".",
"jsondeclarations",
"(",
")",
"}",
"if",
"self",
".",
"version",
":",
"jsondoc",
"[",
"'version'",
"]",
"=",
"self",
".",
"version",
"else",
":",
"jsondoc",
"[",
"'version'",
"]",
"=",
"FOLIAVERSION",
"jsondoc",
"[",
"'generator'",
"]",
"=",
"'pynlpl.formats.folia-v'",
"+",
"LIBVERSION",
"for",
"text",
"in",
"self",
".",
"data",
":",
"jsondoc",
"[",
"'children'",
"]",
".",
"append",
"(",
"text",
".",
"json",
"(",
")",
")",
"return",
"jsondoc"
] | "Serialise the document to a ``dict`` ready for serialisation to JSON.
Example::
import json
jsondoc = json.dumps(doc.json())" | [
"Serialise",
"the",
"document",
"to",
"a",
"dict",
"ready",
"for",
"serialisation",
"to",
"JSON",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6775-L6794" | "" | "" | "" | "" | "" | "" | -1 |
44 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.xmlmetadata" | "def xmlmetadata(self):
"""Internal method to serialize metadata to XML"""
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
elements = []
if self.metadatatype == "native":
if isinstance(self.metadata, NativeMetaData):
for key, value in self.metadata.items():
elements.append(E.meta(value,id=key) )
else:
if isinstance(self.metadata, ForeignData):
#in-document
m = self.metadata
while m is not None:
elements.append(m.xml())
m = m.next
for metadata_id, submetadata in self.submetadata.items():
subelements = []
attribs = {
"{http://www.w3.org/XML/1998/namespace}id": metadata_id,
"type": self.submetadatatype[metadata_id] }
if isinstance(submetadata, NativeMetaData):
for key, value in submetadata.items():
subelements.append(E.meta(value,id=key) )
elif isinstance(submetadata, ExternalMetaData):
attribs['src'] = submetadata.url
elif isinstance(submetadata, ForeignData):
#in-document
m = submetadata
while m is not None:
subelements.append(m.xml())
m = m.next
elements.append( E.submetadata(*subelements, **attribs))
return elements" | "python" | "def xmlmetadata(self):
"""Internal method to serialize metadata to XML"""
E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
elements = []
if self.metadatatype == "native":
if isinstance(self.metadata, NativeMetaData):
for key, value in self.metadata.items():
elements.append(E.meta(value,id=key) )
else:
if isinstance(self.metadata, ForeignData):
#in-document
m = self.metadata
while m is not None:
elements.append(m.xml())
m = m.next
for metadata_id, submetadata in self.submetadata.items():
subelements = []
attribs = {
"{http://www.w3.org/XML/1998/namespace}id": metadata_id,
"type": self.submetadatatype[metadata_id] }
if isinstance(submetadata, NativeMetaData):
for key, value in submetadata.items():
subelements.append(E.meta(value,id=key) )
elif isinstance(submetadata, ExternalMetaData):
attribs['src'] = submetadata.url
elif isinstance(submetadata, ForeignData):
#in-document
m = submetadata
while m is not None:
subelements.append(m.xml())
m = m.next
elements.append( E.submetadata(*subelements, **attribs))
return elements" | [
"def",
"xmlmetadata",
"(",
"self",
")",
":",
"E",
"=",
"ElementMaker",
"(",
"namespace",
"=",
"\"http://ilk.uvt.nl/folia\"",
",",
"nsmap",
"=",
"{",
"None",
":",
"\"http://ilk.uvt.nl/folia\"",
",",
"'xml'",
":",
"\"http://www.w3.org/XML/1998/namespace\"",
"}",
")",
"elements",
"=",
"[",
"]",
"if",
"self",
".",
"metadatatype",
"==",
"\"native\"",
":",
"if",
"isinstance",
"(",
"self",
".",
"metadata",
",",
"NativeMetaData",
")",
":",
"for",
"key",
",",
"value",
"in",
"self",
".",
"metadata",
".",
"items",
"(",
")",
":",
"elements",
".",
"append",
"(",
"E",
".",
"meta",
"(",
"value",
",",
"id",
"=",
"key",
")",
")",
"else",
":",
"if",
"isinstance",
"(",
"self",
".",
"metadata",
",",
"ForeignData",
")",
":",
"#in-document",
"m",
"=",
"self",
".",
"metadata",
"while",
"m",
"is",
"not",
"None",
":",
"elements",
".",
"append",
"(",
"m",
".",
"xml",
"(",
")",
")",
"m",
"=",
"m",
".",
"next",
"for",
"metadata_id",
",",
"submetadata",
"in",
"self",
".",
"submetadata",
".",
"items",
"(",
")",
":",
"subelements",
"=",
"[",
"]",
"attribs",
"=",
"{",
"\"{http://www.w3.org/XML/1998/namespace}id\"",
":",
"metadata_id",
",",
"\"type\"",
":",
"self",
".",
"submetadatatype",
"[",
"metadata_id",
"]",
"}",
"if",
"isinstance",
"(",
"submetadata",
",",
"NativeMetaData",
")",
":",
"for",
"key",
",",
"value",
"in",
"submetadata",
".",
"items",
"(",
")",
":",
"subelements",
".",
"append",
"(",
"E",
".",
"meta",
"(",
"value",
",",
"id",
"=",
"key",
")",
")",
"elif",
"isinstance",
"(",
"submetadata",
",",
"ExternalMetaData",
")",
":",
"attribs",
"[",
"'src'",
"]",
"=",
"submetadata",
".",
"url",
"elif",
"isinstance",
"(",
"submetadata",
",",
"ForeignData",
")",
":",
"#in-document",
"m",
"=",
"submetadata",
"while",
"m",
"is",
"not",
"None",
":",
"subelements",
".",
"append",
"(",
"m",
".",
"xml",
"(",
")",
")",
"m",
"=",
"m",
".",
"next",
"elements",
".",
"append",
"(",
"E",
".",
"submetadata",
"(",
"*",
"subelements",
",",
"*",
"*",
"attribs",
")",
")",
"return",
"elements"
] | "Internal method to serialize metadata to XML" | [
"Internal",
"method",
"to",
"serialize",
"metadata",
"to",
"XML"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6796-L6828" | "" | "" | "" | "" | "" | "" | -1 |
45 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.declare" | "def declare(self, annotationtype, set, **kwargs):
"""Declare a new annotation type to be used in the document.
Keyword arguments can be used to set defaults for any annotation of this type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Keyword Arguments:
annotator (str): Sets a default annotator
annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process.
datetime (datetime.datetime): Sets the default datetime
alias (str): Defines alias that may be used in set attribute of elements instead of the full set name
Example::
doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO)
"""
if (sys.version > '3' and not isinstance(set,str)) or (sys.version < '3' and not isinstance(set,(str,unicode))):
raise ValueError("Set parameter for declare() must be a string")
if inspect.isclass(annotationtype):
annotationtype = annotationtype.ANNOTATIONTYPE
if annotationtype in self.alias_set and set in self.alias_set[annotationtype]:
raise ValueError("Set " + set + " conflicts with alias, may not be equal!")
if not (annotationtype, set) in self.annotations:
self.annotations.append( (annotationtype,set) )
if set and self.loadsetdefinitions and not set in self.setdefinitions:
if set[:7] == "http://" or set[:8] == "https://" or set[:6] == "ftp://":
self.setdefinitions[set] = SetDefinition(set,verbose=self.verbose) #will raise exception on error
if not annotationtype in self.annotationdefaults:
self.annotationdefaults[annotationtype] = {}
self.annotationdefaults[annotationtype][set] = kwargs
if 'alias' in kwargs:
if annotationtype in self.set_alias and set in self.set_alias[annotationtype] and self.set_alias[annotationtype][set] != kwargs['alias']:
raise ValueError("Redeclaring set " + set + " with another alias ('"+kwargs['alias']+"') is not allowed!")
if annotationtype in self.alias_set and kwargs['alias'] in self.alias_set[annotationtype] and self.alias_set[annotationtype][kwargs['alias']] != set:
raise ValueError("Redeclaring alias " + kwargs['alias'] + " with another set ('"+set+"') is not allowed!")
if annotationtype in self.set_alias and kwargs['alias'] in self.set_alias[annotationtype]:
raise ValueError("Alias " + kwargs['alias'] + " conflicts with set name, may not be equal!")
if annotationtype not in self.alias_set:
self.alias_set[annotationtype] = {}
if annotationtype not in self.set_alias:
self.set_alias[annotationtype] = {}
self.alias_set[annotationtype][kwargs['alias']] = set
self.set_alias[annotationtype][set] = kwargs['alias']" | "python" | "def declare(self, annotationtype, set, **kwargs):
"""Declare a new annotation type to be used in the document.
Keyword arguments can be used to set defaults for any annotation of this type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Keyword Arguments:
annotator (str): Sets a default annotator
annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process.
datetime (datetime.datetime): Sets the default datetime
alias (str): Defines alias that may be used in set attribute of elements instead of the full set name
Example::
doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO)
"""
if (sys.version > '3' and not isinstance(set,str)) or (sys.version < '3' and not isinstance(set,(str,unicode))):
raise ValueError("Set parameter for declare() must be a string")
if inspect.isclass(annotationtype):
annotationtype = annotationtype.ANNOTATIONTYPE
if annotationtype in self.alias_set and set in self.alias_set[annotationtype]:
raise ValueError("Set " + set + " conflicts with alias, may not be equal!")
if not (annotationtype, set) in self.annotations:
self.annotations.append( (annotationtype,set) )
if set and self.loadsetdefinitions and not set in self.setdefinitions:
if set[:7] == "http://" or set[:8] == "https://" or set[:6] == "ftp://":
self.setdefinitions[set] = SetDefinition(set,verbose=self.verbose) #will raise exception on error
if not annotationtype in self.annotationdefaults:
self.annotationdefaults[annotationtype] = {}
self.annotationdefaults[annotationtype][set] = kwargs
if 'alias' in kwargs:
if annotationtype in self.set_alias and set in self.set_alias[annotationtype] and self.set_alias[annotationtype][set] != kwargs['alias']:
raise ValueError("Redeclaring set " + set + " with another alias ('"+kwargs['alias']+"') is not allowed!")
if annotationtype in self.alias_set and kwargs['alias'] in self.alias_set[annotationtype] and self.alias_set[annotationtype][kwargs['alias']] != set:
raise ValueError("Redeclaring alias " + kwargs['alias'] + " with another set ('"+set+"') is not allowed!")
if annotationtype in self.set_alias and kwargs['alias'] in self.set_alias[annotationtype]:
raise ValueError("Alias " + kwargs['alias'] + " conflicts with set name, may not be equal!")
if annotationtype not in self.alias_set:
self.alias_set[annotationtype] = {}
if annotationtype not in self.set_alias:
self.set_alias[annotationtype] = {}
self.alias_set[annotationtype][kwargs['alias']] = set
self.set_alias[annotationtype][set] = kwargs['alias']" | [
"def",
"declare",
"(",
"self",
",",
"annotationtype",
",",
"set",
",",
"*",
"*",
"kwargs",
")",
":",
"if",
"(",
"sys",
".",
"version",
">",
"'3'",
"and",
"not",
"isinstance",
"(",
"set",
",",
"str",
")",
")",
"or",
"(",
"sys",
".",
"version",
"<",
"'3'",
"and",
"not",
"isinstance",
"(",
"set",
",",
"(",
"str",
",",
"unicode",
")",
")",
")",
":",
"raise",
"ValueError",
"(",
"\"Set parameter for declare() must be a string\"",
")",
"if",
"inspect",
".",
"isclass",
"(",
"annotationtype",
")",
":",
"annotationtype",
"=",
"annotationtype",
".",
"ANNOTATIONTYPE",
"if",
"annotationtype",
"in",
"self",
".",
"alias_set",
"and",
"set",
"in",
"self",
".",
"alias_set",
"[",
"annotationtype",
"]",
":",
"raise",
"ValueError",
"(",
"\"Set \"",
"+",
"set",
"+",
"\" conflicts with alias, may not be equal!\"",
")",
"if",
"not",
"(",
"annotationtype",
",",
"set",
")",
"in",
"self",
".",
"annotations",
":",
"self",
".",
"annotations",
".",
"append",
"(",
"(",
"annotationtype",
",",
"set",
")",
")",
"if",
"set",
"and",
"self",
".",
"loadsetdefinitions",
"and",
"not",
"set",
"in",
"self",
".",
"setdefinitions",
":",
"if",
"set",
"[",
":",
"7",
"]",
"==",
"\"http://\"",
"or",
"set",
"[",
":",
"8",
"]",
"==",
"\"https://\"",
"or",
"set",
"[",
":",
"6",
"]",
"==",
"\"ftp://\"",
":",
"self",
".",
"setdefinitions",
"[",
"set",
"]",
"=",
"SetDefinition",
"(",
"set",
",",
"verbose",
"=",
"self",
".",
"verbose",
")",
"#will raise exception on error",
"if",
"not",
"annotationtype",
"in",
"self",
".",
"annotationdefaults",
":",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"=",
"{",
"}",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
"=",
"kwargs",
"if",
"'alias'",
"in",
"kwargs",
":",
"if",
"annotationtype",
"in",
"self",
".",
"set_alias",
"and",
"set",
"in",
"self",
".",
"set_alias",
"[",
"annotationtype",
"]",
"and",
"self",
".",
"set_alias",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
"!=",
"kwargs",
"[",
"'alias'",
"]",
":",
"raise",
"ValueError",
"(",
"\"Redeclaring set \"",
"+",
"set",
"+",
"\" with another alias ('\"",
"+",
"kwargs",
"[",
"'alias'",
"]",
"+",
"\"') is not allowed!\"",
")",
"if",
"annotationtype",
"in",
"self",
".",
"alias_set",
"and",
"kwargs",
"[",
"'alias'",
"]",
"in",
"self",
".",
"alias_set",
"[",
"annotationtype",
"]",
"and",
"self",
".",
"alias_set",
"[",
"annotationtype",
"]",
"[",
"kwargs",
"[",
"'alias'",
"]",
"]",
"!=",
"set",
":",
"raise",
"ValueError",
"(",
"\"Redeclaring alias \"",
"+",
"kwargs",
"[",
"'alias'",
"]",
"+",
"\" with another set ('\"",
"+",
"set",
"+",
"\"') is not allowed!\"",
")",
"if",
"annotationtype",
"in",
"self",
".",
"set_alias",
"and",
"kwargs",
"[",
"'alias'",
"]",
"in",
"self",
".",
"set_alias",
"[",
"annotationtype",
"]",
":",
"raise",
"ValueError",
"(",
"\"Alias \"",
"+",
"kwargs",
"[",
"'alias'",
"]",
"+",
"\" conflicts with set name, may not be equal!\"",
")",
"if",
"annotationtype",
"not",
"in",
"self",
".",
"alias_set",
":",
"self",
".",
"alias_set",
"[",
"annotationtype",
"]",
"=",
"{",
"}",
"if",
"annotationtype",
"not",
"in",
"self",
".",
"set_alias",
":",
"self",
".",
"set_alias",
"[",
"annotationtype",
"]",
"=",
"{",
"}",
"self",
".",
"alias_set",
"[",
"annotationtype",
"]",
"[",
"kwargs",
"[",
"'alias'",
"]",
"]",
"=",
"set",
"self",
".",
"set_alias",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
"=",
"kwargs",
"[",
"'alias'",
"]"
] | "Declare a new annotation type to be used in the document.
Keyword arguments can be used to set defaults for any annotation of this type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Keyword Arguments:
annotator (str): Sets a default annotator
annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process.
datetime (datetime.datetime): Sets the default datetime
alias (str): Defines alias that may be used in set attribute of elements instead of the full set name
Example::
doc.declare(folia.PosAnnotation, 'http://some/path/brown-tag-set', annotator="mytagger", annotatortype=folia.AnnotatorType.AUTO)" | [
"Declare",
"a",
"new",
"annotation",
"type",
"to",
"be",
"used",
"in",
"the",
"document",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L6972-L7018" | "" | "" | "" | "" | "" | "" | -1 |
46 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.defaultset" | "def defaultset(self, annotationtype):
"""Obtain the default set for the specified annotation type.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)
"""
if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE
try:
return list(self.annotationdefaults[annotationtype].keys())[0]
except KeyError:
raise NoDefaultError
except IndexError:
raise NoDefaultError" | "python" | "def defaultset(self, annotationtype):
"""Obtain the default set for the specified annotation type.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)
"""
if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE
try:
return list(self.annotationdefaults[annotationtype].keys())[0]
except KeyError:
raise NoDefaultError
except IndexError:
raise NoDefaultError" | [
"def",
"defaultset",
"(",
"self",
",",
"annotationtype",
")",
":",
"if",
"inspect",
".",
"isclass",
"(",
"annotationtype",
")",
"or",
"isinstance",
"(",
"annotationtype",
",",
"AbstractElement",
")",
":",
"annotationtype",
"=",
"annotationtype",
".",
"ANNOTATIONTYPE",
"try",
":",
"return",
"list",
"(",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
".",
"keys",
"(",
")",
")",
"[",
"0",
"]",
"except",
"KeyError",
":",
"raise",
"NoDefaultError",
"except",
"IndexError",
":",
"raise",
"NoDefaultError"
] | "Obtain the default set for the specified annotation type.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)" | [
"Obtain",
"the",
"default",
"set",
"for",
"the",
"specified",
"annotation",
"type",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7039-L7058" | "" | "" | "" | "" | "" | "" | -1 |
47 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.defaultannotator" | "def defaultannotator(self, annotationtype, set=None):
"""Obtain the default annotator for the specified annotation type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)
"""
if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE
if not set: set = self.defaultset(annotationtype)
try:
return self.annotationdefaults[annotationtype][set]['annotator']
except KeyError:
raise NoDefaultError" | "python" | "def defaultannotator(self, annotationtype, set=None):
"""Obtain the default annotator for the specified annotation type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)
"""
if inspect.isclass(annotationtype) or isinstance(annotationtype,AbstractElement): annotationtype = annotationtype.ANNOTATIONTYPE
if not set: set = self.defaultset(annotationtype)
try:
return self.annotationdefaults[annotationtype][set]['annotator']
except KeyError:
raise NoDefaultError" | [
"def",
"defaultannotator",
"(",
"self",
",",
"annotationtype",
",",
"set",
"=",
"None",
")",
":",
"if",
"inspect",
".",
"isclass",
"(",
"annotationtype",
")",
"or",
"isinstance",
"(",
"annotationtype",
",",
"AbstractElement",
")",
":",
"annotationtype",
"=",
"annotationtype",
".",
"ANNOTATIONTYPE",
"if",
"not",
"set",
":",
"set",
"=",
"self",
".",
"defaultset",
"(",
"annotationtype",
")",
"try",
":",
"return",
"self",
".",
"annotationdefaults",
"[",
"annotationtype",
"]",
"[",
"set",
"]",
"[",
"'annotator'",
"]",
"except",
"KeyError",
":",
"raise",
"NoDefaultError"
] | "Obtain the default annotator for the specified annotation type and set.
Arguments:
annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
set (str): the set, should formally be a URL pointing to the set definition
Returns:
the set (str)
Raises:
:class:`NoDefaultError` if the annotation type does not exist or if there is ambiguity (multiple sets for the same type)" | [
"Obtain",
"the",
"default",
"annotator",
"for",
"the",
"specified",
"annotation",
"type",
"and",
"set",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7061-L7080" | "" | "" | "" | "" | "" | "" | -1 |
48 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.parsemetadata" | "def parsemetadata(self, node):
"""Internal method to parse metadata"""
if 'type' in node.attrib:
self.metadatatype = node.attrib['type']
else:
#no type specified, default to native
self.metadatatype = "native"
if 'src' in node.attrib:
self.metadata = ExternalMetaData(node.attrib['src'])
elif self.metadatatype == "native":
self.metadata = NativeMetaData()
else:
self.metadata = None #may be set below to ForeignData
for subnode in node:
if subnode.tag == '{' + NSFOLIA + '}annotations':
self.parsexmldeclarations(subnode)
elif subnode.tag == '{' + NSFOLIA + '}meta':
if self.metadatatype == "native":
if subnode.text:
self.metadata[subnode.attrib['id']] = subnode.text
else:
raise MetaDataError("Encountered a meta element but metadata type is not native!")
elif subnode.tag == '{' + NSFOLIA + '}provenance':
#forward compatibility with FoLiA 2.0; ignore provenance
print("WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!",file=sys.stderr)
pass
elif subnode.tag == '{' + NSFOLIA + '}foreign-data':
if self.metadatatype == "native":
raise MetaDataError("Encountered a foreign-data element but metadata type is native!")
elif self.metadata is not None:
#multiple foreign-data elements, chain:
e = self.metadata
while e.next is not None:
e = e.next
e.next = ForeignData(self, node=subnode)
else:
self.metadata = ForeignData(self, node=subnode)
elif subnode.tag == '{' + NSFOLIA + '}submetadata':
self.parsesubmetadata(subnode)
elif subnode.tag == '{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT': #backward-compatibility for old IMDI without foreign-key
E = ElementMaker(namespace=NSFOLIA,nsmap={None: NSFOLIA, 'xml' : "http://www.w3.org/XML/1998/namespace"})
self.metadatatype = "imdi"
self.metadata = ForeignData(self, node=subnode)" | "python" | "def parsemetadata(self, node):
"""Internal method to parse metadata"""
if 'type' in node.attrib:
self.metadatatype = node.attrib['type']
else:
#no type specified, default to native
self.metadatatype = "native"
if 'src' in node.attrib:
self.metadata = ExternalMetaData(node.attrib['src'])
elif self.metadatatype == "native":
self.metadata = NativeMetaData()
else:
self.metadata = None #may be set below to ForeignData
for subnode in node:
if subnode.tag == '{' + NSFOLIA + '}annotations':
self.parsexmldeclarations(subnode)
elif subnode.tag == '{' + NSFOLIA + '}meta':
if self.metadatatype == "native":
if subnode.text:
self.metadata[subnode.attrib['id']] = subnode.text
else:
raise MetaDataError("Encountered a meta element but metadata type is not native!")
elif subnode.tag == '{' + NSFOLIA + '}provenance':
#forward compatibility with FoLiA 2.0; ignore provenance
print("WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!",file=sys.stderr)
pass
elif subnode.tag == '{' + NSFOLIA + '}foreign-data':
if self.metadatatype == "native":
raise MetaDataError("Encountered a foreign-data element but metadata type is native!")
elif self.metadata is not None:
#multiple foreign-data elements, chain:
e = self.metadata
while e.next is not None:
e = e.next
e.next = ForeignData(self, node=subnode)
else:
self.metadata = ForeignData(self, node=subnode)
elif subnode.tag == '{' + NSFOLIA + '}submetadata':
self.parsesubmetadata(subnode)
elif subnode.tag == '{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT': #backward-compatibility for old IMDI without foreign-key
E = ElementMaker(namespace=NSFOLIA,nsmap={None: NSFOLIA, 'xml' : "http://www.w3.org/XML/1998/namespace"})
self.metadatatype = "imdi"
self.metadata = ForeignData(self, node=subnode)" | [
"def",
"parsemetadata",
"(",
"self",
",",
"node",
")",
":",
"if",
"'type'",
"in",
"node",
".",
"attrib",
":",
"self",
".",
"metadatatype",
"=",
"node",
".",
"attrib",
"[",
"'type'",
"]",
"else",
":",
"#no type specified, default to native",
"self",
".",
"metadatatype",
"=",
"\"native\"",
"if",
"'src'",
"in",
"node",
".",
"attrib",
":",
"self",
".",
"metadata",
"=",
"ExternalMetaData",
"(",
"node",
".",
"attrib",
"[",
"'src'",
"]",
")",
"elif",
"self",
".",
"metadatatype",
"==",
"\"native\"",
":",
"self",
".",
"metadata",
"=",
"NativeMetaData",
"(",
")",
"else",
":",
"self",
".",
"metadata",
"=",
"None",
"#may be set below to ForeignData",
"for",
"subnode",
"in",
"node",
":",
"if",
"subnode",
".",
"tag",
"==",
"'{'",
"+",
"NSFOLIA",
"+",
"'}annotations'",
":",
"self",
".",
"parsexmldeclarations",
"(",
"subnode",
")",
"elif",
"subnode",
".",
"tag",
"==",
"'{'",
"+",
"NSFOLIA",
"+",
"'}meta'",
":",
"if",
"self",
".",
"metadatatype",
"==",
"\"native\"",
":",
"if",
"subnode",
".",
"text",
":",
"self",
".",
"metadata",
"[",
"subnode",
".",
"attrib",
"[",
"'id'",
"]",
"]",
"=",
"subnode",
".",
"text",
"else",
":",
"raise",
"MetaDataError",
"(",
"\"Encountered a meta element but metadata type is not native!\"",
")",
"elif",
"subnode",
".",
"tag",
"==",
"'{'",
"+",
"NSFOLIA",
"+",
"'}provenance'",
":",
"#forward compatibility with FoLiA 2.0; ignore provenance",
"print",
"(",
"\"WARNING: Ignoring provenance data. Use foliapy instead of pynlpl.formats.folia for FoLiA v2.0 compatibility!\"",
",",
"file",
"=",
"sys",
".",
"stderr",
")",
"pass",
"elif",
"subnode",
".",
"tag",
"==",
"'{'",
"+",
"NSFOLIA",
"+",
"'}foreign-data'",
":",
"if",
"self",
".",
"metadatatype",
"==",
"\"native\"",
":",
"raise",
"MetaDataError",
"(",
"\"Encountered a foreign-data element but metadata type is native!\"",
")",
"elif",
"self",
".",
"metadata",
"is",
"not",
"None",
":",
"#multiple foreign-data elements, chain:",
"e",
"=",
"self",
".",
"metadata",
"while",
"e",
".",
"next",
"is",
"not",
"None",
":",
"e",
"=",
"e",
".",
"next",
"e",
".",
"next",
"=",
"ForeignData",
"(",
"self",
",",
"node",
"=",
"subnode",
")",
"else",
":",
"self",
".",
"metadata",
"=",
"ForeignData",
"(",
"self",
",",
"node",
"=",
"subnode",
")",
"elif",
"subnode",
".",
"tag",
"==",
"'{'",
"+",
"NSFOLIA",
"+",
"'}submetadata'",
":",
"self",
".",
"parsesubmetadata",
"(",
"subnode",
")",
"elif",
"subnode",
".",
"tag",
"==",
"'{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT'",
":",
"#backward-compatibility for old IMDI without foreign-key",
"E",
"=",
"ElementMaker",
"(",
"namespace",
"=",
"NSFOLIA",
",",
"nsmap",
"=",
"{",
"None",
":",
"NSFOLIA",
",",
"'xml'",
":",
"\"http://www.w3.org/XML/1998/namespace\"",
"}",
")",
"self",
".",
"metadatatype",
"=",
"\"imdi\"",
"self",
".",
"metadata",
"=",
"ForeignData",
"(",
"self",
",",
"node",
"=",
"subnode",
")"
] | "Internal method to parse metadata" | [
"Internal",
"method",
"to",
"parse",
"metadata"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7216-L7261" | "" | "" | "" | "" | "" | "" | -1 |
49 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.pendingvalidation" | "def pendingvalidation(self, warnonly=None):
"""Perform any pending validations
Parameters:
warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5)
Returns:
bool
"""
if self.debug: print("[PyNLPl FoLiA DEBUG] Processing pending validations (if any)",file=stderr)
if warnonly is None and self and self.version:
warnonly = (checkversion(self.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5
if self.textvalidation:
while self.offsetvalidationbuffer:
structureelement, textclass = self.offsetvalidationbuffer.pop()
if self.debug: print("[PyNLPl FoLiA DEBUG] Performing offset validation on " + repr(structureelement) + " textclass " + textclass,file=stderr)
#validate offsets
tc = structureelement.textcontent(textclass)
if tc.offset is not None:
try:
tc.getreference(validate=True)
except UnresolvableTextContent:
msg = "Text for " + structureelement.__class__.__name__ + ", ID " + str(structureelement.id) + ", textclass " + textclass + ", has incorrect offset " + str(tc.offset) + " or invalid reference"
print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr)
if not warnonly:
raise" | "python" | "def pendingvalidation(self, warnonly=None):
"""Perform any pending validations
Parameters:
warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5)
Returns:
bool
"""
if self.debug: print("[PyNLPl FoLiA DEBUG] Processing pending validations (if any)",file=stderr)
if warnonly is None and self and self.version:
warnonly = (checkversion(self.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5
if self.textvalidation:
while self.offsetvalidationbuffer:
structureelement, textclass = self.offsetvalidationbuffer.pop()
if self.debug: print("[PyNLPl FoLiA DEBUG] Performing offset validation on " + repr(structureelement) + " textclass " + textclass,file=stderr)
#validate offsets
tc = structureelement.textcontent(textclass)
if tc.offset is not None:
try:
tc.getreference(validate=True)
except UnresolvableTextContent:
msg = "Text for " + structureelement.__class__.__name__ + ", ID " + str(structureelement.id) + ", textclass " + textclass + ", has incorrect offset " + str(tc.offset) + " or invalid reference"
print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr)
if not warnonly:
raise" | [
"def",
"pendingvalidation",
"(",
"self",
",",
"warnonly",
"=",
"None",
")",
":",
"if",
"self",
".",
"debug",
":",
"print",
"(",
"\"[PyNLPl FoLiA DEBUG] Processing pending validations (if any)\"",
",",
"file",
"=",
"stderr",
")",
"if",
"warnonly",
"is",
"None",
"and",
"self",
"and",
"self",
".",
"version",
":",
"warnonly",
"=",
"(",
"checkversion",
"(",
"self",
".",
"version",
",",
"'1.5.0'",
")",
"<",
"0",
")",
"#warn only for documents older than FoLiA v1.5",
"if",
"self",
".",
"textvalidation",
":",
"while",
"self",
".",
"offsetvalidationbuffer",
":",
"structureelement",
",",
"textclass",
"=",
"self",
".",
"offsetvalidationbuffer",
".",
"pop",
"(",
")",
"if",
"self",
".",
"debug",
":",
"print",
"(",
"\"[PyNLPl FoLiA DEBUG] Performing offset validation on \"",
"+",
"repr",
"(",
"structureelement",
")",
"+",
"\" textclass \"",
"+",
"textclass",
",",
"file",
"=",
"stderr",
")",
"#validate offsets",
"tc",
"=",
"structureelement",
".",
"textcontent",
"(",
"textclass",
")",
"if",
"tc",
".",
"offset",
"is",
"not",
"None",
":",
"try",
":",
"tc",
".",
"getreference",
"(",
"validate",
"=",
"True",
")",
"except",
"UnresolvableTextContent",
":",
"msg",
"=",
"\"Text for \"",
"+",
"structureelement",
".",
"__class__",
".",
"__name__",
"+",
"\", ID \"",
"+",
"str",
"(",
"structureelement",
".",
"id",
")",
"+",
"\", textclass \"",
"+",
"textclass",
"+",
"\", has incorrect offset \"",
"+",
"str",
"(",
"tc",
".",
"offset",
")",
"+",
"\" or invalid reference\"",
"print",
"(",
"\"TEXT VALIDATION ERROR: \"",
"+",
"msg",
",",
"file",
"=",
"sys",
".",
"stderr",
")",
"if",
"not",
"warnonly",
":",
"raise"
] | "Perform any pending validations
Parameters:
warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5)
Returns:
bool" | [
"Perform",
"any",
"pending",
"validations"
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7396-L7424" | "" | "" | "" | "" | "" | "" | -1 |
50 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.paragraphs" | "def paragraphs(self, index = None):
"""Return a generator of all paragraphs found in the document.
If an index is specified, return the n'th paragraph only (starting at 0)"""
if index is None:
return self.select(Paragraph)
else:
if index < 0:
index = sum(t.count(Paragraph) for t in self.data) + index
for t in self.data:
for i,e in enumerate(t.select(Paragraph)) :
if i == index:
return e
raise IndexError" | "python" | "def paragraphs(self, index = None):
"""Return a generator of all paragraphs found in the document.
If an index is specified, return the n'th paragraph only (starting at 0)"""
if index is None:
return self.select(Paragraph)
else:
if index < 0:
index = sum(t.count(Paragraph) for t in self.data) + index
for t in self.data:
for i,e in enumerate(t.select(Paragraph)) :
if i == index:
return e
raise IndexError" | [
"def",
"paragraphs",
"(",
"self",
",",
"index",
"=",
"None",
")",
":",
"if",
"index",
"is",
"None",
":",
"return",
"self",
".",
"select",
"(",
"Paragraph",
")",
"else",
":",
"if",
"index",
"<",
"0",
":",
"index",
"=",
"sum",
"(",
"t",
".",
"count",
"(",
"Paragraph",
")",
"for",
"t",
"in",
"self",
".",
"data",
")",
"+",
"index",
"for",
"t",
"in",
"self",
".",
"data",
":",
"for",
"i",
",",
"e",
"in",
"enumerate",
"(",
"t",
".",
"select",
"(",
"Paragraph",
")",
")",
":",
"if",
"i",
"==",
"index",
":",
"return",
"e",
"raise",
"IndexError"
] | "Return a generator of all paragraphs found in the document.
If an index is specified, return the n'th paragraph only (starting at 0)" | [
"Return",
"a",
"generator",
"of",
"all",
"paragraphs",
"found",
"in",
"the",
"document",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7445-L7458" | "" | "" | "" | "" | "" | "" | -1 |
51 | "proycon/pynlpl" | "pynlpl/formats/folia.py" | "Document.sentences" | "def sentences(self, index = None):
"""Return a generator of all sentence found in the document. Except for sentences in quotes.
If an index is specified, return the n'th sentence only (starting at 0)"""
if index is None:
return self.select(Sentence,None,True,[Quote])
else:
if index < 0:
index = sum(t.count(Sentence,None,True,[Quote]) for t in self.data) + index
for t in self.data:
for i,e in enumerate(t.select(Sentence,None,True,[Quote])) :
if i == index:
return e
raise IndexError" | "python" | "def sentences(self, index = None):
"""Return a generator of all sentence found in the document. Except for sentences in quotes.
If an index is specified, return the n'th sentence only (starting at 0)"""
if index is None:
return self.select(Sentence,None,True,[Quote])
else:
if index < 0:
index = sum(t.count(Sentence,None,True,[Quote]) for t in self.data) + index
for t in self.data:
for i,e in enumerate(t.select(Sentence,None,True,[Quote])) :
if i == index:
return e
raise IndexError" | [
"def",
"sentences",
"(",
"self",
",",
"index",
"=",
"None",
")",
":",
"if",
"index",
"is",
"None",
":",
"return",
"self",
".",
"select",
"(",
"Sentence",
",",
"None",
",",
"True",
",",
"[",
"Quote",
"]",
")",
"else",
":",
"if",
"index",
"<",
"0",
":",
"index",
"=",
"sum",
"(",
"t",
".",
"count",
"(",
"Sentence",
",",
"None",
",",
"True",
",",
"[",
"Quote",
"]",
")",
"for",
"t",
"in",
"self",
".",
"data",
")",
"+",
"index",
"for",
"t",
"in",
"self",
".",
"data",
":",
"for",
"i",
",",
"e",
"in",
"enumerate",
"(",
"t",
".",
"select",
"(",
"Sentence",
",",
"None",
",",
"True",
",",
"[",
"Quote",
"]",
")",
")",
":",
"if",
"i",
"==",
"index",
":",
"return",
"e",
"raise",
"IndexError"
] | "Return a generator of all sentence found in the document. Except for sentences in quotes.
If an index is specified, return the n'th sentence only (starting at 0)" | [
"Return",
"a",
"generator",
"of",
"all",
"sentence",
"found",
"in",
"the",
"document",
".",
"Except",
"for",
"sentences",
"in",
"quotes",
"."
] | "7707f69a91caaa6cde037f0d0379f1d42500a68b" | "https://github.com/proycon/pynlpl/blob/7707f69a91caaa6cde037f0d0379f1d42500a68b/pynlpl/formats/folia.py#L7460-L7473" | "" | "" | "" | "" | "" | "" | -1 |
Dataset Card for "code_x_glue_tc_nl_code_search_adv"
Dataset Summary
CodeXGLUE NL-code-search-Adv dataset, available at https://github.com/microsoft/CodeXGLUE/tree/main/Text-Code/NL-code-search-Adv
The dataset we use comes from CodeSearchNet and we filter the dataset as the following:
- Remove examples that codes cannot be parsed into an abstract syntax tree.
- Remove examples that #tokens of documents is < 3 or >256
- Remove examples that documents contain special tokens (e.g. <img ...> or https:...)
- Remove examples that documents are not English.
Supported Tasks and Leaderboards
document-retrieval
: The dataset can be used to train a model for retrieving top-k codes from a given English natural language query.
Languages
- Python programming language
- English natural language
Dataset Structure
Data Instances
An example of 'validation' looks as follows.
{
"argument_list": "",
"code": "def Func(arg_0, arg_1='.', arg_2=True, arg_3=False, **arg_4):\n \"\"\"Downloads Dailymotion videos by URL.\n \"\"\"\n\n arg_5 = get_content(rebuilt_url(arg_0))\n arg_6 = json.loads(match1(arg_5, r'qualities\":({.+?}),\"'))\n arg_7 = match1(arg_5, r'\"video_title\"\\s*:\\s*\"([^\"]+)\"') or \\\n match1(arg_5, r'\"title\"\\s*:\\s*\"([^\"]+)\"')\n arg_7 = unicodize(arg_7)\n\n for arg_8 in ['1080','720','480','380','240','144','auto']:\n try:\n arg_9 = arg_6[arg_8][1][\"url\"]\n if arg_9:\n break\n except KeyError:\n pass\n\n arg_10, arg_11, arg_12 = url_info(arg_9)\n\n print_info(site_info, arg_7, arg_10, arg_12)\n if not arg_3:\n download_urls([arg_9], arg_7, arg_11, arg_12, arg_1=arg_1, arg_2=arg_2)",
"code_tokens": ["def", "Func", "(", "arg_0", ",", "arg_1", "=", "'.'", ",", "arg_2", "=", "True", ",", "arg_3", "=", "False", ",", "**", "arg_4", ")", ":", "arg_5", "=", "get_content", "(", "rebuilt_url", "(", "arg_0", ")", ")", "arg_6", "=", "json", ".", "loads", "(", "match1", "(", "arg_5", ",", "r'qualities\":({.+?}),\"'", ")", ")", "arg_7", "=", "match1", "(", "arg_5", ",", "r'\"video_title\"\\s*:\\s*\"([^\"]+)\"'", ")", "or", "match1", "(", "arg_5", ",", "r'\"title\"\\s*:\\s*\"([^\"]+)\"'", ")", "arg_7", "=", "unicodize", "(", "arg_7", ")", "for", "arg_8", "in", "[", "'1080'", ",", "'720'", ",", "'480'", ",", "'380'", ",", "'240'", ",", "'144'", ",", "'auto'", "]", ":", "try", ":", "arg_9", "=", "arg_6", "[", "arg_8", "]", "[", "1", "]", "[", "\"url\"", "]", "if", "arg_9", ":", "break", "except", "KeyError", ":", "pass", "arg_10", ",", "arg_11", ",", "arg_12", "=", "url_info", "(", "arg_9", ")", "print_info", "(", "site_info", ",", "arg_7", ",", "arg_10", ",", "arg_12", ")", "if", "not", "arg_3", ":", "download_urls", "(", "[", "arg_9", "]", ",", "arg_7", ",", "arg_11", ",", "arg_12", ",", "arg_1", "=", "arg_1", ",", "arg_2", "=", "arg_2", ")"],
"docstring": "Downloads Dailymotion videos by URL.",
"docstring_summary": "Downloads Dailymotion videos by URL.",
"docstring_tokens": ["Downloads", "Dailymotion", "videos", "by", "URL", "."],
"func_name": "",
"id": 0,
"identifier": "dailymotion_download",
"language": "python",
"nwo": "soimort/you-get",
"original_string": "",
"parameters": "(url, output_dir='.', merge=True, info_only=False, **kwargs)",
"path": "src/you_get/extractors/dailymotion.py",
"repo": "",
"return_statement": "",
"score": 0.9997601509094238,
"sha": "b746ac01c9f39de94cac2d56f665285b0523b974",
"url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/dailymotion.py#L13-L35"
}
Data Fields
In the following each data field in go is explained for each config. The data fields are the same among all splits.
default
field name | type | description |
---|---|---|
id | int32 | Index of the sample |
repo | string | repo: the owner/repo |
path | string | path: the full path to the original file |
func_name | string | func_name: the function or method name |
original_string | string | original_string: the raw string before tokenization or parsing |
language | string | language: the programming language |
code | string | code/function: the part of the original_string that is code |
code_tokens | Sequence[string] | code_tokens/function_tokens: tokenized version of code |
docstring | string | docstring: the top-level comment or docstring, if it exists in the original string |
docstring_tokens | Sequence[string] | docstring_tokens: tokenized version of docstring |
sha | string | sha of the file |
url | string | url of the file |
docstring_summary | string | Summary of the docstring |
parameters | string | parameters of the function |
return_statement | string | return statement |
argument_list | string | list of arguments of the function |
identifier | string | identifier |
nwo | string | nwo |
score | datasets.Value("float"] | score for this search |
Data Splits
name | train | validation | test |
---|---|---|---|
default | 251820 | 9604 | 19210 |
Dataset Creation
Curation Rationale
[More Information Needed]
Source Data
Initial Data Collection and Normalization
Data from CodeSearchNet Challenge dataset. [More Information Needed]
Who are the source language producers?
Software Engineering developers.
Annotations
Annotation process
[More Information Needed]
Who are the annotators?
[More Information Needed]
Personal and Sensitive Information
[More Information Needed]
Considerations for Using the Data
Social Impact of Dataset
[More Information Needed]
Discussion of Biases
[More Information Needed]
Other Known Limitations
[More Information Needed]
Additional Information
Dataset Curators
https://github.com/microsoft, https://github.com/madlag
Licensing Information
Computational Use of Data Agreement (C-UDA) License.
Citation Information
@article{DBLP:journals/corr/abs-2102-04664,
author = {Shuai Lu and
Daya Guo and
Shuo Ren and
Junjie Huang and
Alexey Svyatkovskiy and
Ambrosio Blanco and
Colin B. Clement and
Dawn Drain and
Daxin Jiang and
Duyu Tang and
Ge Li and
Lidong Zhou and
Linjun Shou and
Long Zhou and
Michele Tufano and
Ming Gong and
Ming Zhou and
Nan Duan and
Neel Sundaresan and
Shao Kun Deng and
Shengyu Fu and
Shujie Liu},
title = {CodeXGLUE: {A} Machine Learning Benchmark Dataset for Code Understanding
and Generation},
journal = {CoRR},
volume = {abs/2102.04664},
year = {2021}
}
@article{husain2019codesearchnet,
title={Codesearchnet challenge: Evaluating the state of semantic code search},
author={Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc},
journal={arXiv preprint arXiv:1909.09436},
year={2019}
}
Contributions
Thanks to @madlag (and partly also @ncoop57) for adding this dataset.
- Downloads last month
- 499