Ajoutez des fichiers projet.

This commit is contained in:
Ambulance Clerc
2021-12-18 18:43:17 +01:00
parent 3c4d48ed26
commit 46254605fc
4842 changed files with 732322 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
"""
Interfaces for serializing Django objects.
Usage::
from django.core import serializers
json = serializers.serialize("json", some_queryset)
objects = list(serializers.deserialize("json", json))
To add your own serializers, use the SERIALIZATION_MODULES setting::
SERIALIZATION_MODULES = {
"csv": "path.to.csv.serializer",
"txt": "path.to.txt.serializer",
}
"""
import importlib
from django.apps import apps
from django.conf import settings
from django.core.serializers.base import SerializerDoesNotExist
# Built-in serializers
BUILTIN_SERIALIZERS = {
"xml": "django.core.serializers.xml_serializer",
"python": "django.core.serializers.python",
"json": "django.core.serializers.json",
"yaml": "django.core.serializers.pyyaml",
"jsonl": "django.core.serializers.jsonl",
}
_serializers = {}
class BadSerializer:
"""
Stub serializer to hold exception raised during registration
This allows the serializer registration to cache serializers and if there
is an error raised in the process of creating a serializer it will be
raised and passed along to the caller when the serializer is used.
"""
internal_use_only = False
def __init__(self, exception):
self.exception = exception
def __call__(self, *args, **kwargs):
raise self.exception
def register_serializer(format, serializer_module, serializers=None):
"""Register a new serializer.
``serializer_module`` should be the fully qualified module name
for the serializer.
If ``serializers`` is provided, the registration will be added
to the provided dictionary.
If ``serializers`` is not provided, the registration will be made
directly into the global register of serializers. Adding serializers
directly is not a thread-safe operation.
"""
if serializers is None and not _serializers:
_load_serializers()
try:
module = importlib.import_module(serializer_module)
except ImportError as exc:
bad_serializer = BadSerializer(exc)
module = type('BadSerializerModule', (), {
'Deserializer': bad_serializer,
'Serializer': bad_serializer,
})
if serializers is None:
_serializers[format] = module
else:
serializers[format] = module
def unregister_serializer(format):
"Unregister a given serializer. This is not a thread-safe operation."
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
del _serializers[format]
def get_serializer(format):
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
return _serializers[format].Serializer
def get_serializer_formats():
if not _serializers:
_load_serializers()
return list(_serializers)
def get_public_serializer_formats():
if not _serializers:
_load_serializers()
return [k for k, v in _serializers.items() if not v.Serializer.internal_use_only]
def get_deserializer(format):
if not _serializers:
_load_serializers()
if format not in _serializers:
raise SerializerDoesNotExist(format)
return _serializers[format].Deserializer
def serialize(format, queryset, **options):
"""
Serialize a queryset (or any iterator that returns database objects) using
a certain serializer.
"""
s = get_serializer(format)()
s.serialize(queryset, **options)
return s.getvalue()
def deserialize(format, stream_or_string, **options):
"""
Deserialize a stream or a string. Return an iterator that yields ``(obj,
m2m_relation_dict)``, where ``obj`` is an instantiated -- but *unsaved* --
object, and ``m2m_relation_dict`` is a dictionary of ``{m2m_field_name :
list_of_related_objects}``.
"""
d = get_deserializer(format)
return d(stream_or_string, **options)
def _load_serializers():
"""
Register built-in and settings-defined serializers. This is done lazily so
that user code has a chance to (e.g.) set up custom settings without
needing to be careful of import order.
"""
global _serializers
serializers = {}
for format in BUILTIN_SERIALIZERS:
register_serializer(format, BUILTIN_SERIALIZERS[format], serializers)
if hasattr(settings, "SERIALIZATION_MODULES"):
for format in settings.SERIALIZATION_MODULES:
register_serializer(format, settings.SERIALIZATION_MODULES[format], serializers)
_serializers = serializers
def sort_dependencies(app_list, allow_cycles=False):
"""Sort a list of (app_config, models) pairs into a single list of models.
The single list of models is sorted so that any model with a natural key
is serialized before a normal model, and any model with a natural key
dependency has it's dependencies serialized first.
If allow_cycles is True, return the best-effort ordering that will respect
most of dependencies but ignore some of them to break the cycles.
"""
# Process the list of models, and get the list of dependencies
model_dependencies = []
models = set()
for app_config, model_list in app_list:
if model_list is None:
model_list = app_config.get_models()
for model in model_list:
models.add(model)
# Add any explicitly defined dependencies
if hasattr(model, 'natural_key'):
deps = getattr(model.natural_key, 'dependencies', [])
if deps:
deps = [apps.get_model(dep) for dep in deps]
else:
deps = []
# Now add a dependency for any FK relation with a model that
# defines a natural key
for field in model._meta.fields:
if field.remote_field:
rel_model = field.remote_field.model
if hasattr(rel_model, 'natural_key') and rel_model != model:
deps.append(rel_model)
# Also add a dependency for any simple M2M relation with a model
# that defines a natural key. M2M relations with explicit through
# models don't count as dependencies.
for field in model._meta.many_to_many:
if field.remote_field.through._meta.auto_created:
rel_model = field.remote_field.model
if hasattr(rel_model, 'natural_key') and rel_model != model:
deps.append(rel_model)
model_dependencies.append((model, deps))
model_dependencies.reverse()
# Now sort the models to ensure that dependencies are met. This
# is done by repeatedly iterating over the input list of models.
# If all the dependencies of a given model are in the final list,
# that model is promoted to the end of the final list. This process
# continues until the input list is empty, or we do a full iteration
# over the input models without promoting a model to the final list.
# If we do a full iteration without a promotion, that means there are
# circular dependencies in the list.
model_list = []
while model_dependencies:
skipped = []
changed = False
while model_dependencies:
model, deps = model_dependencies.pop()
# If all of the models in the dependency list are either already
# on the final model list, or not on the original serialization list,
# then we've found another model with all it's dependencies satisfied.
if all(d not in models or d in model_list for d in deps):
model_list.append(model)
changed = True
else:
skipped.append((model, deps))
if not changed:
if allow_cycles:
# If cycles are allowed, add the last skipped model and ignore
# its dependencies. This could be improved by some graph
# analysis to ignore as few dependencies as possible.
model, _ = skipped.pop()
model_list.append(model)
else:
raise RuntimeError(
"Can't resolve dependencies for %s in serialized app list."
% ', '.join(
model._meta.label
for model, deps in sorted(skipped, key=lambda obj: obj[0].__name__)
),
)
model_dependencies = skipped
return model_list

View File

@@ -0,0 +1,338 @@
"""
Module for abstract serializer/unserializer base classes.
"""
import pickle
from io import StringIO
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
DEFER_FIELD = object()
class PickleSerializer:
"""
Simple wrapper around pickle to be used in signing.dumps()/loads() and
cache backends.
"""
def __init__(self, protocol=None):
self.protocol = pickle.HIGHEST_PROTOCOL if protocol is None else protocol
def dumps(self, obj):
return pickle.dumps(obj, self.protocol)
def loads(self, data):
return pickle.loads(data)
class SerializerDoesNotExist(KeyError):
"""The requested serializer was not found."""
pass
class SerializationError(Exception):
"""Something bad happened during serialization."""
pass
class DeserializationError(Exception):
"""Something bad happened during deserialization."""
@classmethod
def WithData(cls, original_exc, model, fk, field_value):
"""
Factory method for creating a deserialization error which has a more
explanatory message.
"""
return cls("%s: (%s:pk=%s) field_value was '%s'" % (original_exc, model, fk, field_value))
class M2MDeserializationError(Exception):
"""Something bad happened during deserialization of a ManyToManyField."""
def __init__(self, original_exc, pk):
self.original_exc = original_exc
self.pk = pk
class ProgressBar:
progress_width = 75
def __init__(self, output, total_count):
self.output = output
self.total_count = total_count
self.prev_done = 0
def update(self, count):
if not self.output:
return
perc = count * 100 // self.total_count
done = perc * self.progress_width // 100
if self.prev_done >= done:
return
self.prev_done = done
cr = '' if self.total_count == 1 else '\r'
self.output.write(cr + '[' + '.' * done + ' ' * (self.progress_width - done) + ']')
if done == self.progress_width:
self.output.write('\n')
self.output.flush()
class Serializer:
"""
Abstract serializer base class.
"""
# Indicates if the implemented serializer is only available for
# internal Django use.
internal_use_only = False
progress_class = ProgressBar
stream_class = StringIO
def serialize(self, queryset, *, stream=None, fields=None, use_natural_foreign_keys=False,
use_natural_primary_keys=False, progress_output=None, object_count=0, **options):
"""
Serialize a queryset.
"""
self.options = options
self.stream = stream if stream is not None else self.stream_class()
self.selected_fields = fields
self.use_natural_foreign_keys = use_natural_foreign_keys
self.use_natural_primary_keys = use_natural_primary_keys
progress_bar = self.progress_class(progress_output, object_count)
self.start_serialization()
self.first = True
for count, obj in enumerate(queryset, start=1):
self.start_object(obj)
# Use the concrete parent class' _meta instead of the object's _meta
# This is to avoid local_fields problems for proxy models. Refs #17717.
concrete_model = obj._meta.concrete_model
# When using natural primary keys, retrieve the pk field of the
# parent for multi-table inheritance child models. That field must
# be serialized, otherwise deserialization isn't possible.
if self.use_natural_primary_keys:
pk = concrete_model._meta.pk
pk_parent = pk if pk.remote_field and pk.remote_field.parent_link else None
else:
pk_parent = None
for field in concrete_model._meta.local_fields:
if field.serialize or field is pk_parent:
if field.remote_field is None:
if self.selected_fields is None or field.attname in self.selected_fields:
self.handle_field(obj, field)
else:
if self.selected_fields is None or field.attname[:-3] in self.selected_fields:
self.handle_fk_field(obj, field)
for field in concrete_model._meta.local_many_to_many:
if field.serialize:
if self.selected_fields is None or field.attname in self.selected_fields:
self.handle_m2m_field(obj, field)
self.end_object(obj)
progress_bar.update(count)
self.first = self.first and False
self.end_serialization()
return self.getvalue()
def start_serialization(self):
"""
Called when serializing of the queryset starts.
"""
raise NotImplementedError('subclasses of Serializer must provide a start_serialization() method')
def end_serialization(self):
"""
Called when serializing of the queryset ends.
"""
pass
def start_object(self, obj):
"""
Called when serializing of an object starts.
"""
raise NotImplementedError('subclasses of Serializer must provide a start_object() method')
def end_object(self, obj):
"""
Called when serializing of an object ends.
"""
pass
def handle_field(self, obj, field):
"""
Called to handle each individual (non-relational) field on an object.
"""
raise NotImplementedError('subclasses of Serializer must provide a handle_field() method')
def handle_fk_field(self, obj, field):
"""
Called to handle a ForeignKey field.
"""
raise NotImplementedError('subclasses of Serializer must provide a handle_fk_field() method')
def handle_m2m_field(self, obj, field):
"""
Called to handle a ManyToManyField.
"""
raise NotImplementedError('subclasses of Serializer must provide a handle_m2m_field() method')
def getvalue(self):
"""
Return the fully serialized queryset (or None if the output stream is
not seekable).
"""
if callable(getattr(self.stream, 'getvalue', None)):
return self.stream.getvalue()
class Deserializer:
"""
Abstract base deserializer class.
"""
def __init__(self, stream_or_string, **options):
"""
Init this serializer given a stream or a string
"""
self.options = options
if isinstance(stream_or_string, str):
self.stream = StringIO(stream_or_string)
else:
self.stream = stream_or_string
def __iter__(self):
return self
def __next__(self):
"""Iteration interface -- return the next item in the stream"""
raise NotImplementedError('subclasses of Deserializer must provide a __next__() method')
class DeserializedObject:
"""
A deserialized model.
Basically a container for holding the pre-saved deserialized data along
with the many-to-many data saved with the object.
Call ``save()`` to save the object (with the many-to-many data) to the
database; call ``save(save_m2m=False)`` to save just the object fields
(and not touch the many-to-many stuff.)
"""
def __init__(self, obj, m2m_data=None, deferred_fields=None):
self.object = obj
self.m2m_data = m2m_data
self.deferred_fields = deferred_fields
def __repr__(self):
return "<%s: %s(pk=%s)>" % (
self.__class__.__name__,
self.object._meta.label,
self.object.pk,
)
def save(self, save_m2m=True, using=None, **kwargs):
# Call save on the Model baseclass directly. This bypasses any
# model-defined save. The save is also forced to be raw.
# raw=True is passed to any pre/post_save signals.
models.Model.save_base(self.object, using=using, raw=True, **kwargs)
if self.m2m_data and save_m2m:
for accessor_name, object_list in self.m2m_data.items():
getattr(self.object, accessor_name).set(object_list)
# prevent a second (possibly accidental) call to save() from saving
# the m2m data twice.
self.m2m_data = None
def save_deferred_fields(self, using=None):
self.m2m_data = {}
for field, field_value in self.deferred_fields.items():
opts = self.object._meta
label = opts.app_label + '.' + opts.model_name
if isinstance(field.remote_field, models.ManyToManyRel):
try:
values = deserialize_m2m_values(field, field_value, using, handle_forward_references=False)
except M2MDeserializationError as e:
raise DeserializationError.WithData(e.original_exc, label, self.object.pk, e.pk)
self.m2m_data[field.name] = values
elif isinstance(field.remote_field, models.ManyToOneRel):
try:
value = deserialize_fk_value(field, field_value, using, handle_forward_references=False)
except Exception as e:
raise DeserializationError.WithData(e, label, self.object.pk, field_value)
setattr(self.object, field.attname, value)
self.save()
def build_instance(Model, data, db):
"""
Build a model instance.
If the model instance doesn't have a primary key and the model supports
natural keys, try to retrieve it from the database.
"""
default_manager = Model._meta.default_manager
pk = data.get(Model._meta.pk.attname)
if (pk is None and hasattr(default_manager, 'get_by_natural_key') and
hasattr(Model, 'natural_key')):
natural_key = Model(**data).natural_key()
try:
data[Model._meta.pk.attname] = Model._meta.pk.to_python(
default_manager.db_manager(db).get_by_natural_key(*natural_key).pk
)
except Model.DoesNotExist:
pass
return Model(**data)
def deserialize_m2m_values(field, field_value, using, handle_forward_references):
model = field.remote_field.model
if hasattr(model._default_manager, 'get_by_natural_key'):
def m2m_convert(value):
if hasattr(value, '__iter__') and not isinstance(value, str):
return model._default_manager.db_manager(using).get_by_natural_key(*value).pk
else:
return model._meta.pk.to_python(value)
else:
def m2m_convert(v):
return model._meta.pk.to_python(v)
try:
pks_iter = iter(field_value)
except TypeError as e:
raise M2MDeserializationError(e, field_value)
try:
values = []
for pk in pks_iter:
values.append(m2m_convert(pk))
return values
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and handle_forward_references:
return DEFER_FIELD
else:
raise M2MDeserializationError(e, pk)
def deserialize_fk_value(field, field_value, using, handle_forward_references):
if field_value is None:
return None
model = field.remote_field.model
default_manager = model._default_manager
field_name = field.remote_field.field_name
if (hasattr(default_manager, 'get_by_natural_key') and
hasattr(field_value, '__iter__') and not isinstance(field_value, str)):
try:
obj = default_manager.db_manager(using).get_by_natural_key(*field_value)
except ObjectDoesNotExist:
if handle_forward_references:
return DEFER_FIELD
else:
raise
value = getattr(obj, field_name)
# If this is a natural foreign key to an object that has a FK/O2O as
# the foreign key, use the FK value.
if model._meta.pk.remote_field:
value = value.pk
return value
return model._meta.get_field(field_name).to_python(field_value)

View File

@@ -0,0 +1,105 @@
"""
Serialize data to/from JSON
"""
import datetime
import decimal
import json
import uuid
from django.core.serializers.base import DeserializationError
from django.core.serializers.python import (
Deserializer as PythonDeserializer, Serializer as PythonSerializer,
)
from django.utils.duration import duration_iso_string
from django.utils.functional import Promise
from django.utils.timezone import is_aware
class Serializer(PythonSerializer):
"""Convert a queryset to JSON."""
internal_use_only = False
def _init_options(self):
self._current = None
self.json_kwargs = self.options.copy()
self.json_kwargs.pop('stream', None)
self.json_kwargs.pop('fields', None)
if self.options.get('indent'):
# Prevent trailing spaces
self.json_kwargs['separators'] = (',', ': ')
self.json_kwargs.setdefault('cls', DjangoJSONEncoder)
self.json_kwargs.setdefault('ensure_ascii', False)
def start_serialization(self):
self._init_options()
self.stream.write("[")
def end_serialization(self):
if self.options.get("indent"):
self.stream.write("\n")
self.stream.write("]")
if self.options.get("indent"):
self.stream.write("\n")
def end_object(self, obj):
# self._current has the field data
indent = self.options.get("indent")
if not self.first:
self.stream.write(",")
if not indent:
self.stream.write(" ")
if indent:
self.stream.write("\n")
json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs)
self._current = None
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
def Deserializer(stream_or_string, **options):
"""Deserialize a stream or string of JSON data."""
if not isinstance(stream_or_string, (bytes, str)):
stream_or_string = stream_or_string.read()
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode()
try:
objects = json.loads(stream_or_string)
yield from PythonDeserializer(objects, **options)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError() from exc
class DjangoJSONEncoder(json.JSONEncoder):
"""
JSONEncoder subclass that knows how to encode date/time, decimal types, and
UUIDs.
"""
def default(self, o):
# See "Date Time String Format" in the ECMA-262 specification.
if isinstance(o, datetime.datetime):
r = o.isoformat()
if o.microsecond:
r = r[:23] + r[26:]
if r.endswith('+00:00'):
r = r[:-6] + 'Z'
return r
elif isinstance(o, datetime.date):
return o.isoformat()
elif isinstance(o, datetime.time):
if is_aware(o):
raise ValueError("JSON can't represent timezone-aware times.")
r = o.isoformat()
if o.microsecond:
r = r[:12]
return r
elif isinstance(o, datetime.timedelta):
return duration_iso_string(o)
elif isinstance(o, (decimal.Decimal, uuid.UUID, Promise)):
return str(o)
else:
return super().default(o)

View File

@@ -0,0 +1,57 @@
"""
Serialize data to/from JSON Lines
"""
import json
from django.core.serializers.base import DeserializationError
from django.core.serializers.json import DjangoJSONEncoder
from django.core.serializers.python import (
Deserializer as PythonDeserializer, Serializer as PythonSerializer,
)
class Serializer(PythonSerializer):
"""Convert a queryset to JSON Lines."""
internal_use_only = False
def _init_options(self):
self._current = None
self.json_kwargs = self.options.copy()
self.json_kwargs.pop('stream', None)
self.json_kwargs.pop('fields', None)
self.json_kwargs.pop('indent', None)
self.json_kwargs['separators'] = (',', ': ')
self.json_kwargs.setdefault('cls', DjangoJSONEncoder)
self.json_kwargs.setdefault('ensure_ascii', False)
def start_serialization(self):
self._init_options()
def end_object(self, obj):
# self._current has the field data
json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs)
self.stream.write("\n")
self._current = None
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
def Deserializer(stream_or_string, **options):
"""Deserialize a stream or string of JSON data."""
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode()
if isinstance(stream_or_string, (bytes, str)):
stream_or_string = stream_or_string.split("\n")
for line in stream_or_string:
if not line.strip():
continue
try:
yield from PythonDeserializer([json.loads(line)], **options)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError() from exc

View File

@@ -0,0 +1,157 @@
"""
A Python "serializer". Doesn't do much serializing per se -- just converts to
and from basic Python data types (lists, dicts, strings, etc.). Useful as a basis for
other serializers.
"""
from django.apps import apps
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.encoding import is_protected_type
class Serializer(base.Serializer):
"""
Serialize a QuerySet to basic Python objects.
"""
internal_use_only = True
def start_serialization(self):
self._current = None
self.objects = []
def end_serialization(self):
pass
def start_object(self, obj):
self._current = {}
def end_object(self, obj):
self.objects.append(self.get_dump_object(obj))
self._current = None
def get_dump_object(self, obj):
data = {'model': str(obj._meta)}
if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
data["pk"] = self._value_from_field(obj, obj._meta.pk)
data['fields'] = self._current
return data
def _value_from_field(self, obj, field):
value = field.value_from_object(obj)
# Protected types (i.e., primitives like None, numbers, dates,
# and Decimals) are passed through as is. All other values are
# converted to string first.
return value if is_protected_type(value) else field.value_to_string(obj)
def handle_field(self, obj, field):
self._current[field.name] = self._value_from_field(obj, field)
def handle_fk_field(self, obj, field):
if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
related = getattr(obj, field.name)
if related:
value = related.natural_key()
else:
value = None
else:
value = self._value_from_field(obj, field)
self._current[field.name] = value
def handle_m2m_field(self, obj, field):
if field.remote_field.through._meta.auto_created:
if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
def m2m_value(value):
return value.natural_key()
else:
def m2m_value(value):
return self._value_from_field(value, value._meta.pk)
m2m_iter = getattr(obj, '_prefetched_objects_cache', {}).get(
field.name,
getattr(obj, field.name).iterator(),
)
self._current[field.name] = [m2m_value(related) for related in m2m_iter]
def getvalue(self):
return self.objects
def Deserializer(object_list, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options):
"""
Deserialize simple Python objects back into Django ORM instances.
It's expected that you pass the Python objects themselves (instead of a
stream or a string) to the constructor
"""
handle_forward_references = options.pop('handle_forward_references', False)
field_names_cache = {} # Model: <list of field_names>
for d in object_list:
# Look up the model and starting build a dict of data for it.
try:
Model = _get_model(d["model"])
except base.DeserializationError:
if ignorenonexistent:
continue
else:
raise
data = {}
if 'pk' in d:
try:
data[Model._meta.pk.attname] = Model._meta.pk.to_python(d.get('pk'))
except Exception as e:
raise base.DeserializationError.WithData(e, d['model'], d.get('pk'), None)
m2m_data = {}
deferred_fields = {}
if Model not in field_names_cache:
field_names_cache[Model] = {f.name for f in Model._meta.get_fields()}
field_names = field_names_cache[Model]
# Handle each field
for (field_name, field_value) in d["fields"].items():
if ignorenonexistent and field_name not in field_names:
# skip fields no longer on model
continue
field = Model._meta.get_field(field_name)
# Handle M2M relations
if field.remote_field and isinstance(field.remote_field, models.ManyToManyRel):
try:
values = base.deserialize_m2m_values(field, field_value, using, handle_forward_references)
except base.M2MDeserializationError as e:
raise base.DeserializationError.WithData(e.original_exc, d['model'], d.get('pk'), e.pk)
if values == base.DEFER_FIELD:
deferred_fields[field] = field_value
else:
m2m_data[field.name] = values
# Handle FK fields
elif field.remote_field and isinstance(field.remote_field, models.ManyToOneRel):
try:
value = base.deserialize_fk_value(field, field_value, using, handle_forward_references)
except Exception as e:
raise base.DeserializationError.WithData(e, d['model'], d.get('pk'), field_value)
if value == base.DEFER_FIELD:
deferred_fields[field] = field_value
else:
data[field.attname] = value
# Handle all other fields
else:
try:
data[field.name] = field.to_python(field_value)
except Exception as e:
raise base.DeserializationError.WithData(e, d['model'], d.get('pk'), field_value)
obj = base.build_instance(Model, data, using)
yield base.DeserializedObject(obj, m2m_data, deferred_fields)
def _get_model(model_identifier):
"""Look up a model from an "app_label.model_name" string."""
try:
return apps.get_model(model_identifier)
except (LookupError, TypeError):
raise base.DeserializationError("Invalid model identifier: '%s'" % model_identifier)

View File

@@ -0,0 +1,80 @@
"""
YAML serializer.
Requires PyYaml (https://pyyaml.org/), but that's checked for in __init__.
"""
import collections
import decimal
from io import StringIO
import yaml
from django.core.serializers.base import DeserializationError
from django.core.serializers.python import (
Deserializer as PythonDeserializer, Serializer as PythonSerializer,
)
from django.db import models
# Use the C (faster) implementation if possible
try:
from yaml import CSafeDumper as SafeDumper, CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeDumper, SafeLoader
class DjangoSafeDumper(SafeDumper):
def represent_decimal(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', str(data))
def represent_ordered_dict(self, data):
return self.represent_mapping('tag:yaml.org,2002:map', data.items())
DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal)
DjangoSafeDumper.add_representer(collections.OrderedDict, DjangoSafeDumper.represent_ordered_dict)
# Workaround to represent dictionaries in insertion order.
# See https://github.com/yaml/pyyaml/pull/143.
DjangoSafeDumper.add_representer(dict, DjangoSafeDumper.represent_ordered_dict)
class Serializer(PythonSerializer):
"""Convert a queryset to YAML."""
internal_use_only = False
def handle_field(self, obj, field):
# A nasty special case: base YAML doesn't support serialization of time
# types (as opposed to dates or datetimes, which it does support). Since
# we want to use the "safe" serializer for better interoperability, we
# need to do something with those pesky times. Converting 'em to strings
# isn't perfect, but it's better than a "!!python/time" type which would
# halt deserialization under any other language.
if isinstance(field, models.TimeField) and getattr(obj, field.name) is not None:
self._current[field.name] = str(getattr(obj, field.name))
else:
super().handle_field(obj, field)
def end_serialization(self):
self.options.setdefault('allow_unicode', True)
yaml.dump(self.objects, self.stream, Dumper=DjangoSafeDumper, **self.options)
def getvalue(self):
# Grandparent super
return super(PythonSerializer, self).getvalue()
def Deserializer(stream_or_string, **options):
"""Deserialize a stream or string of YAML data."""
if isinstance(stream_or_string, bytes):
stream_or_string = stream_or_string.decode()
if isinstance(stream_or_string, str):
stream = StringIO(stream_or_string)
else:
stream = stream_or_string
try:
yield from PythonDeserializer(yaml.load(stream, Loader=SafeLoader), **options)
except (GeneratorExit, DeserializationError):
raise
except Exception as exc:
raise DeserializationError() from exc

View File

@@ -0,0 +1,432 @@
"""
XML serializer.
"""
import json
from xml.dom import pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser
from django.apps import apps
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import (
SimplerXMLGenerator, UnserializableContentError,
)
class Serializer(base.Serializer):
"""Serialize a QuerySet to XML."""
def indent(self, level):
if self.options.get('indent') is not None:
self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent') * level)
def start_serialization(self):
"""
Start serialization -- open the XML document and the root element.
"""
self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET))
self.xml.startDocument()
self.xml.startElement("django-objects", {"version": "1.0"})
def end_serialization(self):
"""
End serialization -- end the document.
"""
self.indent(0)
self.xml.endElement("django-objects")
self.xml.endDocument()
def start_object(self, obj):
"""
Called as each object is handled.
"""
if not hasattr(obj, "_meta"):
raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))
self.indent(1)
attrs = {'model': str(obj._meta)}
if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
obj_pk = obj.pk
if obj_pk is not None:
attrs['pk'] = str(obj_pk)
self.xml.startElement("object", attrs)
def end_object(self, obj):
"""
Called after handling all fields for an object.
"""
self.indent(1)
self.xml.endElement("object")
def handle_field(self, obj, field):
"""
Handle each field on an object (except for ForeignKeys and
ManyToManyFields).
"""
self.indent(2)
self.xml.startElement('field', {
'name': field.name,
'type': field.get_internal_type(),
})
# Get a "string version" of the object's data.
if getattr(obj, field.name) is not None:
value = field.value_to_string(obj)
if field.get_internal_type() == 'JSONField':
# Dump value since JSONField.value_to_string() doesn't output
# strings.
value = json.dumps(value, cls=field.encoder)
try:
self.xml.characters(value)
except UnserializableContentError:
raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
obj.__class__.__name__, field.name, obj.pk))
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_fk_field(self, obj, field):
"""
Handle a ForeignKey (they need to be treated slightly
differently from regular fields).
"""
self._start_relational_field(field)
related_att = getattr(obj, field.get_attname())
if related_att is not None:
if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
related = getattr(obj, field.name)
# If related object has a natural key, use it
related = related.natural_key()
# Iterable natural keys are rolled out as subelements
for key_value in related:
self.xml.startElement("natural", {})
self.xml.characters(str(key_value))
self.xml.endElement("natural")
else:
self.xml.characters(str(related_att))
else:
self.xml.addQuickElement("None")
self.xml.endElement("field")
def handle_m2m_field(self, obj, field):
"""
Handle a ManyToManyField. Related objects are only serialized as
references to the object's PK (i.e. the related *data* is not dumped,
just the relation).
"""
if field.remote_field.through._meta.auto_created:
self._start_relational_field(field)
if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
# If the objects in the m2m have a natural key, use it
def handle_m2m(value):
natural = value.natural_key()
# Iterable natural keys are rolled out as subelements
self.xml.startElement("object", {})
for key_value in natural:
self.xml.startElement("natural", {})
self.xml.characters(str(key_value))
self.xml.endElement("natural")
self.xml.endElement("object")
else:
def handle_m2m(value):
self.xml.addQuickElement("object", attrs={
'pk': str(value.pk)
})
m2m_iter = getattr(obj, '_prefetched_objects_cache', {}).get(
field.name,
getattr(obj, field.name).iterator(),
)
for relobj in m2m_iter:
handle_m2m(relobj)
self.xml.endElement("field")
def _start_relational_field(self, field):
"""Output the <field> element for relational fields."""
self.indent(2)
self.xml.startElement('field', {
'name': field.name,
'rel': field.remote_field.__class__.__name__,
'to': str(field.remote_field.model._meta),
})
class Deserializer(base.Deserializer):
"""Deserialize XML."""
def __init__(self, stream_or_string, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options):
super().__init__(stream_or_string, **options)
self.handle_forward_references = options.pop('handle_forward_references', False)
self.event_stream = pulldom.parse(self.stream, self._make_parser())
self.db = using
self.ignore = ignorenonexistent
def _make_parser(self):
"""Create a hardened XML parser (no custom/external entities)."""
return DefusedExpatParser()
def __next__(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
def _handle_object(self, node):
"""Convert an <object> node to a DeserializedObject."""
# Look up the model using the model loading mechanism. If this fails,
# bail.
Model = self._get_model_from_node(node, "model")
# Start building a data dictionary from the object.
data = {}
if node.hasAttribute('pk'):
data[Model._meta.pk.attname] = Model._meta.pk.to_python(
node.getAttribute('pk'))
# Also start building a dict of m2m data (this is saved as
# {m2m_accessor_attribute : [list_of_related_objects]})
m2m_data = {}
deferred_fields = {}
field_names = {f.name for f in Model._meta.get_fields()}
# Deserialize each field.
for field_node in node.getElementsByTagName("field"):
# If the field is missing the name attribute, bail (are you
# sensing a pattern here?)
field_name = field_node.getAttribute("name")
if not field_name:
raise base.DeserializationError("<field> node is missing the 'name' attribute")
# Get the field from the Model. This will raise a
# FieldDoesNotExist if, well, the field doesn't exist, which will
# be propagated correctly unless ignorenonexistent=True is used.
if self.ignore and field_name not in field_names:
continue
field = Model._meta.get_field(field_name)
# As is usually the case, relation fields get the special treatment.
if field.remote_field and isinstance(field.remote_field, models.ManyToManyRel):
value = self._handle_m2m_field_node(field_node, field)
if value == base.DEFER_FIELD:
deferred_fields[field] = [
[
getInnerText(nat_node).strip()
for nat_node in obj_node.getElementsByTagName('natural')
]
for obj_node in field_node.getElementsByTagName('object')
]
else:
m2m_data[field.name] = value
elif field.remote_field and isinstance(field.remote_field, models.ManyToOneRel):
value = self._handle_fk_field_node(field_node, field)
if value == base.DEFER_FIELD:
deferred_fields[field] = [
getInnerText(k).strip()
for k in field_node.getElementsByTagName('natural')
]
else:
data[field.attname] = value
else:
if field_node.getElementsByTagName('None'):
value = None
else:
value = field.to_python(getInnerText(field_node).strip())
# Load value since JSONField.to_python() outputs strings.
if field.get_internal_type() == 'JSONField':
value = json.loads(value, cls=field.decoder)
data[field.name] = value
obj = base.build_instance(Model, data, self.db)
# Return a DeserializedObject so that the m2m data has a place to live.
return base.DeserializedObject(obj, m2m_data, deferred_fields)
def _handle_fk_field_node(self, node, field):
"""
Handle a <field> node for a ForeignKey
"""
# Check if there is a child node named 'None', returning None if so.
if node.getElementsByTagName('None'):
return None
else:
model = field.remote_field.model
if hasattr(model._default_manager, 'get_by_natural_key'):
keys = node.getElementsByTagName('natural')
if keys:
# If there are 'natural' subelements, it must be a natural key
field_value = [getInnerText(k).strip() for k in keys]
try:
obj = model._default_manager.db_manager(self.db).get_by_natural_key(*field_value)
except ObjectDoesNotExist:
if self.handle_forward_references:
return base.DEFER_FIELD
else:
raise
obj_pk = getattr(obj, field.remote_field.field_name)
# If this is a natural foreign key to an object that
# has a FK/O2O as the foreign key, use the FK value
if field.remote_field.model._meta.pk.remote_field:
obj_pk = obj_pk.pk
else:
# Otherwise, treat like a normal PK
field_value = getInnerText(node).strip()
obj_pk = model._meta.get_field(field.remote_field.field_name).to_python(field_value)
return obj_pk
else:
field_value = getInnerText(node).strip()
return model._meta.get_field(field.remote_field.field_name).to_python(field_value)
def _handle_m2m_field_node(self, node, field):
"""
Handle a <field> node for a ManyToManyField.
"""
model = field.remote_field.model
default_manager = model._default_manager
if hasattr(default_manager, 'get_by_natural_key'):
def m2m_convert(n):
keys = n.getElementsByTagName('natural')
if keys:
# If there are 'natural' subelements, it must be a natural key
field_value = [getInnerText(k).strip() for k in keys]
obj_pk = default_manager.db_manager(self.db).get_by_natural_key(*field_value).pk
else:
# Otherwise, treat like a normal PK value.
obj_pk = model._meta.pk.to_python(n.getAttribute('pk'))
return obj_pk
else:
def m2m_convert(n):
return model._meta.pk.to_python(n.getAttribute('pk'))
values = []
try:
for c in node.getElementsByTagName('object'):
values.append(m2m_convert(c))
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and self.handle_forward_references:
return base.DEFER_FIELD
else:
raise base.M2MDeserializationError(e, c)
else:
return values
def _get_model_from_node(self, node, attr):
"""
Look up a model from a <object model=...> or a <field rel=... to=...>
node.
"""
model_identifier = node.getAttribute(attr)
if not model_identifier:
raise base.DeserializationError(
"<%s> node is missing the required '%s' attribute"
% (node.nodeName, attr))
try:
return apps.get_model(model_identifier)
except (LookupError, TypeError):
raise base.DeserializationError(
"<%s> node has invalid model identifier: '%s'"
% (node.nodeName, model_identifier))
def getInnerText(node):
"""Get all the inner text of a DOM node (recursively)."""
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
inner_text = []
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
inner_text.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
inner_text.extend(getInnerText(child))
else:
pass
return "".join(inner_text)
# Below code based on Christian Heimes' defusedxml
class DefusedExpatParser(_ExpatParser):
"""
An expat parser hardened against XML bomb attacks.
Forbid DTDs, external entity references
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setFeature(handler.feature_external_ges, False)
self.setFeature(handler.feature_external_pes, False)
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise DTDForbidden(name, sysid, pubid)
def entity_decl(self, name, is_parameter_entity, value, base,
sysid, pubid, notation_name):
raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
def external_entity_ref_handler(self, context, base, sysid, pubid):
raise ExternalReferenceForbidden(context, base, sysid, pubid)
def reset(self):
_ExpatParser.reset(self)
parser = self._parser
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EntityDeclHandler = self.entity_decl
parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
parser.ExternalEntityRefHandler = self.external_entity_ref_handler
class DefusedXmlException(ValueError):
"""Base exception."""
def __repr__(self):
return str(self)
class DTDForbidden(DefusedXmlException):
"""Document type definition is forbidden."""
def __init__(self, name, sysid, pubid):
super().__init__()
self.name = name
self.sysid = sysid
self.pubid = pubid
def __str__(self):
tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)
class EntitiesForbidden(DefusedXmlException):
"""Entity definition is forbidden."""
def __init__(self, name, value, base, sysid, pubid, notation_name):
super().__init__()
self.name = name
self.value = value
self.base = base
self.sysid = sysid
self.pubid = pubid
self.notation_name = notation_name
def __str__(self):
tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)
class ExternalReferenceForbidden(DefusedXmlException):
"""Resolving an external reference is forbidden."""
def __init__(self, context, base, sysid, pubid):
super().__init__()
self.context = context
self.base = base
self.sysid = sysid
self.pubid = pubid
def __str__(self):
tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
return tpl.format(self.sysid, self.pubid)