diff --git a/Pipfile b/Pipfile index cb2e780c..5810d5c1 100644 --- a/Pipfile +++ b/Pipfile @@ -19,6 +19,7 @@ Authlib = "*" bson = "*" python-frontmatter = "*" bs4 = "*" +transliterate = "*" [dev-packages] diff --git a/migrate.py b/migrate.py index c9135010..92354091 100644 --- a/migrate.py +++ b/migrate.py @@ -48,22 +48,31 @@ def shouts(limit): data = json.loads(open('migration/data/content_items.json').read()) newdata = {} print(str(len(data)) + ' entries was loaded. now migrating...') + errored = [] + for entry in data: - oid = entry['_id'] - newdata[oid] = migrateShout(entry) - counter += 1 - author = newdata[oid]['authors'][0]['slug'] - if author == 'discours': - discoursAuthor += 1 - line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + author - print(line) - open('./shouts.id.log','a').write(line + '\n') - if counter > limit: - break + try: + oid = entry['_id'] + newdata[oid] = migrateShout(entry) + counter += 1 + + author = newdata[oid]['authors'][0]['slug'] + if author == 'discours': + discoursAuthor += 1 + line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + str(author) + print(line) + open('./shouts.id.log','a').write(line + '\n') + if counter > limit: + break + except Exception: + print(entry['_id']) + errored.append(entry) + raise Exception open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) print(str(counter) + ' shouts were migrated') - print(str(discoursAuthor) + ' from them by uknown users') + print(str(discoursAuthor) + ' from them by @discours') + print(str(len(errored)) + ' shouts without authors') if __name__ == '__main__': import sys diff --git a/migration/html2text.py b/migration/html2text.py index 4bdb8c77..88253d93 100644 --- a/migration/html2text.py +++ b/migration/html2text.py @@ -18,18 +18,10 @@ def has_key(x, y): if hasattr(x, 'has_key'): return x.has_key(y) else: return y in x -try: - import htmlentitydefs - import urlparse - import HTMLParser -except ImportError: #Python3 - import html.entities as htmlentitydefs - import urllib.parse as urlparse - import html.parser as HTMLParser -try: #Python3 - import urllib.request as urllib -except: - import urllib +import html.entities as htmlentitydefs +import urllib.parse as urlparse +import html.parser as HTMLParser +import urllib.request as urllib import optparse, re, sys, codecs, types try: from textwrap import wrap @@ -45,11 +37,11 @@ ESCAPE_SNOB = 0 LINKS_EACH_PARAGRAPH = 0 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) -BODY_WIDTH = 78 +BODY_WIDTH = 0 # Don't show internal links (href="#local-anchor") -- corresponding link targets # won't be visible in the plain text file anyway. -SKIP_INTERNAL_LINKS = True +SKIP_INTERNAL_LINKS = False # Use inline, rather than reference, formatting for images and links INLINE_LINKS = True diff --git a/migration/tables/comments.py b/migration/tables/comments.py index 4d1b6044..a5f63ac6 100644 --- a/migration/tables/comments.py +++ b/migration/tables/comments.py @@ -1,4 +1,4 @@ -# from html2md import Converter +from html2text import html2text import datetime # markdown = Converter() @@ -8,7 +8,7 @@ def migrate(entry): # is comment type Shout { org: String! - slug: String! + slug: String author: Int! body: String! createdAt: DateTime! @@ -28,7 +28,7 @@ def migrate(entry): ''' # TODO: implement comments migration return { - 'org': 'discours.io', + 'org_id': 0, 'slug': entry['slug'], 'createdAt': entry['createdAt'], 'body': html2text(entry['body']), diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index fee88d56..25c73165 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -1,16 +1,25 @@ -# from migration.html2md import Converter from dateutil.parser import parse +from orm import User from os.path import abspath import frontmatter import json from orm import Shout from bs4 import BeautifulSoup from migration.html2text import html2text +from transliterate import translit +from datetime import datetime +from sqlalchemy.exc import IntegrityError +from orm.base import local_session users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) -users_dict['0'] = {'id': 9999999, 'slug': 'discours', 'viewname': 'Дискурс' } +users_dict['0'] = { + 'id': 9999999, + 'slug': 'discours', + 'viewname': 'Дискурс', + 'userpic': 'https://discours.io/images/logo-mini.svg' + } -# markdown = Converter() +ts = datetime.now() type2layout = { 'Article': 'article', @@ -20,7 +29,7 @@ type2layout = { 'Image': 'image' } -def migrate(entry, data=users_dict): +def migrate(entry, limit=3626, start=0): ''' type Shout { org_id: Int! @@ -45,21 +54,15 @@ def migrate(entry, data=users_dict): views: Int } ''' - try: - author = data[entry['createdBy']] - except KeyError: - author = data['0'] - - # print(author) r = { 'org_id': 0, 'layout': type2layout[entry['type']], 'title': entry['title'], - 'authors': [ { 'slug': author['slug'], 'name': author['viewname'], 'pic': author.get('userpic', '') }, ], + 'authors': [], 'topics': [], - 'published': entry['published'], - 'views': entry['views'], - 'rating': entry['rating'], + 'published': entry.get('published', False), + 'views': entry.get('views', 0), + 'rating': entry.get('rating', 0), 'ratings': [] } r['slug'] = entry.get('slug', '') @@ -76,12 +79,8 @@ def migrate(entry, data=users_dict): r['cover'] = entry['image']['url'] if entry.get('thumborId') is not None: r['cover'] = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId'] - if entry.get('publishedAt') is not None: - r['publishedAt'] = entry['publishedAt'] - if entry.get('createdAt') is not None: - r['createdAt'] = entry['createdAt'] if entry.get('updatedAt') is not None: - r['updatedAt'] = entry['updatedAt'] + r['updatedAt'] = parse(entry['updatedAt']) if entry.get('type') == 'Literature': media = entry.get('media', '') # print(media[0]['literatureBody']) @@ -91,10 +90,9 @@ def migrate(entry, data=users_dict): print('EMPTY BODY!') else: # body_html = str(BeautifulSoup(body_orig, features="html.parser")) - #markdown.feed(body_html) - body = html2text(body_orig).replace('****', '**') - r['body'] = body - # r['body2'] = markdown.md_file + # body = html2text(body_orig).replace('****', '**') + r['old_id'] = entry.get('_id') + r['body'] = body_orig else: print(r['slug'] + ': literature has no media') elif entry.get('type') == 'Video': @@ -105,7 +103,7 @@ def migrate(entry, data=users_dict): if videoUrl == '#': videoUrl = 'https://vimeo.com/' + vm if vm else '#' if videoUrl == '#': - print(m) + print(entry.get('media', 'NO MEDIA!')) # raise Exception r['body'] = '' + html2text(m.get('body', '')) # FIXME elif entry.get('type') == 'Music': @@ -113,30 +111,88 @@ def migrate(entry, data=users_dict): if r.get('body') is None: body_orig = entry.get('body', '') - # body_html = BeautifulSoup(body_orig, features="html.parser") - r['body'] = html2text(body_orig).replace('****', '**') - # markdown.feed(body_html) - # r['body2'] = markdown.md_file - if not r['body']: - r['body'] = entry.get('body') + body_html = BeautifulSoup(body_orig, features="html.parser") + r['body'] = body_html # html2text(body_orig).replace('****', '**') + r['old_id'] = entry.get('_id') + + body = r.get('body') + user = None + try: + userdata = users_dict[entry['createdBy']] + slug = userdata['slug'] + name = userdata['viewname'] + userpic = userdata['userpic'] + except KeyError: + app = entry.get('application') + if app is not None: + authordata = { + 'username': app['email'], + 'email': app['email'], + 'viewname': app['name'], + 'bio': app.get('bio', ''), + 'emailConfirmed': False, + 'slug': translit(app['name'], 'ru', reversed=True).replace(' ', '-').lower(), + 'createdAt': ts, + 'wasOnlineAt': ts + } + try: + user = User.create(**authordata) + except IntegrityError: + with local_session() as session: + user = session.query(User).filter(User.email == authordata['email']).first() + if user is None: + user = session.query(User).filter(User.slug == authordata['slug']).first() + + slug = user.slug + name = user.viewname + userpic = user.userpic + else: + # no application, no author! + slug = 'discours' + name = 'Дискурс' + userpic = 'https://discours.io/images/logo-mini.svg' + with local_session() as session: + user = session.query(User).filter(User.slug == slug).first() + r['authors'].append({ + 'slug': slug, + 'name': name, + 'pic': userpic + }) + metadata = {} metadata['title'] = r.get('title') metadata['authors'] = r.get('authors') if r.get('cover', False): metadata['cover'] = r.get('cover') - body = r.get('body') post = frontmatter.Post(body, **metadata) dumped = frontmatter.dumps(post) - # raise Exception - open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.md', 'w').write(dumped) - # open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.my.md', 'w').write(r['body2']) - #if body_orig: - # open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.html', 'w').write(body_orig) - #markdown.related_data = [] - #markdown.md_file = '' - #markdown.reset() - r['body'] = dumped - # shout = Shout.create(**r.copy()) - # r['id'] = shout['id'] + if entry['published']: + if r.get('old_id', None): + ext = 'html' + content = str(body).replace('

', '').replace('

', '') + else: + ext = 'md' + content = dumped + open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.' + ext, 'w').write(content) + + try: + shout_dict = r.copy() + shout_dict['authors'] = [user, ] + if entry.get('createdAt') is not None: + shout_dict['createdAt'] = parse(entry.get('createdAt')) + else: + shout_dict['createdAt'] = ts + if entry.get('published'): + if entry.get('publishedAt') is not None: + shout_dict['publishedAt'] = parse(entry.get('publishedAt')) + else: + shout_dict['publishedAt'] = ts + s = Shout.create(**shout_dict) + r['id'] = s.id + except: + r['body'] = 'body moved' + print(r) + # print(s) + raise Exception return r diff --git a/migration/tables/users.py b/migration/tables/users.py index 4dfa94b9..56b608d3 100644 --- a/migration/tables/users.py +++ b/migration/tables/users.py @@ -6,13 +6,7 @@ from migration.html2text import html2text # markdown = Converter() counter = 0 - -def add(data): - data.emailConfirmed = False - user = User.create(**data) - return user - -def migrate(entry): +def migrate(entry, limit=668): ''' type User { diff --git a/orm/like.py b/orm/like.py index cd077985..91996087 100644 --- a/orm/like.py +++ b/orm/like.py @@ -11,7 +11,7 @@ class Like(Base): id: int = None user_id: str = Column(ForeignKey("user.id"), comment="Author", primary_key = True) - shout: str = Column(String, ForeignKey("shout.slug"), comment="Liked shout slug", primary_key = True) + shout_id: int = Column(Integer, ForeignKey("shout.id"), comment="Liked shout id", primary_key = True) value: int = Column(Integer, nullable=False, comment="Value") # TODO: add resolvers, debug, etc. diff --git a/orm/proposal.py b/orm/proposal.py index 73232791..d84fee0a 100644 --- a/orm/proposal.py +++ b/orm/proposal.py @@ -10,9 +10,9 @@ class Proposal(Base): __tablename__ = 'proposal' author_id: int = Column(Integer, ForeignKey("user.id"), nullable=False, comment="Author") + shout_id: int = Column(Integer, ForeignKey("shout.id"), nullable=False, comment="Shout") body: str = Column(String, nullable=False, comment="Body") createdAt: str = Column(datetime, nullable=False, comment="Created at") - shout: str = Column(String, ForeignKey("shout.slug"), nullable=False, comment="Updated at") range: str = Column(String, nullable=True, comment="Range in format :") # TODO: debug, logix \ No newline at end of file diff --git a/orm/rating.py b/orm/rating.py deleted file mode 100644 index f1ea6876..00000000 --- a/orm/rating.py +++ /dev/null @@ -1,3 +0,0 @@ -from sqlalchemy import Column, Integer, String, ForeignKey -# from orm import Permission -from orm.base import Base diff --git a/orm/rbac.py b/orm/rbac.py index 55cdaebf..64c3d17a 100644 --- a/orm/rbac.py +++ b/orm/rbac.py @@ -24,7 +24,7 @@ class ClassType(TypeDecorator): def process_result_value(self, value, dialect): class_ = REGISTRY.get(value) if class_ is None: - warnings.warn(f"Can't find class <{value}>,find it yourself 😊", stacklevel=2) + warnings.warn(f"Can't find class <{value}>,find it yourself!", stacklevel=2) return class_ class Organization(Base): diff --git a/orm/shout.py b/orm/shout.py index d812276c..f54a6889 100644 --- a/orm/shout.py +++ b/orm/shout.py @@ -7,31 +7,46 @@ from orm.base import Base ShoutAuthors = Table('shout_authors', Base.metadata, - Column('shout', String, ForeignKey('shout.slug')), + Column('shout', Integer, ForeignKey('shout.id')), Column('user_id', Integer, ForeignKey('user.id')) ) ShoutTopics = Table('shout_topics', Base.metadata, - Column('shout', String, ForeignKey('shout.slug')), - Column('topic', String, ForeignKey('topic.slug')) + Column('shout', Integer, ForeignKey('shout.id')), + Column('topic', Integer, ForeignKey('topic.id')) ) +class ShoutRatings(Base): + __tablename__ = "user_ratings" + + id = None + rater_id = Column(ForeignKey('user.id'), primary_key = True) + shout_id = Column(ForeignKey('shout.id'), primary_key = True) + value = Column(Integer) + class Shout(Base): __tablename__ = 'shout' - slug: str = Column(String, primary_key=True) + # NOTE: automatic ID here + + slug: str = Column(String, nullable=False, unique=True) org_id: int = Column(Integer, ForeignKey("organization.id"), nullable=False, comment="Organization") body: str = Column(String, nullable=False, comment="Body") createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") updatedAt: str = Column(DateTime, nullable=True, comment="Updated at") - replyTo: str = Column(ForeignKey("shout.slug"), nullable=True) - versionOf: str = Column(ForeignKey("shout.slug"), nullable=True) + replyTo: int = Column(ForeignKey("shout.id"), nullable=True) + versionOf: int = Column(ForeignKey("shout.id"), nullable=True) tags: str = Column(String, nullable=True) views: int = Column(Integer, default=0) published: bool = Column(Boolean, default=False) publishedAt: str = Column(DateTime, nullable=True) cover: str = Column(String, nullable = True) + title: str = Column(String, nullable = True) + subtitle: str = Column(String, nullable = True) layout: str = Column(String, nullable = True) authors = relationship(lambda: User, secondary=ShoutAuthors) # NOTE: multiple authors topics = relationship(lambda: Topic, secondary=ShoutTopics) + rating: int = Column(Integer, nullable=True, comment="Rating") + ratings = relationship(ShoutRatings, foreign_keys=ShoutRatings.shout_id) + old_id: str = Column(String, nullable = True) diff --git a/orm/topic.py b/orm/topic.py index cef52bc8..f8edca43 100644 --- a/orm/topic.py +++ b/orm/topic.py @@ -8,8 +8,8 @@ from orm.base import Base Connection = Table('topic_connections', Base.metadata, - Column('child', String, ForeignKey('topic.slug')), - Column('parent', String, ForeignKey('topic.slug')), + Column('child', Integer, ForeignKey('topic.id')), + Column('parent', Integer, ForeignKey('topic.id')), UniqueConstraint('parent', 'child', name='unique_usage') ) @@ -17,8 +17,7 @@ Connection = Table('topic_connections', class Topic(Base): __tablename__ = 'topic' - id: int = None - slug: str = Column(String, unique = True, nullable = False, primary_key=True) + slug: str = Column(String, unique = True, nullable = False) org_id: str = Column(ForeignKey("organization.id"), nullable=False) createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at") createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Author") diff --git a/requirements.txt b/requirements.txt index ef38b755..e53568d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ httpx psycopg2-binary bson python-frontmatter -bs4 \ No newline at end of file +transliterate \ No newline at end of file diff --git a/schema.graphql b/schema.graphql index 2acdb4fb..51e98c10 100644 --- a/schema.graphql +++ b/schema.graphql @@ -179,7 +179,7 @@ type Message { # is publication type Shout { org_id: Int! - slug: String! + slug: String authors: [Int!]! cover: String layout: String @@ -196,10 +196,12 @@ type Shout { tags: [String] # actual values topics: [String] # topic-slugs, order has matter title: String + subtitle: String versionOf: String visibleForRoles: [String] # role ids are strings visibleForUsers: [Int] views: Int + old_id: String } type Topic {