Browse Source

first commit

merge-requests/1/head
Carl Chenet 4 years ago
parent
commit
5f4d453765
27 changed files with 1864 additions and 0 deletions
  1. +2
    -0
      AUTHORS
  2. +3
    -0
      CHANGELOG
  3. +39
    -0
      LICENSE
  4. +59
    -0
      README.md
  5. +177
    -0
      docs/Makefile
  6. +4
    -0
      docs/source/authors.rst
  7. +261
    -0
      docs/source/conf.py
  8. +116
    -0
      docs/source/configure.rst
  9. +27
    -0
      docs/source/index.rst
  10. +36
    -0
      docs/source/install.rst
  11. +4
    -0
      docs/source/license.rst
  12. +38
    -0
      docs/source/plugins.rst
  13. +56
    -0
      docs/source/use.rst
  14. +36
    -0
      feed2toot.py
  15. +15
    -0
      feed2toot/__init__.py
  16. +52
    -0
      feed2toot/addtags.py
  17. +105
    -0
      feed2toot/cliparse.py
  18. +231
    -0
      feed2toot/confparse.py
  19. +90
    -0
      feed2toot/filterentry.py
  20. +204
    -0
      feed2toot/main.py
  21. +15
    -0
      feed2toot/plugins/__init__.py
  22. +42
    -0
      feed2toot/plugins/influxdbplugin.py
  23. +63
    -0
      feed2toot/removeduplicates.py
  24. +51
    -0
      feed2toot/tootpost.py
  25. +25
    -0
      scripts/feed2toot
  26. +64
    -0
      scripts/register_feed2toot_app
  27. +49
    -0
      setup.py

+ 2
- 0
AUTHORS View File

@ -0,0 +1,2 @@
Antoine Beaupré <anarcat@debian.org>
Carl Chenet <chaica@ohmytux.com>

+ 3
- 0
CHANGELOG View File

@ -0,0 +1,3 @@
## [0.1] - 2017-04-09
### Changed
- forking from feed2tweet

+ 39
- 0
LICENSE View File

@ -0,0 +1,39 @@
Copyright © 2017, Carl Chenet
Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
The original code was under:
MIT License
Copyright (c) 2012, Todd Eddy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

+ 59
- 0
README.md View File

@ -0,0 +1,59 @@
### Feed2toot
Feed2toot automatically parses rss feeds, identifies new posts and posts them on Twitter.
For the full documentation, [read it online](https://feed2toot.readthedocs.org/en/latest/).
If you would like, you can [support the development of this project on Liberapay](https://liberapay.com/carlchenet/).
Alternatively you can donate cryptocurrencies:
- BTC: 1BcdXCcLKN9PRpp6qw23FYkYuVp59dKZix
- XMR: 4Cxwvw9V6yUehv832FWPTF7FSVuWjuBarFd17QP163uxMaFyoqwmDf1aiRtS5jWgCiRsi73yqedNJJ6V1La2joznUDzkmvBr6KKHT7Dvzj
### Quick Install
* Install Feed2toot from PyPI
# pip3 install feed2toot
* Install Feed2toot from sources
*(see the installation guide for full details)
[Installation Guide](http://feed2toot.readthedocs.org/en/latest/install.html)*
# tar zxvf feed2toot-0.1.tar.gz
# cd feed2toot
# python3 setup.py install
# # or
# python3 setup.py install --install-scripts=/usr/bin
### Use Feed2toot
* Create or modify feed2toot.ini file in order to configure feed2toot:
[mastodon]
user_credentials=feed2toot_usercred.txt
client_credentials=feed2toot_clientcred.txt
[cache]
cachefile=cache.db
[rss]
uri=https://www.journalduhacker.net/rss
tweet={title} {link}
[hashtaglist]
several_words_hashtags_list=hashtags.txt
* Launch Feed2toot
$ feed2toot -c /path/to/feed2toot.ini
### Authors
* Carl Chenet <chaica@ohmytux.com>
* Antoine Beaupré <anarcat@debian.org>
* First developed by Todd Eddy
### License
This software comes under the terms of the GPLv3+. Previously under MIT license. See the LICENSE file for the complete text of the license.

+ 177
- 0
docs/Makefile View File

@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/backupchecker.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/backupchecker.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/backupchecker"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/backupchecker"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

+ 4
- 0
docs/source/authors.rst View File

@ -0,0 +1,4 @@
Authors
=======
Carl Chenet <chaica@ohmytux.com>

+ 261
- 0
docs/source/conf.py View File

@ -0,0 +1,261 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# feed2toot documentation build configuration file, created by
# sphinx-quickstart on Wed Dec 17 18:25:26 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.1'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'feed2toot'
copyright = '2017, Carl Chenet <chaica@ohmytux.com>'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '1.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'feed2tootdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'feed2toot.tex', 'feed2toot Documentation',
'Carl Chenet \\textless{}chaica@ohmytux.com.org\\textgreater{}', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'feed2toot', 'feed2toot Documentation',
['Carl Chenet <chaica@ohmytux.com>'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'feed2toot', 'feed2toot Documentation',
'Carl Chenet <chaica@ohmytux.com>', 'feed2toot', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False

+ 116
- 0
docs/source/configure.rst View File

@ -0,0 +1,116 @@
Configure Feed2toot
===================
As a prerequisite to use Feed2toot, you need to authorize a Mastodon app for your account.
Just use the script register_feed2toot_app to register the feed2toot app for your account.::
$ ./register_feed2toot_app
This app generates Mastodon app credentials needed by Feed2toot.
feed2toot_clientcred.txt and feed2toot_usercred.txt will be written in the current dir /home/chaica/progra/python/feed2toot.
One connection is initiated to create the app.
Your password is *not* stored.
Mastodon instance url (defaults to https://mastodon.social):
Mastodon login:chaica@ohmytux.com
Mastodon password:
The feed2toot app was added to your preferences=>authorized apps page
As described above, two files were created. You'll need them in the feed2toot configuration.
In order to configure Feed2toot, you need to create a feed2toot.ini file (or any name you prefer, finishing with the extension .ini) with the following parameters::
[mastodon]
; Here you need the two files created by register_feed2toot_app
user_credentials=/etc/feed2toot/credentials/feed2toot_usercred.txt
client_credentials=/etc/feed2toot/credentials/feed2toot_clientcred.txt
[cache]
cachefile=/var/lib/feed2toot/feed2toot.db
cache_limit=10000
[rss]
uri: https://www.journalduhacker.net/rss
uri_list: /etc/feed2toot//rsslist.txt
tweet: {title} {link}
title_pattern: Open Source
title_pattern_case_sensitive: true
no_uri_pattern_no_global_pattern=true
[hashtaglist]
several_words_hashtags_list: /etc/feed2toot/hashtags.txt
For the [mastodon] section:
- user_credentials: a file with the user credentials, generated by the command register_feed2toot_app
- client_credentials: a file with the client credentials, generated by the command register_feed2toot_app
For the [cache] section:
- cachefile: the path to the cache file storing ids of already tweeted links. Absolute path is mandatory. This file should always use the .db extension.
- cache_limit: length of the cache queue. defaults to 100.
For the [rss] section:
- uri: the url of the rss feed to parse
- uri_list: a path to a file with several adresses of rss feeds, one by line. Absolute path is mandatory.
- tweet: format of the tweet you want to post. It should use existing entries of the RSS fields like {title} or {link}. Launch it with this field empty to display all available entries.
- {one field of the rss feed}_pattern: takes a string representing a pattern to match for a specified field of each rss entry of the rss feed, like title_pattern or summary_pattern.
- {one field of the rss feed}_pattern_case_sensitive: either the pattern matching for the specified field should be case sensitive or not. Default to true if not specified.
- no_uri_pattern_no_global_pattern: don't apply global pattern (see above) when no pattern-by-uri is defined in the uri_list. Allows to get all entries of a rss in the uri_list because no pattern is defined so we match them all. Defaults to false, meaning the global patterns will be tried on every rss in the uri_list NOT HAVING specific patterns and so ONLY entries from the specific uri in the uri_list matching the global patterns will be considered.
For the [hashtaglist] section:
- several_words_hashtags_list: a path to the file containing hashtags in two or more words. Absolute path is mandatory. By default Feed2toot adds a # before every words of a hashtag.
List of rss feeds
=================
Simple list of rss feeds
------------------------
With the parameter **uri_list**, you can define a list of uri to use. Starting from 0.10, Feed2toot is now able to match specific patterns for each of the rss feeds from this list. Consider the following rss section of the configuration file::
[rss]
uri_list=/home/john/feed2toot/rsslist.txt
tweet={title} {link}
Now let's have a look at the =/home/john/feed2toot/rsslist.txt file::
https://www.journalduhacker.net/rss
https://carlchenet.com/feed
Each line of this file is a url to a rss feed. Pretty simple.
Match specific patterns of rss feeds in the uri_list files
----------------------------------------------------------
You can use specific pattern matching for uri in the uri_list file to filter some of the rss entries of a rss feed. Lets modify the previous file::
https://www.journalduhacker.net/rss|title|hacker,psql
https://carlchenet.com/feed|title|gitlab
Each line of this file starts with an uri, followed by a pipe (|), followed by the name of the available section to parse (see below), again followed by a pipe (|), followed by patterns, each pattern being separated from the other one by a semi-colon (,).
In the example file above wee get every rss entries from the feed available at https://www.journalduhacker.net/rss where a substring in the title section of this entry matches either "hacker" or "psql". Specific patterns are not case sensitive. For the second line, we match every rss entries from the feed available at https://carlchenet.com/feed where a substring in the title section of this entry matches "gitlab".
Consider every entries of a rss feed from a uri in the uri_list file
--------------------------------------------------------------------
It is possible to get all entries from a rss feed available in the uri_list file. You need an option to deactivate the global pattern matching for uri in the uri_list NOT having specific patterns::
[rss]
...
no_uri_pattern_no_global_pattern=true
In you rsslist.txt, just don't give anything else than the needed feed url to get all the entries::
https://www.journalduhacker.net/rss|title|hacker,psql
https://carlchenet.com/feed|title|gitlab
https://blog.linuxjobs.fr/feed.php?rss
The last line of the file above only has the url of a rss feed. All entries from this feed will be tweeted.
How to display available sections of the rss feed
=================================================
Feed2toot offers the **--rss-sections** command line option to display the available section of the rss feed and exits::
$ feed2toot --rss-sections -c feed2toot.ini
The following sections are available in this RSS feed: ['title', 'comments', 'authors', 'link', 'author', 'summary', 'links', 'tags', id', 'author_detail', 'published'].

+ 27
- 0
docs/source/index.rst View File

@ -0,0 +1,27 @@
Documentation for the Feed2toot project
=======================================
Feed2toot parses a RSS feed, extracts the last entries and sends them to Mastodon.
You'll find below anything you need to install, configure or run Feed2toot.
Guide
=====
.. toctree::
:maxdepth: 2
install
configure
use
plugins
license
authors
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

+ 36
- 0
docs/source/install.rst View File

@ -0,0 +1,36 @@
How to install Feed2toot
========================
From PyPI
^^^^^^^^^
$ pip3 install feed2toot
From sources
^^^^^^^^^^^^
* You need at least Python 3.4.
* On some Linux Distribution **setuptools** package does not come with default python install, you need to install it.
* Install **PIP**::
$ wget https://bootstrap.pypa.io/get-pip.py -O - | sudo python3
* Install **setuptools** module::
$ wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo python3
Alternatively, Setuptools may be installed to a user-local path::
$ wget https://bootstrap.pypa.io/ez_setup.py -O - | python3 - --user
* Untar the tarball and go to the source directory with the following commands::
$ tar zxvf feed2toot-0.1.tar.gz
$ cd feed2toot
* Next, to install Feed2toot on your computer, type the following command with the root user::
$ python3 setup.py install
$ # or
$ python3 setup.py install --install-scripts=/usr/bin

+ 4
- 0
docs/source/license.rst View File

@ -0,0 +1,4 @@
License
=======
This software comes under the terms of the **GPLv3+**. It was previously under the **MIT** license. See the LICENSE file for the complete history of the license and the full text of the past and current licenses.

+ 38
- 0
docs/source/plugins.rst View File

@ -0,0 +1,38 @@
Plugins
=======
Feed2toot supports plugins. Plugins offer optional features, not supported by default. Optional means you need a dedicated configuration and sometimes a dedicated external dependencies. What you need for each module is specified below.
InfluxDB
--------
The InfluxDB plugin allows to store already published tweets in a InfluxDB database.
Install the InfluxDB plugin
^^^^^^^^^^^^^^^^^^^^^^^^^^^
To install Feed2toot with the InfluxDB plugin, execute the following command.
From scratch::
# pip3 install feed2toot[influxdb]
Upgrading from a previous version, execute the followin command::
# pip3 install feed2toot[influxdb] --upgrade
Configuration
^^^^^^^^^^^^^
Below is the block of configuration to add in your feed2toot.ini::
[influxdb]
;host=127.0.0.1
;port=8086
user=influxuser
pass=V3ryS3cr3t
database=influxdb
measurement=tweets
- host: the host where the influxdb instance is. Defaults to 127.0.0.1
- port: the port where the influxdb instance is listening to. Defaults to 8086
- user: the user authorized to connect to the database. Mandatory (no default)
- pass: the password needed to connect to the database. Mandatory (no default)
- database: the name of the influxdb database to connect to. Mandatory (no default)
- measurement: the measurement to store the value into. Mandatory (no default)

+ 56
- 0
docs/source/use.rst View File

@ -0,0 +1,56 @@
Use Feed2toot
==============
After the configuration of Feed2toot, just launch the following command::
$ feed2toot -c /path/to/feed2toot.ini
Run Feed2toot on a regular basis
=================================
Feed2toot should be launche on a regular basis in order to efficiently send your new RSS entries to Mastodon. It is quite easy to achieve with adding a line to your user crontab, as described below::
@hourly feed2toot -c /path/to/feed2toot.ini
will execute feed2toot every hour. Or without the syntactic sugar in the global crontab file /etc/crontab::
0 * * * * johndoe feed2toot -c /path/to/feed2toot.ini
Test option
===========
In order to know what's going to be sent to Mastodon without actually doing it, use the **--dry-run** option::
$ feed2toot --dry-run -c /path/to/feed2toot.ini
Debug option
============
In order to increase the verbosity of what's Feed2toot is doing, use the **--debug** option followed by the level of verbosity see [the the available different levels](https://docs.python.org/3/library/logging.html)::
$ feed2toot --debug -c /path/to/feed2toot.ini
Populate the cache file without posting tweets
==============================================
Starting from 0.8, Feed2toot offers the **--populate-cache** command line option to populate the cache file without posting to Mastodon::
$ feed2toot --populate-cache -c feed2toot.ini
populating RSS entry https://www.journalduhacker.net/s/65krkk
populating RSS entry https://www.journalduhacker.net/s/co2es0
populating RSS entry https://www.journalduhacker.net/s/la2ihl
populating RSS entry https://www.journalduhacker.net/s/stfwtx
populating RSS entry https://www.journalduhacker.net/s/qq1wte
populating RSS entry https://www.journalduhacker.net/s/y8mzrp
populating RSS entry https://www.journalduhacker.net/s/ozjqv0
populating RSS entry https://www.journalduhacker.net/s/6ev8jz
populating RSS entry https://www.journalduhacker.net/s/gezvnv
populating RSS entry https://www.journalduhacker.net/s/lqswmz
How to display available sections of the rss feed
=================================================
Starting from 0.8, Feed2toot offers the **--rss-sections** command line option to display the available section of the rss feed and exits::
$ feed2toot --rss-sections -c feed2toot.ini
The following sections are available in this RSS feed: ['title', 'comments', 'authors', 'link', 'author', 'summary', 'links', 'tags', id', 'author_detail', 'published'].
Using syslog
============
Feed2toot is able to send its log to syslog. You can use it with the following command::
$ feed2toot --syslog=WARN -c /path/to/feed2toot.ini

+ 36
- 0
feed2toot.py View File

@ -0,0 +1,36 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
# Launch Feed2toot
'''Launch Feed2toot'''
import sys
from feed2toot.main import Main
class Feed2Toot(object):
'''Feed2toot class'''
def __init__(self):
'''Constructor of the Feed2Toot class'''
self.main()
def main(self):
'''main of the Feed2Toot class'''
Main()
if __name__ == '__main__':
Main()
sys.exit(0)

+ 15
- 0
feed2toot/__init__.py View File

@ -0,0 +1,15 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>

+ 52
- 0
feed2toot/addtags.py View File

@ -0,0 +1,52 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
# Add as many tags as possible depending on the tweet length
'''Add as many tags as possible depending on the tweet length'''
# standard library imports
from operator import itemgetter
class AddTags(object):
'''Add as many tags as possible depending on the tweet length'''
def __init__(self, tweet, tags):
'''Constructor of AddTags class'''
self.tags = tags
self.tweet = tweet
self.main()
def main(self):
'''Main of the AddTags class class'''
maxlength = 500
shortenedlinklength = 23
tweetlength = len(self.tweet)
# sort list of tags, the ones with the greater length first
tagswithindices = ({'text':i, 'length':len(i)} for i in self.tags)
sortedtagswithindices = sorted(tagswithindices, key=itemgetter('length'), reverse=True)
self.tags = (i['text'] for i in sortedtagswithindices)
# add tags is space is available
for tag in self.tags:
taglength = len(tag)
if (tweetlength + (taglength +1)) <= maxlength:
self.tweet = ' '.join([self.tweet, tag])
tweetlength += (taglength + 1)
@property
def finaltweet(self):
'''return the final tweet with as many tags as possible'''
return self.tweet

+ 105
- 0
feed2toot/cliparse.py View File

@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
# CLI parsing
'''CLI parsing'''
# standard library imports
from argparse import ArgumentParser
import glob
import logging
import os.path
import sys
__version__ = '0.1'
class CliParse(object):
'''CliParse class'''
def __init__(self):
'''Constructor for the CliParse class'''
self.main()
def main(self):
'''main of CliParse class'''
feed2tootepilog = 'For more information: https://feed2toot.readhthedocs.org'
feed2tootdescription = 'Take rss feed and send it to Mastodon'
parser = ArgumentParser(prog='feed2toot',
description=feed2tootdescription,
epilog=feed2tootepilog)
parser.add_argument('--version', action='version', version=__version__)
parser.add_argument('-c', '--config',
default=[os.path.join(os.getenv('XDG_CONFIG_HOME', '~/.config'),
'feed2toot.ini')],
nargs='+',
dest="config",
help='Location of config file (default: %(default)s)',
metavar='FILE')
parser.add_argument('-a', '--all', action='store_true', default=False,
dest='all',
help='tweet all RSS items, regardless of cache')
parser.add_argument('-l', '--limit', dest='limit', default=10, type=int,
help='tweet only LIMIT items (default: %(default)s)')
parser.add_argument('--cachefile', dest='cachefile',
help='location of the cache file (default: %(default)s)')
parser.add_argument('-n', '--dry-run', dest='dryrun',
action='store_true', default=False,
help='Do not actually post tweets')
parser.add_argument('-v', '--verbose', '--info', dest='log_level',
action='store_const', const='info', default='warning',
help='enable informative (verbose) output, work on log level INFO')
parser.add_argument('-d', '--debug', dest='log_level',
action='store_const', const='debug', default='warning',
help='enable debug output, work on log level DEBUG')
levels = [i for i in logging._nameToLevel.keys()
if (type(i) == str and i != 'NOTSET')]
parser.add_argument('--syslog', nargs='?', default=None,
type=str.upper, action='store',
const='INFO', choices=levels,
help="""log to syslog facility, default: no
logging, INFO if --syslog is specified without
argument""")
parser.add_argument('--hashtaglist', dest='hashtaglist',
help='a list of hashtag to match')
parser.add_argument('-p', '--populate-cache', action='store_true', default=False,
dest='populate',
help='populate RSS entries in cache without actually posting them to Mastodon')
parser.add_argument('-r', '--rss', help='the RSS feed URL to fetch items from',
dest='rss_uri', metavar='http://...')
parser.add_argument('--rss-sections', action='store_true', default=False,
dest='rsssections',
help='print the available sections of the rss feed to be used in the tweet template')
self.opts = parser.parse_args()
# expand the path to the cache file if defined
if self.opts.cachefile:
self.opts.cachefile = os.path.expanduser(self.opts.cachefile)
# verify if the path to cache file is an absolute path
# get the different config files, from a directory or from a *.ini style
self.opts.config = list(map(os.path.expanduser, self.options.config))
for element in self.opts.config:
if element and not os.path.exists(element):
sys.exit('You should provide an existing path for the config file: %s' % element)
if os.path.isdir(element):
self.opts.configs = glob.glob(os.path.join(element, '*.ini'))
else:
# trying to glob the path
self.opts.configs = glob.glob(element)
# verify if a configuration file is provided
if not self.opts.configs:
sys.exit('no configuration file was found at the specified path(s) with the option -c')
@property
def options(self):
'''return the path to the config file'''
return self.opts

+ 231
- 0
feed2toot/confparse.py View File

@ -0,0 +1,231 @@
# -*- coding: utf-8 -*-
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/
# Get values of the configuration file
'''Get values of the configuration file'''
# standard library imports
from configparser import SafeConfigParser, NoOptionError, NoSectionError
import logging
import os
import os.path
import socket
import sys
# 3rd party library imports
import feedparser
class ConfParse(object):
'''ConfParse class'''
def __init__(self, clioptions):
'''Constructor of the ConfParse class'''
self.clioptions = clioptions
self.tweetformat = ''
self.stringsep = ','
self.confs = []
self.main()
def main(self):
'''Main of the ConfParse class'''
for pathtoconfig in self.clioptions.configs:
options = {}
# read the configuration file
config = SafeConfigParser()
if not config.read(os.path.expanduser(pathtoconfig)):
sys.exit('Could not read config file')
###########################
#
# the rss section
#
###########################
section = 'rss'
if config.has_section(section):
############################
# tweet option
############################
confoption = 'tweet'
if config.has_option(section, confoption):
self.tweetformat = config.get(section, confoption)
else:
sys.exit('You should define a format for your tweet with the keyword "tweet" in the [rss] section')
############################
# pattern format option
############################
options['patterns'] = {}
options['patternscasesensitive'] = {}
for pattern in ['summary_detail', 'published_parsed', 'guidislink', 'authors', 'links', 'title_detail', 'author', 'author_detail', 'comments', 'published', 'summary', 'tags', 'title', 'link', 'id']:
currentoption = '{}_pattern'.format(pattern)
if config.has_option(section, currentoption):
tmppattern = config.get(section, currentoption)
if self.stringsep in tmppattern:
options['patterns'][currentoption] = [i for i in tmppattern.split(self.stringsep) if i]
else:
options['patterns'][currentoption] = [tmppattern]
# pattern_case_sensitive format
currentoption = '{}_pattern_case_sensitive'.format(pattern)
if config.has_option(section, currentoption):
try:
options['patternscasesensitive'][currentoption] = config.getboolean(section, currentoption)
except ValueError as err:
print(err)
options['patternscasesensitive'][currentoption] = True
############################
# rsslist
############################
bozoexception = False
feeds = []
patterns = []
currentoption = 'uri_list'
if config.has_option(section, currentoption):
rssfile = config.get(section, currentoption)
rssfile = os.path.expanduser(rssfile)
if not os.path.exists(rssfile) or not os.path.isfile(rssfile):
sys.exit('The path to the uri_list parameter is not valid: {rssfile}'.format(rssfile=rssfile))
rsslist = open(rssfile, 'r').readlines()
for line in rsslist:
line = line.strip()
# split each line in two parts, rss link and a string with the different patterns to look for
confobjects = line.split('|')
if len(confobjects) > 3 or len(confobjects) == 2:
sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line))
if len(confobjects) == 3:
rss, rssobject, patternstring = line.split('|')
if len(confobjects) == 1:
rss = confobjects[0]
rssobject = ''
patternstring = ''
# split different searched patterns
patterns = [i for i in patternstring.split(self.stringsep) if i]
# retrieve the content of the rss
feed = feedparser.parse(rss)
if 'bozo_exception' in feed:
bozoexception = True
logging.warning(feed['bozo_exception'])
continue
# check if the rss feed and the rss entry are valid ones
if 'entries' in feed:
if rssobject and rssobject not in feed['entries'][0].keys():
sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss))
else:
sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss))
feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject})
# test if all feeds in the list were unsuccessfully retrieved and if so, leave
if not feeds and bozoexception:
sys.exit('No feed could be retrieved. Leaving.')
############################
# uri
############################
if not feeds and not self.clioptions.rss_uri:
confoption = 'uri'
if config.has_option(section, confoption):
options['rss_uri'] = config.get('rss', 'uri')
else:
sys.exit('{confoption} parameter in the [{section}] section of the configuration file is mandatory. Exiting.'.format(section=section, confoption=confoption))
else:
options['rss_uri'] = self.clioptions.rss_uri
# get the rss feed for rss parameter of [rss] section
feed = feedparser.parse(options['rss_uri'])
if not feed:
sys.exit('Unable to parse the feed at the following url: {rss}'.format(rss=rss))
#########################################
# no_uri_pattern_no_global_pattern option
#########################################
currentoption = 'no_uri_pattern_no_global_pattern'
# default value
options['nopatternurinoglobalpattern'] = False
if config.has_option(section, currentoption):
options['nopatternurinoglobalpattern'] = config.getboolean(section, currentoption)
###########################
#
# the cache section
#
###########################
section = 'cache'
if not self.clioptions.cachefile:
confoption = 'cachefile'
if config.has_section(section):
options['cachefile'] = config.get(section, confoption)
else:
sys.exit('You should provide a {confoption} parameter in the [{section}] section'.format(section=section, confoption=confoption))
options['cachefile'] = os.path.expanduser(options['cachefile'])
cachefileparent = os.path.dirname(options['cachefile'])
if cachefileparent and not os.path.exists(cachefileparent):
sys.exit('The parent directory of the cache file does not exist: {cachefileparent}'.format(cachefileparent=cachefileparent))
else:
options['cachefile'] = self.clioptions.cachefile
### cache limit
if config.has_section(section):
confoption = 'cache_limit'
if config.has_option(section, confoption):
try:
options['cache_limit'] = int(config.get(section, confoption))
except ValueError as err:
sys.exit('Error in configuration with the {confoption} parameter in [{section}]: {err}'.format(confoption=confoption, section=section, err=err))
else:
options['cache_limit'] = 100
else:
options['cache_limit'] = 100
###########################
#
# the hashtag section
#
###########################
section = 'hashtaglist'
if not self.clioptions.hashtaglist:
confoption = 'several_words_hashtags_list'
if config.has_section(section):
options['hashtaglist'] = config.get(section, confoption)
options['hashtaglist'] = os.path.expanduser(options['hashtaglist'])
if not os.path.exists(options['hashtaglist']) or not os.path.isfile(options['hashtaglist']):
sys.exit('The path to the several_words_hashtags_list parameter is not valid: {hashtaglist}'.format(hashtaglist=options['hashtaglist']))
else:
options['hashtaglist'] = False
###########################
#
# the plugins section
#
###########################
plugins = {}
section = 'influxdb'
if config.has_section(section):
##########################################
# host, port, user, pass, database options
##########################################
plugins[section] = {}
for currentoption in ['host','port','user','pass','database']:
if config.has_option(section, currentoption):
plugins[section][currentoption] = config.get(section, currentoption)
if 'host' not in plugins[section]:
plugins[section]['host'] = '127.0.0.1'
if 'port' not in plugins[section]:
plugins[section]['port'] = 8086
if 'measurement' not in plugins[section]:
plugins[section]['measurement'] = 'tweets'
for field in ['user','pass','database']:
if field not in plugins[section]:
sys.exit('Parsing error for {field} in the [{section}] section: {field} is not defined'.format(field=field, section=section))
# create the returned object with previously parsed data
if feeds:
self.confs.append((options, config, self.tweetformat, feeds, plugins))
else:
self.confs.append((options, config, self.tweetformat, [{'feed': feed, 'patterns': [], 'rssobject': ''}], plugins))
@property
def confvalues(self):
'''Return the values of the different configuration files'''
return self.confs

+ 90
- 0
feed2toot/filterentry.py View File

@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/
# Filter an entry of the RSS feeds
'''Filter an entry of the RSS feeds'''
# standard library imports
from configparser import SafeConfigParser, NoOptionError, NoSectionError
import os
import os.path
import sys
# 3rd party library imports
import feedparser
class FilterEntry(object):
'''FilterEntry class'''
def __init__(self, elements, entry, options, byrsspatterns, rssobject):
'''Constructor of the FilterEntry class'''
self.matching = {}
self.entry = entry
self.elements = elements
self.options = options
self.byrsspatterns = byrsspatterns
self.rssobject = rssobject
self.main()
def main(self):
'''Main of the FilterEntry class'''
for i in self.elements:
if i not in self.entry:
sys.exit('The element {} is not available in the RSS feed. The available ones are: {}'.format(i, [j for j in self.entry]))
# for the case if no pattern at all is defined
if not self.options['patterns'] and not self.byrsspatterns and not self.rssobject:
self.matching[i] = self.entry[i]
# global filter only
elif self.options['patterns'] and not self.byrsspatterns and not self.rssobject:
if not self.options['nopatternurinoglobalpattern']:
self.applyglobalfilter(i)
else:
self.matching[i] = self.entry[i]
# global filter and then by rss filter
elif self.options['patterns'] and self.byrsspatterns and self.rssobject:
# patterns by rss
self.applyglobalfilter(i)
self.applyspecificfilter(i)
elif not self.options['patterns'] and self.byrsspatterns and self.rssobject:
self.applyspecificfilter(i)
else:
self.matching[i] = self.entry[i]
def applyglobalfilter(self, i):
'''Apply the global filter'''
for patternlist in self.options['patterns']:
if not self.options['patternscasesensitive']['{}_case_sensitive'.format(patternlist)]:
# not case sensitive, so we compare the lower case
for pattern in self.options['patterns'][patternlist]:
finalpattern = pattern.lower()
finaltitle = self.entry[patternlist.split('_')[0]].lower()
if finalpattern in finaltitle:
self.matching[i] = self.entry[i]
else:
# case sensitive, so we use the user-defined pattern
for pattern in self.options['patterns'][patternlist]:
if pattern in self.entry['title']:
self.matching[i] = self.entry[i]
def applyspecificfilter(self, i):
'''Apply specific filters for by-rss pattern matching'''
for byrsspattern in self.byrsspatterns:
byrssfinalpattern = byrsspattern.lower()
if byrssfinalpattern in self.entry[self.rssobject].lower():
self.matching[i] = self.entry[i]
@property
def finalentry(self):
'''Return the processed entry'''
return self.matching

+ 204
- 0
feed2toot/main.py View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
"""Checks an RSS feed and posts new entries to Mastodon."""
# standard libraires imports
import codecs
import importlib
import logging
import logging.handlers
import os
import sys
# 3rd party libraries imports
import feedparser
from persistentlist import PersistentList
import tweepy
# app libraries imports
from feed2toot.addtags import AddTags
from feed2toot.cliparse import CliParse
from feed2toot.confparse import ConfParse
from feed2toot.filterentry import FilterEntry
from feed2toot.removeduplicates import RemoveDuplicates
from feed2toot.tootpost import TootPost
class Main(object):
'''Main class of Feed2toot'''
def __init__(self):
self.main()
def setup_logging(self, options):
if options.syslog:
sl = logging.handlers.SysLogHandler(address='/dev/log')
sl.setFormatter(logging.Formatter('feed2toot[%(process)d]: %(message)s'))
# convert syslog argument to a numeric value
loglevel = getattr(logging, options.syslog.upper(), None)
if not isinstance(loglevel, int):
raise ValueError('Invalid log level: %s' % loglevel)
sl.setLevel(loglevel)
logging.getLogger('').addHandler(sl)
logging.debug('configured syslog level %s' % loglevel)
logging.getLogger('').setLevel(logging.DEBUG)
sh = logging.StreamHandler()
sh.setLevel(options.log_level.upper())
logging.getLogger('').addHandler(sh)
logging.debug('configured stdout level %s' % sh.level)
def main(self):
"""The main function."""
clip = CliParse()
clioptions = clip.options
self.setup_logging(clioptions)
# iterating over the different configuration files
cfgp = ConfParse(clioptions)
confs = cfgp.confvalues
for conf in confs:
options = conf[0]
config = conf[1]
tweetformat = conf[2]
feeds = conf[3]
plugins = conf[4]
# open the persistent list
cache = PersistentList(options['cachefile'][0:-3], options['cache_limit'])
if options['hashtaglist']:
severalwordshashtags = codecs.open(options['hashtaglist'],
encoding='utf-8').readlines()
severalwordshashtags = [i.rstrip('\n') for i in severalwordshashtags]
# reverse feed entries because most recent one should be sent as the last one in Mastodon
for feed in feeds:
# store the patterns by rss
if 'patterns' in feed:
patterns = feed['patterns']
entries = feed['feed']['entries'][0:clioptions.limit]
entries.reverse()
# --rss-sections option: print rss sections and exit
if clioptions.rsssections:
if entries:
print('The following sections are available in this RSS feed: {}'.format([j for j in entries[0]]))
sys.exit(0)
else:
sys.exit('Could not parse the section of the rss feed')
totweet = []
# cache the ids of last rss feeds
if not clioptions.all:
for i in entries:
if 'id' in i and i['id'] not in cache:
totweet.append(i)
else:
totweet = entries
for entry in totweet:
if 'id' not in entry:
# malformed feed entry, skip
continue
logging.debug('found feed entry %s, %s', entry['id'], entry['title'])
rss = {
'id': entry['id'],
}
severalwordsinhashtag = False
# lets see if the rss feed has hashtag
if 'tags' in entry:
hastags = True
else:
hastags = False
if hastags:
rss['hashtags'] = []
for i, _ in enumerate(entry['tags']):
if 'hashtaglist' in options:
prehashtags = entry['tags'][i]['term']
tmphashtags = entry['tags'][i]['term']
for element in severalwordshashtags:
if element in prehashtags:
severalwordsinhashtag = True
tmphashtags = prehashtags.replace(element,
''.join(element.split()))
# replace characters stopping a word from being a hashtag
if severalwordsinhashtag:
# remove ' from hashtag
tmphashtags = tmphashtags.replace("'", "")
# remove - from hashtag
tmphashtags = tmphashtags.replace("-", "")
# remove . from hashtag
tmphashtags = tmphashtags.replace(".", "")
# remove space from hashtag
finalhashtags = tmphashtags.replace(" ", "")
rss['hashtags'].append('#{}'.format(finalhashtags))
else:
nospace = ''.join(entry['tags'][i]['term'])
# remove space from hashtag
nospace = nospace.replace(" ", "")
rss['hashtags'].append('#{}'.format(nospace))
elements=[]
for i in tweetformat.split(' '):
tmpelement = ''
# if i is not an empty string
if i:
if i.startswith('{') and i.endswith('}'):
tmpelement = i.strip('{}')
elements.append(tmpelement)
# match elements of the tweet format string with available element in the RSS feed
fe = FilterEntry(elements, entry, options, feed['patterns'], feed['rssobject'])
entrytosend = fe.finalentry
if entrytosend:
tweetwithnotag = tweetformat.format(**entrytosend)
# remove duplicates from the final tweet
dedup = RemoveDuplicates(tweetwithnotag)
# only append hashtags if they exist
# remove last tags if tweet too long
if 'hashtags' in rss:
addtag = AddTags(dedup.finaltweet, rss['hashtags'])
finaltweet = addtag.finaltweet
else:
finaltweet = dedup.finaltweet
if clioptions.dryrun:
if entrytosend:
logging.warning('Tweet should have been sent: {tweet}'.format(tweet=finaltweet))
else:
logging.debug('This rss entry did not meet pattern criteria. Should have not been sent')
else:
storeit = True
if entrytosend and not clioptions.populate:
logging.debug('sending the following tweet:{tweet}'.format(tweet=finaltweet))
twp = TootPost(config, finaltweet)
storeit = twp.storeit()
else:
logging.debug('populating RSS entry {}'.format(rss['id']))
# in both cas we store the id of the sent tweet
if storeit:
cache.append(rss['id'])
# plugins
if plugins and entrytosend:
for plugin in plugins:
capitalizedplugin = plugin.title()
pluginclassname = '{plugin}Plugin'.format(plugin=capitalizedplugin)
pluginmodulename = 'feed2toot.plugins.{pluginmodule}'.format(pluginmodule=pluginclassname.lower())
try:
pluginmodule = importlib.import_module(pluginmodulename)
pluginclass = getattr(pluginmodule, pluginclassname)
pluginclass(plugins[plugin], finaltweet)
except ImportError as err:
print(err)
# do not forget to close cache (shelf object)
cache.close()

+ 15
- 0
feed2toot/plugins/__init__.py View File

@ -0,0 +1,15 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>

+ 42
- 0
feed2toot/plugins/influxdbplugin.py View File

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/
# Push values to a influxdb database
'''Push values to a influxdb database'''
# standard libraries imports
import json
# 3rd party libraries imports
from influxdb import InfluxDBClient
class InfluxdbPlugin(object):
'''InfluxdbPlugin class'''
def __init__(self, plugininfo, data):
'''Constructor of the InfluxdbPlugin class'''
self.plugininfo = plugininfo
self.data = data
self.datatoinfluxdb = []
self.client = InfluxDBClient(self.plugininfo['host'],
self.plugininfo['port'],
self.plugininfo['user'],
self.plugininfo['pass'],
self.plugininfo['database'])
self.main()
def main(self):
'''Main of the PiwikModule class'''
self.datatoinfluxdb.append({'measurement': self.plugininfo['measurement'], 'fields': {'value': self.data}})
self.client.write_points(self.datatoinfluxdb)

+ 63
- 0
feed2toot/removeduplicates.py View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
# Remove duplicates from the final string before sending the tweet
'''Remove duplicates from the final string before sending the tweet'''
class RemoveDuplicates(object):
'''Remove duplicates from the final string before sending the tweet'''
def __init__(self, tweet):
'''Constructor of RemoveDuplicates class'''
self.tweet = tweet
self.main()
def main(self):
'''Main of the RemoveDuplicates class'''
# identify duplicate links
links = []
for element in self.tweet.split():
if element != ' ' and (element.startswith('http://') or element.startswith('https://')):
newlink = True
# if we already found this link, increment the counter
for i,_ in enumerate(links):
if links[i]['link'] == element:
newlink = False
links[i]['count'] += 1
if newlink:
links.append({'link': element, 'count': 1})
# remove duplicates
validatedlinks = []
for i in range(len(links)):
if links[i]['count'] >= 2:
validatedlinks.append(links[i])
wildcard = 'FEED2TOOTWILDCARD'
for element in validatedlinks:
for i in range(element['count']):
# needed for not inversing the order of links if it is a duplicate
# and the second link is not one
if i ==