sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit ced0dd7f8a01dedad3ba16c4cf209aea673c3e82
parent 3f1d323497e5aefce65a3758d1bc408130d9bbe0
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Mon, 10 Apr 2023 17:03:57 +0200

sfeed_json: add JSON output format tool

This outputs the TSV data to JSON.

It uses a subset of JSON Feed 1.1:

	https://www.jsonfeed.org/version/1.1/

Diffstat:
MMakefile | 1+
MREADME | 1+
Asfeed_json.1 | 48++++++++++++++++++++++++++++++++++++++++++++++++
Asfeed_json.c | 171+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 221 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -51,6 +51,7 @@ BIN = \ sfeed_frames\ sfeed_gopher\ sfeed_html\ + sfeed_json\ sfeed_mbox\ sfeed_opml_import\ sfeed_plain\ diff --git a/README b/README @@ -186,6 +186,7 @@ sfeed_curses - Format feed data (TSV) to a curses interface. sfeed_frames - Format feed data (TSV) to HTML file(s) with frames. sfeed_gopher - Format feed data (TSV) to Gopher files. sfeed_html - Format feed data (TSV) to HTML. +sfeed_json - Format feed data (TSV) to JSON Feed. sfeed_opml_export - Generate an OPML XML file from a sfeedrc config file. sfeed_opml_import - Generate a sfeedrc config file from an OPML XML file. sfeed_markread - Mark items as read/unread, for use with sfeed_curses. diff --git a/sfeed_json.1 b/sfeed_json.1 @@ -0,0 +1,48 @@ +.Dd March 8, 2023 +.Dt SFEED_JSON 1 +.Os +.Sh NAME +.Nm sfeed_json +.Nd format feed data to JSON Feed +.Sh SYNOPSIS +.Nm +.Op Ar +.Sh DESCRIPTION +.Nm +formats feed data (TSV) from +.Xr sfeed 1 +from stdin or for each +.Ar file +to stdout as JSON Feed data. +If one or more +.Ar file +arguments are specified then the basename of the +.Ar file +is used as the feed name in the output. +If no +.Ar file +arguments are specified and so the data is read from stdin then the feed name +is empty. +If +.Nm +is reading from one or more +.Ar file +arguments it will prefix the entry title with "[feed name] ". +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +.Bd -literal +curl -s 'https://codemadness.org/atom.xml' | sfeed | sfeed_json +.Ed +.Sh SEE ALSO +.Xr sfeed 1 , +.Xr sfeed_atom 1 , +.Xr sfeed 5 +.Sh STANDARDS +.Rs +.%T JSON Feed Version 1.1 +.%U https://www.jsonfeed.org/version/1.1/ +.%D Nov, 2022 +.Re +.Sh AUTHORS +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org diff --git a/sfeed_json.c b/sfeed_json.c @@ -0,0 +1,171 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "util.h" + +static char *line; +static size_t linesize; +static int firstitem = 1; + +/* Unescape / decode fields printed by string_print_encoded() */ +static void +printcontent(const char *s) +{ + for (; *s; s++) { + switch (*s) { + case '\\': + s++; + switch (*s) { + case 'n': fputs("\\n", stdout); break; + case '\\': fputs("\\\\", stdout); break; + case 't': fputs("\\t", stdout); break; + } + break; /* ignore invalid escape sequence */ + case '"': fputs("\\\"", stdout); break; + default: + putchar(*s); + break; + } + } +} + +static void +printfield(const char *s) +{ + for (; *s; s++) { + if (*s == '\\') + fputs("\\\\", stdout); + else if (*s == '"') + fputs("\\\"", stdout); + else + putchar(*s); + } +} + +static void +printfeed(FILE *fp, const char *feedname) +{ + char *fields[FieldLast], timebuf[32]; + struct tm parsedtm, *tm; + time_t parsedtime; + ssize_t linelen; + int ch; + char *p, *s; + + while ((linelen = getline(&line, &linesize, fp)) > 0 && + !ferror(stdout)) { + if (line[linelen - 1] == '\n') + line[--linelen] = '\0'; + parseline(line, fields); + + if (!firstitem) + fputs(",\n", stdout); + firstitem = 0; + + fputs("{\n\t\"id\": \"", stdout); + printfield(fields[FieldId]); + fputs("\"", stdout); + + parsedtime = 0; + if (!strtotime(fields[FieldUnixTimestamp], &parsedtime) && + (tm = gmtime_r(&parsedtime, &parsedtm)) && + strftime(timebuf, sizeof(timebuf), "%Y-%m-%dT%H:%M:%SZ", tm)) { + fputs(",\n\t\"date_published\": \"", stdout); + fputs(timebuf, stdout); + fputs("\"", stdout); + } + + fputs(",\n\t\"title\": \"", stdout); + if (feedname[0]) { + fputs("[", stdout); + printfield(feedname); + fputs("] ", stdout); + } + printfield(fields[FieldTitle]); + fputs("\"", stdout); + + if (fields[FieldLink][0]) { + fputs(",\n\t\"url\": \"", stdout); + printfield(fields[FieldLink]); + fputs("\"", stdout); + } + + if (fields[FieldAuthor][0]) { + fputs(",\n\t\"authors\": [{\"name\": \"", stdout); + printfield(fields[FieldAuthor]); + fputs("\"}]", stdout); + } + + if (fields[FieldCategory][0]) { + fputs(",\n\t\"tags\": [", stdout); + + for (p = s = fields[FieldCategory]; ; s++) { + if (*s == '|' || *s == '\0') { + if (p != fields[FieldCategory]) + fputs(", ", stdout); + ch = *s; + *s = '\0'; /* temporary NUL terminate */ + fputs("\"", stdout); + printfield(p); + fputs("\"", stdout); + *s = ch; /* restore */ + p = s + 1; + } + if (*s == '\0') + break; + } + fputs("]", stdout); + } + + if (fields[FieldEnclosure][0]) { + fputs(",\n\t\"attachments\": [{\"url:\": \"", stdout); + printfield(fields[FieldEnclosure]); + fputs("\"}]", stdout); + } + + if (!strcmp(fields[FieldContentType], "html")) + fputs(",\n\t\"content_html\": \"", stdout); + else + fputs(",\n\t\"content_text\": \"", stdout); + printcontent(fields[FieldContent]); + fputs("\"\n}", stdout); + } +} + +int +main(int argc, char *argv[]) +{ + FILE *fp; + char *name; + int i; + + if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) + err(1, "pledge"); + + fputs("{\n" + "\"version\": \"https://jsonfeed.org/version/1.1\",\n" + "\"title\": \"Newsfeed\",\n" + "\"items\": [\n", stdout); + + if (argc == 1) { + printfeed(stdin, ""); + checkfileerror(stdin, "<stdin>", 'r'); + } else { + for (i = 1; i < argc; i++) { + if (!(fp = fopen(argv[i], "r"))) + err(1, "fopen: %s", argv[i]); + name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i]; + printfeed(fp, name); + checkfileerror(fp, argv[i], 'r'); + checkfileerror(stdout, "<stdout>", 'w'); + fclose(fp); + } + } + fputs("]\n}\n", stdout); + + checkfileerror(stdout, "<stdout>", 'w'); + + return 0; +}