advertising PeerTube's rather simple DNT policy

This commit is contained in:
Rigel Kent 2018-08-03 02:02:01 +02:00 committed by Chocobozzz
parent 9a12f169c1
commit aad0ec24e8
7 changed files with 287 additions and 5 deletions

View File

@ -100,6 +100,7 @@
"flat": "^4.1.0",
"fluent-ffmpeg": "^2.1.0",
"helmet": "^3.12.1",
"ip-anonymize": "^0.0.6",
"ipaddr.js": "https://github.com/whitequark/ipaddr.js.git#8e69afeb4053ee32447a101845f860848280eca5",
"is-cidr": "^2.0.5",
"iso-639-3": "^1.0.1",
@ -129,6 +130,7 @@
"sequelize-typescript": "0.6.6-beta.1",
"sharp": "^0.20.0",
"srt-to-vtt": "^1.1.2",
"useragent": "^2.3.0",
"uuid": "^3.1.0",
"validator": "^10.2.0",
"webfinger.js": "^2.6.6",

View File

@ -16,6 +16,8 @@ import * as morgan from 'morgan'
import * as cors from 'cors'
import * as cookieParser from 'cookie-parser'
import * as helmet from 'helmet'
import * as useragent from 'useragent'
import * as anonymise from 'ip-anonymize'
process.title = 'peertube'
@ -27,7 +29,7 @@ import { checkMissedConfig, checkFFmpeg, checkConfig, checkActivityPubUrls } fro
// Do not use barrels because we don't want to load all modules here (we need to initialize database first)
import { logger } from './server/helpers/logger'
import { API_VERSION, CONFIG, STATIC_PATHS, CACHE, REMOTE_SCHEME } from './server/initializers/constants'
import { API_VERSION, CONFIG, CACHE } from './server/initializers/constants'
const missed = checkMissedConfig()
if (missed.length !== 0) {
@ -85,6 +87,7 @@ import {
trackerRouter,
createWebsocketServer
} from './server/controllers'
import { advertiseDoNotTrack } from './server/middlewares/dnt'
import { Redis } from './server/lib/redis'
import { BadActorFollowScheduler } from './server/lib/schedulers/bad-actor-follow-scheduler'
import { RemoveOldJobsScheduler } from './server/lib/schedulers/remove-old-jobs-scheduler'
@ -103,8 +106,17 @@ if (isTestInstance()) {
credentials: true
}))
}
// For the logger
morgan.token('remote-addr', req => {
return (req.get('DNT') === '1') ?
anonymise(req.ip || (req.connection && req.connection.remoteAddress) || undefined,
16, // bitmask for IPv4
16 // bitmask for IPv6
) :
req.ip
})
morgan.token('user-agent', req => (req.get('DNT') === '1') ?
useragent.parse(req.get('user-agent')).family : req.get('user-agent'))
app.use(morgan('combined', {
stream: { write: logger.info.bind(logger) }
}))
@ -116,6 +128,8 @@ app.use(bodyParser.json({
}))
// Cookies
app.use(cookieParser())
// W3C DNT Tracking Status
app.use(advertiseDoNotTrack)
// ----------- Views, routes and static files -----------

View File

@ -1,4 +1,5 @@
import * as cors from 'cors'
import { createReadStream } from 'fs'
import * as express from 'express'
import { CONFIG, STATIC_DOWNLOAD_PATHS, STATIC_MAX_AGE, STATIC_PATHS, ROUTE_CACHE_LIFETIME } from '../initializers'
import { VideosPreviewCache } from '../lib/cache'
@ -93,10 +94,26 @@ staticRouter.use('/.well-known/nodeinfo',
}
)
staticRouter.use('/nodeinfo/:version.json',
// asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.NODEINFO)),
asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.NODEINFO)),
asyncMiddleware(generateNodeinfo)
)
// dnt-policy.txt service (see https://www.eff.org/dnt-policy)
staticRouter.use('/.well-known/dnt-policy.txt',
asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.DNT_POLICY)),
(_, res: express.Response) => {
res.type('text/plain')
createReadStream('./server/static/dnt-policy/dnt-policy-1.0.txt').pipe(res)
}
)
// dnt service (see https://www.w3.org/TR/tracking-dnt/#status-resource)
staticRouter.use('/.well-known/dnt/',
(_, res: express.Response) => {
res.json({ tracking: 'N' })
}
)
// ---------------------------------------------------------------------------
export {

View File

@ -55,6 +55,7 @@ const ROUTE_CACHE_LIFETIME = {
FEEDS: '15 minutes',
ROBOTS: '2 hours',
NODEINFO: '10 minutes',
DNT_POLICY: '1 week',
ACTIVITY_PUB: {
VIDEOS: '1 second' // 1 second, cache concurrent requests after a broadcast for example
}

13
server/middlewares/dnt.ts Normal file
View File

@ -0,0 +1,13 @@
import * as ipaddr from 'ipaddr.js'
import { format } from 'util'
const advertiseDoNotTrack = (_, res, next) => {
res.setHeader('Tk', 'N')
return next()
}
// ---------------------------------------------------------------------------
export {
advertiseDoNotTrack
}

View File

@ -0,0 +1,218 @@
Do Not Track Compliance Policy
Version 1.0
This domain complies with user opt-outs from tracking via the "Do Not Track"
or "DNT" header [http://www.w3.org/TR/tracking-dnt/]. This file will always
be posted via HTTPS at https://example-domain.com/.well-known/dnt-policy.txt
to indicate this fact.
SCOPE
This policy document allows an operator of a Fully Qualified Domain Name
("domain") to declare that it respects Do Not Track as a meaningful privacy
opt-out of tracking, so that privacy-protecting software can better determine
whether to block or anonymize communications with this domain. This policy is
intended first and foremost to be posted on domains that publish ads, widgets,
images, scripts and other third-party embedded hypertext (for instance on
widgets.example.com), but it can be posted on any domain, including those users
visit directly (such as www.example.com). The policy may be applied to some
domains used by a company, site, or service, and not to others. Do Not Track
may be sent by any client that uses the HTTP protocol, including websites,
mobile apps, and smart devices like TVs. Do Not Track also works with all
protocols able to read HTTP headers, including SPDY.
NOTE: This policy contains both Requirements and Exceptions. Where possible
terms are defined in the text, but a few additional definitions are included
at the end.
REQUIREMENTS
When this domain receives Web requests from a user who enables DNT by actively
choosing an opt-out setting in their browser or by installing software that is
primarily designed to protect privacy ("DNT User"), we will take the following
measures with respect to those users' data, subject to the Exceptions, also
listed below:
1. END USER IDENTIFIERS:
a. If a DNT User has logged in to our service, all user identifiers, such as
unique or nearly unique cookies, "supercookies" and fingerprints are
discarded as soon as the HTTP(S) response is issued.
Data structures which associate user identifiers with accounts may be
employed to recognize logged in users per Exception 4 below, but may not
be associated with records of the user's activities unless otherwise
excepted.
b. If a DNT User is not logged in to our service, we will take steps to ensure
that no user identifiers are transmitted to us at all.
2. LOG RETENTION:
a. Logs with DNT Users' identifiers removed (but including IP addresses and
User Agent strings) may be retained for a period of 10 days or less,
unless an Exception (below) applies. This period of time balances privacy
concerns with the need to ensure that log processing systems have time to
operate; that operations engineers have time to monitor and fix technical
and performance problems; and that security and data aggregation systems
have time to operate.
b. These logs will not be used for any other purposes.
3. OTHER DOMAINS:
a. If this domain transfers identifiable user data about DNT Users to
contractors, affiliates or other parties, or embeds from or posts data to
other domains, we will either:
b. ensure that the operators of those domains abide by this policy overall
by posting it at /.well-known/dnt-policy.txt via HTTPS on the domains in
question,
OR
ensure that the recipient's policies and practices require the recipient
to respect the policy for our DNT Users' data.
OR
obtain a contractual commitment from the recipient to respect this policy
for our DNT Users' data.
NOTE: if an “Other Domain” does not receive identifiable user information
from the domain because such information has been removed, because the
Other Domain does not log that information, or for some other reason, these
requirements do not apply.
c. "Identifiable" means any records which are not Anonymized or otherwise
covered by the Exceptions below.
4. PERIODIC REASSERTION OF COMPLIANCE:
At least once every 12 months, we will take reasonable steps commensurate
with the size of our organization and the nature of our service to confirm
our ongoing compliance with this document, and we will publicly reassert our
compliance.
5. USER NOTIFICATION:
a. If we are required by law to retain or disclose user identifiers, we will
attempt to provide the users with notice (unless we are prohibited or it
would be futile) that a request for their information has been made in
order to give the users an opportunity to object to the retention or
disclosure.
b. We will attempt to provide this notice by email, if the users have given
us an email address, and by postal mail if the users have provided a
postal address.
c. If the users do not challenge the disclosure request, we may be legally
required to turn over their information.
d. We may delay notice if we, in good faith, believe that an emergency
involving danger of death or serious physical injury to any person
requires disclosure without delay of information relating to the
emergency.
EXCEPTIONS
Data from DNT Users collected by this domain may be logged or retained only in
the following specific situations:
1. CONSENT / "OPT BACK IN"
a. DNT Users are opting out from tracking across the Web. It is possible
that for some feature or functionality, we will need to ask a DNT User to
"opt back in" to be tracked by us across the entire Web.
b. If we do that, we will take reasonable steps to verify that the users who
select this option have genuinely intended to opt back in to tracking.
One way to do this is by performing scientifically reasonable user
studies with a representative sample of our users, but smaller
organizations can satisfy this requirement by other means.
c. Where we believe that we have opt back in consent, our server will
send a tracking value status header "Tk: C" as described in section 6.2
of the W3C Tracking Preference Expression draft:
http://www.w3.org/TR/tracking-dnt/#tracking-status-value
2. TRANSACTIONS
If a DNT User actively and knowingly enters a transaction with our
services (for instance, clicking on a clearly-labeled advertisement,
posting content to a widget, or purchasing an item), we will retain
necessary data for as long as required to perform the transaction. This
may for example include keeping auditing information for clicks on
advertising links; keeping a copy of posted content and the name of the
posting user; keeping server-side session IDs to recognize logged in
users; or keeping a copy of the physical address to which a purchased
item will be shipped. By their nature, some transactions will require data
to be retained indefinitely.
3. TECHNICAL AND SECURITY LOGGING:
a. If, during the processing of the initial request (for unique identifiers)
or during the subsequent 10 days (for IP addresses and User Agent strings),
we obtain specific information that causes our employees or systems to
believe that a request is, or is likely to be, part of a security attack,
spam submission, or fraudulent transaction, then logs of those requests
are not subject to this policy.
b. If we encounter technical problems with our site, then, in rare
circumstances, we may retain logs for longer than 10 days, if that is
necessary to diagnose and fix those problems, but this practice will not be
routinized and we will strive to delete such logs as soon as possible.
4. AGGREGATION:
a. We may retain and share anonymized datasets, such as aggregate records of
readership patterns; statistical models of user behavior; graphs of system
variables; data structures to count active users on monthly or yearly
bases; database tables mapping authentication cookies to logged in
accounts; non-unique data structures constructed within browsers for tasks
such as ad frequency capping or conversion tracking; or logs with truncated
and/or encrypted IP addresses and simplified User Agent strings.
b. "Anonymized" means we have conducted risk mitigation to ensure
that the dataset, plus any additional information that is in our
possession or likely to be available to us, does not allow the
reconstruction of reading habits, online or offline activity of groups of
fewer than 5000 individuals or devices.
c. If we generate anonymized datasets under this exception we will publicly
document our anonymization methods in sufficient detail to allow outside
experts to evaluate the effectiveness of those methods.
5. ERRORS:
From time to time, there may be errors by which user data is temporarily
logged or retained in violation of this policy. If such errors are
inadvertent, rare, and made in good faith, they do not constitute a breach
of this policy. We will delete such data as soon as practicable after we
become aware of any error and take steps to ensure that it is deleted by any
third-party who may have had access to the data.
ADDITIONAL DEFINITIONS
"Fully Qualified Domain Name" means a domain name that addresses a computer
connected to the Internet. For instance, example1.com; www.example1.com;
ads.example1.com; and widgets.example2.com are all distinct FQDNs.
"Supercookie" means any technology other than an HTTP Cookie which can be used
by a server to associate identifiers with the clients that visit it. Examples
of supercookies include Flash LSO cookies, DOM storage, HTML5 storage, or
tricks to store information in caches or etags.
"Risk mitigation" means an engineering process that evaluates the possibility
and likelihood of various adverse outcomes, considers the available methods of
making those adverse outcomes less likely, and deploys sufficient mitigations
to bring the probability and harm from adverse outcomes below an acceptable
threshold.
"Reading habits" includes amongst other things lists of visited DNS names, if
those domains pertain to specific topics or activities, but records of visited
DNS names are not reading habits if those domain names serve content of a very
diverse and general nature, thereby revealing minimal information about the
opinions, interests or activities of the user.

View File

@ -3531,6 +3531,10 @@ ioredis@^3.1.4:
redis-commands "^1.2.0"
redis-parser "^2.4.0"
ip-anonymize@^0.0.6:
version "0.0.6"
resolved "https://registry.yarnpkg.com/ip-anonymize/-/ip-anonymize-0.0.6.tgz#d2c513e448e874e8cc380d03404691b94b018e68"
ip-regex@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
@ -4492,7 +4496,7 @@ lowercase-keys@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-1.0.1.tgz#6f9e30b47084d971a7c820ff15a6c5167b74c26f"
lru-cache@^4.0.1:
lru-cache@4.1.x, lru-cache@^4.0.1:
version "4.1.3"
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-4.1.3.tgz#a1175cf3496dfc8436c156c334b4955992bce69c"
dependencies:
@ -5256,7 +5260,7 @@ os-locale@^1.4.0:
dependencies:
lcid "^1.0.0"
os-tmpdir@^1.0.0, os-tmpdir@^1.0.1, os-tmpdir@~1.0.1:
os-tmpdir@^1.0.0, os-tmpdir@^1.0.1, os-tmpdir@~1.0.1, os-tmpdir@~1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274"
@ -7201,6 +7205,12 @@ tmp@0.0.31:
dependencies:
os-tmpdir "~1.0.1"
tmp@0.0.x:
version "0.0.33"
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9"
dependencies:
os-tmpdir "~1.0.2"
to-array@0.1.4:
version "0.1.4"
resolved "https://registry.yarnpkg.com/to-array/-/to-array-0.1.4.tgz#17e6c11f73dd4f3d74cda7a4ff3238e9ad9bf890"
@ -7564,6 +7574,13 @@ user-home@^2.0.0:
dependencies:
os-homedir "^1.0.0"
useragent@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/useragent/-/useragent-2.3.0.tgz#217f943ad540cb2128658ab23fc960f6a88c9972"
dependencies:
lru-cache "4.1.x"
tmp "0.0.x"
ut_metadata@^3.0.8:
version "3.2.2"
resolved "https://registry.yarnpkg.com/ut_metadata/-/ut_metadata-3.2.2.tgz#189baf4ec690111ec242d7dfd954fa17242d4d70"