#!/usr/bin/python3
"""Grammar check file using LanguageTool HTTP API, local or remote

- Grammar check files from command line
- Completely offline when used with local LanguageTool server
- Free software (GPL 2), using LanguageTool under the LGPL-2.1 License 

## Installing local offline LanguageTool server ##

$ sudo docker pull erikvl87/languagetool
$ sudo docker run --detach --rm -p 8010:8010 erikvl87/languagetool

## Usage ##

$ echo 'See you at TeroKarvinen.com. Car drivinggg.'|tlangtool -
L0:33 B33       TYPOS: Possible spelling mistake found. (driving, drivings)
"See you at TeroKarvinen.com. Car drivinggg. "
                                  ^^^^^^^^^
$ tlangtool sample.txt 
L0:43 B257      REDUNDANCY: Specify a number, remove phrase, or simply use “many” or “numerous” (many, numerous)
"...es and use of new versions of software, a large number of dependencies on libraries in target sys..."
                                            ^^^^^^^^^^^^^^^^^
L0:43 B630      STYLE: This word has been used in one of the immediately preceding sentences. Using a synonym could make your text more interesting to read, unless the repetition is intentional. (issues, concerns, difficulties)
"...t itself is dependent on. There were no problems with installed libraries in the case st..."
                                            ^^^^^^^^

## Using a Public Online LanguageTool.org Instance

Use of public API is subject to terms and restrictions https://dev.languagetool.org/public-http-api.html

	$ echo "Seee you at TeroKarvinen.com"|tlangtool --api https://api.languagetool.org/v2/check -
	L0:0 B0 TYPOS: Possible spelling mistake found. (See, Seen, Sees, Seed, Seek)
	"Seee you at TeroKarvinen.com "
	 ^^^^


Copyright 2022 Tero Karvinen http://TeroKarvinen.com , GNU General Public License 2
"""
__copyright__ = "Copyright 2022 Tero Karvinen http://TeroKarvinen.com , GNU General Public License 2 "
__version__ = "0.2.3"

import sys
import logging
from logging import info, debug, error, warning, INFO, WARNING, DEBUG
import argparse
import json

import requests
from requests import post

def parseArgs():
	parser = argparse.ArgumentParser(fromfile_prefix_chars="@", epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
	parser.add_argument("-v", "--verbose", action="store_const", dest="log_level", const=INFO, default=WARNING)
	parser.add_argument("-d", "--debug", action="store_const", dest="log_level", const=DEBUG)
	parser.add_argument("--api", help="URL of LanguageTool HTTP API, use --api https://api.languagetool.org/v2/check for LanguageTool public API", default="http://localhost:8010/v2/check")
	parser.add_argument("--disabled-categories", help="IDs of disabled rule categories, comma separated list. Example: TYPOS. Defined in LanguageTool API docs https://languagetool.org/http-api/swagger-ui/#!/default/post_check", default="TYPOS")
	parser.add_argument("--language", default="en-US")
	parser.add_argument("-m", "--machine", action="store_true", default=False, help="Output one thing per line machine readable output")
	parser.add_argument("-w", "--with-file-name", action="store_true", default=False, help="Print file name on each line")
	parser.add_argument("file", help='Input file. Use dash "-" for standard input')
	return parser.parse_args()

def offset2line(text, offset):
	"Convert byte offset to line number"
	return text[:offset].count("\n")+1 # first line is one (not zero)

def grammarCheck(text):
	r = post(args.api, data={"text": text, "language": args.language, "disabledCategories": args.disabled_categories })
	debug(r.status_code)
	if r.status_code != 200:
		raise Exception(f"LanguageTool API call failed: HTTP status '{r.status_code}', url '{args.api}', response '{r.text}'")
	debug("LanguageTool API call succeeded.")
	matches = r.json()["matches"]
	for match in matches: # TODO offset to line
		if args.with_file_name: 
			match["file"] = args.file
			match["filesep"] = ":"
		else:
			match["file"] = "L"
			match["filesep"] = ""
		match["categoryId"] = match["rule"]["category"]["id"]
		match["line"] = offset2line(text, match["offset"])
		match["replacements"] = ", ".join([x["value"] for x in match["replacements"][:5]])
		match["contextText"] = match["context"]["text"]
		match["contextOffset"] = match["context"]["offset"]
		match["contextLength"] = match["context"]["length"]
		print('{file}{filesep}{line}:{contextOffset} B{offset}	{categoryId}: {message} ({replacements})'.format(**match))
		if not args.machine:
			print('"{contextText}"'.format(**match))
			print(" "+" "*match["contextOffset"]+"^"*match["contextLength"])
		debug(json.dumps(match, indent=4))

def main():
	# prepare command line arguments and logging
	global args
	args=parseArgs()
	logformat="%(funcName)s():%(lineno)i: %(message)s %(levelname)s"
	logging.basicConfig(level=args.log_level, format=logformat)

	# slurp input file
	text = ""
	if "-" == args.file:
		info("Reading standard input, ctrl-D to end...")
		text = sys.stdin.read()
	else:
		with open(args.file, "r") as f:
			text=f.read()

	# check grammar
	grammarCheck(text)

if __name__ == "__main__":
	main()
